From 73111e76d24f00d0065306a1a56344528e22548b Mon Sep 17 00:00:00 2001 From: ojwg Date: Thu, 11 Nov 2021 14:53:02 +0100 Subject: [PATCH 001/694] initial disk on GPU commit --- src/particles/particles_3D.cpp | 139 ++++++++++++++++++++---------- src/particles/particles_3D.h | 3 + src/particles/particles_3D_gpu.cu | 8 ++ 3 files changed, 105 insertions(+), 45 deletions(-) diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 6778fe0a4..139150eec 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -511,7 +511,7 @@ void Particles_3D::Initialize_Sphere( void ){ #ifdef PARTICLES_CPU n_local = pos_x.size(); #endif //PARTICLES_CPU - + #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks chprintf( " Computing Global Particles IDs offset \n" ); @@ -557,13 +557,9 @@ void Particles_3D::Initialize_Sphere( void ){ /** - * Initializes a disk population of uniform mass (\f$(10^4 M_\odot)\f$) stellar clusters + * Initializes a disk population of uniform mass stellar clusters */ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { - #ifdef PARTICLES_GPU - chprintf( " Initialize_Disk_Stellar_Clusters: PARTICLES_GPU not currently supported\n"); - chexit(-1); - #endif #ifndef SINGLE_PARTICLE_MASS chprintf( " Initialize_Disk_Stellar_Clusters: only SINGLE_PARTICLE_MASS currently supported\n"); chexit(-1); @@ -583,17 +579,34 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { Real R_max = sqrt(P->xlen*P->xlen + P->ylen*P->ylen)/2; R_max = P->xlen / 2.0; + real_vector_t temp_pos_x; + real_vector_t temp_pos_y; + real_vector_t temp_pos_z; + real_vector_t temp_vel_x; + real_vector_t temp_vel_y; + real_vector_t temp_vel_z; + real_vector_t temp_grav_x; + real_vector_t temp_grav_y; + real_vector_t temp_grav_z; + #ifndef SINGLE_PARTICLE_MASS + real_vector_t temp_mass; + #endif + #ifdef PARTICLE_IDS + int_vector_t temp_ids; + #endif + #ifdef PARTICLE_AGE + real_vector_t temp_age; + #endif + Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - #ifdef PARTICLE_IDS - part_int_t id; - #endif - particle_mass = 1e4; //solar masses + + particle_mass = 1e5; //solar masses //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius - unsigned long int N = (long int)(6.5e6 * 0.9272485558395908); // 15kpc radius + unsigned long int N = 38;//(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius long lost_particles = 0; - for ( unsigned long int i = 0; i < N; i++ ){ + for ( part_int_t i = 0; i < N; i++ ){ do { R = R_d*radialDist(generator); } while (R > R_max); @@ -614,59 +627,95 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { vy = vPhi*cos(phi); vz = 0; - #ifdef PARTICLES_CPU - //Copy the particle data to the particles vectors - pos_x.push_back(x); - pos_y.push_back(y); - pos_z.push_back(z); - vel_x.push_back(vx); - vel_y.push_back(vy); - vel_z.push_back(vz); - grav_x.push_back(0.0); - grav_y.push_back(0.0); - grav_z.push_back(0.0); + //add particle data to the particles vectors + temp_pos_x.push_back(x); + temp_pos_y.push_back(y); + temp_pos_z.push_back(z); + temp_vel_x.push_back(vx); + temp_vel_y.push_back(vy); + temp_vel_z.push_back(vz); + temp_grav_x.push_back(0.0); + temp_grav_y.push_back(0.0); + temp_grav_z.push_back(0.0); #ifdef PARTICLE_IDS - id = i; - #ifdef PARALLEL_OMP - #pragma omp parallel num_threads( N_OMP_THREADS ) - { - id += 1.0*omp_get_thread_num()/omp_get_num_threads(); - } - #endif //PARALLEL_OMP - partIDs.push_back(id); + temp_ids.push_back(i); #endif //PARTICLE_IDS #ifdef PARTICLE_AGE //if (fabs(z) >= Z_d) age.push_back(1.1e4); //else age.push_back(0.0); - age.push_back(0.0); + temp_age.push_back(0.0); #endif - - #endif//PARTICLES_CPU } + n_local = temp_pos_x.size(); - - #ifdef PARTICLES_CPU - n_local = pos_x.size(); - #endif - #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks chprintf( " Computing Global Particles IDs offset \n" ); part_int_t global_id_offset; global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - #ifdef PARTICLES_CPU for ( int p_indx=0; p_indx 0) chprintf(" lost %lu particles\n", lost_particles); chprintf( " Stellar Disk Particles Initialized, n_local: %lu\n", n_local); } diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 846c4760c..4c454fa3a 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -71,6 +71,9 @@ class Particles_3D #ifdef PARTICLE_IDS part_int_t *partIDs_dev; #endif + #ifdef PARTICLE_AGE + Real *age_dev; + #endif Real *mass_dev; Real *pos_x_dev; Real *pos_y_dev; diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index e8b8da37d..a3c314ce0 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -140,7 +140,15 @@ void Particles_3D::Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Re cudaDeviceSynchronize(); } +void Particles_3D::Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size) { + CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(part_int_t), cudaMemcpyHostToDevice) ); + cudaDeviceSynchronize(); +} +void Particles_3D::Copy_Particles_Array_Int_Device_to_Host( part_int_t *array_dev, part_int_t *array_host, part_int_t size) { + CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(part_int_t), cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); +} __global__ void Set_Particles_Array_Real_Kernel( Real value, Real *array_dev, part_int_t size ){ int tid = blockIdx.x * blockDim.x + threadIdx.x ; From 2afef0b9d145a599a86ccb37933fbc197763b310 Mon Sep 17 00:00:00 2001 From: ojwg Date: Thu, 11 Nov 2021 14:53:02 +0100 Subject: [PATCH 002/694] initial disk on GPU commit --- src/particles/particles_3D.cpp | 137 ++++++++++++++++++++---------- src/particles/particles_3D.h | 3 + src/particles/particles_3D_gpu.cu | 8 ++ 3 files changed, 104 insertions(+), 44 deletions(-) diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 9973e449f..c0ec13c09 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -563,13 +563,9 @@ void Particles_3D::Initialize_Sphere(struct parameters *P){ /** - * Initializes a disk population of uniform mass (\f$(10^4 M_\odot)\f$) stellar clusters + * Initializes a disk population of uniform mass stellar clusters */ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { - #ifdef PARTICLES_GPU - chprintf( " Initialize_Disk_Stellar_Clusters: PARTICLES_GPU not currently supported\n"); - chexit(-1); - #endif #ifndef SINGLE_PARTICLE_MASS chprintf( " Initialize_Disk_Stellar_Clusters: only SINGLE_PARTICLE_MASS currently supported\n"); chexit(-1); @@ -590,17 +586,34 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { Real R_max = sqrt(P->xlen*P->xlen + P->ylen*P->ylen)/2; R_max = P->xlen / 2.0; + real_vector_t temp_pos_x; + real_vector_t temp_pos_y; + real_vector_t temp_pos_z; + real_vector_t temp_vel_x; + real_vector_t temp_vel_y; + real_vector_t temp_vel_z; + real_vector_t temp_grav_x; + real_vector_t temp_grav_y; + real_vector_t temp_grav_z; + #ifndef SINGLE_PARTICLE_MASS + real_vector_t temp_mass; + #endif + #ifdef PARTICLE_IDS + int_vector_t temp_ids; + #endif + #ifdef PARTICLE_AGE + real_vector_t temp_age; + #endif + Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - #ifdef PARTICLE_IDS - part_int_t id; - #endif - particle_mass = 1e4; //solar masses + + particle_mass = 1e5; //solar masses //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius - unsigned long int N = (long int)(6.5e6 * 0.9272485558395908); // 15kpc radius + unsigned long int N = 38;//(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius long lost_particles = 0; - for ( unsigned long int i = 0; i < N; i++ ){ + for ( part_int_t i = 0; i < N; i++ ){ do { R = R_d*radialDist(prng.generator); } while (R > R_max); @@ -621,59 +634,95 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { vy = vPhi*cos(phi); vz = 0; - #ifdef PARTICLES_CPU - //Copy the particle data to the particles vectors - pos_x.push_back(x); - pos_y.push_back(y); - pos_z.push_back(z); - vel_x.push_back(vx); - vel_y.push_back(vy); - vel_z.push_back(vz); - grav_x.push_back(0.0); - grav_y.push_back(0.0); - grav_z.push_back(0.0); + //add particle data to the particles vectors + temp_pos_x.push_back(x); + temp_pos_y.push_back(y); + temp_pos_z.push_back(z); + temp_vel_x.push_back(vx); + temp_vel_y.push_back(vy); + temp_vel_z.push_back(vz); + temp_grav_x.push_back(0.0); + temp_grav_y.push_back(0.0); + temp_grav_z.push_back(0.0); #ifdef PARTICLE_IDS - id = i; - #ifdef PARALLEL_OMP - #pragma omp parallel num_threads( N_OMP_THREADS ) - { - id += 1.0*omp_get_thread_num()/omp_get_num_threads(); - } - #endif //PARALLEL_OMP - partIDs.push_back(id); + temp_ids.push_back(i); #endif //PARTICLE_IDS #ifdef PARTICLE_AGE //if (fabs(z) >= Z_d) age.push_back(1.1e4); //else age.push_back(0.0); - age.push_back(0.0); + temp_age.push_back(0.0); #endif - - #endif//PARTICLES_CPU } - - - #ifdef PARTICLES_CPU - n_local = pos_x.size(); - #endif + n_local = temp_pos_x.size(); #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks chprintf( " Computing Global Particles IDs offset \n" ); part_int_t global_id_offset; global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - #ifdef PARTICLES_CPU for ( int p_indx=0; p_indx 0) chprintf(" lost %lu particles\n", lost_particles); chprintf( " Stellar Disk Particles Initialized, n_local: %lu\n", n_local); } diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 1acff51c9..128249d34 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -71,6 +71,9 @@ class Particles_3D #ifdef PARTICLE_IDS part_int_t *partIDs_dev; #endif + #ifdef PARTICLE_AGE + Real *age_dev; + #endif Real *mass_dev; Real *pos_x_dev; Real *pos_y_dev; diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index e8b8da37d..a3c314ce0 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -140,7 +140,15 @@ void Particles_3D::Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Re cudaDeviceSynchronize(); } +void Particles_3D::Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size) { + CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(part_int_t), cudaMemcpyHostToDevice) ); + cudaDeviceSynchronize(); +} +void Particles_3D::Copy_Particles_Array_Int_Device_to_Host( part_int_t *array_dev, part_int_t *array_host, part_int_t size) { + CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(part_int_t), cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); +} __global__ void Set_Particles_Array_Real_Kernel( Real value, Real *array_dev, part_int_t size ){ int tid = blockIdx.x * blockDim.x + threadIdx.x ; From 2c4c5750f58badcccf565ab20109bf1bd40f6fb3 Mon Sep 17 00:00:00 2001 From: ojwg Date: Tue, 1 Feb 2022 07:37:19 -0500 Subject: [PATCH 003/694] changes to support some particle attributes on GPU (age, id and, in some cases, mass), as well as GPU feedback. --- builds/make.type.disk | 13 +- examples/scripts/parameter_file.txt | 2 +- src/analysis/analysis.cpp | 113 +++++++ src/global/global.h | 4 +- src/global/global_cuda.h | 16 + src/gravity/grav3D.cpp | 8 + src/gravity/grav3D.h | 8 + src/gravity/gravity_boundaries.cpp | 6 +- src/gravity/gravity_functions.cpp | 79 ++++- src/gravity/gravity_functions_gpu.cu | 61 +++- src/gravity/potential_SOR_3D_gpu.cu | 8 +- src/gravity/potential_paris_galactic.cu | 2 +- src/grid/boundary_conditions.cpp | 23 +- src/grid/grid3D.h | 35 ++- src/grid/mpi_boundaries.cpp | 56 +--- src/io/io.cpp | 2 + src/main.cpp | 55 +++- src/model/disk_ICs.cpp | 27 +- src/model/disk_galaxy.h | 4 + src/particles/density_CIC_gpu.cu | 21 +- src/particles/density_boundaries.cpp | 2 +- src/particles/density_boundaries_gpu.cu | 4 +- src/particles/feeback_CIC.h | 7 +- src/particles/feedback_CIC.cpp | 350 ++++++++++++++++----- src/particles/feedback_CIC.h | 3 +- src/particles/feedback_CIC_gpu.cu | 322 +++++++++++++++++++ src/particles/gravity_CIC.cpp | 6 +- src/particles/gravity_CIC_gpu.cu | 27 +- src/particles/io_particles.cpp | 12 +- src/particles/particles_3D.cpp | 65 ++-- src/particles/particles_3D.h | 10 +- src/particles/particles_3D_gpu.cu | 60 +++- src/particles/particles_boundaries.cpp | 129 +++++--- src/particles/particles_boundaries_cpu.cpp | 20 +- src/particles/particles_boundaries_gpu.cu | 103 ++++-- src/particles/particles_boundaries_gpu.h | 4 +- src/particles/particles_dynamics.cpp | 2 +- src/particles/particles_dynamics_gpu.cu | 4 +- src/particles/supernova.h | 37 +++ src/utils/prng_utilities.h | 6 +- 40 files changed, 1368 insertions(+), 348 deletions(-) create mode 100644 src/particles/feedback_CIC_gpu.cu create mode 100644 src/particles/supernova.h diff --git a/builds/make.type.disk b/builds/make.type.disk index a142a4756..c77137b38 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -1,15 +1,17 @@ MPI_GPU = DFLAGS += -DPARTICLES DFLAGS += -DPARTICLES_CPU -DFLAGS += -DONLY_PARTICLES +#DFLAGS += -DPARTICLES_GPU +#DFLAGS += -DONLY_PARTICLES DFLAGS += -DPARTICLE_IDS DFLAGS += -DSINGLE_PARTICLE_MASS DFLAGS += -DGRAVITY +#DFLAGS += -DGRAVITY_GPU # Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions DFLAGS += -DSOR -DFLAGS += -DPARIS_GALACTIC +#DFLAGS += -DPARIS_GALACTIC DFLAGS += -DGRAVITY_ANALYTIC_COMP @@ -21,17 +23,20 @@ DFLAGS += -DPPMP DFLAGS += -DHLLC DFLAGS += -DVL -#DFLAGS += -DDISK_ICS +DFLAGS += -DDISK_ICS DFLAGS += -DDENSITY_FLOOR DFLAGS += -DTEMPERATURE_FLOOR DFLAGS += -DDE DFLAGS += -DCPU_TIME +DFLAGS += -DAVERAGE_SLOW_CELLS -OUTPUT ?= -DOUTPUT -DHDF5 +OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES DFLAGS += $(OUTPUT) DFLAGS += $(MPI_GPU) DFLAGS += -DPARALLEL_OMP DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) + +DFLAGS += -DHIDE_CIC_ERRORS diff --git a/examples/scripts/parameter_file.txt b/examples/scripts/parameter_file.txt index 175d3a461..c6ed505ce 100644 --- a/examples/scripts/parameter_file.txt +++ b/examples/scripts/parameter_file.txt @@ -10,7 +10,7 @@ ny=256 # number of grid cells in the z dimension nz=256 # final output time -tout=3000 +tout=10000 #tout=3 # time interval for output outstep=100 diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index a508e4312..03d3e2c6e 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -108,6 +108,117 @@ void Grid3D::Compute_Lya_Statistics( ){ #endif //LYA_STATISTICS +#ifdef FEEDBACK +void Grid3D::Compute_Gas_Velocity_Dispersion() { + #ifdef PARTICLES_CPU + int i, j, k, id, idm, idp; + int id_grav; + Real x, y, z, r, xpm, xpp, ypm, ypp, zpm, zpp; + Real Pm, Pp; + Real dPdx, dPdy, dPdr; + Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, vcya; + Real total_mass, partial_mass = 0, total_var_analytic = 0, total_var_poisson = 0, partial_var_poisson = 0, partial_var_analytic = 0; + + int n_ghost_grav = Particles.G.n_ghost_particles_grid; + int ghost_diff = n_ghost_grav - H.n_ghost; + int nx_grav = Particles.G.nx_local + 2*n_ghost_grav; + int ny_grav = Particles.G.ny_local + 2*n_ghost_grav; + + for (k=0; k %E, %E DIFF %E%% \n", i, j, fabs(gravAnalytic), fabs(gravCalc), fabs((gravAnalytic-gravCalc)/gravAnalytic*100)); + //vc = sqrt(r*fabs(Particles.G.gravity_x[id_grav]*x/r + Particles.G.gravity_y[id_grav]*y/r - dPdr/C.density[id])); + vcp = sqrt(r*fabs(Particles.G.gravity_x[id_grav]*x/r + Particles.G.gravity_y[id_grav]*y/r)); + vcxp = -y/r * vcp; + vcyp = x/r * vcp; + //auto [vcx, vcy] = Galaxies::MW.rotation_velocity(x, y); + vx = C.momentum_x[id]/ C.density[id]; + vy = C.momentum_y[id]/ C.density[id]; + vz = C.momentum_z[id]/ C.density[id]; + + partial_var_poisson += ((vx - vcxp)*(vx - vcxp) + (vy - vcyp)*(vy - vcyp) + vz*vz)* C.density[id]; + partial_var_analytic += ((vx - vcxa)*(vx - vcxa) + (vy - vcya)*(vy - vcya) + vz*vz)* C.density[id]; + } + } + } + partial_var_poisson /= total_mass; + partial_var_analytic /= total_mass; + + #ifdef MPI_CHOLLA + MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, root, world); + MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, root, world); + + #else + total_var_poisson = partial_var_poisson; + total_var_analytic = partial_var_analytic; + #endif + + vrms_poisson = sqrt(total_var_poisson)*VELOCITY_UNIT/1e5; // output in km/s + vrms_analytic = sqrt(total_var_analytic)*VELOCITY_UNIT/1e5; + + chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", H.t, H.dt, vrms_poisson, vrms_analytic); + #endif // PARTICLES_CPU +} +#endif // FEEDBACK + + void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ #ifdef COSMOLOGY @@ -125,11 +236,13 @@ void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ #endif //Write to HDF5 file + #if defined(COSMOLOGY) || defined(PHASE_DIAGRAM) || defined(LYA_STATISTICS) #ifdef MPI_CHOLLA if ( procID == 0 ) Output_Analysis(P); #else Output_Analysis(P); #endif + #endif #ifdef LYA_STATISTICS diff --git a/src/global/global.h b/src/global/global.h index ff6c7f5e3..977288952 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -211,14 +211,14 @@ struct parameters int yu_bcnd; int zl_bcnd; int zu_bcnd; -#ifdef MPI_CHOLLA +//#ifdef MPI_CHOLLA FIXME: verify this change is needed. int xlg_bcnd; int xug_bcnd; int ylg_bcnd; int yug_bcnd; int zlg_bcnd; int zug_bcnd; -#endif /*MPI_CHOLLA*/ +//#endif /*MPI_CHOLLA*/ char custom_bcnd[MAXLEN]; char outdir[MAXLEN]; char indir[MAXLEN]; //Folder to load Initial conditions from diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index cba4b18cc..c14aa872d 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -143,6 +143,22 @@ __device__ inline int sgn_CUDA(Real x) __global__ void test_function(); +//Define atomic_add if it's not supported +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) +{ + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + #endif //GLOBAL_CUDA_H diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 9960feb18..a82513c7d 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -126,6 +126,10 @@ void Grav3D::AllocateMemory_CPU(void) F.pot_boundary_z0 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); //array for the potential isolated boundary F.pot_boundary_z1 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); #endif + + #ifdef GRAVITY_ANALYTIC_COMP + F.analytic_potential_h = (Real *) malloc(n_cells_potential*sizeof(Real)); + #endif } void Grav3D::Set_Boundary_Flags( int *flags ){ @@ -169,6 +173,10 @@ void Grav3D::FreeMemory_CPU(void) #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) Poisson_solver_test.Reset(); #endif + + #ifdef GRAVITY_ANALYTIC_COMP + free(F.analytic_potential_h); + #endif } #endif //GRAVITY diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index 15eec6873..d02508040 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -150,6 +150,10 @@ class Grav3D * \brief Array containing the gravitational potential of each cell in the grid at the previous time step */ Real *potential_1_h; + #ifdef GRAVITY_ANALYTIC_COMP + Real *analytic_potential_h; + #endif + #ifdef GRAVITY_GPU /*! \var density_d @@ -164,6 +168,10 @@ class Grav3D * \brief Device Array containing the gravitational potential of each cell in the grid at the previous time step */ Real *potential_1_d; + #ifdef GRAVITY_ANALYTIC_COMP + Real *analytic_potential_d; + #endif + #endif //GRAVITY_GPU // Arrays for computing the potential values in isolated boundaries diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index 43aba3088..244f7564b 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -142,9 +142,9 @@ void Grid3D::Compute_Potential_Isolated_Boundary( int direction, int side, int cm_pos_z = H.sphere_center_z; } - // for bc_pontential_type = 1 the mod_frac is - // the disk mass fraction being modelled. - Real mod_frac = 1; + // for bc_pontential_type = 1 the mod_frac is the fraction + // of the disk mass contributed by the simulated particles + Real mod_frac = 0; //1.0; //0; Real pot_val; int i, j, k, id; for ( k=0; kbc_potential_type == 1) { @@ -578,16 +578,20 @@ void Grid3D::Compute_Gravitational_Potential( struct parameters *P ){ printDiff(p.data(),Grav.F.potential_h,Grav.nx_local,Grav.ny_local,Grav.nz_local); #endif + #ifdef GRAVITY_ANALYTIC_COMP + Add_Analytic_Potential(); + #endif + #ifdef CPU_TIME Timer.End_and_Record_Time( 3 ); #endif - } + #ifdef GRAVITY_ANALYTIC_COMP -void Grid3D::Add_Analytic_Potential(struct parameters *P) { +void Grid3D::Setup_Analytic_Potential(struct parameters *P) { #ifndef PARALLEL_OMP - Add_Analytic_Galaxy_Potential(0, Grav.nz_local, Galaxies::MW); + Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2*N_GHOST_POTENTIAL, Galaxies::MW); #else #pragma omp parallel num_threads( N_OMP_THREADS ) { @@ -596,13 +600,40 @@ void Grid3D::Add_Analytic_Potential(struct parameters *P) { omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs( Grav.nz_local + 2*N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end ); - Add_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); + Setup_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); } #endif + + #ifdef GRAVITY_GPU + CudaSafeCall( cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, Grav.n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); + #endif } -#endif + + +void Grid3D::Add_Analytic_Potential() { + #ifdef GRAVITY_GPU + Add_Analytic_Potential_GPU(); + #else + #ifndef PARALLEL_OMP + Add_Analytic_Potential(0, Grav.nz_local); + #else + #pragma omp parallel num_threads( N_OMP_THREADS ) + { + int omp_id, n_omp_procs; + int g_start, g_end; + + omp_id = omp_get_thread_num(); + n_omp_procs = omp_get_num_threads(); + Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + + Add_Analytic_Potential(g_start, g_end); + } + #endif //PARALLEL_OMP + #endif // GRAVITY_GPU else +} +#endif //GRAVITY_ANALYTIC_COMP void Grid3D::Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end){ @@ -663,17 +694,13 @@ void Grid3D::Copy_Hydro_Density_to_Gravity(){ #ifdef GRAVITY_ANALYTIC_COMP -/** - * Adds a specified potential function to the potential calculated from solving the Poisson equation. - * The raison d'etre is to solve the evolution of a system where not all particles are simulated. - */ -void Grid3D::Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal) { +void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal) { int nx = Grav.nx_local + 2*N_GHOST_POTENTIAL; int ny = Grav.ny_local + 2*N_GHOST_POTENTIAL; int nz = Grav.nz_local + 2*N_GHOST_POTENTIAL; // the fraction of the disk that's not modelled (and so its analytic contribution must be added) - //Real non_mod_frac = 0.0; + Real non_mod_frac = 1; //0.0; //1.0; int k, j, i, id; Real x_pos, y_pos, z_pos, R; @@ -681,13 +708,33 @@ void Grid3D::Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& g for ( j=0; j= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return; + + tid= tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + /* + if (tid_x < nx_pot && tid_y == 0 && tid_z == (nz_pot/2)) { + printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, potential_d[tid]); + printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, analytic_d[tid]); + }*/ + //potential_d[tid] += analytic_d[tid]; + potential_d[tid] = analytic_d[tid]; // FIXME debug only } + +void Grid3D::Add_Analytic_Potential_GPU() { + int nx_pot, ny_pot, nz_pot; + nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL; + ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL; + nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL; + + // set values for GPU kernels + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; + + int ngrid_x = (nx_pot - 1) / tpb_x + 1; + int ngrid_y = (ny_pot - 1) / tpb_y + 1; + int ngrid_z = (nz_pot - 1) / tpb_z + 1; + + // number of blocks per 1D grid + dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); + // number of threads per 1D block + dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); + + //Copy the analytic potential from the device array to the device potential array + hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d, Grav.F.potential_d, nx_pot, ny_pot, nz_pot); + cudaDeviceSynchronize(); +} +#endif //GRAVITY_ANALYTIC_COMP && GRAVITY_GPU + + + void __global__ Extrapolate_Grav_Potential_Kernel( Real *dst_potential, Real *src_potential_0, Real *src_potential_1, int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, int n_offset, Real dt_now, Real dt_prev, bool INITIAL, Real cosmo_factor ){ @@ -177,6 +231,7 @@ void __global__ Extrapolate_Grav_Potential_Kernel( Real *dst_potential, Real *sr src_potential_1[tid_pot] = pot_now; } + void Grid3D::Extrapolate_Grav_Potential_GPU(){ int nx_pot, ny_pot, nz_pot; diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index 47d680077..e9b921e98 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -355,7 +355,7 @@ void Potential_SOR_3D::Copy_Potential_From_Host( Real *output_potential ){ -__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ +__global__ void Load_Transfer_Buffer_GPU_kernel_SOR( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -435,7 +435,7 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, i -__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ +__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -544,7 +544,7 @@ void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); } @@ -614,7 +614,7 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel,dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR,dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); } diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index db53ea31a..2eb1fea04 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -58,7 +58,7 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const Real dy = dr_[1]; const Real dz = dr_[0]; - const Real md = galaxy.getM_d(); + const Real md = 0; //galaxy.getM_d(); //FIXME temporary change for debugging small # clusters const Real rd = galaxy.getR_d(); const Real zd = galaxy.getZ_d(); diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index bbb824f61..8d438d935 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -187,16 +187,12 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) int nPB, nBoundaries; int *iaBoundary, *iaCell; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA /*if the cell face is an mpi boundary, exit */ - if(flags[dir]==5) - return; -#endif /*MPI_CHOLLA*/ - - + if(flags[dir]==5) return; + #endif /*MPI_CHOLLA*/ #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ if ( flags[dir] == 1 ){ // Set Periodic Boundaries for the ghost cells. @@ -254,9 +250,7 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) } return; } - #endif - #ifdef PARTICLES if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ if ( flags[dir] ==1 ){ #ifdef PARTICLES_CPU @@ -278,10 +272,13 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) #endif//PARTICLES_GPU - } else if (flags[dir] == 3) { - #ifdef PARTICLES_CPU - Set_Particles_Open_Boundary(dir/2, dir%2); - #endif + } else if (flags[dir] == 3) { + #ifdef PARTICLES_CPU + Set_Particles_Open_Boundary_CPU(dir/2, dir%2); + #endif + #ifdef PARTICLES_GPU + Particles.Set_Particles_Open_Boundary_GPU(dir/2, dir%2); + #endif } return; } diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index e88d25529..2c2d8d122 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -322,6 +322,13 @@ class Grid3D Analysis_Module Analysis; #endif + #ifdef FEEDBACK //TODO refactor this into Analysis module + Real countSN; + Real countResolved; + Real countUnresolved; + Real totalEnergy; + Real totalMomentum; + #endif struct Conserved { /*! \var density @@ -629,12 +636,10 @@ class Grid3D void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); void Set_Edge_Boundaries(int dir, int *flags); void Set_Edge_Boundary_Extents(int dir, int edge, int *imin, int *imax); - void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); void Load_and_Send_MPI_Comm_Buffers_SLAB(int *flags); void Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags); void Wait_and_Unload_MPI_Comm_Buffers_SLAB(int *flags); void Wait_and_Unload_MPI_Comm_Buffers_BLOCK(int dir, int *flags); - void Unload_MPI_Comm_Buffers(int index); void Unload_MPI_Comm_Buffers_SLAB(int index); void Unload_MPI_Comm_Buffers_BLOCK(int index); @@ -699,8 +704,13 @@ class Grid3D #endif//GRAVITY #ifdef GRAVITY_ANALYTIC_COMP - void Add_Analytic_Potential(struct parameters *P); - void Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal); + void Add_Analytic_Potential(); + void Add_Analytic_Potential(int g_start, int g_end); + void Setup_Analytic_Potential(struct parameters *P); + void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal); + #ifdef GRAVITY_GPU + void Add_Analytic_Potential_GPU(); + #endif #endif //GRAVITY_ANALYTIC_COMP #ifdef PARTICLES @@ -713,7 +723,9 @@ class Grid3D void Transfer_Particles_Boundaries( struct parameters P ); Real Update_Grid_and_Particles_KDK( struct parameters P ); void Set_Particles_Boundary( int dir, int side); - void Set_Particles_Open_Boundary(int dir, int side); + #ifdef PARTICLES_CPU + void Set_Particles_Open_Boundary_CPU(int dir, int side); + #endif #ifdef MPI_CHOLLA int Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer ); void Unload_Particles_Density_Boundary_From_Buffer( int direction, int side, Real *buffer ); @@ -762,11 +774,11 @@ class Grid3D void Advance_Particles_KDK_Step1_GPU(); void Advance_Particles_KDK_Step2_GPU(); void Set_Particles_Boundary_GPU( int dir, int side); + int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); + void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); #endif//PARTICLES_GPU #ifdef GRAVITY_GPU void Copy_Particles_Density_GPU(); - int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); - void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); #endif//GRAVITY_GPU #endif//PARTICLES @@ -821,8 +833,13 @@ class Grid3D #ifdef PARTICLES #ifdef DE #ifdef PARTICLE_AGE - void Cluster_Feedback(); - void Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end); + #ifdef FEEDBACK + Real Cluster_Feedback(); + Real Cluster_Feedback_GPU(); + void Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Real* info, int thread_id, Real* dti); + void Compute_Gas_Velocity_Dispersion(); + Real Calc_Timestep(int index); + #endif #endif #endif #endif diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 721724808..d4aaddf65 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -1,4 +1,4 @@ -#include "../grid/grid3D.h" +#include "grid3D.h" #include "../mpi/mpi_routines.h" #include "../io/io.h" #include "../utils/error_handling.h" @@ -48,7 +48,7 @@ void Grid3D::Set_Boundaries_MPI_SLAB(int *flags, struct parameters P) Set_Edge_Boundaries(1,flags); //1) load and post comm for buffers - Load_and_Send_MPI_Comm_Buffers(0, flags); + Load_and_Send_MPI_Comm_Buffers_SLAB(flags); //2) perform any additional boundary conditions //including whether the x face is non-MPI @@ -89,7 +89,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) /* Step 1 - Send MPI x-boundaries */ if (flags[0]==5 || flags[1]==5) { - Load_and_Send_MPI_Comm_Buffers(0, flags); + Load_and_Send_MPI_Comm_Buffers_BLOCK(0, flags); } /* Step 2 - Set non-MPI x-boundaries */ @@ -112,7 +112,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) /* Step 4 - Send MPI y-boundaries */ if (flags[2]==5 || flags[3]==5) { - Load_and_Send_MPI_Comm_Buffers(1, flags); + Load_and_Send_MPI_Comm_Buffers_BLOCK(1, flags); } /* Step 5 - Set non-MPI y-boundaries */ @@ -133,7 +133,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) /* Step 7 - Send MPI z-boundaries */ if (flags[4]==5 || flags[5]==5) { - Load_and_Send_MPI_Comm_Buffers(2, flags); + Load_and_Send_MPI_Comm_Buffers_BLOCK(2, flags); } /* Step 8 - Set non-MPI z-boundaries */ @@ -399,25 +399,6 @@ void Grid3D::Set_Edge_Boundary_Extents(int dir, int edge, int *imin, int *imax) } - - -void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) -{ - - switch(flag_decomp) - { - case SLAB_DECOMP: - /*load communication buffers*/ - Load_and_Send_MPI_Comm_Buffers_SLAB(flags); - break; - case BLOCK_DECOMP: - /*load communication buffers*/ - Load_and_Send_MPI_Comm_Buffers_BLOCK(dir, flags); - break; - } - -} - void Grid3D::Load_and_Send_MPI_Comm_Buffers_SLAB(int *flags) { @@ -991,7 +972,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags) // load left x communication buffer if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + { #ifdef HYDRO_GPU buffer_length = Load_Hydro_DeviceBuffer_X0(d_send_buffer_x0); #ifndef MPI_GPU @@ -1001,7 +982,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags) #else buffer_length = Load_Hydro_Buffer_X0(h_send_buffer_x0); #endif - } + } #ifdef GRAVITY if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ @@ -1121,7 +1102,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags) #endif if ( transfer_main_buffer ){ - #if defined(MPI_GPU) && defined(HYDRO_GPU) + #if defined(MPI_GPU) && defined(HYDRO_GPU) //post non-blocking receive right x communication buffer MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); @@ -1205,7 +1186,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags) #endif if ( transfer_main_buffer ){ - #if defined(MPI_GPU) && defined(HYDRO_GPU) + #if defined(MPI_GPU) && defined(HYDRO_GPU) //post non-blocking receive left y communication buffer MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); @@ -1488,7 +1469,7 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Buffers_SLAB(int *flags) MPI_Waitany(wait_max,recv_request,&index,&status); //depending on which face arrived, load the buffer into the ghost grid - Unload_MPI_Comm_Buffers(status.MPI_TAG); + Unload_MPI_Comm_Buffers_SLAB(status.MPI_TAG); } } @@ -1535,22 +1516,7 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Buffers_BLOCK(int dir, int *flags) //if (procID==1) MPI_Get_count(&status, MPI_CHREAL, &count); //if (procID==1) printf("Process 1 unloading direction %d, source %d, index %d, length %d.\n", status.MPI_TAG, status.MPI_SOURCE, index, count); //depending on which face arrived, load the buffer into the ghost grid - Unload_MPI_Comm_Buffers(status.MPI_TAG); - } -} - - - -void Grid3D::Unload_MPI_Comm_Buffers(int index) -{ - switch(flag_decomp) - { - case SLAB_DECOMP: - Unload_MPI_Comm_Buffers_SLAB(index); - break; - case BLOCK_DECOMP: - Unload_MPI_Comm_Buffers_BLOCK(index); - break; + Unload_MPI_Comm_Buffers_BLOCK(status.MPI_TAG); } } diff --git a/src/io/io.cpp b/src/io/io.cpp index 5162e4517..6725336cc 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -136,7 +136,9 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) G.H.Output_Now = false; #endif + #ifdef MPI_CHOLLA MPI_Barrier(world); + #endif } diff --git a/src/main.cpp b/src/main.cpp index 8faa0263a..5cc91e67a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,6 +13,9 @@ #include "grid/grid3D.h" #include "io/io.h" #include "utils/error_handling.h" +#ifdef FEEDBACK +#include "particles/supernova.h" +#endif int main(int argc, char *argv[]) @@ -118,6 +121,25 @@ int main(int argc, char *argv[]) if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); #endif + #ifdef FEEDBACK + G.countSN = 0; + G.countResolved = 0; + G.countUnresolved = 0; + G.totalEnergy = 0; + G.totalMomentum = 0; + #ifdef PARTICLES_GPU + #ifdef MPI_CHOLLA + Supernova::initState(&P, G.Particles.n_local, 4); + #else + Supernova::initState(&P, G.Particles.n_local); + #endif // MPI_CHOLLA + #endif // PARTICLES_GPU + #endif // FEEDBACK + + #ifdef GRAVITY_ANALYTIC_COMP + G.Setup_Analytic_Potential(&P); + #endif + #ifdef GRAVITY // Get the gravitational potential for the first timestep G.Compute_Gravitational_Potential( &P); @@ -128,11 +150,6 @@ int main(int argc, char *argv[]) G.Set_Boundary_Conditions_Grid(P); chprintf("Boundary conditions set.\n"); - #ifdef GRAVITY_ANALYTIC_COMP - // add analytic component to gravity potential. - G.Add_Analytic_Potential(&P); - #endif - #ifdef PARTICLES // Get the particles acceleration for the first timestep G.Get_Particles_Acceleration(); @@ -211,9 +228,9 @@ int main(int argc, char *argv[]) //Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - #ifdef GRAVITY_ANALYTIC_COMP + #if defined(GRAVITY_ANALYTIC_COMP) && !defined(GRAVITY_GPU) // add analytic component to gravity potential. - G.Add_Analytic_Potential(&P); + G.Add_Analytic_Potential(); #endif #ifdef PARTICLES @@ -221,8 +238,22 @@ int main(int argc, char *argv[]) G.Advance_Particles( 2 ); #endif - #ifdef PARTICLE_AGE - //G.Cluster_Feedback(); + #ifdef FEEDBACK + Real fdti = G.Cluster_Feedback(); + if (fdti != 0 && dti != 0) { + printf("DTI COMP: returned: %.4e [%.4e kyr]\n", fdti, 1/fdti); + printf(" current: %.4e [ %.4e kyr ] \n", dti, 1/dti); + + } else { + printf("DTI COMP: returned: %.4e, current: %.4e\n", fdti, dti); + } + if (fdti > dti) { + printf(" CHANGING\n"); + dti = fdti; + } + #ifdef ANALYSIS + G.Compute_Gas_Velocity_Dispersion(); + #endif #endif #ifdef CPU_TIME @@ -257,6 +288,12 @@ int main(int argc, char *argv[]) cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); #endif + int my_y = G.H.ny/2; + int my_z = G.H.nz/2; + for (int i = 0; i < 10; i++) { + chprintf("density[%d, %d, %d] = %.4e\n", i, my_y, my_z, G.C.density[i + my_y*G.H.nx + my_z*G.H.nx*G.H.ny]); + } + WriteData(G, P, nfile); // add one to the output file count nfile++; diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index f6ada7002..858eb874a 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -13,6 +13,7 @@ #include "../mpi/mpi_routines.h" #include "../io/io.h" #include "../utils/error_handling.h" +#include "disk_galaxy.h" // #define DISK_ICS @@ -771,22 +772,16 @@ void Grid3D::Disk_3D(parameters p) Real r_cool; // MW model - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun (assume all stars) - R_d = 3.5; // MW stellar disk scale length in kpc - z_d = 3.5/5.0; // MW stellar disk scale height in kpc - R_vir = 261; // MW viral radius in kpc - c_vir = 20; // MW halo concentration (to account for adiabatic contraction) - r_cool = 157.0; // cooling radius in kpc (MW) - - // M82 model - //M_vir = 5.0e10; // viral mass of M82 in M_sun (guess) - //M_d = 1.0e10; // mass of M82 disk in M_sun (Greco 2012) - //R_d = 0.8; // M82 stellar disk scale length in kpc (Mayya 2009) - //z_d = 0.15; // M82 stellar thin disk scale height in kpc (Lim 2013) - //R_vir = R_d/0.015; // M82 viral radius in kpc from R_(1/2) = 0.015 R_200 (Kravtsov 2013) - //c_vir = 10; // M82 halo concentration - //r_cool = 100.0; // cooling in kpc (M82, guess) + DiskGalaxy galaxy = Galaxies::MW; + // M82 model Galaxies::M82; + + M_vir = galaxy.getM_vir(); // viral mass in M_sun + M_d = galaxy.getM_d(); // mass of disk in M_sun (assume all stars) + R_d = galaxy.getR_d(); // stellar disk scale length in kpc + z_d = galaxy.getZ_d(); // stellar disk scale height in kpc + R_vir = galaxy.getR_vir(); // viral radius in kpc + c_vir = galaxy.getC_vir(); // halo concentration (to account for adiabatic contraction) + r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) M_h = M_vir - M_d; // halo mass in M_sun R_s = R_vir / c_vir; // halo scale length in kpc diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index d21b7ed98..ba4909604 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -142,6 +142,10 @@ class DiskGalaxy { Real getM_d() const { return M_d; }; Real getR_d() const { return R_d; }; Real getZ_d() const { return Z_d; }; + Real getM_vir() { return M_vir; }; + Real getR_vir() { return R_vir; }; + Real getC_vir() { return c_vir; }; + Real getR_cool() { return r_cool; }; }; diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 931cdc9e3..cb4e19bfa 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -7,24 +7,9 @@ #include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../particles/particles_3D.h" - - -//Define atomic_add if it's not supported -#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 -#else -__device__ double atomicAdd(double* address, double val) -{ - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); -} -#endif +#include "particles_3D.h" + + //Get the CIC index from the particle position ( device function ) __device__ void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index ae509d537..5aacda6ce 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -2,7 +2,7 @@ #include "../io/io.h" #include "../grid/grid3D.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #include //Copy the particles density boundaries for non-MPI PERIODIC transfers diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu index 531a7c8e8..85f453dc3 100644 --- a/src/particles/density_boundaries_gpu.cu +++ b/src/particles/density_boundaries_gpu.cu @@ -2,7 +2,7 @@ #include "../io/io.h" #include "../grid/grid3D.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #include @@ -168,4 +168,4 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i #endif//MPI_CHOLLA -#endif//PARTICLES_GPU +#endif//PARTICLES_GPU & GRAVITY_GPU diff --git a/src/particles/feeback_CIC.h b/src/particles/feeback_CIC.h index a2d99ad02..a2cf33adb 100644 --- a/src/particles/feeback_CIC.h +++ b/src/particles/feeback_CIC.h @@ -1,7 +1,4 @@ -#ifdef PARTICLES -#ifdef DE -#ifdef PARTICLE_AGE - +#if defined(PARTICLES) && defined(DE) && defined(PARTICLE_AGE) && defined(PARTICLE_CPU) && defined(FEEDBACK) #ifndef FEEDBACK_CIC_H #define FEEDBACK_CIC_H @@ -11,7 +8,5 @@ Real getClusterEnergyFeedback(Real t, Real dt, Real age); Real getClusterMassFeedback(Real t, Real dt, Real age); -#endif -#endif #endif #endif \ No newline at end of file diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp index d7d36dde8..2f61e17b6 100644 --- a/src/particles/feedback_CIC.cpp +++ b/src/particles/feedback_CIC.cpp @@ -1,39 +1,79 @@ #ifdef PARTICLES #ifdef DE #ifdef PARTICLE_AGE +#ifdef FEEDBACK #include -#include "../particles/feedback_CIC.h" -#include "../particles/particles_3D.h" +#include "feedback_CIC.h" +#include "particles_3D.h" #include "../grid/grid3D.h" -#include "../particles/density_CIC.h" +#include "../io/io.h" +#include "supernova.h" +#include +#include +#include +#include +#ifdef MPI_CHOLLA +#include "../mpi/mpi_routines.h" +#endif #ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" +#include"../utils/parallel_omp.h" #endif -// simple energy feedback prescription -Real getClusterEnergyFeedback(Real t, Real dt, Real age) { - if (t + age <= 1.0e4) return ENERGY_FEEDBACK_RATE * dt; - else return 0; -} +std::random_device rd; +//std::mt19937_64 gen(rd()); +std::mt19937_64 generator(42); //FIXME read this in from init params or ChollaPrngGenerator + -// simple feedback prescription -Real getClusterMassFeedback(Real t, Real dt, Real age) { - //if (t + age <= 1.0e4) return 0.1 * dt; // 0.01 SN/ky/cluster * 10 solar mass ejected/SN - //if (t + age <= 1.0e4) return 10 * dt; // 1 SN/ky/cluster * 10 solar mass ejected/SN - //else return 0; - return 0; +std::tuple getClusterFeedback(Real t, Real dt, Real age, Real density) { + int N = 0; + if (t + age <= 1.0e4) { + std::poisson_distribution distribution(Supernova::SNR * dt); + N = distribution(generator); + } + Real n_0 = density * DENSITY_UNIT / (Supernova::MU*MP); // in cm^{-3} + //std::cout << "n_0 is " << n_0 << std::endl; + //if (N > 0) std::cout << "MOMENTUM: " << FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) * VELOCITY_UNIT/1e10 << std::endl; + + return { /* number of SN */ N, + /* total energy given off */ N * Supernova::ENERGY_PER_SN, + /* total mass */ N * Supernova::MASS_PER_SN, + /* final momentum */ Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93), + /* shell formation radius */ Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29) + }; } -void Grid3D::Cluster_Feedback(){ - #ifdef PARTICLES_CPU +Real Grid3D::Cluster_Feedback() { + Real max_sn_dti = 0; + #ifdef PARTICLES_GPU + max_sn_dti = Cluster_Feedback_GPU(); + #ifdef MPI_CHOLLA + max_sn_dti = ReduceRealMax(max_sn_dti); + #endif // MPI_CHOLLA + #else + Real* feedbackInfo; + Real* thread_dti; + int totalThreads = 1; + Real partiallyReducedInfo[N_INFO] = {0, 0, 0, 0, 0}; + Real reducedInfo[N_INFO] = {0, 0, 0, 0, 0}; + const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4; + #ifndef PARALLEL_OMP - Cluster_Feedback_Function( 0, Particles.n_local ); + + feedbackInfo = (Real*)calloc(N_INFO, sizeof(Real)); + sn_thread_dti = (Real*)calloc(1, sizeof(Real)); + Cluster_Feedback_Function( 0, Particles.n_local, feedbackInfo, 0, thread_dti); + #else + + totalThreads = N_OMP_THREADS; + feedbackInfo = (Real*)calloc(N_INFO*totalThreads, sizeof(Real)); + thread_dti = (Real*)calloc(totalThreads, sizeof(Real)); + // malloc array of size N_OMP_THREADS to take the feedback info #pragma omp parallel num_threads( N_OMP_THREADS ) { int omp_id, n_omp_procs; @@ -43,16 +83,74 @@ void Grid3D::Cluster_Feedback(){ n_omp_procs = omp_get_num_threads(); Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); + Cluster_Feedback_Function( p_start, p_end, feedbackInfo, omp_id, thread_dti); + } + #endif //PARALLEL_OMP - Cluster_Feedback_Function( p_start, p_end ); + for (int i = 0; i < totalThreads; i++) { + partiallyReducedInfo[SN] += feedbackInfo[i*N_INFO + SN]; + partiallyReducedInfo[RESOLVED] += feedbackInfo[i*N_INFO + RESOLVED]; + partiallyReducedInfo[NOT_RESOLVED] += feedbackInfo[i*N_INFO + NOT_RESOLVED]; + partiallyReducedInfo[ENERGY] += feedbackInfo[i*N_INFO + ENERGY]; + partiallyReducedInfo[MOMENTUM] += feedbackInfo[i*N_INFO + MOMENTUM]; + max_sn_dti = fmax(max_sn_dti, thread_dti[i]); } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU + + #ifdef MPI_CHOLLA + max_sn_dti = ReduceRealMax(max_sn_dti); + MPI_Reduce(&partiallyReducedInfo, &reducedInfo, N_INFO, MPI_CHREAL, MPI_SUM, root, world); + if (procID==root) { + #else + reducedInfo = partiallyReducedInfo; + #endif //MPI_CHOLLA + + countSN += reducedInfo[SN]; + countResolved += reducedInfo[RESOLVED]; + countUnresolved += reducedInfo[NOT_RESOLVED]; + totalEnergy += reducedInfo[ENERGY]; + totalMomentum += reducedInfo[MOMENTUM]; + + Real resolved_ratio = 0.0; + if (reducedInfo[RESOLVED] > 0 || reducedInfo[NOT_RESOLVED] > 0) { + resolved_ratio = reducedInfo[RESOLVED]*1.0/(reducedInfo[RESOLVED] + reducedInfo[NOT_RESOLVED]); + } + Real global_resolved_ratio = 0.0; + if (countResolved > 0 || countUnresolved > 0) { + global_resolved_ratio = countResolved / (countResolved + countUnresolved); + } + /*chprintf("iteration %d: number of SN: %d, ratio of resolved %f\n", H.n_step, (long)reducedInfo[SN], resolved_ratio); + chprintf(" this iteration: energy: %e erg. x-momentum: %e S.M. km/s\n", + reducedInfo[ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, reducedInfo[MOMENTUM]*VELOCITY_UNIT/1e5); + chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %f\n", (long)countSN, (long)countResolved, (long)countUnresolved, global_resolved_ratio); + chprintf(" energy: %e erg. Total x-momentum: %e S.M. km/s\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, totalMomentum*VELOCITY_UNIT/1e5); + */ + + #ifdef MPI_CHOLLA + } + #endif /*MPI_CHOLLA*/ + + free(feedbackInfo); + free(thread_dti); + + #endif //PARTICLES_GPU + return max_sn_dti; +} + + +// returns the largest 1/dt for the cell with the given index +Real Grid3D::Calc_Timestep(int index) { + Real density = fmax(C.density[index], DENS_FLOOR); + Real vx = C.momentum_x[index] / density; + Real vy = C.momentum_y[index] / density; + Real vz = C.momentum_z[index] / density; + Real cs = sqrt(gama * fmax( (C.Energy[index]- 0.5*density*(vx*vx + vy*vy + vz*vz))*(gama-1.0), TINY_NUMBER ) / density); + return fmax( fmax((fabs(vx) + cs)/H.dx, (fabs(vy) + cs)/H.dy), (fabs(vz) + cs)/H.dz ) ; } //Compute the CIC feedback -void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end) { +void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Real* info, int threadId, Real* max_dti) { + #ifdef PARTICLES_CPU int nx_g, ny_g, nz_g; nx_g = H.nx; ny_g = H.ny; @@ -62,112 +160,204 @@ void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end) { xMin = H.xblocal; //TODO: make sure this is correct (and not H.xbound) (local min vs. global min) yMin = H.yblocal; zMin = H.zblocal; - - + part_int_t pIndx; int indx_x, indx_y, indx_z, indx; - Real x_pos, y_pos, z_pos; + int pcell_x, pcell_y, pcell_z, pcell_index; + Real pos_x, pos_y, pos_z; Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; - Real dV_inv = 1./(H.dx*H.dy*H.dz); - Real feedback_energy, feedback_density; + Real dV = H.dx*H.dy*H.dz; + Real feedback_energy, feedback_density, feedback_momentum; + bool ignore, in_local, is_resolved; - bool ignore, in_local; for ( pIndx=p_start; pIndx < p_end; pIndx++ ){ - ignore = false; - in_local = true; - // pMass = Particles.mass[pIndx] * dV_inv; - x_pos = Particles.pos_x[pIndx]; - y_pos = Particles.pos_y[pIndx]; - z_pos = Particles.pos_z[pIndx]; - Get_Indexes_CIC( xMin, yMin, zMin, H.dx, H.dy, H.dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z ); - if ( indx_x < -1 ) ignore = true; - if ( indx_y < -1 ) ignore = true; - if ( indx_z < -1 ) ignore = true; - if ( indx_x > nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( x_pos < H.xblocal || x_pos >= H.xblocal + H.domlen_x ) in_local = false; - if ( y_pos < H.yblocal || y_pos >= H.yblocal + H.domlen_y ) in_local = false; - if ( z_pos < H.zblocal || z_pos >= H.zblocal + H.domlen_z ) in_local = false; - if ( ! in_local ) { + pos_x = Particles.pos_x[pIndx]; + pos_y = Particles.pos_y[pIndx]; + pos_z = Particles.pos_z[pIndx]; + + pcell_x = (int) floor( ( pos_x - xMin ) / H.dx ) + H.n_ghost; + pcell_y = (int) floor( ( pos_y - yMin ) / H.dy ) + H.n_ghost; + pcell_z = (int) floor( ( pos_z - zMin ) / H.dz ) + H.n_ghost; + pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; + + auto [N, energy, mass, momentum, r_sf] = getClusterFeedback(H.t, H.dt, Particles.age[pIndx], C.density[pcell_index]); + if (N == 0) continue; + + feedback_energy = energy / dV; + feedback_density = mass / dV; + feedback_momentum = momentum / sqrt(3) / dV; + is_resolved = 3 * std::max({H.dx, H.dy, H.dz}) <= r_sf; + // now fill in 'info' for logging + info[threadId*N_INFO] += N*1.0; + if (is_resolved) info[threadId*N_INFO + 1] += 1.0; + else info[threadId*N_INFO + 2] += 1.0; + + indx_x = (int) floor( ( pos_x - xMin - 0.5*H.dx ) / H.dx ); + indx_y = (int) floor( ( pos_y - yMin - 0.5*H.dy ) / H.dy ); + indx_z = (int) floor( ( pos_z - zMin - 0.5*H.dz ) / H.dz ); + + in_local = (pos_x >= H.xblocal && pos_x < H.xblocal + H.domlen_x) && + (pos_y >= H.yblocal && pos_y < H.yblocal + H.domlen_y) && + (pos_z >= H.zblocal && pos_z < H.zblocal + H.domlen_z); + if (!in_local) { std::cout << " Cluster_FeedbackError:" << std::endl; #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << Particles.partIDs[pIndx] << std::endl; + std::cout << " Particle outside local domain pID: " << Particles.partIDs[pIndx] << std::endl; #else - std::cout << " Particle outside Local domain " << std::endl; + std::cout << " Particle outside local domain " << std::endl; #endif std::cout << " Domain X: " << xMin << " " << H.xblocal + H.domlen_x << std::endl; std::cout << " Domain Y: " << yMin << " " << H.yblocal + H.domlen_y << std::endl; std::cout << " Domain Z: " << zMin << " " << H.zblocal + H.domlen_z << std::endl; - std::cout << " Particle X: " << x_pos << std::endl; - std::cout << " Particle Y: " << y_pos << std::endl; - std::cout << " Particle Z: " << z_pos << std::endl; + std::cout << " Particle X: " << pos_x << std::endl; + std::cout << " Particle Y: " << pos_y << std::endl; + std::cout << " Particle Z: " << pos_z << std::endl; continue; } - if ( ignore ){ + + ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; + if (ignore){ #ifdef PARTICLE_IDS std::cout << "ERROR Cluster_Feedback Index pID: " << Particles.partIDs[pIndx] << std::endl; #else std::cout << "ERROR Cluster_Feedback Index " << std::endl; #endif - std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; - std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; - std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; - std::cout << "Excess xIndx: " << x_pos << " " << indx_x << std::endl; - std::cout << "Excess yIndx: " << y_pos << " " << indx_y << std::endl; - std::cout << "Excess zIndx: " << z_pos << " " << indx_z << std::endl; + std::cout << "xIndx: " << pos_x << " " << indx_x << std::endl; + std::cout << "zIndx: " << pos_y << " " << indx_z << std::endl; + std::cout << "yIndx: " << pos_z << " " << indx_y << std::endl; std::cout << std::endl; continue; } - + cell_center_x = xMin + indx_x*H.dx + 0.5*H.dx; cell_center_y = yMin + indx_y*H.dy + 0.5*H.dy; cell_center_z = zMin + indx_z*H.dz + 0.5*H.dz; - delta_x = 1 - ( x_pos - cell_center_x ) / H.dx; - delta_y = 1 - ( y_pos - cell_center_y ) / H.dy; - delta_z = 1 - ( z_pos - cell_center_z ) / H.dz; + delta_x = 1 - ( pos_x - cell_center_x ) / H.dx; + delta_y = 1 - ( pos_y - cell_center_y ) / H.dy; + delta_z = 1 - ( pos_z - cell_center_z ) / H.dz; indx_x += H.n_ghost; indx_y += H.n_ghost; indx_z += H.n_ghost; - feedback_energy = getClusterEnergyFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv; - feedback_density = getClusterMassFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv; + //std::cout << "delta (x, y, z): (" << delta_x << ", " << delta_y << ", " << delta_z << ")" << std::endl; + //std::cout << "cell center (x, y, z): (" << cell_center_x << ", " << cell_center_y << ", " << cell_center_z << ")" << std::endl; indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * delta_z; + if (is_resolved) { + C.density[indx] += feedback_density * delta_x * delta_y * delta_z; + C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * delta_z; + C.Energy[indx] += feedback_energy * delta_x * delta_y * delta_z; + info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; + } else { + C.momentum_x[indx] += -delta_x * feedback_momentum; + C.momentum_y[indx] += -delta_y * feedback_momentum; + C.momentum_z[indx] += -delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; + if (is_resolved) { + C.density[indx] += feedback_density * (1-delta_x) * delta_y * delta_z; + C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; + C.Energy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; + info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; + } else { + C.momentum_x[indx] += delta_x * feedback_momentum; + C.momentum_y[indx] += -delta_y * feedback_momentum; + C.momentum_z[indx] += -delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; + if (is_resolved) { + C.density[indx] += feedback_density * delta_x * (1-delta_y) * delta_z; + C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; + C.Energy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; + info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; + } else { + C.momentum_x[indx] += -delta_x * feedback_momentum; + C.momentum_y[indx] += delta_y * feedback_momentum; + C.momentum_z[indx] += -delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); + if (is_resolved) { + C.density[indx] += feedback_density * delta_x * delta_y * (1-delta_z); + C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); + C.Energy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); + info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; + } else { + C.momentum_x[indx] += -delta_x * feedback_momentum; + C.momentum_y[indx] += -delta_y * feedback_momentum; + C.momentum_z[indx] += delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; + if (is_resolved) { + C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * delta_z; + C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; + C.Energy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; + info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; + } else { + C.momentum_x[indx] += delta_x * feedback_momentum; + C.momentum_y[indx] += delta_y * feedback_momentum; + C.momentum_z[indx] += -delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); + if (is_resolved) { + C.density[indx] += feedback_density * (1-delta_x) * delta_y * (1-delta_z); + C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); + C.Energy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); + info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; + } else { + C.momentum_x[indx] += delta_x * feedback_momentum; + C.momentum_y[indx] += -delta_y * feedback_momentum; + C.momentum_z[indx] += delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); + if (is_resolved) { + C.density[indx] += feedback_density * delta_x * (1-delta_y) * (1-delta_z); + C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); + C.Energy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); + info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; + } else { + C.momentum_x[indx] += -delta_x * feedback_momentum; + C.momentum_y[indx] += delta_y * feedback_momentum; + C.momentum_z[indx] += delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); + if (is_resolved) { + C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z); + C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); + C.Energy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); + info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; + } else { + C.momentum_x[indx] += delta_x * feedback_momentum; + C.momentum_y[indx] += delta_y * feedback_momentum; + C.momentum_z[indx] += delta_z * feedback_momentum; + info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); } + #endif //PARTICLES_CPU } - #endif //PARTICLE_AGE #endif //DE #endif //PARTICLES +#endif //FEEDBACK diff --git a/src/particles/feedback_CIC.h b/src/particles/feedback_CIC.h index 1775cb898..7e5006043 100644 --- a/src/particles/feedback_CIC.h +++ b/src/particles/feedback_CIC.h @@ -7,10 +7,11 @@ #define FEEDBACK_CIC_H #include "../global/global.h" -#define ENERGY_FEEDBACK_RATE 5.25958e-07 //Rate is 1e51 erg/100M_solar spread out over 10Myr +const int N_INFO = 5; Real getClusterEnergyFeedback(Real t, Real dt, Real age); Real getClusterMassFeedback(Real t, Real dt, Real age); +std::tuple getClusterFeedback(Real t, Real dt, Real age, Real density); #endif #endif diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu new file mode 100644 index 000000000..097b99f9c --- /dev/null +++ b/src/particles/feedback_CIC_gpu.cu @@ -0,0 +1,322 @@ +#if defined(FEEDBACK) && defined(PARTICLES_GPU) + +#include +#include +#include +#include +#include "../grid/grid3D.h" +#include "../global/global_cuda.h" +#include "../global/global.h" +#include "supernova.h" + + +namespace Supernova { + curandStateMRG32k3a_t* curandStates; + part_int_t n_states; +} + + +__device__ double atomicMax(double* address, double val) +{ + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(fmax(val, __longlong_as_double(assumed))) + ); + } while (assumed != old); + return __longlong_as_double(old); +} + + +__global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { + int id = blockIdx.x*blockDim.x + threadIdx.x; + curand_init(seed, id, 0, &states[id]); +} + + +/** + * @brief Initialize the cuRAND state, which is analogous to the concept of generators in CPU code. + * The state object maintains configuration and status the cuRAND context for each thread on the GPU. + * Initialize more than the number of local particles since the latter will change through MPI transfers. + * + * @param n_local + * @param allocation_factor + */ +void Supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { + printf("Supernova::initState start\n"); + n_states = n_local*allocation_factor; + cudaMalloc((void**) &curandStates, n_states*sizeof(curandStateMRG32k3a_t)); + + //int ngrid = (n_states + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_states + 64- 1) / 64; + + dim3 grid(ngrid); + //dim3 block(TPB_PARTICLES); + dim3 block(64); + + printf("Supernova::initState: n_states=%d, ngrid=%d, threads=%d\n", n_states, ngrid, 64); + hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, curandStates); + CHECK(cudaDeviceSynchronize()); + printf("Supernova::initState end\n"); +} + + +/* +__device__ void Single_Cluster_Feedback(Real t, Real dt, Real age, Real density, Real* feedback, curandStateMRG32k3a_t* state) { + int N = 0; + if (t + age <= Supernova::SN_ERA) { + N = curand_poisson (state, Supernova::SNR * dt); + } + Real n_0 = density * DENSITY_UNIT / (Supernova::MU*MP); // in cm^{-3} + + feedback[Supernova::NUMBER] = N * 1.0; // number of SN + feedback[Supernova::ENERGY] = N * Supernova::ENERGY_PER_SN; // total energy + feedback[Supernova::MASS] = N * Supernova::MASS_PER_SN; // total mass + feedback[Supernova::MOMENTUM] = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93); // final momentum + feedback[Supernova::SHELL_RADIUS] = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); // shell formation radius +} +*/ + +__device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real *momentum_y, Real *momentum_z, Real *energy, int index, Real dx, Real dy, Real dz){ + Real dens = fmax(density[index], DENS_FLOOR); + Real d_inv = 1.0 / dens; + Real vx = momentum_x[index] * d_inv; + Real vy = momentum_y[index] * d_inv; + Real vz = momentum_z[index] * d_inv; + Real P = fmax((energy[index]- 0.5*dens*(vx*vx + vy*vy + vz*vz))*(gamma-1.0), TINY_NUMBER); + Real cs = sqrt(gamma * P * d_inv); + return fmax( fmax((fabs(vx) + cs)/dx, (fabs(vy) + cs)/dy), (fabs(vz) + cs)/dz ); +} + + +__global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, + Real mass, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, + Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, + Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states){ + + int tid = blockIdx.x * blockDim.x + threadIdx.x ; + if ( tid >= n_local) return; + + Real xMax, yMax, zMax; + xMax = xMin + xLen; + yMax = yMin + yLen; + zMax = zMin + zLen; + + Real pos_x, pos_y, pos_z; + Real cell_center_x, cell_center_y, cell_center_z; + Real delta_x, delta_y, delta_z; + Real feedback_energy, feedback_density, feedback_momentum, n_0, shell_radius; + bool is_resolved; + int pcell_x, pcell_y, pcell_z, pcell_index; + Real dV = dx*dy*dz; + Real local_dti = 0.0; + + pos_x = pos_x_dev[tid]; + pos_y = pos_y_dev[tid]; + pos_z = pos_z_dev[tid]; + + bool in_local = (pos_x >= xMin && pos_x < zMax) && + (pos_y >= yMin && pos_y < yMax) && + (pos_z >= zMin && pos_z < zMax); + if (!in_local) { + printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + return; + } + + int indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); + int indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); + int indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); + + bool ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; + if (ignore) { + printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); + } + + pcell_x = (int) floor( ( pos_x - xMin ) / dx ) + n_ghost; + pcell_y = (int) floor( ( pos_y - yMin ) / dy ) + n_ghost; + pcell_z = (int) floor( ( pos_z - zMin ) / dz ) + n_ghost; + pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; + + if (t + age_dev[tid] > Supernova::SN_ERA) return; + + curandStateMRG32k3a_t state = states[tid]; // <- more efficient? + unsigned int N = curand_poisson (&state, Supernova::SNR * dt); + states[tid] = state; + + if (N == 0) return; + + feedback_energy = N * Supernova::ENERGY_PER_SN / dV; + feedback_density = N * Supernova::MASS_PER_SN / dV; + n_0 = density[pcell_index] * DENSITY_UNIT / (Supernova::MU*MP); + feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) / sqrt(3.0) / dV; + shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + + /*printf(" [%d]: got %d SN\n", tid, N); + if (is_resolved) printf(" [%d] resolved\n", tid); + else printf(" [%d] NOT resolved\n", tid); + printf(" [%d] E=%.3e, D=%.3e, P=%.3e, S_r=%.3e\n", tid, + feedback_energy*dV*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, + feedback_density*DENSITY_UNIT / (Supernova::MU*MP), + feedback_momentum*dV*VELOCITY_UNIT/1e5, shell_radius); + */ + cell_center_x = xMin + indx_x*dx + 0.5*dx; + cell_center_y = yMin + indx_y*dy + 0.5*dy; + cell_center_z = zMin + indx_z*dz + 0.5*dz; + delta_x = 1 - ( pos_x - cell_center_x ) / dx; + delta_y = 1 - ( pos_y - cell_center_y ) / dy; + delta_z = 1 - ( pos_z - cell_center_z ) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + int indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * delta_y * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * delta_z); + atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * delta_z); + //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); + //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); + atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); + //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * delta_y * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); + //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); + //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); + //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); + //info[threadId*N_INFO + 3], feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); + //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + atomicMax(dti, local_dti); +} + + + +Real Grid3D::Cluster_Feedback_GPU() { + if (H.dt == 0) return 0.0; + + if (Particles.n_local > Supernova::n_states) { + printf("ERROR: not enough cuRAND states (%d) for %f local particles\n", Supernova::n_states, Particles.n_local ); + exit(-1); + } + + printf("Cluster_Feedback_GPU: start. dt=%.4e\n", H.dt); + Real h_dti = 0.0; + Real* d_dti; + cudaMalloc(&d_dti, sizeof(Real)); + cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice); + + int ngrid = (Particles.n_local + 64 - 1) / 64; + dim3 grid(ngrid); + dim3 block(64); + + hipLaunchKernelGGL(Cluster_Feedback_Kernel, grid, block, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.particle_mass, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.domlen_x, H.domlen_y, H.domlen_z, + H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, + C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates); + + cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost); + cudaFree(d_dti); + printf("Cluster_Feedback_GPU: end. calc dti=%.4e\n", h_dti); + + return h_dti; +} + + +#endif //FEEDBACK & PARTICLES_GPU diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index a367f9bd3..cf6ce45c5 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -7,8 +7,8 @@ #include "../global/global.h" #include "../grid/grid3D.h" #include "../io/io.h" -#include "../particles/particles_3D.h" -#include "../particles/density_CIC.h" +#include "particles_3D.h" +#include "density_CIC.h" #include "../model/disk_galaxy.h" @@ -86,7 +86,7 @@ void Particles_3D::Get_Gravity_Field_Particles_GPU( Real *potential_host ){ void Particles_3D::Get_Gravity_CIC_GPU(){ - Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev ); + Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev, partIDs_dev ); } #endif //PARTICLES_GPU diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 62928c354..88fadf095 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -7,7 +7,7 @@ #include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #ifdef GRAVITY_GPU #include "../grid/grid3D.h" @@ -136,7 +136,7 @@ __device__ void Get_Indexes_CIC_Gravity( Real xMin, Real yMin, Real zMin, Real d } //Kernel to compute the gravitational field at the particles positions via Cloud-In-Cell -__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost ){ +__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost, part_int_t *partIDs_dev ){ part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; @@ -168,7 +168,9 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, if ( pos_y < yMin || pos_y >= yMax ) in_local = false; if ( pos_z < zMin || pos_z >= zMax ) in_local = false; if ( ! in_local ) { + #ifndef HIDE_CIC_ERRORS printf(" Gravity CIC Error: Particle outside local domain"); + #endif return; } @@ -241,11 +243,24 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, grav_y_dev[tid] = g_y; grav_z_dev[tid] = g_z; + /* + if (partIDs_dev[tid] == 15) { + //printf(" (g_x_bl, g_y_bl) (%.4e, %.4e)\n", g_x_bl, g_y_bl); + //printf(" (g_x_br, g_y_br) (%.4e, %.4e)\n", g_x_br, g_y_br); + //printf(" (g_x_bu, g_y_bu) (%.4e, %.4e)\n", g_x_bu, g_y_bu); + //printf(" (g_x_bru, g_y_bru) (%.4e, %.4e)\n", g_x_bru, g_y_bru); + //printf(" (g_x_tl, g_y_tl) (%.4e, %.4e)\n", g_x_tl, g_y_tl); + //printf(" (g_x_tr, g_y_tr) (%.4e, %.4e)\n", g_x_tr, g_y_tr); + //printf(" (g_x_tu, g_y_tu) (%.4e, %.4e)\n", g_x_tu, g_y_tu); + //printf(" (x, y) -> (%f, %f)\n", pos_x, pos_y); + //printf(" (g_x_tru, g_y_tru) (%.4e, %.4e)\n", g_x_tru, g_y_tru); + printf(" -------->ID 15: pos (%f, %f), grav (%.4e, %.4e)\n", pos_x, pos_y, g_x, g_y); + }*/ } //Call the kernel to compote the gravitational field at the particles positions ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ){ +void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, part_int_t *partIDs_dev ){ // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -254,8 +269,10 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid ); - CudaCheckError(); + hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid, partIDs_dev ); + CHECK(cudaDeviceSynchronize()); + + //CudaCheckError(); } diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index b9a2b396c..f49cbf5bf 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -8,7 +8,7 @@ #include "../global/global.h" #include "../grid/grid3D.h" #include "../io/io.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #ifdef HDF5 #include @@ -359,7 +359,7 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par chprintf( " Allocated GPU memory for particle data\n"); // printf( " Loaded %ld particles ", n_to_load); - //Copyt the particle data to GPU memory + //Copy the particle data to GPU memory Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_px, pos_x_dev, n_local); Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_py, pos_y_dev, n_local); Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_pz, pos_z_dev, n_local); @@ -479,7 +479,9 @@ void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){ part_int_t i, j, k, id, buf_id; hid_t dataset_id, dataspace_id; Real *dataset_buffer; + #ifdef PARTICLE_IDS part_int_t *dataset_buffer_IDs; + #endif herr_t status; part_int_t n_local = Particles.n_local; hsize_t dims[1]; @@ -621,6 +623,9 @@ void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){ #ifdef PARTICLES_CPU for ( i=0; i= Z_d) age.push_back(1.1e4); //else age.push_back(0.0); temp_age.push_back(0.0); #endif + //} } n_local = temp_pos_x.size(); - #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) + #if defined(PARTICLE_IDS) + part_int_t global_id_offset = 0; + #ifdef MPI_CHOLLA // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks chprintf( " Computing Global Particles IDs offset \n" ); - part_int_t global_id_offset; global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - for ( int p_indx=0; p_indx void Free_GPU_Array( T *array ){ cudaFree(array); } //TODO remove the Free_GPU_Array_ functions void Allocate_Memory_GPU(); void Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ); void Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ); void Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ); + void Allocate_Particles_GPU_Array_Part_Int( part_int_t **array_dev, part_int_t size ); void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ); - void Reallocate_and_Copy_Partciles_Array_Real( Real **src_array_dev, part_int_t size_initial, part_int_t size_end ); + void Reallocate_and_Copy_Particles_Array_Real( Real **src_array_dev, part_int_t size_initial, part_int_t size_end ); + void Reallocate_and_Copy_Particles_Array_Int( part_int_t **src_array_dev, part_int_t size_initial, part_int_t size_end ); void Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size); void Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size); + void Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size); + void Copy_Particles_Array_Int_Device_to_Host( part_int_t *array_dev, part_int_t *array_host, part_int_t size); void Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size); void Free_Memory_GPU(); void Initialize_Grid_Values_GPU(); @@ -248,7 +253,7 @@ class Particles_3D void Get_Gravity_Field_Particles_GPU( Real *potential_host ); void Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); void Get_Gravity_CIC_GPU(); - void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); + void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, part_int_t *partIDs_dev ); Real Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev ); void Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); void Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); @@ -260,6 +265,7 @@ class Particles_3D void Replace_Tranfered_Particles_GPU( int n_transfer ); void Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ); void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ); + void Set_Particles_Open_Boundary_GPU( int dir, int side ); #endif //PARTICLES_GPU diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index a3c314ce0..3cd66e38e 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -8,7 +8,7 @@ #include "../io/io.h" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" @@ -17,12 +17,14 @@ void Particles_3D::Free_GPU_Array_int( int *array ) { cudaFree(array); } void Particles_3D::Free_GPU_Array_bool( bool *array ){ cudaFree(array); } -void __global__ Copy_Device_to_Device_Kernel( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){ +template< typename T > +void __global__ Copy_Device_to_Device_Kernel( T *src_array_dev, T *dst_array_dev, part_int_t size ){ int tid = blockIdx.x * blockDim.x + threadIdx.x ; if ( tid < size ) dst_array_dev[tid] = src_array_dev[tid]; } -void Copy_Device_to_Device( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){ +template< typename T > +void Copy_Device_to_Device( T *src_array_dev, T *dst_array_dev, part_int_t size ){ int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); @@ -31,7 +33,8 @@ void Copy_Device_to_Device( Real *src_array_dev, Real *dst_array_dev, part_int_t } -void Particles_3D::Reallocate_and_Copy_Partciles_Array_Real( Real **src_array_dev, part_int_t size_initial, part_int_t size_end ){ + +void Particles_3D::Reallocate_and_Copy_Particles_Array_Real( Real **src_array_dev, part_int_t size_initial, part_int_t size_end ){ size_t global_free, global_total; CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); cudaDeviceSynchronize(); @@ -60,11 +63,38 @@ void Particles_3D::Reallocate_and_Copy_Partciles_Array_Real( Real **src_array_de CudaSafeCall( cudaFree( *src_array_dev )); cudaDeviceSynchronize(); *src_array_dev = temp_array_dev; - } - - +void Particles_3D::Reallocate_and_Copy_Particles_Array_Int( part_int_t **src_array_dev, part_int_t size_initial, part_int_t size_end ){ + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + cudaDeviceSynchronize(); + #ifdef PRINT_GPU_MEMORY + printf( "ReAllocating GPU Memory: %ld MB free \n", global_free/1000000); + #endif + if ( global_free < size_end*sizeof(Real) ){ + printf( "ERROR: Not enough global device memory \n" ); + printf( " Available Memory: %ld MB \n", global_free/1000000 ); + printf( " Requested Memory: %ld MB \n", size_end*sizeof(part_int_t)/1000000 ); + exit(-1); + } + part_int_t *temp_array_dev; + CudaSafeCall( cudaMalloc((void**)&temp_array_dev, size_end*sizeof(part_int_t)) ); + cudaDeviceSynchronize(); + // printf( " Allocated GPU Memory: %ld MB \n", size_end*sizeof(Real)/1000000 ); + if ( size_initial*sizeof(part_int_t) > size_end*sizeof(part_int_t) ){ + printf("ERROR: Memory to copy larger than array size\n" ); + exit(-1); + } + // printf( " Copying: %ld -> %ld \n", size_initial*sizeof(Real), size_end*sizeof(Real) ); + // CudaSafeCall( cudaMemcpy(temp_array_dev, *src_array_dev, size_initial*sizeof(Real), cudaMemcpyDeviceToDevice) ); + // NOTE: cudaMemcpy is not working! made kernel to do the device to device copy + Copy_Device_to_Device( *src_array_dev, temp_array_dev, size_initial ); + cudaDeviceSynchronize(); + CudaSafeCall( cudaFree( *src_array_dev )); + cudaDeviceSynchronize(); + *src_array_dev = temp_array_dev; +} void Particles_3D::Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ){ size_t global_free, global_total; @@ -114,6 +144,22 @@ void Particles_3D::Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t cudaDeviceSynchronize(); } +void Particles_3D::Allocate_Particles_GPU_Array_Part_Int( part_int_t **array_dev, part_int_t size ){ + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + #ifdef PRINT_GPU_MEMORY + chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); + #endif + if ( global_free < size*sizeof(part_int_t) ){ + printf( "ERROR: Not enough global device memory \n" ); + printf( " Available Memory: %ld MB \n", global_free/1000000 ); + printf( " Requested Memory: %ld MB \n", size*sizeof(part_int_t)/1000000 ); + exit(-1); + } + CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(part_int_t)) ); + cudaDeviceSynchronize(); +} + void Particles_3D::Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ){ size_t global_free, global_total; CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index ef6e7cdc3..030e48d89 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -5,12 +5,12 @@ #include #include "../grid/grid3D.h" #include "../io/io.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" #ifdef PARTICLES_GPU -#include "../particles/particles_boundaries_gpu.h" +#include "particles_boundaries_gpu.h" #endif//PARTICLES_GPU #endif//MPI_CHOLLA @@ -579,7 +579,7 @@ int Particles_3D::Select_Particles_to_Transfer_GPU( int direction, int side ){ domainMax = G.zMax; domainMin = G.zMin; } - + //chprintf("n_local=%d SELECT PARTICLES: %d dir, %d side. Max/Min %.4e/%.4e \n", n_local, direction, side, domainMax, domainMin); //Set the number of particles that will be sent and load the particles data into the transfer buffers n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d ); CHECK(cudaDeviceSynchronize()); @@ -594,6 +594,7 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir Real *pos, *send_buffer_d; Real domainMin, domainMax; int bt_pos_x, bt_pos_y, bt_pos_z, bt_non_pos; + int field_id = -1; bt_pos_x = -1; bt_pos_y = -1; @@ -652,9 +653,6 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir } } - - - if ( (*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER > buffer_size ){ printf("ERROR:Transfer Buffer is not large enough\n" ); exit(-1); @@ -662,22 +660,29 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir // Load the particles that will be transferred into the buffers n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Load_Particles_to_Transfer_GPU_function( n_transfer, 0, n_fields_to_transfer, pos_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_x ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 1, n_fields_to_transfer, pos_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_y ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 2, n_fields_to_transfer, pos_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_z ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 3, n_fields_to_transfer, vel_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 4, n_fields_to_transfer, vel_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 5, n_fields_to_transfer, vel_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_x ); + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_y ); + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_z ); + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + #ifndef SINGLE_PARTICLE_MASS + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, mass_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + #endif + #ifdef PARTICLE_IDS + Load_Particles_to_Transfer_Int_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + #endif + #ifdef PARTICLE_AGE + Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, age_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); + #endif CHECK(cudaDeviceSynchronize()); *n_send += n_transfer; // if ( *n_send > 0 ) printf( "###Transfered %ld particles\n", *n_send); - - } + void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){ // Replace the particles that were transferred @@ -687,6 +692,15 @@ void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){ Replace_Transfered_Particles_GPU_function( n_transfer, vel_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); Replace_Transfered_Particles_GPU_function( n_transfer, vel_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); Replace_Transfered_Particles_GPU_function( n_transfer, vel_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); + #ifndef SINGLE_PARTICLE_MASS + Replace_Transfered_Particles_GPU_function( n_transfer, mass_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); + #endif + #ifdef PARTICLE_IDS + Replace_Transfered_Particles_Int_GPU_function( n_transfer, partIDs_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); + #endif + #ifdef PARTICLE_AGE + Replace_Transfered_Particles_GPU_function( n_transfer, age_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); + #endif CHECK(cudaDeviceSynchronize()); // Update the local number of particles @@ -696,47 +710,89 @@ void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){ void Particles_3D::Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer_h, int buffer_length ){ - int n_transfer; - n_transfer = Select_Particles_to_Transfer_GPU( direction, side ); Copy_Transfer_Particles_to_Buffer_GPU( n_transfer, direction, side, send_buffer_h, buffer_length ); Replace_Tranfered_Particles_GPU( n_transfer ); +} + +/** + * Open boundary conditions follows the same logic as Load_Particles_to_Buffer_GPU, except that the particles that are selected for transfer are + * not moved into any buffer (Copy_Transfer_Particles_to_Buffer_GPU step is skipped). Also the domainMix/domainMax are the global min/max values. + */ +void Particles_3D::Set_Particles_Open_Boundary_GPU( int dir, int side ){ + int n_transfer; + /*Real *pos; + Real domainMin, domainMax; + + if ( dir == 0 ){ + domainMin = G.domainMin_x; + domainMax = G.domainMax_x; + } + if ( dir == 1 ){ + domainMin = G.domainMin_y; + domainMax = G.domainMax_y; + } + if ( dir == 2 ){ + domainMin = G.domainMin_z; + domainMax = G.domainMax_z; + }*/ + n_transfer = Select_Particles_to_Transfer_GPU(dir, side); + //n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d ); + //CHECK(cudaDeviceSynchronize()); + //chprintf("OPEN condition: removing %d\n", n_transfer); + Replace_Tranfered_Particles_GPU( n_transfer ); } void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ){ - int n_fields_to_transfer; part_int_t n_local_after = n_local + n_recv; if ( n_local_after > particles_array_size ){ particles_array_size = Compute_Particles_GPU_Array_Size( n_local_after ); printf("Reallocating GPU particles arrays \n" ); - Reallocate_and_Copy_Partciles_Array_Real( &pos_x_dev, n_local, particles_array_size ); - Reallocate_and_Copy_Partciles_Array_Real( &pos_y_dev, n_local, particles_array_size ); - Reallocate_and_Copy_Partciles_Array_Real( &pos_z_dev, n_local, particles_array_size ); - Reallocate_and_Copy_Partciles_Array_Real( &vel_x_dev, n_local, particles_array_size ); - Reallocate_and_Copy_Partciles_Array_Real( &vel_y_dev, n_local, particles_array_size ); - Reallocate_and_Copy_Partciles_Array_Real( &vel_z_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &pos_x_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &pos_y_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &pos_z_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &vel_x_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &vel_y_dev, n_local, particles_array_size ); + Reallocate_and_Copy_Particles_Array_Real( &vel_z_dev, n_local, particles_array_size ); + #ifndef SINGLE_PARTICLE_MASS + Reallocate_and_Copy_Particles_Array_Real( &mass_dev, n_local, particles_array_size ); + #endif + #ifdef PARTICLE_IDS + Reallocate_and_Copy_Particles_Array_Int( &partIDs_dev, n_local, particles_array_size ); + #endif + #ifdef PARTICLE_AGE + Reallocate_and_Copy_Particles_Array_Real( &age_dev, n_local, particles_array_size ); + #endif } // Unload the particles that were transferred from the buffers + int field_id = -1; n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 0, n_fields_to_transfer, pos_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 1, n_fields_to_transfer, pos_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 2, n_fields_to_transfer, pos_z_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 3, n_fields_to_transfer, vel_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 4, n_fields_to_transfer, vel_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 5, n_fields_to_transfer, vel_z_dev, recv_buffer_d ); - // + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_x_dev, recv_buffer_d ); + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_y_dev, recv_buffer_d ); + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_z_dev, recv_buffer_d ); + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_x_dev, recv_buffer_d ); + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_y_dev, recv_buffer_d ); + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_z_dev, recv_buffer_d ); + #ifndef SINGLE_PARTICLE_MASS + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, mass_dev, recv_buffer_d ); + #endif + #ifdef PARTICLE_IDS + Unload_Particles_Int_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, partIDs_dev, recv_buffer_d ); + #endif + #ifdef PARTICLE_AGE + Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, age_dev, recv_buffer_d ); + #endif + n_local += n_recv; // if ( n_recv > 0 ) printf( "###Unloaded %d particles\n", n_recv ); - - } @@ -790,15 +846,8 @@ void Particles_3D::Unload_Particles_from_Buffer_GPU( int direction, int side , R } - - - #endif //PARTICLES_GPU - - - - #endif //MPI_CHOLLA #endif //PARTICLES diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index b90963b05..7e6eb3372 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -5,7 +5,7 @@ #include #include "../grid/grid3D.h" #include "../io/io.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" @@ -100,24 +100,22 @@ void Grid3D::Set_Particles_Boundary( int dir, int side ){ //Set open boundaries for particles when not using MPI -void Grid3D::Set_Particles_Open_Boundary( int dir, int side ){ - Real d_min, d_max, L; +void Grid3D::Set_Particles_Open_Boundary_CPU( int dir, int side ){ + Real d_min, d_max; if ( dir == 0 ){ - d_min = Particles.G.xMin; - d_max = Particles.G.xMax; + d_min = Particles.G.domainMin_x; + d_max = Particles.G.domainMax_x; } if ( dir == 1 ){ - d_min = Particles.G.yMin; - d_max = Particles.G.yMax; + d_min = Particles.G.domainMin_y; + d_max = Particles.G.domainMax_y; } if ( dir == 2 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + d_min = Particles.G.domainMin_z; + d_max = Particles.G.domainMax_z; } - L = d_max - d_min; - Real pos; int_vector_t removed_indices; diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index 0dbda2332..fbb68cf94 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -9,9 +9,9 @@ #include "../io/io.h" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../particles/particles_3D.h" #include "../grid/grid3D.h" -#include "../particles/particles_boundaries_gpu.h" +#include "particles_boundaries_gpu.h" +#include "particles_3D.h" #define SCAN_SHARED_SIZE 2*TPB_PARTICLES @@ -82,17 +82,10 @@ __global__ void Get_Transfer_Flags_Kernel( part_int_t n_total, int side, Real d bool transfer = 0; Real pos = pos_d[tid]; - // if ( tid < 1 ) printf( "%f\n", pos); - if ( side == 0 ){ - if ( pos < d_min ) transfer = 1; - } - - if ( side == 1 ){ - if ( pos >= d_max ) transfer = 1; - } + if ( side == 0 && pos < d_min) transfer = 1; + if ( side == 1 && pos >= d_max) transfer = 1; - // if ( transfer ) printf( "##Thread particles transfer\n"); transfer_flags_d[tid] = transfer; } @@ -229,7 +222,7 @@ __global__ void Get_Transfer_Indices_Kernel( part_int_t n_total, bool *transfer_ } -__global__ void Select_Indices_to_Replace_Tranfered_Kernel( part_int_t n_total, int n_transfer, bool *transfer_flags_d, int *prefix_sum_d, int *replace_indices_d ){ +__global__ void Select_Indices_to_Replace_Transfered_Kernel( part_int_t n_total, int n_transfer, bool *transfer_flags_d, int *prefix_sum_d, int *replace_indices_d ){ int tid, tid_inv; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -248,9 +241,8 @@ __global__ void Select_Indices_to_Replace_Tranfered_Kernel( part_int_t n_total, } - -__global__ void Replace_Transfered_Particles_Kernel( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ - +template< typename T> +__global__ void Replace_Transfered_Particles_Kernel( int n_transfer, T *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; if ( tid >= n_transfer ) return; @@ -260,7 +252,7 @@ __global__ void Replace_Transfered_Particles_Kernel( int n_transfer, Real *field src_id = replace_indices_d[tid]; if ( dst_id < src_id ){ - if (print_replace) printf("Replacing: %f \n", field_d[dst_id] ); + if (print_replace) printf("Replacing: %f \n", field_d[dst_id]*1.0 ); field_d[dst_id] = field_d[src_id]; } @@ -281,6 +273,19 @@ void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, } +void Replace_Transfered_Particles_Int_GPU_function( int n_transfer, part_int_t *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + + hipLaunchKernelGGL( Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace ); + CudaCheckError(); +} + + part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ){ // set values for GPU kernels int grid_size, grid_size_half; @@ -316,7 +321,7 @@ part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int si hipLaunchKernelGGL( Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local , transfer_flags_d, transfer_prefix_sum_d, transfer_indices_d ); CudaCheckError(); - hipLaunchKernelGGL( Select_Indices_to_Replace_Tranfered_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d ); + hipLaunchKernelGGL( Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d ); CudaCheckError(); // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); @@ -360,7 +365,41 @@ void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int } +__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ + + int tid; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if ( tid >= n_transfer ) return; + + int src_id, dst_id; + part_int_t field_val; + src_id = transfer_indices_d[tid]; + dst_id = tid * n_fields_to_transfer + field_id; + field_val = field_d[src_id]; + + // Set global periodic boundary conditions + if ( boundary_type == 1 && field_val < domainMin ) field_val += ( domainMax - domainMin ); + if ( boundary_type == 1 && field_val >= domainMax ) field_val -= ( domainMax - domainMin ); + send_buffer_d[dst_id] = __longlong_as_double(field_val); + +} + + +void Load_Particles_to_Transfer_Int_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ + // set values for GPU kernels + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + + hipLaunchKernelGGL( Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type ); + CudaCheckError(); +} + +#ifdef MPI_CHOLLA void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){ int transfer_size; @@ -380,7 +419,7 @@ void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, R CudaCheckError(); } - +#endif //MPI_CHOLLA __global__ void Unload_Transfered_Particles_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ){ @@ -410,6 +449,34 @@ void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int } +__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ){ + + int tid; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if ( tid >= n_transfer ) return; + + int src_id, dst_id; + src_id = tid * n_fields_to_transfer + field_id; + dst_id = n_local + tid; + field_d[dst_id] = __double_as_longlong(recv_buffer_d[src_id]); + +} + +void Unload_Particles_Int_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ){ + + // set values for GPU kernels + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + + hipLaunchKernelGGL( Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d ); + CudaCheckError(); + +} + // #endif//MPI_CHOLLA diff --git a/src/particles/particles_boundaries_gpu.h b/src/particles/particles_boundaries_gpu.h index d10fb3428..e99a5ddc1 100644 --- a/src/particles/particles_boundaries_gpu.h +++ b/src/particles/particles_boundaries_gpu.h @@ -6,15 +6,17 @@ part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ); void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ); +void Load_Particles_to_Transfer_Int_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ); void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ); +void Replace_Transfered_Particles_Int_GPU_function( int n_transfer, part_int_t *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ); void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ); - +void Unload_Particles_Int_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ); #endif //PARTICLES_H diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 1ea2e2647..1c5f38726 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -7,7 +7,7 @@ #include #include "../global/global.h" #include "../grid/grid3D.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #include "../io/io.h" #ifdef PARALLEL_OMP diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index fb605135b..037cb33b7 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -9,7 +9,7 @@ #include "../global/global_cuda.h" #include "../grid/grid3D.h" #include "../io/io.h" -#include "../particles/particles_3D.h" +#include "particles_3D.h" #ifdef COSMOLOGY #include "../cosmology/cosmology.h" @@ -104,8 +104,6 @@ Real Particles_3D::Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_parti return max_dti; - - } diff --git a/src/particles/supernova.h b/src/particles/supernova.h new file mode 100644 index 000000000..c876abb94 --- /dev/null +++ b/src/particles/supernova.h @@ -0,0 +1,37 @@ +#ifndef SUPERNOVA_H +#define SUPERNOVA_H + +#include "../global/global.h" +#ifdef PARTICLES_GPU +#include +#include +#endif + + +namespace Supernova { + static const int NUMBER = 0; + static const int ENERGY = 1; + static const int MASS = 2; + static const int MOMENTUM = 3; + static const int SHELL_RADIUS = 4; + + // supernova rate: 1SN / 100 solar masses, with 10^5 solar masses per cluster, spread over 10^4 kyr + static const Real SNR=0.1; + static const Real ENERGY_PER_SN = 5.3e-05; // 1e51 ergs/SN in solarMass*(kpc/kyr)**2 + static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN + static const Real FINAL_MOMENTUM = 0.29; // 2.8e5 solarMasses km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + static const Real MU = 0.6; + static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) + static const Real SN_ERA = 1.0e4; // assume SN occur during first 10 Myr after cluster formation. + + #ifdef PARTICLES_GPU + extern curandStateMRG32k3a_t* curandStates; + extern part_int_t n_states; + + void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); + + #endif //PARTICLES_GPU +} + + +#endif diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h index 47e628a77..6f89a9c1b 100644 --- a/src/utils/prng_utilities.h +++ b/src/utils/prng_utilities.h @@ -26,8 +26,10 @@ class ChollaPrngGenerator // This should give a fairly random seed even if std::random_device // isn't random std::string hashString = std::to_string(std::random_device{}()) - + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()) - + std::to_string(static_cast(procID)); + #ifdef MPI_CHOLLA + + std::to_string(static_cast(procID)) + #endif + + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); std::size_t hashedSeed = std::hash{}(hashString); P->prng_seed = static_cast(hashedSeed); } From be373a4536516d8f08e0f07c914d18a8d268da39 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 13:53:21 -0500 Subject: [PATCH 004/694] add cooling type for tests --- src/system_tests/hydro_system_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 32244e6aa..9b816a23f 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -22,7 +22,7 @@ * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi +class tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi :public ::testing::TestWithParam { @@ -30,7 +30,7 @@ class tHYDROSYSTEMSodShockTubeParameterizedMpi systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, +TEST_P(tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { sodTest.numMpiRanks = GetParam(); @@ -38,7 +38,7 @@ TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, } INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tHYDROSYSTEMSodShockTubeParameterizedMpi, + tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= \ No newline at end of file From 3b6905caa21e823c8ddcbf47b89bb65ddcaa2741 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 14:07:54 -0500 Subject: [PATCH 005/694] update file names for cooling test --- ...izedMpi_CorrectInputExpectCorrectOutput.h5 | Bin 12587920 -> 0 bytes ...edMpi_CorrectInputExpectCorrectOutput.txt} | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 rename src/system_tests/input_files/{tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt => tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt} (100%) diff --git a/src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 b/src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 deleted file mode 100644 index 9152a0663c3659c10a0d8a56905d78fec315168f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12587920 zcmeF)3;b>6oImj2>F9pC9G66xjQeG%VaBy~&M1ZGrrbuQiyB>w)BQq7E+vFgk}iY@ zNeKTgGNlp?MmhP9WYGMnNQLS4Uu*C6{hhO)Ilr^d$*uK#-@RVbx3$;)ZNK$=p6~lv zd)1uh9h*LPlhsyv@G4X7Un{LNb)Tsm{_);9l`5n4{nAO#{cl8s? zjs9WQ)cvR0b8C06ui1V6y0b?wti8ghZnM?K8&5UwjL(nS{n*r|qu+39y?8-QH3FM% z{G2V?3w~*L)iK|`+0C;UjGcGcYyR&095}V--h1w{*FO91x%~m7Gt+mKWl1*W`?gl^_m^`+;hj~mAMZeOszCLv(eO3 zSDkw5-Q7H&HEZ-6$L}3=aNDlKzZQPpa>LKtXZRWS;PWm$zWwWjg`YQj_<1{bAGplF z-zyH7_rc@uJZ#R?M#JA|*3^nqbDHbjYy0{8?DD#)sWn%gYRqW=T6g&OnXa=rxl{Xw zg}>Lz-RJDO`!0LzJb(DZ&EJ;c^M-QFnHo-uMvhfR;+&y;L>oxoAw(ygebsV3&^Y!0)*bDc&cH!rae&0QJoIiizr!ITmn_T_o z3xD=+*MDT;=dM2defQgCkG*$l8X-P=S%2So|M-n(oOIng?z8Z7R~yZvQGdn9F7teU z#=f`x_Dj$Fmx~vE?r3~>8TQ}w$1d~dZn(m;pMT7eKX}r@`L+CTe(k*bp2N8{)4b~1 zb1G^x1vgsc{}!1Ct4@F3_RY}lxJUD@=C6DHx#7pCpZ4B$*ZI3NlXe+CZ}bave_wdr zU4Opr^?UEsT=_l!ypgnRzqh`~4?_xVL_-5(m>K|X8h>BBe9nmu}9qtTCbYK|sV`_fFW z&u+%J{mGerzvY`%r9Hpga?1_>2Ucp{-~Kgw)_YbR{zLP(b>knJdf#ppYTx|0&mOqr z!j*H8H~k6!aqAbo__^(vY}9?`1*@;S#7`g1{;8=e*I4rVMr-NR)YF@FW=Vgo*`0jP zs_QQ4ZPR0P@|-2VZ@unQKDXME-?zm7v3qcGa8rBdd)gsg-L|VQ=<4&;;rP@t4w382 zx?JDT)yVyc-SOXc^$}ftc=39iS4aKRmvM+ZpWNjcxyQVS>v&Li{()UxOC37U*Kvp( zBiBcDc}DKxTg;F0{qb{acR#0&`o3@DuuhlXBfGkJS0n#$KIVDkUe1TUU3d21H*$<| z2#i4v}Bf$a!h=k9p9S>-Bq%&(Fv`s{F^tr}p_0xkr`1 z{M+X~{Le8DqVJ>X{~Wo;x)!-dmB0M?fA?w8AGO!$`>6U~|LcGK@BTlB1L1yD{^!U& z?rS3VsPdP;_22q${mgrt9J$ARVB{WE{_?l}TmP;9IUET0qw+sT?$-ZYIY-`QjlPen|8vcY$UUn3o%zx6+d1L1yD{^!Wu`kyQ3$h)l3_fhqKu6Yr; zN0q<)-T%A)cmJQmfp9-6|8wLX_kodnRQb!_`fvTW{^xKY+>grt9JyQnbLAX)mo@r6 zs{YS4FCzD-@|VB+fA|0H|8qDH?nmW+j@;uuFmjJ7fB9Sgt^d~l91ev0Q78Ey>qhMR z6+fqr=1bqlq0B$_0ge4X>xkpg z{lJ|IVjifY-1{~Tkz?dKQ~q&X@&1?(YCroua*JFi-*cOL+VADh>3{vN|MkE7|C|o+KWFZ7 zA0U7ETmP;9*8iLi@IPnn*8kkO%U}NOeINgG&jb0(-}gV?|9t<;=>Y$8=I;JKckc3+ zzwdu}@B8$>{@4Hd-~E4X2jnk*&;RnS1NvY8>wo?4`F~Ca_@6U(&wp~~E`RxZ{+IW@ zPyg$G{jdMs|L1l<{_^+yFYh{_|MkEA*Z-dX=X8MoIdk{?CwK1hm%rzKdGGu5zy8<% z`rrM3ZU^Kqf6xE&t^@jC|LcGKUq1hj6wb|eM=4)yjhsgKKMed*P zj@7pRZ@#A1aX72XIp#ssxZX3n^JV=^_qsZoAAKE%&viMLu9f+Jx_h5G%Da|vh&>n*YG^@kKCh1{&BwidB^>pAM)>aet&HL$zT5M_nPoM+>grty61=fuRGsK z>x2B|&;OgRnbaq>p4b2SU;pcW_y0K^;D65C<9Qy9pHb?+^zq)bCR4)8x`?wzE1 z{jdK$|Ig_F|8wT<`A_cL3{vN|MkE7|J)A9U;du|<=67Q%AYiG7g{Z_VqodtFaGw%!Akut~`#{)zN(D z+c-p?k^7g1`JXlCxn13)tDCCL{_khascjrK>vD}cugE=Ke?j-U+V=Tbb7~uh=Xd!= z-^VwKa7tIU1#F4q@zHFA#{`A2RV+e38HW+wU{{&p8kDf6m-veUrcZ`G3}&=!a}o z|LcGKum9cu=X8MoIdhNu9Qn)N`fvTW{^xXn|2cEF{^!nJ{_=0{`}m)G9>`z*zW@3D z=lfqy2l$^eclZCfbC3iezy8<%?*DT;Ab^7()40~q^0#y+9pg*tMh zmT`#uV;|tiKaOL6*m$2h%DvG_Ww-z$LqCRf7a`W93%g-mglpMQ?1us<}3fYT%R;=<)16>az4mk z{_XRg()~Ct=SMkz9QS+vl=(*PQOoo4?swj0er3Lqd(`rL`SXtZJs-+^BloD~d3pCc z?=rtK-^e{`dA|I4$NioUWxkPn)bhN%`<-{0Uzu;@9<@AQ{=DOU&xbPK$USO#Uf%uA zyUef5H*$|!o-cpialhw7nQ!DCwLCBHe&=20SLPeJM=j5nKkvBT^P$W)a*tY`mv_JO zF7qq%johP_=gXgW-0%5N<{P<3Ezirl-+7n$mH9^QQOon?&pYn-d?@pc+@qG~<=yYR z%lyiGBloD~`SRx-_j^8+`9|(h%k%Q?civ@wWxkPn)bf1!^N#yHAIf|q_o(H0dG|Z- zGQTq4$USO#zWjN|{hkkHzL9&>^1QtJop+gEnQ!DCwLD+`yyJe)hce&DJ!*Mg-u=$I z%&*Kha*tY`FMr-~zvn}lZ{!}eJTLEl=UwJk<{P<3Ezg%f@3`Odq0BdOk6NCWcfa#4 z^DFa>+@qG~%b$1L@A**X8@Wd<&&#{td6)T>`9|(h%k$;WJMQ;@_hO8j{7|y%6udDsO5Qi_dD-0 zzcSy*J!*Nr{CUUyo)2Zdk$cqgyuACJcbQ+AZ{!}eJYW92<9^SFGT+EOYI$DX{m#40 zugo`ck6NBDf8KGw=R=uq%xZm@k%r|n6TAr78zw<8h zEAx%qqn78(pLg8v`B3H?xkoL}%e&usm-&_XM($C|^X1Pw?)Q8s^Nrl2mgnW&@4UR z+eU-tdYU@uYIz*5tK;L;_wm@E$UTmqQ_O>y59%oYzK=u9+h-R!NA4RI^P)VCpI7_k zLF5?oH~KwtjMw8>?N`2kY+jLn%#-r-;&pX2FZwzTk!$2V({a4s*X#6qkI&P{bEaC( zhj^cH93RKNjz{DF4?a#*YcSEOCGu@z zxBgrIOBed?p5K4uTA6Ek%zys#pZ}!`{H();GS~8$|8?X%Q@@sZm&fv#zxChxU%D`p zCtmND7iF&HG5`CO=d$)+nRj_CfB9Sgt^cJ9%j%5x-#Z;Cb1jegfA7rgPt{Ll-sQ3U zOde_db7T+3tr|8?{EbN5G?cX=#-`CI?3|D_9mZkK%iU&fs>*YcSE zf0-QqTJyckyF8Y^{H_1i|I&rOmRJ6F|Fj;Jxt7QL|I?25y>0%Md6&oXm%sJj`d_+m zZ*$D=bI+VAb1jegf6vCXFY~d?yF8Y^{H_1i|I&rN_~!RqvR`Gc&SVgel7DZkL53a>%a9s z{0JY)<9J;idC`|~SlS$CS_k@aoqo@yo0pMu>;oM8LC0~7W4tep)ukKXd+RtwoQo>)%u^q z1^(yA+4`S5XZg#&z0WJ>0snK)3;E06_rL$`V(Db9$NFFY>wo^}qht|MA>Es^|YXT;P9>oXhWhaV&rNTmP;9vF=B;{^xLk|2cAw^}ej~m%sJj z`XB3lRO^2Z7x;L!8*Z-qF;D0U;_@689(mDCd-}-O; zk99w4>4f7to#20+xh`$L%3uE0f9rp&`%#zH5AW-@AN=oket&HL$zT4~f9rp&`%(W` zSNwcGT;YE|a{6=illHd(v{H_1i|5*2<{-quA zzxUU1i2r|`alg0BclpcT`fvS@bwBF8#V5bVpVue;|9Qvv-adcjFMsR5^*`4AsP}fa z{C4R+78GRl#_5qJ^h}Yvd=7T!&q3`3cXuh{^ zx7zDJ-|Ppj<{O8zirmjGj?Ig@`=ZQ0@{ZTlQU0}$L*yQ{_UmpCk9^ z@5sBXabB%^K9sq~vE1v&`v$jl9bm=heFBL796T%f0U0 zBj3ootZ`ngdmfaz$Fbb&&OP#tyvrKr)w<_FnR^_|z3$v2-^jbHabB%^9+bJqvE1v< zJ@Spb%Npm^y5~WedmPKX?%X5a$h)j@Uafl`l)1;T-0RLg@{PR98t2uz=Rui!9Lv4# z+#}z}yR30ut$QAnxyP~G>&`v$jl9bm=heFBL796T%f0U0Bj3ootZ`ngdmfaz$Fbb& z&OP#tyvrKr)w<_FnR^_|z3$v2-^jbHabB%^9+bJqvE1v&`v$jl9bm=heFBL796T%f0U0Bj3ootZ`ngdmfaz$Fbb&&OP#tyvrKr)w<_FnR^_| zz3$v2-^jbHabB%^9+bJqvE1v8HS(QQZ=Q&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6QE%?)uV7a}Qo` zaFc2r%maPE0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_qcHs8;I~;ZSlLt2q>w0RcagclNc|TMB@wz&iAGMCd zOy0-!uHUT(>S!L+G7gci{OihfY3HZ>bLJlNL;mt_zxVM!=RDB=IdkWK?%d@s|D}1} z8|y>N7qzZ=6ZyLTt1H)~?Nj;Z%su9Z{N?Za|52AmKV+-=U;pcW{qO!irvv=YnS0#l z$Y1`}f9t>XKc@ry&zZaRKX>l(m%o4ioAeCW^}qht z|DONnbb$XkbNBowckc3+zvq8>@B8$>{@4Hd-~E4X2jnk*&;RnS1NvY8>wo?4`F~Ca z_@6U(&wp~~E`RxZ{+IW@Pyg$G{jdMs|L1l<{_^+yFYh{_|MkEA*Z-dX=X8MoIdk{? zCwK1hm;bUn|BH1(9nG8C#v$(i%Ik{P)lu%Xjzg3CZ{|P#yRUqpY8=c1eZT<@aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14&-*=2lxNX&(2tXaMQ4^r=}VQx#ynu^3R<=@8#dF|K@>tU>@iL4sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS^{-htQ$I;wG6 z{5aJ%E=}IYUVGj3k2+E{4(5SA-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4ay#(AW4C(fVdo8QQjLRopbt2} z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0Wz=6aL+_>5H$6tKk69=ksFc0(r2ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%gb*?}9L@Xmc7yThgf z)i{_3`hWu*-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29LVgzFE3gD;oth+o&(i5mwo?9)8{?nDFf9wm#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701DPH8`LjN|^{(3;K2VK=d7uwC zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKY9-4*cv9k9xvwD}QI88VB=0A8>#J9N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701DPH8>7!4&?c3YD zd!QNz^FSYPfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW4u9k_1W8y@tofBM8gH4f&1KHvZcIKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02QoYG zlk3jd^=m6%Jy4B@JkSRm-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N<7|2M*ikfj9qhkHL>r<6s`> z0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$AhQFftbO*0^A-$#q#6hFKp${`103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCGsgSa9fy7u|mP z?15?=%maPE0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_qcHoDvU+bu!oc*YQY8=c1eZT<@aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14y1Nq zubUS9daXkTKU9r_d7uwCzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKY9#4lH=mh36i8_a+0?IG6|efCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3GNbSI``<(gH$L~E@pc)7BKp${`103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCHHw*yM3fyy$7m4Hl@z!936h z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K$Vh4VB+lfzp+B)|gsK&uO&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6Q2zZol>W&wJL3 z2R~GegL$A2IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROii#11Uj=7yag^1KZPs&Oz6^Z^GrzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IFQ#J9N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<701Bo5@!38h<$`7{q-9R-C=7B!o00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0QJMiyUulSf9k6Uk`8VB=0 zA8>#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<701DPGT;*1lXa>hHi8mPv>JkSRm-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N<7^2QHtwb+vtd zv-UtW4(5SA-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4GCOeDYEQmu%gvrPP>q9mpbt2}0S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6yT zTzb=;7vJ@v6$h$uFc0(r2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%gb+JRfHnf>C&?mxIxH4f&1KHvZcIKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02NFAQ*-FQq`JoFwI8cp)d7uwCzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9_4y^UVBVV(^HwKrf#=$($ z2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)hKxPNlx%SGh?{>!EGSxVk2l{{m9N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70102Zgz`C<D^t*pDP>q9mpbt2}0S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0Wz=6yTeD#=ztoOpJR~e|r!936h9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K$W(Usw_EGC>yvw5os&Oz6 z^Z^GrzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IFQm#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<701DPE-{^_X;N0U?<6s`>0}gP2103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$Agu$lrdF7mHP!qz{99X|-X2ZOnmu}9qv6ri)Y8`3Q)>@DYVN}aQ!5RBcB84M zt~&M9yJwF+X!+*V_LG-eZn@$Az)H>g+rMVdde8mV9sbkQRMhtHzTKy^Z+_fo58QET z>ZR?=f40BwP2;gqca`fdaVwy5>N9sd@9Y)SYR2I)*B|ixZ#H>v@q)E(esEJi z|C3*Q?pX_SzvGy%e(n1QZgA)w7p=V0o*!uH{u}-3^qso;YIQgseIAFC_ub;i=QlZ? zx78k}p4jC6#r-Z^Z`Y=F{XU)hG2i^iDI0EZ=Bd@cxspbc1`X(9dq3(Xa4W}r~5zpebL1UyJZwouuIUV4C&fKm4xpSAl{M&yI z&i~x=K>qUg{m=J5-~Vzt!2g`NyZ_IfyZq(v`(NJsKK-x%^}qgi|DW3d`ODw)zr5>! z{@4HdU;lglpVI;U=gi&npWM02U;du|<-PCI|N3A5>wowExgC(d{5}86yAJ4o{jdM^ zzvurs9pHb?+&%xvoxA+y@A+Te`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2_q6|G-@o=g zGQLl!{hCiR^?$c7@Ex%~Y`0J7?cIKnv2Wn;&yQ>NkBdJ?P95#1)|YX(J`+&!O(6KLY><=6JgtoQaFLKf2cK=YvqkUBG?c>n(`*iN7UcU8x9=yR9 zwz}UL>;Cw%4Zg7DiU0hapYGJumtDBxB_}j>^K%}4=3Qzv+zx(2czvADUdi3RYAMtfAo9g{;vD~^zde#*y2s^-SOP!enIW&IK21D zTX+5WBKdE#&*i5)Y%%#q|3|;ey^eW*^tWE~nf>LT)-USl|C|o+KWFZ7A0U7ETmP;9*8iLi@IPnn*8kkO z%U}NOf49f~-19*G^7sAE_dnnNayr2OoVmOI&z-ycwx~(|N3A5d;Xu(0siOA-SeN^xyxVvp8w^&@6-SKU;pcW_y4&akiYyr|I51$ z=zsmM|MkD;|2ZAtf6m-J|H+-Z{N?ZYU*7vZ{jdM^zy5dspW6ZX%ir_Ayz9VB{om~i zJiY&C>=PRMMaI5?@$>3vAHBYf!!Zx|hm#I#{v5EYKflcvcWL&0-1dntzU&iC?e>F? zeSu?t*w`mj9qpIe`*DceSAEN)U;phU_mc+)uX@rV`+&!O(6KLY><=6J#HgeFQu{g% z(eIJ_+&6t-{(Uz1@sp2u`@Tm!x2Z>-a`bzzZtDA)N01z{cqUq zwcl^@Jm#f8UwQSWzUc)AE%>*lM()R6_>=P=+8iJM|E_x2rcFKJoP%F-sanrCJo`C! z|3{POv0vWoKXz#9(KoKS>O-0u{T{iWeAeNY-qE~%(uGfX#sN+J$hS8C>C3z0{ng>P z^l=D?^U?2-`)PCk@vWOQ=TH00t6%ajP5tO$ zpMK#h)!vT7X)C|y->z!%J8_Q_e)zJczW>&*ZQABp<{$kYxu5oh^-e#jdEaSs2hY7s z?e93O@tV(^+2nWPX-B^5olQM^}qht|N4KC z=l`DnF2nuvz1{aozaRhk&wu{&Kc@ry&zXDN7sy}!)_?22^*^Tr{Lh)Y^*?v+@|S;e z-xvQKzyJQrfBzN#{lDk`p8xy*|48D2`~Tec0rHo>=YO96dH$Ev0siOA-SeN^xyxVv zp8w^&@6-SKU;pcW_y4&akiYyr|I51$=zsmM|MkD;|2ZAtf6m-J|H+-Z{N?ZYU*7vZ z{jdM^zy5dspW6ZX%ir_Ayz7Ae*Z=xo|9k$Q(*geH%-!>!+__K4KlTMaq1zue_6d#s zB2Vk~4U9iOLhbg$>^$wuc$~V)2ma&e_Wp0bl|FQOQ``L?+duyy_JfXnfn$Hz*e5jp zybyJ?Z)R`Dq09ZaUw`7oCpNji`xU=`PFrIi@YoMJ_63gpVPl`rm@n$6Z~8h8k^5V( zJm>b`ZMfnqZaH|@?XPX?2~R!u@7wzI*WY+kTi?0W@7}J~GY+T!&!@L-^W5y_N4~PH z@40#9?b_Pqe(Qt&eXU{MUwh~5)7sZB+U@?Esr8J*x32yBb=o{{dfg|kY3r{Z_}G7F zYxH}U`>nGUto_{f_5VI=yQjAGqAPxX$)d;cx;h%4zKp{Sv#&j&&GEVqANTF1-n!=d zSNVQ>9L`6-$Mqck;TOKXJ%9YuU-9y`9(l+!S6BNx4w2*M-*)^H+Z@08zK8#~t!4hv z@A3XuAGXU;?fb4+|UHR4)8x`?vtMX#J<3>KWyw18v8%SzJW1+)X{#JeH({&{os{nx9k7= z&fD@KZH@gOV?XHF7r5IW_LkTuG}a4sv~Ol#$02gx{D6NP?&G@k?wu@7ns?1zZ23t&#h!8(#Xo_V|vU%>8&<@A}{e{#~tR9JZac{OuoX z^4xa$<>wFQ!EIN4=TmLI(eGXE+fF^PcWqnmyXzI3E_xiVtE2Jh$vC{^ zfBy5TOWM5e`q`RqXzNWo-u30ShVxy&Pv^eoPS4zPnEUp(zC>;Mp^|Y}`F=+n-{yYv zYhE;Tpv-@|-@EI*|5_Jr+~&W=Z!SGt?e93Ob=AwSYx7(ArcFQ7=KqIT4?A>NyZgMd z|6{((y^eVweJuYv@?6%wlz-0LW1h%g{?`Bg-oHg3-_xr9^}qht|L*^DI>7&&xySv2 z{N-=`xBgrIb2`BPoVi>7bLTF9`M39d{LeiPwx~(|N3A5d;Xu(0siOA-SeN^xyxVvp8w^&@6-SKU;pcW z_y4&akiYyr|I51$=zsmM|MkD;|2ZAtf6m+|J^$(U1&;k;W1rCJ{VrqQz?eVkXg|!p zjl%=}<_G7t>;Ebrd;ANUI=%nn???MVPwxxd?GL+cw@+y77pac+&Ft+sbh&T4#@n9# z(Ki3zZT6(K+8XBZ$g?OXJ@y!)AVnOoP#@=xlR{OjoBvagr+SpM?2{$G{${#F0$fBmojyYK(L|0Vk# zz<>Vppa1;N=>Y$8<{r<7wnMxb2`BPoVk1c zlRJ0$%ir_Ay!U*MX()|Iz-B_J6C(i<$E8_63gpKVzTJ z>HR;ueFNjq4^zASFuU>W&3MHAk8eG2ui?IrvHxSYA9U;s9Q(t@K7s0JztrB3L*%~V zMK`qhAG+nf+YIMHw-5OAe$d^%z|;G~#yn9+`=$1L93uBOuJT_yZ}^5syl(Cj4mf34 zKl$_P+q&iz^DY?H8}`3RZN{UPahT3^pFx}Jeb;!}R9ojp?*I1{Ck*r6@dc|4>()pA zx7y@i+c>=Oq3;~#{-dML80H)EqRV~m!8d+=c>byX^#HZ$k6OlI_x+9;=DEeI?mw*W zID3P(PW#r)^XcoIck;)F?^{?u(H*O;oAXZY=Q!-J@5hF@e`M>&FID~<%{zIRyIRLM zME+;4@xlrDkLI`B>&QKFlz&}$My^X+! z+_}qN{+|Ekz3}Qa-TUIt3sgt@ruKduwp;nq(f+U7UEkK}eZaf@py$TEz_CAU zIH8XAOYQqOd}NgS+J4w(~zVd|j<|9F802yZyZVhBf9z8FV_M6umAObzVChEfd1G2`d|Nh{*%)I{^!iy z`ky;@`OClk_Z|GtJrCqBf8YOn|MUGXrvv=YnY;V{+_}qN{=Wa^z3!{@4HdU;lglpVI;U=gi&npWM02U;du|<-PCI|N3A5>wowExgC(d{5}86yAJ4o z{jdM^zvurs9pHb?+&%xvoxA+yznJHL<^Hd+PiVS*5u-n9uf_Tw`$50${L#L^u|I75 zIl<~^ztmdBVV9|Eh7KNnpNoez_5qLmpkrU)*dI3b2~$V=rS^RsBKO_)e%FQ=^=6U43 z-*H4&eIWmH%?JITD{ubi&RhQSZ`b>n2mH@HALK88_x}q$3>ULh{jdM^ zzyA09C#M7a&zXDN-^gG7)_?22^*^Tr{Lh)Y^*?v+@|S;m-^c&l^FaRc_x;cJKi~gy zI>7&&xx4?*oxA+y@B3fg`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2`d|Nh{-4tU{^!iy z^Pk+g%U}MU|K+{!)BpNk|LcGE|G6EIzx+M_%exNffBmoj^}px;IUV4C&fGo!$(_6W z%gS`k9|Vpeo?Ki*OF0>}Qa>iBxu@9~J-_rLZJ8(zQHmfySalI4c=rf(jj zHsewKIPA31DEA{iy!gD&eDLUXwd!%W@raYg^Y8M$@yKn5ud9`g!|C51<^Gm`U2^`b zynQqe)VjtY@-Nr>vj62?SMHIg{Oim$@|{%WFaO1T&*6WLd7=MvolCf6m<9|L4wK{_^+zFYkSy{@4HdU;n%R&+UNxwo>P z|2_ZD=>Y$8=I;4V?%d@sf6xE&-uLN${jdM^zx)5(4#;2rp8w@t2lT)G*Z=z8^Z%R< z@IPnnp8w>|UH9Tr}OUi0iWIvy4)AITqhh4{pjsJ@zLi;oHST@ z^Rw1j?}GD2^{DIBc06hyhm+Qqdxo4R&EuK+{`~ih=7(DMI2`@J#d{yQANx0>^J-n= z5c$sJL!6g;UAae|k+1yg%r|nLsmecR?lB+aFaITd|B3k#^F^&|zC^y(|GIKr+CG(k z&fH^u$Y1`}|8>@je#ln!zy8<%`rrM3P6zm(Gxxa9k-z+{|JHx&e@+MZpEGysf9~An zFaP$wkN>&nf&As~`=9TBzW?QPfd4sjcmJO|clpcT_rJXNefnSj>wo?4{y(<^@|VBo ze|gsd{jdM^zyA09Kc@ry&zZaDKe=<4zx+M_%X{Ca|MkEA*Z=PSb2}h^`FsACcOB6G z`d|O+f6xDOI>7&&xqJSTJ9qiZ-}Aq`_kH?b|LcGK@BTlx1M-)@=YM(E0sXK4^}qg) z=l`)EbR35d>d1*&$072LeW0Vxv@dYHUmeYhzK=tb_hVoF!0n!Q4lu8Y-nJgOgu zgXEqf=L!8hQ{TVu>7)6f);$jIe#7FukKB)Y=jgmz*EmGJGp%!RUhZ|}9(hK-@~<=B z$a$tJ|D3tUe2~BVm-PK7=10sIwXXRR`C9+$%5`b`RQ@@0kNF{g`CI?DdtUTIwyOX2 zzy8<%?*DT-!2g`N$9<0c7&&xm*8p=PrNwxA%Sg&pi+1FMr?veE;+P zFQ)_i&zZaX|J=FDU;e)T<-PCI|N3A5>wowExgC(d{5}86yAJ4o{jdM^zvurs9pHb? z+&%xvoxA+y@A+Te`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2`d|Nh{-4tU{^!iy^Pk+g z%U}MU|K+{!)BpNk|LcGE|G6EIzx+M_%exNffBmoj_5bwq|876%I1V4wkrTC!L*yO% zKu4WvU*LGZI+_=KABQIIjSqV5C$GC;R#%^?4#y+QILLi8|Fh&>Ugr$?pMT$x18Uvl za6vgAy5s5GAKhKQI&|XvTE?O4Z}}&k-}0}kpO@7o`OClk-e>*KIS=%I&fH_2kiY!- ze_b`-=c2Edw(5WVumAPG`~RE{@IPnnaep9x`CI?3|JMJU4)8x`?$-a@xyxVv&3#|| z_dWjopa1@2asRz*%#XCs$HU25tNXv)>wx^_@A;4CKc4^Obb$XkbC3I3`ODw>Z~eFa z=X8MoIdixE=gwXJ^7rq5^4|C9fBmoj^}qZ7+z!ZJ{+|EkT?h2P{@4Hd-}C>R4)8x` z?wzE1{jdK$|Ig_F|8wT<`A_cL z3{vN|MkE7|J)8NZT{W+rvE%Z$D?^u$v8~^`Jb^5bkrH13r)Y@`Qbe1 z{dmk8?D(APANPP$o;Io{|E{aIsqMI&T=_W2J@-7H&cB@Jw|C>B4t+ei)^R9vkKk23c-uJw9d z*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~iIIi`2UDx=$j=alioW{qg z*6}EFkK&UyT#%X+ zk23c-uJw9d*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~iIIi`2UDx=$ zj=alioW{qg*6}EFkK&UyT#%X+k23c-uJw9d*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~i zIIi`2UDx=$j=alioW{qg*6}EFkK&UyT#%X+k23c-uJw9d*Z91Syvu5w#>c7F@#u1oeW0V(zVEr~8}0Ai z0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$AhiQ0pa12%KC;Oz1JyW~2l{{m z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70102Zgz-JHGZ?~^}>f-~|IG6|efCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G$n3zmZ#nhl=U)4T zfodGg1AV{&4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S;t#;KJYBc=ahyIc=aC2lGH5aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2nH{*~ zmY+Z1gl!HSsK&uO&<7mg00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G;6P>vzV)@={B+;tHyNnL!936h9N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K$W(Tf($~o`<$ZyUcsK&uO&<7mg00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6P>vzWYCO*8lV;4j!n+!936h z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K$W(U51(TmRe?W-;ysK&uO&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6Q2z-n7-$C;!_) zgYT=x!936h9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K$Vh4V3`pwJz{vFp1RO4VC=mQRLfCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS{YfA;P?%;$RH|F}fbDmk{Ym0gyx&WtaE7E+c_ zAtXe|ntiE3wvc_xP9e&^&G@2}P(=16k(81wQAqLo&Wt(N@0{OtU-$jHuKS$(p0CIG z=XJhvG4p(XKaZL5&zl1r-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2H?so=+r?&CP-8=k z)xrC~bHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14&2NR9BBPk-RT8R#8@4?4?G7P-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N@ss?7+U3 zTRJa&V{wet!TZ2-zyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8+{_Lf%(1a)gDnSRtPb7>o&yeWfCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaNuTk;BZ{CAuqNZ8Dn+uKJXlHfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3hvjYd8`f}^o6W_#G9lQ@b2OQu4 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)hz|HKyfkIzTtaeZF7^{Q#f#-k&9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70101-i9T=Kx$+^DuVh&gx zybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103MMP3*wI(NAZ+aJWZ|)xrC~bHD)(aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14&2lZ>_6G+ z#QlR}4q6?&4?G7P-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N@ss?7--=R~n={5OdJ#;C4sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S?^E z4s4rqyj#_W55-s=ybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103MM&FsMEneyHCx4SFHSRK3%JO>=$00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G;K0r7z}6ZQ&(uHmag5c$`@nO+0S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS{2)DG19YFMU)`(w6R9lQ@b z2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)hz|HJHv&Wz8J|I43tJT5#z;nO>4sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S?^E4s@Mgs>IC2 zFfn9gIp6>XIKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02X1Bu zo;mUA=AQ<}Y_&RgA9xNpzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKY9M*@4<+zCTm4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S?^E4%8Y}rO<`Ay)jk??*q>P2ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%g5GdqxZ z`jdsXXPF;kb?`p$9B_aG9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<7012?e)6_2HV@s5+v4z@aYA9xNpzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKY9M+JU)?pK6z{O|Obp2k!&V0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wa5Fp5@!<*^3mxuO(dyuR;5py` z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00(Yn2TG;M{CT=7BP&`RybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MMP3%C0G><;k>8XbA zR!Ea!+LmA1q^*$VmOee}Kf1R#n^HTpc{q0b z?)xjG8Jhpq*QPhEkmj}e=SQE54IIvLvEap@S_clz+}7*Bz25{5Ej^uPLA~1ohbC6; zefsRdz~SJDXNI&~A2`&s_U~)ghW@)vnH_pv{Ogv@)dGj^zI(;^1rG%dFFcX+^QTh> zR#fbEs#io*U`@``e`{6a)4<~IR}bxxX;)xnwt@2w#V-#mu37KU_$4ucWtpbmx^TiB zHbr*0Gq7UM&!yLQT^jhX`jyR}{PU{iJV8ot3b9R_gI?%gEl}VGoDHRyM zwaf>ff6yQ>wa8uH>|Xv>U~1vG<~jNm2rRky*Xdg>$J-RzVOHe!-6yL)9vF0aah8ZH zy91-LEcmubsj+d-|4`}5TaDM3ZT>-%<#XRT66oBcM49)GObGPd+b&C&?;Z(^Irqn) z!b^V(4BWi!sRox@*p%5J*ztkNlgi${CN8Q#`K*8Dem&}F-iMa8-+W@(OU>uq-!J~% zHOI0S{OD@`tHD8+qgv(pu})m6frHP_YfvmOcF4xbd4HQ9_qXZ;M_j2qGnN0Pr~*;h zpRD*zm8DApL-NIyj;dcVa`NeYF|(HqUfDF&q1*3`jYv_Sj1N`;1r$&~0RnJ#ywq zw{>a8{n$DEu=VTGOqtYo+sG;F(mdPiiHL^1)}`q(u+si{=hmeuKK)4dS*O>fiCHso z-SYW1rFO_yD*M9p&DW)wRPm8HAEsHCrsbA#&FYm7Oj_Hu;L%U^21e|wes|L+JBIFi zy|VAU9DfDIt=e93Qu>jBN%{7UUiiWzNv|L1V-vchzz&J$*kNK|)|*!|E*!HvFmTIj zRla$(TVPDXx8Dv{`y()+>FS}Emy`}n+uQa)>rqPr@6JDS>w%aJfw@gP7k%mY>wkZp z{OdNUPfF>Kc#eC13XC~lY3QfH){lAvv#dab!i6eH@44xYjoLad3 z?`QY#3{G0xa?aQ9-4~oOxqJ0n%eGH?y=bXT=#m0EB%b5_?Sh>KxfGaiY39WR6Q2kSx^#R0(y7)4VzXZQ`tFbJPkP;Bi%sZ~ z0y`w07>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzS)DiC*hQQd0mYGg>d=dE9v#GQ0# zNZj{_7R_GQGU-0RcdF(~x)-j`gErxw#FW<|@mwblC>`vW?Wr#cKAbjg@8R}E3rs%~ zjLlx~qpK%Z1mF2{|E!&lmJamp{!VbKD{H@CTV-*O4A6Goz77nn)9I_8TSo`p8&)?u>Qc@?-@Wa!^tHU5To%j7`KCBrTm@{P3;V1gO8kih;{rA2l zehW;>zwDLjS=T3Ro-zOUI{DXais|xB%Qrq5y)Zfuo4st!rKL{?#+=)f{i)JF2PW?r zGGo+t#Y3+zj9nf1d~npF!bhI_s7K(f8t-Q7mhrQs&D%b4yUw3!LtRp0htZp!9)IuG z4+LU!{?+)q`b`2;i`;W$ra>RwA&qM_|7ZMo2Dulc&Fv=m?z4Ax}wg?ZS&Ja zWmuYbOtpgVZ2u$vmU(a9AGhzP#P1I#4Zk}$B!AN)GmkzWj17JL%iE-;O`#p?gns_} zBy+*|!b6|?CU^F0fkBs}TIH(t@`qImSITU2#v7 zmW{VR`bEi%d#ZUHuUcnlmCice&xY0PCh)S z$C8&GeeA8yeY>Qr&&EfqfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@XqY=Ol4-9q<* z1v~dBQD$y~PZRI?efWz$k(YMdzG`#*%d5M*JRn`mUOyz>A2w;|F&nx^Ed_RnzGdE^ z4&`2+8=pRJ-|Cfiyjgb8EiF3yxV1^JWA=g{U2VBK*tX}NZ$6oRWUy`T0ehY+*ejTD zKVXhVD}ux4JyGY(kMo07&Xs?!T87*f32tbP7f|vx_0Br>2-rs#>NiHxAahO!td>Km-{00b>NMz zh2~eiC(!Zk?jO99_OalwH_}gA@x>p(-UD~e-cj?iO`#n|7aNi1m&tbrd*&Vb(VaEN z1!rcBtC4rtpNak-Ejv5dJM?v}_n<$66W=~^@8gB@1c&5{T|K3(AR^uwda1A>F=+Tf88eaNhuu?&UMybse|Kky*4=No%f@AJz2I^?H3jY2mStY zk9=E3CH@@Hx8SJYsBD{uoqlUfaB{i#M$~<$Sa8s#+xy@BS$xv#7ME;7mlW7x%=wzP ze0geVpzpp+zpa}1M&fzbY`!Y`(UWP@bQ&@)x_P_qe?W-+_sNp?`zG$f z44G```=pfGVQ%`{qH904>%J$}EsuWg<>?a(4$S+{|N0b2_&#v_;`f)X4Q=S(g>JkL zswThw=il%3|Ly`2yPNiY_Mf4 zqfk3U*iKdRK2m5(;<{oJXK*F`7Y2Uyi6-iw&xI;?Eo>011eK*2SKdo-=^ z(B0csJlApJff+xQ33To{?)mliKN1~%ZsfMP=~@QHoUgR@=l*{L+J^4`YrS%RVEDWz z>YQlzc;dZ){nECwp)M)0L&wl_eek89g9Epg`Cw(E_KD})|F)+Rw|Gd=vN!+k`g-%$+vsMw~Y%1rnjpcQEcdjz=(a-@6Mj{!}$4k=I#+cuzLK! z(chJOt6!)1&qh`5R`i>F!H^dp*H8794Wkb2H%ErbH#OZ@tXxN=<9yn_M;jlZkI!A{^51kG8ylHSqzSva$K1?Hur9;`e=_`@L7bFnD^7;@Q{7kGk{oSx-#85WnD?2Y&CE zvs!%Jc0HczF?e>umKvIOcYO19JxZ^tQPZZp4kZ_y%NM`!&iB*poLoB?oAuCF;}$KA z%bUJtsvZ|UUVI{Bs>%1=_2dV$S2p~xeTnYHpT4W{#mS%jT6*lfR4cl^{l}WXXOpIF zE&bq{azl$Zd}2j*n?gI3IsL>PcjxSv;Qpl6*S=qKwfmx?8M}1(=YNfg4n6O$?x~hK z`PWMY$`yJf!((ZZU)z*bp!k4%jUOB|_KQ;a(&pUQJM9mP`)t|SJ;%{2A7m=qu|n}i zAEd0$#z(7w0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36ApuqoZfif?I?g35PdCZz)4`euc zruA*n6OPm#Jhga>6%Dpz$yV+5yvr`+YkbRn!xt`paLt{w2iLyWrpykZbA0Bny^Wg0 z_YRbc$~|~S!>~F%Q_uK%WBDD`_Wal;y3U&4i~c#b=2wqJ z&;01f*PGf^iC$g(c(-P&UJ2d*e&n8GyAK8iZrNP2aOn(zzI)qcP1R*e;$~d%hYj^e zp&hcEF5fFKe17KH&veP3aIgDEB_6sMy=X(3Vi{&+<_q3)YK15=CS>+#K|AEM_za(2{R?RN!+6nJRt zmrGU$I%YkzeRq$dfl2ws4=evcY_RYCQy)CjJ9*!@@!BSJNwFQ`M<4!fR_EVm1Seej z{8XM9l|uKq$Cb#L<3wOwuHKpNe7tHP4{G&6MGzfO?Q6jo` zi~8|Dz5eo^rUQp0Zkqga6Ss4^B{tM0rFAHurAWfLme255v0&)AzIWv5(!naD^k+NYP= zgdQoUL)2}TKL5Gaq1O}7wM_Qh369@UuJx|Kj4AamzrW&@55K?oT$UU|rY)F0B**>n z53EoA*NBosXBDc|`q1)K)gKsh@=&K`tDd+rDfjI&3fUCe;nDrm+TFQyO~TLrC98D1 z62Gi`gDcNp`8@f&|D%mab-BUVQq>pzNB`Zg|L-nPbkyaB_w4M}I3h=x^vxmzw|*FV z`S|`H?*IF9{=fU9_t7b!fC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jDDc0l!1A_%az$Fy zJs5d7bpPXv=s#}c0JNd+c0OnlC}RXr9Ql7B+}%IDGrM^{T-f5pYx!BN@LjxHCMJ9=}w zQvD`IY>ED)Z)DHRf7OZpq~_ehgWnxvQ)q{+5AJ%b`|tGQ)Jc?t|X&?4I($ z*lbU2IrrWC;PANvewdjqbFgF9EO`eEnGo!|=l$kK$E``+DZ6UeP>&Sep+$j8V?yUU zB-@cUjwamyoFTYkX2N~Yw>-9GdT`*D&3T94GCSdY@&2C0$D@~B8~9qC44V_S)9Gv7Yr77uE>Yppi}T-Y6FsEXL-R97%#Hr)t55IP zJ780wZ|Lhn=^xM7@ab^pp;;#q&hxHP&oxT$eqm&r`U@}A%JB7z1JeHUze+md4 zJkQDZI({;zb$EVuJ}U(jP(T3%6i`3`1r$&~0ROK zOtGOZDY3&lE#EI!{l)pg(EY!k)*5yp{@9Ywa_4H%JK_HC>zxWl2F9H4e&(4o&nDap zT%^#o)4{PrHom>R@GFVCv(HHz>XFhrB%EjHUiZbHeq22;cF3gB_a5CJ7?S_RVD959 z0~1F+-Dky>`vZe6HOe=+Wx=RQ2da%LJ*Y}lrB81?eBr|bHidVnnDu&#JrAY|j@bL^ z{&Tl>Pq+tudg}_^6TU7SdHH&_gx5t2_86CNFLeLERcW8FZ@#wfs12hIF{^tAVza(G zBuA6A3FqBm$~RpD1Gkpx*FA0ixI9B^b1qCFtnepIk#?rO~D=ZK3=JCh3WBk zH%m9>!*SW3D)8#?UjNzo`|k=Upnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0!alv zY_n}~m#k|RN341;cgx{*A6=bh%-DYE-d!Eh`;J--kG{N2r|>Qi?7Qdcf+C;hjLT5D zbp6@Y0&$`HKYMk|cOm|8#Rg-d3g@ul(V=|mXD>|KIjU0p@#l}#{@V-puWnxa=!}u+ zss}^&e)sJ=_s?MH9{4ZnjJ_}a$hfbHMK3s-a6fFdUB`0S@aa(H)+ggb_k9=m{o&CY z63%6jka#cfc{m70h)KVEUjE*PCrwE+tZ3xb9#s}DpEWHIa^T$^Q6B|H-Wpx~x46e`m~}`x z-=7nn`|8KrPboLz+;8u;jv2r5g&tKVP5vvk+{7g{XLr3nBG55gwfdiQn`^_XLsa8h zy-Qy#aW47m{D1b-?IYEHy!Ex3v13D%9YP(T3% z6i`3`1r$&~0R&QD(XU;EvDS4uCiHb;i*73&Ro zZ1w1S9;-9!m2^?5vKK6IVrH6P$81mLPE~lD4WAAbzV1CBu5q` zbB56U&NJ6-d1>*Dd!8HLxEFXsFm(TK%%;y5*s$ubI^mx8>s{7Xzj5A;LiarvuAP0( zJAv4&2UgE(H~hxEz~d7(W3F85ZTNL~vdK4TZ`|{}A@n{Nw>(F|p8_NH>@MA{^6P7J z-~Q6vZC`#_w*KbzwO;%Dc-i`?53G4((Ks6(9paY!F>-f?@q5>t%eJ!Ik(jKZbKkS+ z@I!w_bwAOn?L&tfmv1(HO8+}c-etqIL(^H6zx?q;nxyAG_cu|akL`&$`EJCj`fnD^ z^=Qt>^358xI9C7nUqko4*LmX0a=+N{>X3N84Q_uf^xW@F`{$yWWwRVATBB9zYQ-+) zTXtdqdIRX*_B?ZF}Wzu1xg^vgD^I=m8f*Q|rZKdD;1%+-sxZz^6PZ*at(eIFfZIV5SD zHCg-8vc=0DZl5+7n=Lq?-_8>@ygDSD?}C_tsgut;*fHyF5jqmg@O-WXu7uH zxO!*XU5;McJf`;4Hj8W9uih zyUb;7srt(uztp^5CUuRq9mYOaz=lVMH61#qf4%g@;Oc!po&2#&t;ZA3z4O)XkxzA9 z-u>BGU4pS$uRm66(^(t79n#(M(zWF0K62`Y2}PsdUAy}GMZZk%)Z(^a$86=7*029@ zXd|!fSY5-0U5CW;4V{0VuRCAu7V%iy9Br&Z zzUv1j+3@O6?x}Mn>UOF!Epp=cH^&Uf@Xo*b=SzRS0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mi1$3#{q>@E!YCZmzH*zQEt&^FQ|W>W_=1zwhzs8P_&le(r^e z>89H7>acoZ@om>%t@A?ZAAZVo=i=cNL-+eWU$5)=Taq_)FKp<3*cQ!0TlV|n%{$w$ z>yU81_a>irXq$gqdqVHE9bQRYZ^ou!f!M6q(iK>}J+z^Fz}Ez9ICh9ze&gKx1lsm! zQoY}*eZkIMzU|jIs!!BCQMHe5{rFm`FP?0E$7lUYm)hR3^{Yi|oV4N5VRgx$?<=;f z{N&ZgvfuvEz2DrHbnf#ul{*ssSNw@7nS*V6%zCnGp6xb_J9L>`@y2t%H}b%r*?X64 zo7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j{HqJBuG09zch3YHmpU@M&w{kkD;Ixu za?aRmlfDey|666!l)vt^;niVfw}`<9eyDq>)Qa>2*3GSTEp)!M9{%Ic{>d9?+oMg9 zmmcq0{;m`^X}Yb=i`lENfWx~x#x_J2SkMK1)ebEjZL#{cy|ch z^PPC^(Jy|#^qsG&zrObUFN0t7toK=D*}$xrm09jsF(u^QBc&fL8Jtsh$E@LLYfc*Q#G9HTkyplP@Qodz<5tj~pC4Wcb4?;`_YOwrh5(By>rdW2ht(D^1m_r;%yEAZi4 zS+_6zqGHQWhVD2&@8e+PZ>RPbjJo&xiA&x-6=}n(L-BggY>I0A#?_Jsa`fNw`l+n{ z>Yp$D`3fkYfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3MimJN-wagR7{CALtgr% z{?7$%U#V%stHY9I}59WIwYQN@_C2ucaAIhLfu>mTX{{7gnQuU96!(| zqYdv4$>$!`w#O?co@q8UK6p9m?2mJMmV5qCxBD~p=oPW*ZyESp{M4@wth#0S@*4A_QzxDK>gaOMp1+#v?8+i>@4R+sLzmbuZ8&$>RWk9p zul{!Ja#jAiQn&7uN6MwEw&~31z8^&gFGpC{{Md#(bT6w1`-hs>-n)y+)?= zpI?3a3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@Wdp2O7rK&#+|v z;*wcTcmDHY;nSnm9NzXovo(1FN7trK_nHl_4vUx7TDqra$Gab?`M~upM?ZZeuGPIw zA9(JCxTI}ZFsje@h5i^==HAfe-;k}}d>eKh63;jJyhHapms>Z#L-x>?KHg*CFQI#Z z3m)!YxxWqX4$0>pId*@^A6D-f829^?A2zjIw6<*BYN`MB#lWT|zO1mW^VCAEO6;iJ zKlPu7E7d{gM51z?$$MF+M=f3i2*-qn9%{%$msx|Sgi`;uh(Ze>J zJCr(-_}s6(wMf}czb(Di{9uh$&5youa9`Woqkg~g-PLnHoKM=5FWQ72;nv}Y^U2SB z(c)uAEl%^?szOt0?LQfDu}bzY8$5GIsV6J{tAFnB=l^>OWV=|t#N(0gH+g^GGc)QR zd-3C<&utl-YTXZ0OYGYG^zMB3Jn--NDEuu7D4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apg_tkFz@GC$9}xBe&W2^cg_0rwOXrZl{;}VZ`_bADfct-k^kQX-mCxB z&gygG9(q6X*36A=z5Rtn%fE}Lv*gbu??;}WIH1YtB5TI(f4p0Pt@kGFqHiO|?z?vE z*?b=*?h6Of+fa{i>kvBMQ@BBlLIx8YV`~87T70QP01wPWIWZeQbygMYHd)bfL zG|TnYnOSiWz56uy^u)7e;$BSMp#Fi;#ZP`xVdlNxeqa3L#+pA@ZM4CLM~6jwW4?|p z`}(X>ot8X&>W;zPlFt2yrDfK{b;#djClZ2etq2?;ZXB@F_5L z&RwTl4%)Hrk)GN57w*^Kc=2;3&X#?;Rps#c41FdFD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36ApuoSTz}$D|PXGBpf!ujhZ+rRt#&OO{UcE!j8I~&>*-(gYLPN$9)J6R`7y1bo7mK^j({Fse7 zR{s6m@4dcw&Z)kStS+|WZzWpI-DtzRL(+49`xgn%`>hJ&e6v#EPdzbQC*R(4hab-mIpME>~@A+H(Eea^0 zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3MimJ$}RBTCqJhC^waHAQ|@QrBme&? zuwdWuVx>++KmXo>w?3^nDr4SN_be}8>9=%QlXu0ysUw^bAJz_)2LZfn* zpZ-j?htpnvdi({^v06kbLf?SC(%4%jbg*mx=ghY4Nd#pLuBX_SoMx*U6XX&sNWr`sl!^ zJb$(xc<|JTNj4lh9B&dj*CNkn%Go^6pEYW}zdhfle?Rxm`-+r_jh>e0$bqWI?&-L~ zhG&PoPdw3m($PA3lArsWo3_3D=p*M2<}H3|_3$ARUYK__V&A}nuk5km-68q8|L^~f z(x0e+0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0t$pjffWh&e{Y># zFL`GdYu$c+hhU@7evoBdt`}_Bbx1tlE$es`N` zq1{yI!L7S(*mc-(b@!`#lg~X*?ZeqJ@2*-iPwhhm%T#ODJx}eG0|%tbcQ#M$xP@)k zm&kHsAGC=*!k|NZ=1&X%JUCD7_;>%h?^M}5C9Xb^^>1G^3vHFSXBuzJQ{sAy_w&~+ zXv4L`)haC>D}Fgo#N!_w>XbchN~MR!tef5DkLmN$pRf7*XJ_YjZ1~LizD>Tc;oTwk zuEy8Lp5FOKczhS@vrs?*1r$&~0R^dZ!Z}PdhqY}1u-YP%Xu5x5Opv!EtTg zPuS@{pG*Vvd3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0{>WG zX4@v2YX`nMG4qw`Yv-3}Fkxn!DOdWuGHh$Y_U?T((uQA$#Pdx)_ji~5Q6cuR3^U(* z?M%IDfBtQD>iz}K_Z^xeVgD@mMrs>w9r^`_W`3{m%%pR_J$JeS1Ey^*z9DzI{J-po zj(as>x6e6QI&t57GrJA-2(J!jTcs~QBjMbKwS1ybu455d<~;G%+QhA}rFr639BUK0 zglmTio2!J*IcvG!E^qqB?*>w(%Jp`$U3tn;a!#|(9*9s`0fC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfCB$mV8&<7p1uFchVy4^tx`Q-r-ydT_@eOb z52dMaJ!u<7+JqkA)**DhH_mM2!4rpoi*Y^8)MvLm#()JrYD74ofto7NE8+*nk_6VmAC*Rof(4`qS z&OPVO!Tax+m;c_-F3R!viRh#~d5=x#5uP0iWv-L?#<}mvIdN{fx?de?nzUa(dt;Md z+r&O!hh>L47>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3jDtXruQ0M`L%`U*uSQ`=+y( zLc6TN*X4e(H$jKKtKR(kx%aEssblY&1#%>99-G+XCh72WR^I_Bpnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@W9TwvPxxl=E<$nsLcUixwMuQt3p zB)=1;Z7OpnSNk>b(>~qMGp_%(Gl@H(c0(KL5w0EjA6b!n?l~%qJ-GOehZcmk)cu2A z+i!1%4y$tg{oHRou_&%@-o`oXP1zy&>wr~20R|*btB8s?*;;L&|d;ez!XMY?2GZ{0P zd{1(p>rr0G$vNl#T<;``E_5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5;& zbb+z+_E@;#qjwBWJ^uR9W53_$F*Xk6-8bs9?#uJ9A2y`y@s(w_nc^`%4rSfbe+@h% zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oN9;Z-KFAbUyL;HGUdh_Soa4U$^!c zABVE;8?Cg;?5}_N?ey};pZA>XG35~#hgA1?|N5Dp009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7dXy1>{vi~2X7-DaW3csgu2^4~r0UHrwH9^>hd{%hbF0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5;&cnjP(;+uVD%)R;=kMVTaaDyJLzZ&-0X&&S0 z5bs|<(-R;-fB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5J(reaf2S)^jSD$zQ=ev zG?;w$-V?e_800aY4(Y!Jo)I8GfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5Qw+H z4U^B_@tiM5?CLR|4h?o0b>7A&e>2cyJRRcw>t}ib1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009E&0ymWJ|NhZDk7fJ6$IqeulW*4F^w6i9c#Nk*`mcd!1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C7;w>=dkNw|0F5UkDduU?61JD&T=1uaSv zL+Aan$r(4co_+X7JO6m)F%L~Ww#S|8UR-*6&F|#QCpis0 z%E`B{={~<<&s!d>GxFm;t6cR^$2xUydividuaH?STn5ES&XTgQk!Ce96yI%;26i7id54;gvgle&_zl3HOY@@~nm{ruv`X z<-w;@{cjxI|M_cf>{w^n3(xPm>+6XHQ$DM6_aQ4LCVe>bioWZ;nz-(n*7rVp?-7YU zeVezwYvJ|DK|i!VvC+o|)qEw+j6xt;_Wd6-{jB6YBR_chr^h}_F8I%(Pb_+<`Ga*1 zy+bi3Ou}>G#|h=>v(e+pPZXYY$wR=>Pg@8=U>Y zwzFFfy!@*>kJvtn8Qinx0__*SwED4&|9(gE&H-0+c&*v1$#?!ealKYU-+8dk*#mce z^Ug0j)@ilXeXU!)oOnCA_`tzEA4*Kx?~n!8P2MRncI(+aHokbv#PBbF8npG4ZxbU1 zKQ^ZEw-al=5@$vskSzZ@zTl!{>1JCtdjG^!|EFGVb=CRbJy@sN%?r;P|L?Nz|5YC^ zc_p!+`vKq0ne%R9>UuXXf6w)+CMJLL>u1aUP?{L{)bo2E@x$@6|JG&T&>wdh7sU+j zS#yE^_Mg4!@DrLXc{=&d6{@(gW-yq5r=pUti^!L_3`UX+1K>sN9qrbQQ(Km>41^P#+ zAN{@ckG?^aE6_hm{pjzlfAkHaT!H>k>PLTX{iAOXmPlCC|96=l={)% zTmR@AM7aX}qtuW7-ug%1Aj%c!AEkcu_tro922rj+|0wmNzqkI;H;8ft`bViB{k`>% zzCn~L&_7E3== z*B!IDxA#Ubuyp_D?Qc%bxaZ~#UcGwtvr3PSb*|Xr)U$5x zo>;KP8q==3d-KG7U4DG(@sE#94EkZx1}%PkZFc)DJJkDS(|@!Z^Vu6a-8%L1=qIv* zYy^^>-aoZ-;_Ua5Q@*&NahuCNO)l8==aEwfZ1G^7(K~K?+kMM-taDb^uZDd&Ao2D+ zi#K_8pXCyhkJ-4xtnSSD@Q+(6=Wk&-hcFh!HIt7-B5a5x9^hUy4LMFxAVivq4Vy(?~0$-OpKiI!j7X? zxh8S@#Lt(Xztcm>(Q|s<_Rbw^CkB1L@+Ga7nNza-T$?JdIao5HBytcG|--b)S{~KQT$M^r?rQiSir!4*c_kFsgO^dD1IPRZSf8n%< zVkP=VpP zR-%6t>V6f4m`3iZOdr~ZZ0B8rvhABB41+*AL;X%WRr^p8TlaPFyp;k1Zi zCHhC9UO4yEzi?Vau@e2GP%oT&>R&i5qF9OkQK%QrJ@qe~7E!E3|0vW8=bri(PKzj3 zqJI?Xg>z5+3#UaCE73m+^}@NQ{)N*bik0Xeg?i!KQ~$zg5yeXMk3zk0?x}y_w1{FQ z`bVK&IQP`Qa9TvM68)o4FPwYoUpOtISc(2os29#X^)H+jQLIG&DAWt*p86L~izrs2 ze-!G4b5H#Xr$rPi(LW0H!nvpZh0`L6mFORZdg0u&w)(djdj5lt*1gpJ--VO*FRkQH z{g3(o?f+K3|Kl0sm+t>K@8hNWKdyV)()}MFeY5=h(X(zSS?`&bk9qlu3;)thy{TXX z5~q~y+jnWJWT$@}{N0E3TP4S|pK|z>JGDxV82s3n7I!vFjKA#Xb?zBHFERY9o#*^~ z_t0dYKF7WB`|Gn4gSU8b!ZkZ@)2?j)=aCKfpRxOS#~t!+C;NXFOxeG-kYw-rdq3B7 z^A*a!$2Wc7E_vAvE&kKD*WlzGZ$5p?dk@V@41D^mPVb(2Zen!f&%WOLvb{@oS$V~C zXMNo&*?axzJ$Ksk^U}4>yz$`ftA9H6(>uQ(-|NOMf9-cK|<(9A!#g*< z;f0^3t=;^XCBJPs_OIR5y9z}h*{5&w))S6+B6-UBzg)K0D%T`$d+pVdUhf{By#Bk> zH*Nde#i{T8!6og#-y|`n-S6M^Z~k5K+WWU&c-SL-lRY1LX~-viyO%C|=Vj~8e&b)I zo1gM%!y9)UQ7Cg+$6pgjl>PnR{J9I0r}VpY-eFBINKWW}@bMq-^JTKnMdxfaanHJm z{;!|5!NqH@oEW{u)RVf@dnj?`zPHyucbz8_*MHw)LGt|-%YQ#c4_RZ{n%#OH^TMvr z{k6M#SD^^B`~IbsXFS#Qhvc9i+Ml@b(bJOm_G;2`ovThy^j_n=Q0sspkbBC5PO($3dTe zJTWBz2M~JRTr&t)3oP)O7H*2_y3%U;}X{_zvKOHPVSc+kox^-+-<)HzP+t= z&o5d(_}A|0U4RtNjaQ z9_z?XK>zH#LVLIVp%u+;9{scP3hmwchgLMZdGycDE3|j(A6n7u=FvYpuh8DDe`rOs zn@9ibyh3}o{-G7kZXW%!^9t?V`iE9DyLt4_&MUNc>mORt?B>xwJFn2*t$%1mvztf% z?7TvIxBj6O&2Aq3v-1k=-TH@CG`o59&(15fck3To(d_2YKRd6`-mQOVMYEen|LnX% zd$<0f70qrQ{j>85?cMr^Ry4bL^v}*Kw0G+tTG8z0(LXz{(B7?oXhpM|NB``+LVLIV zp%u+;9{scP3hmwchgLMZdGycDE3|j(A6n7u=FvYpuh8DDe`rOsn@9ibyh3}o{-G7k zZXW%!^9t?V`iE9DyLt4_&MUNc>mORt?B>xwJFn2*t$%1mvztf%?7TvIxBj6O&2Aq3 zv-1k=-TH@CG`o59&(15fch^?`mS1oA+{K4%FgY}z(C4YPe`;U&?{`1GcjAVg7d<}g zozxe~*Vg;j_l-__HF;2K-}uMZZ(4qx)IP$YdxbV%$+CA2I-~6g6Oz|G z)B4`0-+C`Oy781&r<^l0Ic)o(js87mjl`H2pZ}$2X_v%+25t76((1FsfK7fq=7X-? z5=Y)}QjhM-G%wxyio@RBw83u={J7C`OEw?WH#DEn=P6yKasMtmzjRrm(+6#4-1^YA z$y>j=WdFVU-<&+Z@fDA(w%^Fq_k3#on=k%2weR=b279;qG%_fxx%Z@vGYW9B@w(*6g<#VfQqOZVSskF8H!tzk*Y zm)AT$VWpmlLEk_7*;@UFC$8^acX+Q+kC%7fee2DV1D`&t)8vMOlGoOM>Z;GK9G<-H z>8Dz}eERSQR_oj8;Puv@mMHt2TsP^i(0oFlC-v{TrjPuq(T|DYU+p~S(@TCz44wDh zJy$n-BQfggmkwL*mTt+xO$N*yvelC0Ep?_Jy4*AMl4IK5G4$XYwlBHhq3xf2Ysvws zfB&8M=w4gyK4ng-zkmN1-N&WPqmSIvwSCLs<-hyArq55@J@wGD8{V=wG3dLOSGan~ zRZIKNz9%`M`zZ%ZxNp7Wpzl|{WQ`|FQ~jImdc!UC%KsirzN`NqpMyP%4j|?Wp1y1M zeusVX=!1zNckXe};?s6d-2Tm;4UXLV=)^V4FYdY0Isf=?|FrM_@!yy9YrM-D&pbDv zy#GCQ*Yo`^&JR+_(%#x7x|M-3eeE;LyzHD;+_4oT9-_L;We|+1QO|HNG ze*fe98Swp&Z~L;z_1E9;e|$d!zW?!UUpBe^`uqKl?`OdGKfdkDCf8qozyI<54EX-X zw|&{<`s?raKfa#<-~afwFPmI{{r&#O_cP%8AK&(6lk2a)-~af227LeH+rDga{q^_z zAK%Y_?|*#Tmrbs}{(k@C`x)^4k8k_3$@SOY?|*zh1HS+9ZC^II{`&jB zzbo7S?ZN>&wyW6xtHo>G`yKkoWjDuLv-qa3_WM7w;k@LIm+o4=|Lgc)mrLyfz2CAg zeljIF^!3v=*n9si%KJ}PWyRKAUwHq(aXU>YK7n|zczTaBe=A*>XqxDq+6VTnV?RyY z-QczBzkl+;#NZ~cJ^asOnkH}B@rdd5PWmD_vf){mx4+@8ME^I|`*y~ti;@HC@BQ3* zE4^ECz;3Uuy~l$Ow0OVQH><65$_4S(EWYVyz1_Ea$IaGwJ~{B2*7r_7`t-!jr|x;h zt@r+(xcRhQT5NyCL&@uZSZ=G~TTU*yV6Qeu?LYn2^6&e8J!X`gb;4^SS30Ui$zdnl zJgVuy#`o`f=BXWfPk5yG1meBoX@~V5HUE}>wN8}n>wd|KdnX3eFWvpUXTMGieWS}7 z4K}&2bmD58_AMRQrS$5j+P?S6J=>Q)JngJ!>yB$Yy=%Rldi8tuqDhO_S!T@ItM3(W z#p0d5^-+C0jai}3@RA{KFTQET^>!{9_sGI`=APE6oKudBrOb@0HJbZ^OEaKJT^S ztgm~0(c}1Cb|{(I<^6Myn{iUf^>2Aa-o*aGCGp#minP|J!wsm)_L$$aBrheVzm}S=R**iJYLs79-XV2Qva%C;_^%@HKqPl z$;9Jz{o~QOswwraN+uq!>mQHKRZXdXRWk8-UH^D=u4+pCtCESw>-xu|b5&F7UzJQe zUe`Y!ovWHs|Egr-@w)!;=v>v5`d1|rkJt5&N9U@h)W0g3c)YHEJUUl3rT$gP#N&1S zPCLXWrACJyeO{srXGVyp_|9Et+YD)d9l8MLb`p2VlRa5F;l}tQd*FPSe ztC~{(s$}Bvy8iL#T-B8NS0xjV*Y%G_=c=aEzbct{ysm#dI#)HN{#D7u<8}Sx(YdNA z^{+}M9kkIq$1nM?m2?*6h##r|*2E^TtmMuSE?SJi#IUYG)ZssHIuwJY2I^PnG} zKdgNJ#}D2PF$c_Q{n;L?uCd9*Et{;c$z%Oi_^2?aw5qBEQu{w1`0MLO zcPqK-p!1iz<(KgYWg*;z<`i{qlJmFZ!{n`*^)D1xouR+P2^H@>@E*)Op^=Czl+Q z7&3AFTMs;Yi^SM%n)H2l?njB#{=bhLzfHTc&SO?vah39YV#oYCV0PnG_FJa!*N@Kn z`MmqLTj%Vv?_IA=?~cumzPB)^w5qBE+I`x$)96&sS>J8<%ZUA6*{y89&qtbX_D#uz zZNIp7_0wNX4Bm3H5id@jnfl$I-F%ac7biwGOzg4t@87py{XZwKGUlY-_kH-yVH<2b z_@=6^@AbkKc;J*o+ii|N_^ozV{%gDI8jl>1`hCBCi(S54DfPR*_tRf~Ffq})-hvli z>$`2Ld%yKp+kMZ(fcmAoj+=Z$qF?HBbIY4|-F@@mjn^&R@S(z<((0-bX!mxi<7Ly@ zCR2a+|F+@_`y_{TUasr3Zw^QfnQ-Yx<5pTfF}lfhS9Q2|K-u4chdbVWQL<0p<|X}( z@0u9&L;Dk2eAlt6>w3L71j_!dk6LiZ@yS%ro^|eRpS-sI3d7pY-Za^}-Uf{?9Xu-8 z|IJVCJ+R+^#LzdothU!Wqf7twj}zu~A2@i{Cwnaze#zlm?{LNVt@iJ=;DO?p)++xj zQ2zZ-{rxY!aMFO17cTg*}f$*L?ZhV@j|3_U%dk*=9(| z&tH+u76cxVZE+@SUt0yM*nQZ!g{~{VfD;*8vU~s3+w&*ht)INY4p!lEUfqI zA6Cz7r_n!Kv9R8+e^@=Uokss`#lm{O{$cgZb{hS&6$|VA`iIps+iCRARxGUd>mOFn zY^TvbTd}a-uYXuQvzL{rZR1GuvtO&sHp~_v;^4&upjB zKU=Y|-miaHJ+qxg|7^v=dcXc*^~`n}{j(Jd>;3wN)ic{^^v_l-toQ35R?lpw(LYnKSUt0yM*nQZ!g{~{VfD;*8vU~s3+w&*ht)INY4p!lEUfqIA6Cz7r_n!Kv9R8+ ze^@=Uokss`#lm{O{$cgZb{hS&6$|VA`iIps+iCRARxGUd>mOFnY^TvbTd}a-uYXuQ zvzL{rZR1Guvru)W5?9+q7Tt`)5Dhf5AU0_J7;%+NJx* zWqW_`k5=F>_aD%p=|kVmJF#s4*A8`e+rE7N*VYX-ow4hiH=VZHlvyn{ZoSHJ?GnpJ zJB_<%E0F4c#K7~mI<9R0&+fxkd#~h~jb7dDyzdSv+yC$QEq5MXI_&Ka#`pQ|mc;O{ z_FQ9V%N>%bePGwR=Cf91`+=_7w%4m=`^WFl>D(Xh8}P&XZ+^6{eW0@q5b^s<{^(b> z5Af(MM;%$V5Abny4(^oN|95cxg%>SKUjO~GpESB<=S1%f?;CZ(3xkrKK7793D`)mi z+;U^rAMToVQt8wVNBnsBkIzOthTGRr;J%Mu88iHi9WO242m0JS-%F-`=N~?FUWc;I zb(7P^m3`lr-R4 zw1@>JwcGRU!M(m-d-gK*OZT{Ia{bcJUte{`(-$vW*8BG}o3=|1eEO-zLzca@>~nE% zV!fwIUYq;++nsMew`B1Cn@wE5#e|5L@Af$gOy97_8+Yur^A4qt_qp(lW3Sw#^rz>C zw(s=(zf*tj&-?tq&krr@e%HK7@0VONceep+%ovxr{`+wsE`IU7l2=}NV%66#otxwB zogT43>8+{1-+PbSsPySNlW&=Of_Iwzu)koGwR*)Tf}3yeGLU#Hk$DHZ{zRzEZMt3 z)6#7|_$)d6h>;7LZZIx6V(?>Ezp(X5W&O|m?!-Y=^`G|lY1`fN!LX-lczdTsEHG(K z>filiejSk3f3?nsl=px6!XR-#CMRqy;i!4|zFIWFs1}(D7>0e~QYI(W(*D`34T~7ZZ3s%d^)xVZOi|lgx7g?}c zUatPN3|eHD)4#}q)$(%nuVv68yPW<-7Oa++tA8zn7TM+WFS1~@yj=Zj8MMeQr+<+J ztL5eDU(29Hb~*ivELbfsSN~cDEwan$Uu3~*dAa)6GH8)qPX8hcR?ExPzm`FZ>~i`S zS+H7OuKu+QT4a~gzsQ2s@^baBWzZtKoc=`?td^Ine=UO++2!;vvS78mT>Wbqw8$=} ze~|^N<>l&M%b-PeIsJ<)SS>GC|5^quvdigTWWj2Ax%$^KXpvn`{~`-k%gfcjmO+c` za{3opuv%WO{{zVq7mY1u4ErS-><^D?l2}2ftP`3Z) z6Wv$pS+@V@Y753MF5Ca{kpUMx)U45xJ9%V%-!;YRjXVJ>*Z`JH1ki{*KO!Zu$?Xb0y zW#8}pull-dKln*cJldtK`z6<{b7a}RzzgnOWpvrTz)gR9bwSy_(D$@ExogSP4oCdd za?YaSo<6>-zvr7ie>?T<>vt*J2l|KZEeDjoH~Gm;8y?>;F=FuOVQVdVGckCJzb7|+ ze|p*P!@?7Oub&wBOzVr!TVacKKYjMoj6wZZjIUf6lizpp^OMf5H)~R||Gb~?-sXu- zlB2HKr+@zmza)CsTX58thaZ|qJ-=|;kL#BG4s6%7?{%{`J7MjXbK7m+`ijoIZ~pPj zo5E0t?-VU&+_7oTAqVd|yY4Qny58D3ksSKQUe6sgZ0%&~dq4Zwi#jF-eSdV5F+H1> z{oY^lw@#-ghkv#61v~XwuKn68%sPF!tDcXqTo{wjtTTMX#Vg%Db9Sod1-+YmUAl1O zE8Cacy(n?b3i}>8X5R4jJJ0VtvelqYW&JNZ>%m23{rmiS^kXF-Z@K9@UoO9M7z**7 zqD8+|_djyc2~$ho-tmax?YG{ztb6BI-)mU*y{~ubqZ_r~`HAJ<{`$W8RrQ~G+oHKs z?wL3+zJg&)oK{PF97fPT2fP4I)jxoUIgFrx4tN2as(%0x za~MJY9Pk1-RsR4Y<}iZ(Ip76ws{R2)%wYumbHEGWRQ&^pn8OJA=YSW$srm;HF^3WK z&jBxhQ}quZVh$tdp95Y1r|KU-#2iM@KL@-3PSrnvh&ha)e-3y7oT`5S5px(p{~YiF zI92}uBIYoH{yE?UaH{?RM9g6X{d2$z;8gtsh?v6&`saWbz^VEN5HW`l^v?k=fK&Ak zAYu+9=$`{#0H^97K*StI&_4&f08Z6EfQUJapnnc{0i3FT01R!M7r?3d2M{rb5%kXiFMw04?7w5Z-;bZS z(K6-xKkhWM-yi#bK3?+X6`OW@_3mR+`#(N0>-F7t4uBxmBXn5q(64WL`P!Y!_W$g( z{9dJh?EkrOa_H-)zWL?y=OqTzyM6G+-R6|-3w&MuW~-Lx%JK%3et()5Cear7QANFu!=o{;Pe(vo9%l7@gxmU;Y${x?S@!fOE_XTb+ zWB)()h2G-6k@ff9^3)+01V9k$5oY#iw&<2?R_ao=|M&Ax9=m$QKHtf~O}2Wm?pHe` zd)FI3`pM^?@37Y9qi?Kp)pl)%e);X~y>_}T)@T6@KWF2*zwda+;N23v>y3Nx>0>7) z1~=LIfCjhSP}aZSnip|KAaO_$mF`DCi^PII5VyR82@4I4hx zcKTMEe75MrFRSXmZI_RiKWo1MBOn&HKo0$vd-<5Vh8FkqRR!M7r?3d2M{rb z5%kXiFMw0^4K{PF97fPT2fP4I)jxoUIgFrx4tN2as(%0xa~MJY9Pk1-RsR4Y z<}iZ(Ip76ws{R2)%wYumbHEGWRQ&^pn8OJA=YSW$srm;HF^3WK&jBxhQ}quZVh$td zp95Y1r`Ejxl8-(b+U(8%2x2`#S^tjj^t<$r{Xb_`?Eg8Z;bTKid%MTJsr^51{k7B1 zm;JH-@2BmL*}q?_Z<>D@YqWrdpM6Z{|D5sdc4w6B_x=0XbGnrE->ciZE3{v3<6VB= zt>IdK>|L>#!_?|zUdOq@jjhp@} zF?8O%xo>awOZ#E(&D&|o?Je4Duwc7=Pi)mW)@T6@KYP-PQ;*sGhFr(E$;}dXyO} z{O8{DHacUYrTvfV*5T+L9h+|Sb@w@&u6*o-=U;rW!*SQ$c*$xztlr_cqmO^D>v#W| zy;|Q+v+jN@)@C6M-+r~eyX`Xn+m+`WJ9X03%}-xp&XFSz`Qo%6O4@XN>Y@G5diMQ_ z{)yi858C6!w*O9CTmOrR1AhJ?B!f85k{rCnb1!Z2*$K%3^^=!g{PN;CJ$5=^?8%c3 zXw!AM`Liy%f296le>(IJyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt` zyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V z>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t z*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt` zyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V z>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t z*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V{n7uVKkxrJ zd-6}$yjJ(R*E@6{zsrjsFRoL*|L+kiAC}txw`Bbe>s?pfbsaAbf%dD9f2-N!dv+M{|8?I@#?Kiu>&yDn+b-Ox-qL;GH+?s?ANcDl{Br-e@_nISIBG_5OlOtV z3AFzCo0aD_>)5zr|L>*yzwbUVwcm5+e}cFM+A z9y}>o-E|!=4uM(cZNBQ8)1T^?>|OuxL(chX+0!+%os7cBATz zeRrMH)=v(o_xo=9&;25Gyu*~PT_0_8@ph&6|Ms6-s=Kb^#UU{B(d}1Rd-t2yNqyf_ zeUI;v7*OxGmA0JuLhEJs{P(K=-tYJS>EG_RmuBwU;e_NjYyUofba6~)mDLH%>Hff% zNA0-M*i`>EA2#dJC)GdoeV>(Rz1+TY&KPmnmikvGSlq|;FYbQ1u3rCK1&jN9{foO_ zuB+ESSHa>wU;pCnm+R{F&sDIv&)2`W`{lZN{c{y8?(_98?tZzhUjJMLi~D^2i@RU0 ztJgnQ!Qwt&|KjeK>+1E-Rj|0v*T1;?<+^(Pa}_M^^Yt(8ez~q*|6B!&`+WV2yI-!W z*FRUm;yz#h;_jF0>h;f6u(;3HzqtG5x_bR{6)f)a^)K#zxvpOSTm_5!eEo~NU#_dy zKUcxxK41Uh?w9N8_0LtXxX;(WxclY0di`@1EbjC5FYbQ1u3rCK1&jN9{foO_uB+ES zSHa>wU;pCnm+R{F&sDIv&)2`W`{lZN{c{y8?(_98?tZzhUjJMLi~D^2i@RU0tJgnQ z!Qwt&|KjeK>+1E-Rj|0v*T1;?<+^(Pa}_M^^Yt(8ez~q*|6B!&`+WV2yI-!W*FRUm z;yz#h;_jF0>h;f6u(;3HzqtG5x_bR{6)f)a^)K#zxvsvd{wTSH7y>g#HGBT|Qx7`okNtnwTDt%1><4DP@vkKxpS}O? zsn`E^ANX_Ijk@l#;g{CEp)gi&6}bwu`siQltu|@Mj;ZhW|L*^OVd9j&-MU;nMZTv{=&8kOMWd~eB=7)}ji}q`_KVZ=_kN%c=ef(bMGc9O5)ay0n z;hR@Wy|2^gvF99F7^}C6Tm@2nJB_~a*R?vF^-Q-xZ94qpzx`k6-Rsot_FbZXuEK?V zzW#;XC*P~nKi|TIeSQ54yHCDXr+>bM3;X)|7j~a~uTKAb3m5kF^)Ku``Cgs=`4%qh z>+4_Gee%6J{qrqc*w@#;u>0hDb^7O9xUjFUe_{8@_v-Y|w{T%!U;o1Hlke5(pKsy9 zzP|p2-6!9x(?8$Bg?)Yf3%gIgSEqlzg$w)o`WJSee6LRbd7Q@m z!oI%#h21CLtJ6Q`uZ1kpM0-Q|9lG<_Vx8I>^}Kko&Nb2 zF6`^;U)X)}y*mB#EnL{w*T1m)4c=Uce2udjb$_sRF_^v}0&VP9YW!tRsr)#;ya z;ljSY{)OEq->cI<-@=7`eftEP?^1ZsM`p?|q)3vvs+F?xU`1rlf9<$^B?El-h!HIoW$oINj|Nll{`nfGG zpa0Rp3le|Y|99ql4OV_+t#?;Uy=YF1+~b4tKpcdxxVAZn(7jAy2m3_N~hY57~TLyDj@| zaKvW~H@oM#9eX~0#YlAE@-48#xKnrdI(fxIOZ%VFrL6xhv-5(PX=CKY6ohzSrgY|2G2LZn4}h=WaUegOcM1-g181 zbskN9?{9m5{>kf1yD_yd{I;!jP3!;CHdnS^>%&o#ul}Un|IO;$CEo(GQ$6oK<)r;m z{cn14=Ub;%^xq+Q+rcl5A5usEd>4rJ`ua!fANAe!kGep#_t!sK|ETY-f7At{y}$m^ z`bT|t{i7}r?fvzS)<5dI>mPN2Xz#CowEj`wUH_;HM0L5KkB>dA9aCf@2`Kf{!!mu|ELQ@dw>0-^^f}Q`bS+L+WYGtt$);a*FWk4 z(cWMGX#JzUyZ%uZi1z;aN9!N;-Sv;UK(zPQKU)8&@2-E;1){yb{?YnJeRut%E)ebg z^^ev+>bvV7b%AK_uYa`uQQuwvs0&1UfBmENkNWQVM_nM=`|BUAf7Ex^Kk5R}-e3P{ z{iD9S{!tf*_Wt@u>mT*q^^dwhwD;FPTK}l;u7A`8qP@TV(fUVycm1O-5bgc-kJdlx zyXzlyfoSiqf3*Hl-@U5-Q#alF@y}nq;@7hMKf4`u#t9Ys|28}1z`5;vUG#I*>vsRz z5tux+?Wk9Kyt2=q_y3*opV8g^*>A5CmhJ=Js%2_l*z$egcbLENB~Sn3lX|stcUOs8 zpnU)DBY)ZZfA{|`+yA}&DSb~`eBg0oOOrpe|Kj7BC2c$X`s2X|-?;7e2Q?bCXq8`H zje6bgUpoTRx0-+CfUb2X&pvUj?i(I<)a;Vv4{aZuct(Sz-6x#i_Rz0>UgnGG+q7M& z^W4ve^?zjRaU0)#>RYoL)Xv>qC2E0*lXlqPvp&Ckv$X$_O;i0_z2A4>zuO+Z;M{fB z>3hNK8_rtr@Yw@iOugRXg)^2pY{EYq4}9axwfBp9-R@sI0$YD}?X0(--|)Eh7k;?a zPXk-8mHOT{efyGKE@?4i$^D&{{dBwZ{;jWV*C_E`^9$x~S37rim8b<~HA*a+v)oE& zrTTC2aQ%xX{`SmUItLtCrh4Q;T z{qyTv=vUXj&7QTULchBHg(f8KzJ-2u{R_QNe%Ggeetir5>iQRYq5Q5-|NQzE`qlL>^g{VvpZ@vvE%dAFU+9JM zyFUH%>s#no*T2vU<#&Dh=hwH;udaWg7s~JY^v|zvp^)2+P>tE=F z^1D9$^XpsaSJ%JL3*~ox`sdfT(66q4p%=>U`t;AQZ=qkks{T`!9e7yj*jMH+-T(8| z9V+(!-M-Dnk2N{7@9~8`m-YQMfk{*MpEB?K-fR4M|KAzQKJdW46HAsW-v_?S)d!^Z zg)QF)zQv4XI=%PO1=swwt9w_W3v9dJhQB|(=!tFrXaDc={omV-cyou3X3iTp`@0`M zn%i^wqBduKJLR_?Z|%EXvk})$s&m+kLZ8d}{+hs@pN@a;;~yUyHEYD1yPvz|Gi$X^ z_3nGnv=&Rd-#@3#nIo6EbIGqWX7pHe(OYkvvhr3p?*HbJ7TGh`UhZ$t4eUOvwAVIkt-9UYsn(3 zuU!g#F6;Yi0^{2*Ueb8>;HTS-c;mPC4sG&C>U+QNshup_HuI9QxPH zEtIqL52a+y=Fq=pZlRo|e<&quHi!N-a|`7x{X;2PvpMvynOi7l=^skTn$4kq&D=sc zOaD+x)@%;_YvvZpS^9@kvSxGWUo*E*&eA`Wk~Nz{|C+gla+dy~l&skt`q#`Yl(X~? zrDVTS zY!3Zv<`&9X`iD}oW^?FYGq+IA(m#}vHJd~Knz@B?mj0oXtl1p;*UT-Hv-A(8WX*%Wbna`>Xx$dgH)by1w`KN8|1m%KUe& z|IGSFFTe6pf9^l|tl_(DxYeGEmhS)Aa$&{(zuO)5^oV61{Ag?_^T&OT`ycqA#X*yQ znOxrgfA;@v`PN&XH(K@m#pV0J&slz()V{Fg`@lDU^{IVcYqI8sad!)4{=3?JHR{rC zpKbF$`+t}3|K8@2laFh7<>%vPx9E4_5l?(HrS+_-eNS$??>gI_-{8h`ZWuB;l=SC4(MkRnjxKQRcN4miDF=q@IyJ7O6)c3yc6>kk(rP-3lJC0lHpC9UaVwraum0&CaeJnJ+={2qtADyj+@9$lx8mvZ>Ywfrw`cmtt$6yp`loxu z?V0{@E1o{D{^=fZd!~Qfil@)3f4WE9p6MUA;_371pY9R2XZpvjc>28hr+dWhnf`Gr zo<6Vs=^k-=rhnXur_ZZ@x<}le=^wY^>GSHJ?h&_V`p2z!`n>w5d&KRT{&6dwKCk}i z9&vl7f82_v&#QmBN8Fz2AGhM^^Xi}O5w~ah$E|q!y!xkm#O;~>aVwraum0&CaeJnJ z+={2qtADyj+@9$lx8mvZ>Ywfrw`cmtt$6yp`loxu?V0{@E1o{D{^=fZd!~Qfil@)3 zf4WE9p6MUA;_371pY9R2XZpvjc>28hr+dWhnf`Gro<8qi?LYRm)j#Vw=%qK(&-ScX z1t#{~YT1?tZ}r5V```ciPVWq8^N;a=-v4vk^Xp%-^~FcLU##<5ce=nmQ?}@~{($)# zmG`edqkYBxuPx7t&A(`~&xyOFU&XUx6`0tw`T1wu z{@Z!~v;TMb{_kzpd|<-pFIO2d>yieE`G=lAX?oqm?w+;G&ucfFbbPC)>a2N0vCeDV z=>m5={_%28Jn_QV8EZb!VdrmGs@JZp_c5E*`EU2NexG#PC;hjbzTV|Sx4q!8C(_UM zEEIvX{x|jLTK0XP`TM+|W^Vt|-pRRj%~iqHe2qQ`rfzy z=G&*Pp0xG5v-Ur!+n4&MJB9kJynn-^dsXz`{G0*Pu9^Sxk@|;PJbf>z$O#iqQ zPoG!+bdR__(?4#-)92Mc-6L+#^p9Kd^m+A9_lVmw{o__VeO~?3J>vFE|F{)TpI85M zkGMV4KW@d-=hZ*mBW}<1k6ZEddG$~Ch}$#$<5oO>z$O#iqQPoG!+bdR__(?4#-)92Mc-6L+# z^p9Kd^m+A9_lVmw{o__VeO~?3J>vFE|F{)TpI85MkGMV4KW@d-=hZ*mBW}<1k6ZEd zdG$~Ch}$#$<5oOV#4{`KvmxH*M7 z?;Uqv`NWRbuG2Ve|L@fP@2!v6e9djP{^awSFZ_P_zk#Jut6pk3R1=YFYlh|NgeuHeccQeXiTE#cle>_{a1Qzj3As_0Oae{xSW-Z=7jD{WIx=e@y@I z8)uqO|4cgJAJael#+fG6Ka)=Q$Mg@sai$6N&!iLnG5y1DoM}S+GwFnXO#kp3XPQv| zOgiBo(?9&inI_aflTP@@^bfyrrU~`Wq!a!z{ljmZX+r%o>4bkw|L_}Uno$2tI^iGF zKm5j-Ce%NZPWZ?455IAy3H8sU6aF#%!*85vLj5!8gnvx`@Ed2EQ2$Ii;UCjK{KlCk z)IXC>_{a1Qzj3As_0Oae{xSW-Z=7jD{WIx=e@y@I8)uqO|4cgJAJael#+fG6Ka)=Q z$Mg@sai$6N&!iLnG5y1DoM}S+GwFnXO#kp3XPU6m{&NQmvHxQxwQ!F8xqp)t&Tsa^ zIhTzt@857nJ^TNL)4H-rmiBMhwqpO+7OyrL`#{}2XZ(2|_{}=?eBjj85Bj9CYj~}A z1=9Ba-e>5kt){m+=HJf_db@Rtcc(NP@%Dk6%zgfZUAO)6t>T@~$}pgPJ`4!za>+{Aotp}9c)VSFvGlvYm{-{jXaIAO*D*E4f)#WSon0Cnhn+W-VGjXcvPzY+^sfx=Cww@^cwiqCyPhxA5P;;ljxsGC;Vgjhu=8U zg!*UF3ICY>;Wy4Sq5he4!at^e_>D77sDCD%@Q>*qe&b9N>Yqs`{A2ou-#F8R`e)J! z|Cs*aH_kMn{+V>bKc;{9jWbQCe;Wy4Sq5he4!at^e z_>D77sDCD%@Q>*qe&b9N>Yqs`{A2ou-#F8R`e)J!|Cs*aH_kMn{+V>bKc;{9jWbQC zeg*Qoi=d(`{ebI0Ad z^Jc>zy!GUtihoiQR3)6jENI%g^Ks{;`gfXr;4zQg zeCH}(?f2c2UsQIk*NR_Y{08f+u+0g(-8lY$#Kw0Hc&clo1EvqWbfqR=G;03)@po-; zaMKCBk38_J{@tD{{z*+xnZWq^LyqWn_mIREy^q{`m33OSYjnWudro<%`3Cw|rWM|6 zss2Yl`ug}2QvEwVc2%#jfArr-|L|H@Hj)07sl?|s{o~WRvPt!?OeH?A=^vlgl})OD zWh(J`P5=0`u542MD^rQjYx>8hb!C(4UztjLUeiB5tt*>U|H@S2^P2wgX8_`Ifnd|FpFss5Fz#OF2rn*Q-=UD>4iSEdr5*YuB1 z>&hn8zcQ8hyrzGAT30ry{*|f3=QaJ~)4H-r^{-4NKCkH?pVpO4s()oF@p(=E__VHU zQvEAaiO*~L$ES5=lj>iYN_<|^KR&H1n^gbGRO0iR{_$yD*`)ecrV^jm^p8*L$|lvn zGL`terhj}|S2n5sm8rz%HT~n$y0S_2uS_LAujwD3)|E}Fe`PB1c}@TLw61JY{VP+6 z&ujX}r*&nM>R*{kd|vzi_b*-liLF1qylnsH<9BQJgZ+QQ{w$>W|Ihxv7v0k6fYRH4 zUHg+Zjauyf$bvH)%-;0XV}2j;*~|8S5A_Eph)*k}nPU*J(0f|=@kGp;9PaOyBRyWnZSKZYw+Ow?xTO&`t>!#(OTX_8g^`-#} zQ()XlUFJ+|H~YJBcbxV4A)SBuTdMyB6TcjBZCU@DKV7n1qiJs~`>)*|`e&Han`+$2 zH~x6xNmo5{>vESg{i^>J8<+JzpmC><^pD49qpW|wem}P>>;FsJi$FqCI)cRLQ z#Pd=Acy_OtTK@`(cs}YM&+Zjd>t7)e&qw{^*}Y|Qao{uL7OeAGXl-7BWnzd|COkNU^6d&SiHS4hP3QU7>$ub5i@3W<0=>L1VU6;tb9 zAra3<{o~oaVru;>B;xs4N&ibmEk60I^8KHWnA^|(zhQqa%K9(e|M%kL73<%z^`>`j zzf9-1?|u&FFNegclp6*e&)lsIP%t)yyKo{ z|IoqxPk75ob5xl3Ri5$Z_qqSA9{)AZc+ug%|HJ2g-L{x=S{ED{+r6o@pb#p zvGb-^Y5z@S=J>k(=h%7EtF-^7GIM<0{&VcS=~dc)Q<*uwZvQ!U-t;Q%zp2a|U$_4p zJ8ycG_TN-yj<4H)j-5BXO8ajrGsoBMKgZ6SUZwpvm6_w~_Mc@@2kv*<1N-0f_)mZ4&11YuuVPby;`hJ*-Ek*hddt7M>^=`Z?rxv?$J1Yb$q#P& zbG|-CfisTz`RhFA^yfV0jFaB>iql{F(|>>5$sd2;S@(GVKkx1T$^SCOtMn>16*%Me z54q!+ue<;Ep7H4WUvb1wKld4X`~S=%_xFFqra$NFV-z^!_7DD(mp$o%7aZ9C%45F$ z-1F@}#;(`&RTTSw-2T_J+!)_y|1oyl_Ep$_+j3+4Is1>XED{@a!t5p>_5hi+rA3>Z(DARKWG0jcHH(=*nit{WBfV$kFn#nufqP@mK)>G z*?){3w|y1%-?rQsf6o46?6~c#u>ZE@#`tsgA7jUDUxod*EjPxWv;P=7Zu=_iziqiO z{+#{C*m2ueVgGH*jq&H~KgN#Rz6$$qTW*X$Xa6yF-1b%2f7^0n{5kuNvE#O{!v5Qq z8{^N}e~cZseHHfKw%izh&i-TUxb3U3|F-4E_;dCjW5;b@#Ub|p$PsV+(wFW3{@+J# zdtHAnM}aTc|6@;m?=3Gs`1{{4|FH`X?EjgQf9$a@$nh#X<+cUN-~WE|cRurd*ZQHeU}6KpLOgvfB2qz z`~S^DbG!;qxov^dpZClg-RA87_U)&?^#j-X%ZEJv6MOrA?1uaMKXTja`g1u79N7P; zXTJQDhaTMj9zT9N`{&qgsi!RVzmxr!Dx2dv_Rq0Xt*5kqt+F|;YyTWO)p|<%*D9Oi zy7tepQ>~}8f331Pu514sJJoth``0R)~}8f331Pu514sJJoth``0R)~}8 zf331Pu514sJJoth``0R)~}8f331P zu514sJJoth``0R)R2E?*9Jw zckJ%};Nze99eq8mr&+qd7ybVCyB+a{Z~Tti-s9L$J?iN^>&?N{EhxBm;T-PhB4nxzYzcIEp&ef+ymJn!^7 z-Tljd@z4I}gZB4-ul<)U-RJcV?0@-tp7^_8xx4?fKK++h*uT%6YCVGQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24 zzgF2C*R_9+ooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%Q`)~) z*&Nrke~z7MJ*E9?mCbQo`{&rH)>GQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24zgF2C z*R_9+ooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%Q`)~)*&Nrk ze~z7MJ*E9?mCbQo`{&rH)>GQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24zgF2C*R_9+ zooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%QyyynxBj0GJkIa` ztyR3Q>we+>FMja1zy84Q|K0ul@3+|9|I40y`UCoUT2Hfdf%5miAN#grUvu3zUwZkm zKYj1V|HzXcbepB`=X!k>IPJ~f@bTjw`^V=T*#8f|`)`iFWPks!>GNql)lvmc`?WJ) z_?buFw;f?tl3Ouldx${eR{=9&Y~}yDjyU_Ft-G&g)1c% zZc9D2{g*14^E&p=x!Y1tZU3c8=Dd#mbMCg(Q`>*3k~yzq|D3xm_0;xXs$|aV*gxlP zOFgyymnxa_I`+@G+fq+$|D{UiypH{I?zYrZ+kdH&Ij>{?oVzXc)b?MhWX|i@Kj&^s zJ+=LpDw*>-_RqQ7QcrFFrAp?!j{S4)w$xMGf2oo=uVeq5yDjz9_Ft-G&g)1c%Zc9D2 z{g*14^E&p=x!Y1tZU3c8=Dd#mbMCg(Q`>*3k~yzq|D3xm_0;xXs$|aV*gxlPOFgyy zmnxa_I`+@G+fq+$|D{UiypH{I?zYrZ+kdH&Ij>{?oVzXc)b?MhWX|i@Kj&^sJ@p~? zf6A+W*6)8_s&L=exyt@eKmS!%{r&F`ylLN0@9CB*@cG~We!{7L{Rh{-$`g(~;ncev^^7OH&;EVywe-`ScKpc?y!gF8b&~y; zE}P?e`};q{{yFwq`YG+dbkSVbvwyC=mVR3MFI_a(_3WQ(uce>X{!16lbv^s%+H2{j zwg1vZb6wB=x%OK6Y3;vs(OlQFf3Cfjep>r4T{PGA?4N6|rJvUROBc;`J^Sa{Yw4%8 z|I$TsUC;ix_FDRB?Z0%8G{- z(nWJ!&;GggTKZ}2zjV=D*Ry}Fy_SAj`!8KI*Y)h5YpHd=h|!Or?vmm zMRQ%x{<-#A`f2UIbkSVbvwyC=mVR3MFI_a(_3WQ(uce>X{!16lbv^s%+H2{jwg1vZ zb6wB=x%OK6Y3;vs(OlQFf3Cfjep>r4T{PGA?4N6|rJvUROBc;`J^Sa{Yw4%8|I$Ts zUC;ix_FDRB?Z0%8G{-(nWJ! z&;GggTKZ}2zjV=D*Ry}Fy_SC3tM32mT7`!~tl?M>|8?KDju%>GR>cY722cRNj!2eW^Z%-!C^{@qT~dlUP2J57@ZvwxG!-QL9h-A>cw!R+58bGJ9K zf49>#c`*Ao$=vNt?BDG)O&-ktO)__T6Z>~NO_K++f0NAJ-o*ahPSfPU?B67Fw>PnW zx6?FvF#9*j-0e;5-|aL_9?bqtGIx6u`*%A{lLxbZlg!=T#Qxn*)8xVI-z0OlH?e=W z(=>T7`!~tl?M>|8?KDju%>GR>cY722cRNj!2eW^Z%-!C^{@qT~dlUP2J57@ZvwxG!-QL9h-A>cw!R+58bGJ9Kf49># zc`*Ao$=vNt?BDG)O&-ktO)__T6Z>~NO_K++f0NAJ-sE5R_xsG_rL$> zQSW^JuRr(M6>sIz9Ts@u?O)m9BYLEY1$Os8=^g)aJNs9R z-0`LD-|;vN9?kv@B6oZX`*%D}gGaM}gUB7uu@f5+oAcr^Prh}`il z?BDS?4Ia(@4I+1Z3;TCGPJ>6Ye}l*!-@^VKkJI4M?B5`A$G5P5$Ky13H2XJ*-0>~! z-|;vN9?kv@B6oZX`*%D}gGaM}gUB7uu@f5+oAcr^Prh}`il?BDS? z4Ia(@4I+1Z3;TCGPJ>6Ye}l*!-{K4R_xs;F9;(u#?e2f@_rD)`y|e$}vsXN}(rsL} z(*iF%{DV*V+OzL={xQ4z|MR!JqSMFpSd|Lczf#yFm$iSAp(;J5{VRn{a#{N)8LHA_ z+P_lRB$u^+lA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1c zQrIMywSSVKDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lR zB$u^+lA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMy zwSSVKDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lRB$u^+ zlA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMywSSVK zDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lRB$u^+lA$U+ zru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMyy~_T6|9g_v zmV35BHxwAS*^YX9Y%X1b#NGp)7!v)X_8 zrkSp2|4eHw|E%_3zG5BHxwAS*^YX9Y%X1b#NGp)7!v)X_8rkSp2 z|4eHw|E%_3zG|ZTyiVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T z?IG=7Ep3Vm+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m z?O!cziVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7 zEp3Vm+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m?O!cz ziVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7Ep3Vm z+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m?O!cziVNF6 z#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7Ep3Vm+dsuj z)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}GT_W$ZvJn;c%e(#yjsCFY4?y|u1U-86y zKj{;v-q-$J&QtxN>|Z@`*Eg_#*Yi|=IQv&m-1QCY-}OAzAI|>O6L)6 z^~7D@!2Vs&Q~lxWUp;ZxH?V)#^HhI0`&Uog^$qOb^*q%d&i>UCcYOonSBPu%ql?BDe~)gR9O)f0Dp1N(P9PxXhhfAz#& z-@yJ|&r|*3>|Z@`*Eg_#*Yi|=IQv&m-1QCY-}OAzAI|>O6L)6^~7D@ z!2Vs&Q~lxWUp;ZxH?V)#^HhI0`&Uog^$qOb^*q%d&i>UCcYOonSBPu%ql?BDe~)gR9O)f0Dp1N(P9PxXiU{Qlqg8$W*R zV=nu->Nj+Qt_nQ=;O-xN?^oNut6f?>jQv}c?(;tO?{k+{Pi6mBrTe^({rlXd)l=EO zRp~zOWB)#PY4ue0Z&kX_``EwFU0OYr{acmp^FH?PbC*_6W&c*C`@E0+``o3~Q`x^& z=|1mc|2}tV^;Gt6Rl3jn*uT$RT0NEhTb1tfKKAc(msU?@|5l~@ypR3++@;l1*}qlk zKJR1yK6h#LRQ7LGy3hOAzt3G-J(c}imG1LC_V06-R!?RBR;ByAkNx}HrPWi}zg6iz z?_>WycWL!h_HR|X&->WF&s|zQmHk_l?(;tO?{k+{Pi6mBrTe^({rlXd)l=EORp~zO zWB)#PY4ue0Z&kX_``EwFU0OYr{acmp^FH?PbC*_6W&c*C`@E0+``o3~Q`x^&=|1mc z|2}tV^;Gt6Rl3jn*uT$RT0NEhTb1tfKKAc(msU?@|5l~@ypR3++@;l1*}qlkKJR1y zK6h#LRQ7LGy3hOAzt3G-J(c}imG1LC_V06-R!?RBR;ByAkNx}HrPWi}zg6iz?_>Wy zcWL!hvH#7^ec#P)@zA&2s?~knr;h@^xxc&p``DxPQ`oMSH1=;@ys!J& zzpp)7KaKrc7w_wS_U~(t)=y*q*2Vj}pZ)vVqxI9+zjg7x?q~nL_GtYy_HSLhulw1* zuRU5njs05}@9Tc{?`x0NPhwfm{Yme4XWB=C0`?{a~``V-R)7Zat@xJb7|GxHU{WSJ(UA(XR*}tzn zT0f2bTNm%^e)jKckJe9P|JKF(x}W{~+N1T;*uQo0zV2uLzV>MSH1=;@ys!J&zpp)7 zKaKrc7w_wS_U~(t)=y*q*2Vj}pZ)vVqxI9+zjg7x?q~nL_GtYy_HSLhulw1*uRU5n zjs05}@9Tc{?`x0NPhwfm{Yme4X^ZEVn_9Htdo$yQVZhe3ETerY(?(cpx`>#91N_V&aO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$B6xJ`pUn2;k#aW*mYKV47XjW!1MNZ zxBp55tbAMhuROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{-F4MeZSiOsy$|Vf7svtyZ?KZ|8u?%eE#R$J$}yF-~Rl+@~rqg%y#to_xbnv z_xbnt|2ZAt|D3zW-*fD5e?R~I{P**JP6zlu=k9*~&%L|-?Z5l?KK{@BKCr+2{qH~j z`_KRWo6`aQ&$+w5|IfX<{q666|IPdNKA(S|f1iJ!e}Dg<+X4IA-@pIO`*XnO-{;@w z-{;@I|Ig_F|L5G@zyHj=yZ!C&-~Z5a>Xm) z|F!@7T+81B9={JdelKvi;JCMc_71V@Rd>JQxFZj~AFep=I{U{X91r|8>5Ah>kJH65m%Iuf6|% zaNP4CTZh;)c8=ZS7+;U?E06Kt9mn{25WB~*+%?|c<=36tf26+;&H8?h&wU)r4nF^5 ze9p^V<2-hcV|+c1_P0Oh$NAs=@wtv;eD32IUyq~Dzt6wVzt4Yo;5e2C;eH&;efdAu z-sQfrdmPLEZh!mx`S0g{>4NhyTnP8$SnkUIF?KF@jq}(&j`8(4+TZ?u{`>hK9ypHW zLAW2sa$o+BwRgF1>>kJRzuVvbe*XLUU%KFY3>U)vIF`Hee~g{WUE@4e+>c|q zEC0vXx!g6*WA`}5*W+k^`}_It=YM$MIF<+DejLkv`9Id)<-W0d9LxW1fBXCS@8^H% zg7Yz42>0Vy?#ll$b}n~~^VmI(@%1>`-~N97`}rRpIF98(xF5%IU;dA^ce!uu9>?;( z+u#0v{`>h~y5M{a7sCBGmb>zQjGfC}<2-hcV|+c1_P4*E|9<|52aaQT5bnn@_Vs%o z$J)8vHO^!A_&v~Z9{=5O%=ba;9J|M{{C&{zeU5wYgHbxf-h0Q(o_3#uzt{P)r=7Te z+}S^#;CSHozF+pVQ99gvf6qAHcmF(gkFWpd{jWRje(o=O+9)0FRqPs{^Ek%qyvP3c zIqvO0T8DcUpZoBw-2eFf?{nOHAB@r=_7CsvKi0mR-lumjT#wJY{pZ*}-WT!nEOw7$ zydUDf$M-vq_5O%`WA`}Pf2^H1eQxbP$IkYjduRLGfA`<7%J%{P=YC(<-~NaC|7-aB z{~YfNpZ_^_E`RRFx&7_W|1bV~aeO{TKl=Rp{QLa-{Kvoh$I;*a=Wv1lbL_n7?=kkb zzn}kp{>RV#IQscNhYS3lV`o4A=ib@=_TT+`Uim)Y|J?5j``h3D{`+DVOD9J?_xbnv z_xbnvkAL@xqksRI!v+4&vGb^T?x>HhK0_K$CHJn(zp&$(#S z4sYDB`}{o{4LWq)_ay}h${ zh|h8CylMaVzU;5p^B?+sdv*K!{2#j8DDS8Ed)}tczy0mM`}dns-tS(=Ro~A(|5xoX z+xx@*_CM6WZ^hpW{rn&M{o(UJ_P$s5^UVJC=l{I_`^M+r=ilew=ik5o&FKLD=iEL1 zcY*!w@8`dt|9<|@=>Y%d+}+Rrxp%j}{dfP~$N#zC2lltW|NZBG|M}m4b2`BPId}K> z|G9U!zy1C1zj^=O=kxFL@AL2T@9+O}J79nN`}ezxVn4`~3U-`~3U+|J)AP-~RsnZ{D8+*Yxx6_dt&6MEstg`1jxV zebDjs`29a|9>;j!IPSeKvUP|Z9g6qC z*Y2M??tMN+>2P(Q_gnAp?zp%6=pEwy5by8NU$579=%4qS_P-4e4*vaUl=n^S8K3(& zZrXp8*FF2|h~48DyT{S~WBA4$j`6vVqtCz3zt6wV|EPcenf-Nz`*95S5BK93?#Ge;{O3Ra zNA+*^*A?!^G2D+M|M|~<{?G2@sINcVk7KwWNB;Al|NI}-zu8|`xF5%GKaTw8KmYkZ zyOX27{%}8z;eH(X&wu{&e^mcwe_i2z9K-!M@}K|w=l|?Zj{5q;{WymEapXV$`Op7R z{hR%Dh5K;~_v6Ta{_~&zvpYHJ>ks$i81BcB|NQ4a|3~$2_SY5e$1&WGBmeo&fBw(z zc|pA4mT4pa1+H)xX(aSGXU?a6gXx=Rg1XKf9BozW#7Oj^TbB`Okm;^M6$T zW`AAbejLO7IP#zW{OAAdPLBHe!~Hmh`*Gwy|M}1VP5y-&j(bjI>rn0*?w7~%`?9@G z&wJ?iXY3k3=gZyWJiaf^9rt>U*5TmJm!I<5yYK#fU&rY>+~aC?KYsteJMMixM(c1* zKlkyvuj%*uzMP(xJIA@>n4kaHH9oI#?l|_|<-X4?p>bSd#t_7J-|vfU7p)}ti8)U%e~9<^81~S^?oS#F3;^f*52iw<=*9a`TfqvdOwtVm*@5# zYwvQ;a_{oI{C?+Sy&uZG%X52=wRgE^xp#S9e!ugv-Vf#8<+;7b+PmDd+`Bw4zu);- z?}u{l^4#8I?OpC!?p>aj-|u{^_d~gNd2a8q_Ad7<_b$)N?{_}d`=Q*sJh%5)dzX8b zdza_s_d6f!{ZQ^*p4)q@y~{nzz033R`<;*Vekk`Y&+R?d-sPU<-sO4u{m#dFKa_iy z=k^|J?{d#_@AAC-e&=JoAIiPUb9;}qce!V|cX?iZzw@!)59QwFxxL5QyWF$fyF4$y z-}zYYhjQ=o+}>mDUG7=#U7nZU?|iKHL%DZ(ZtpSoj^FbOr z`pFmX{+{;(#~1A%LV7I=!w#zmGTV9$$C7s*9ucC_f+ZI?veudEmJB zJ{YY-xqF=3f6Tqh_iyZ79_?@c-TysMzF(Y=^}Z-S_i^s?Ki1BhKDV)Vd9=U%`9J>8 zzw&b*=RW^F|33dd|C|2a;Ok@f5Z;$Z{*STm)qPIO-Q(Q;_V@GO&;P6Q!++21M0j5w z`Ja2&Yx=n?caL-X+uzTBKmV_(Bfc-E7vX(*WS~q zjAlryf=rzhckdE|eedoBGrDtC`_``h2o ze?R}1_Qv)4awWVkkNodzr&^zza`!m5zy1CE_w&D&JFeS@FX4T8f+lcwZj*zs?@5KL_RR zac+P6`}yzZe`}B2udO@beR<@6+a1<=-}_8)uy*tgs}z8=T;?~Z%#hao%Ma_3wBX!rL#f5Gup`{#~(T}JB=d%xQ52jBOj z?R!mMPwZcQe|+6>@AE%;2m8;lbNN0lcaQV(`<##Y{)oNHqrJ!6yWF+hJ3?=kl-cP)31^YZ(gkNJLxz00G$$K1QzwcI_<%kOhO=KCS` zE|2yebMJE3a`!kdzt8!Y?}ymCJlcEAy~|z8-Q&FcKIdb;A7bzFXzwxiE_W?=kMr{T zoR9f_h`r0By~o_U+_l_2&dcv}KIZ!&_AZb19&_(<*K+qbFTc3?=kl-cP)31^YZ(gkNJLxz00G$$K1QzwcI_<%kOhO=KCS`E|2ye zbMJE3a`!kdzt8!Y?}ymCJlcEAy~|z8-Q&FcKIdb;A7bzFXzwxiE_W?=kMr{ToR9f_ zh`r0By~o_U+%d;zV3L{ zPT70J&gC&)SNV14d;5>lq1?GVALVs=9smFL?dgU0Wv}?}j(Z(P=}_*x>GMCz>-9RW z`u?^5ReQ|#{;Z|oaK{_~&z{0|S}IE#m~z0TM-j{N68 z|M?#t#BnwkXL-G`Zyfp0fBy47Jc#2g9?teUW8XOPpa1;le|Qkb*<76E^~Sz&j4r7iW3Bv2Pst&wu{& zKRk%zEFRAGI%D5B@}K|w=YMz*$Jtz*<@LtCapXV$`Op9GAda(mINR%tedEY~{_~&z z;Xxc{b8(i}8~etQ|NQ4a|HFeg&f?*0uQT?IBmd|2|LWcc@!uWy-Y27X2oFbponGHz zS6=douX^0y9{4@bj`MZ+klnrSeg65!0S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b1{$N|3x{IDrM7rVRv(a|6Ls>eC% z;CpZk4p z|GD?)z5RE8{(B#IA9x@59B_aG9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$;Lr}l?}3h^PKWL^T9*TRf8gYoUvc~|IO^bi;B&wM4sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S?UV!0-IRTW^2ayG}gJQ3vk>p92nXfCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aA0N!j{l0ky5OeAoOqa{ z4&Dbo2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)hz{C!G$fn9gbHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14$SPpKb-T2N8aGI-@oIigZF{Y0S7q10S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WFtY=n zeAzkoJmLI5+i}#v`@rXb103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC(u*?~*%{HfD#@azZeIO^bi;B&wM4sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S-*fn9gbHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14ovL8CvNmz|M8pu>&XIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02WEEQl55}T&98a)b$1+f@ILT4-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4W_IA> zZ~E2`-S)d5u;ZwM_kqs=2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQwF5u)sOx;})=%BJ*ii@X1D^v9aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sc*%2QInq!!P{K^PjlmsDt-`&jANGzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4I54#X4}JS%uJx>&>|Elg zgZF{Y0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WFtY<+deMCkd)QrfE^*Ys`@rXb103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC(u*nx}x z_}A}v!7CoJXIKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02PSslk{dqqQ}_DO7w%)g_EXpY-e+FAfn9gbHD)(aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14$SPpTYu?k zA9~&=cP@0)!TZ4HfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3GnAw3hJmsOM-0<-`7dqA6liB=uXXunWu5a1zR$hZ_3SzR*c{*h2ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4COfd}{F`ol-r37u)j600d%yt>aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14vco-!7q6B z_g-?q0o6H}1AD*$4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S-)dVAluk_`_dX_U=-hgE_DV9N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MM zXa^p8%C7HO4mhAX2XkN#IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROiisSfPfeb0ga`P=Va)hlmb*IU;0W_9~~ zuIzFS$#tFcy>kD=I`5CH>xb2Y`FA^qkFRt6?{&>wB=`5NfBxQe{fE-$=juT}uiTV# zxO$yqa^70^@hx+n_et)l?^!>uu5#bBb4ae&taHrX*Zh-r>buv^tE=2M^&FDtk@C-+ zWWG1`ef{5k=)B7R(DRr!AM%&~_P=NOKXVT3|IFNT&&gl@{J-bQ?8CHb|Lwp1xBou> z&vbzQGjq@99Qn)N{qO#F|Ic)Q|1)!U|If}{{_@}c+{gdfb0B~D`}d!J|M~adOb7Ts zGk2f=XXh?|`TO_Z+|PaX-~QWw`|tDrYzO2oe}Dg*dk@%u`)~j4zrX*_bb$XebNBb3 z*}2PK{{H?q_j8~9xBvFv{`>qt+X4B@-{1e{-UIgE{@Z{1@9+OJ9pL}W-2MG$cJA_* zzrX*@{oH5&?Z5rE|33fEc0m5}_xHcK_kjJk|MuVh*T4Vgdm!`upZT89d=FsUP*;9z z>N(_lee3tX=6k>My}=0siPzUk+XT(4VypKJXd@O&S1z85$ysH^-p{T!0(p}Frr z?K=<7!~WC0V_i>D4}478e_GdbNZzT*{Xy&8Q+Ka>b$kE!pVs9ZGS|sHea-#Eb^fVc z-hb+Q)Etw0YR)Ck)q28u9@Lcwn|2P#GdU;s)cicp)tcX{J??|#o?3IweAl1r<=^S^ z(A0U(-lx`cu>U>wyyludxu@pmsq&XU=llG-KYN{;y-&^0Q|-U~xBvD(9;m%Mi2JEE zU;g*XyXKqRQ|s^LFMs#H`@g!N@8Lq+Pp!G~zemnB*YwFfH9t?4zx>_*?*Dk8_VOU^ zr`CM=-z)E$Z*otqzn8!K-T&_Y>Vm$93voZS=F0ybIoDj%C->C+JXQYkcmKQp7I36*7Y3q!1uo2d1;q(c=|fe)Sq1U$vr=R+WP0}_TJxlX_s?2yX2ZZ zPtAKib^V;W%D>w=oK^NdzSaDnvVKlo%|VxQNdEC&{=M?uX->~9u4munKO_IlMeeiY zo|<{c@AJIcYd(^1a!-|iubg+aV zVgG03T<`t#@|QpV@BO(r`_aAHfBSF$?SKC6pX&4f3>Wx6Bj=qy$H-s)?tk}x?)_Bv z{|p!SKO<-N|LmOQFaPb&d36r>KYK3ZFMt33ySIzg$*#TqxBvFv{^##Lss8>m!v+4& z$a$yFFY=ea```Vadq36vKf?w7&&b*RKRajn%isV1XYS`b`)~j4zx}Tc`20WH1^LV0 z-~Z;`3-;gs+kg9?zx${9`~M6V_&+1(`tQE<@|VB+-~FF^Kh^y|!v+4&$T|0Yt@4+@ z```Vadq36vKf?w7&&avn`|0H`fA_!pKlgsB`+tTD)&G1CWWFCZzt4SS_))^&FD(GY)-FqB5 z&-VY&^O!at@|XYN{=SvZh3@~}^I`vc=X<34O#brc|J;Ay*nj(P|LwoO|IKuO|1)#X zzYF9qfA_!p-~B(+0sha--Tgm1clpbI`*R=vXU~EB7fBWzA|7-{3FMogkn|lw~fBSF$?Z3bO&vbzQGjsR%pV_&~U;h67H}`X& z{kQ-2-~RjjKidKM%irJs=H3JMb^m=2WKSpZJwN&TZ@v#YKhO97q)*MP@O}WgGaA>b+sQ|&f!S={+M;{>MHl{=a6~G ze0P6e?|1n7ey98&%Y%cTkGjlF^32|+?v#I*_n!V9$vri>r^>(geV^P@lY6TC<-h%R zfay+legEuzYW6FirKmYmP)xYWQ757u)ek%X@&wu_;ce3mI$Nki}pUQv!^Pm4+{hR(?aX&Tg zr}Cfw{OA94C%e9X+)s`Bsr=_Z|M}n5zv=H4_fzA3D*ySB@}K|w=l^skyS{(iPmTMj{O3Ra`QO#Q>F*WyQ{#Rr|M|~<{!e$Z z>-)$3)VQC@fBy5I|6To?{$6oEHSVYKpa1;l|8yt2zJJ_Ljr*zm=Rg1X-_^hA?-ln` z<9;gt`Okm;Pj|BG`^Wv%xSz^@{_~&zJNXwk)RhyPb`CYyxL<31Zqwe=|GkHw&*YkW zzUH1j&!tya=h^KX4(5E<%is5u?eF(h$DPBON67uj>))%Z{pfZM_jT{{-uLx+&uyls zHRtqd&;3uX+1K=H@4RciHUHYz=k>kjqvl9Vylb8{ z@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+==%Dd)S z^R9h;Uf*jTYTmV%cdxu_o;B~<*XQ-U=Aq_YdwKWDyXIN*u6=!8-)kOf-nEx^ue@uX zHSgNj=k>kjq2^tCdH2e@=2`QueSKcvYaVLewU>9Vylb8{@7mYr^}Xhy=3RSv_sYBG zS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+==%Dd)S^R9h;Uf*jTYTmV%cdxu_ zo;B~<*XQ-U=Aq_YdwKWDyXIN*u6=!8-)kOf-nEx^ue@uXHSgNj=k>kjq2^tCdH2e@ z=2`QueSKcvYaVLewU>9Vylb8{@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tc zYo0al+Sljxz2>3jU3+==%Dd)S^R9h;Uf*jTYTmV%cdxu_o;B~<*XQ-U=Aq_YdwKWD zyXIN*u6=!8-)kOf-nEx^ue@uXHSgNj=k>kjq2^tCdH2e@=2`QueSKcvYaVLewU>9V zylb8{@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+== z%sb!zotoTJ<=!jz;(e|B-^@I}**&berWcrp&F~}M*UJCR$nV?TvzmK)`ODw^@BaUG zeeu74d^-~FYvup(o!kAjS2g$a@|VB+-~E4oIpaU)`1B;+*UJCnGq3x5k81Ae+8_V<$JUv6Un~EQZ7xS^FKX`TV76 zLCrnA{N?ZdcmE%4kNo!?U3cPrt^7Z_IUK9Gueqm}zx>_*?*C)ulKfa~_g+t@7@fcg?ltp1wY(?>P_2yHFaa)p7W5rYn6A;ylbvC_w@BSeb0GF-nGiRXWli}ntS^CoWAEgB=1_~-81i+Yt21< zeNNwV9+G#h^6r^;&9&y9zCNe#ISts z>3hyY@~&0hJ@c-)*4)$A=kz`2A$iv-@1A+rTx;&>>vQ^^^N_r2m3Pm)Ypyl-^z}J? z&v{7RwaU9^-Zj^nd;0pEzUMq7?^@;EGw+&f%{_g6PTzALl6S50?wNPZwdS6_KBwBP$v2h%{O3Ra<3Z{)7pJ^$@=fJG|M}1Vc#t~9!)foCd{g<)fBy47 z9;8llamxE9-&Fqdpa1-i2dPs$oc5l{HNFRpyl?VN zp0j@0om4`Okm;^FJP>PIGa}`zGI1{_~&z{Er8zQ#_pZp2;_r|NQ4a z|KmaGG#975Z}Ls$KmYm9|9Fr(#lvaunS4|EKimH!%|U*zuI8lsImE;6@9F(^-*emT z4}a9I1K$I!&O3(_We%85<{-e+C*?q8HuZ}tgxzC<^`OnUu_wwJq|IL9p zFbDR4103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW4ccOc&bo$8zpe@@-bKx30J>UQb zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02PQkP>o+d``pcgFUwc&NU=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQ*nwsLTQ0iooG1N` z>Kx30J>UQbIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02Sz*awX@#%_UoT=`=UArb6^iRzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9)4t(X> zb3XH$2mk4!ItO!L4>-U94sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h0fyoZs{kZF{ed9^Lx2Vp+9M}U6aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sc+y1OIdC1)u)x$KSrF&cPhm0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$V6p>We%FWgJ^$VBSXAd=4(tI3 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02RJaFA00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G;J|1Hmgm0jvLATcvQKpm=D;3sfCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW4Y9r(`+FFNo0f8t*k)j600d%yt>aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14or4n>#kEDcGl~jv8c|$9M}U6 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sc+!1Fw4jHGg&8Wy_Z89L#||-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4COhyauiACp30Evz zs&g<0_J9K%-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`292o4tttVY_`KM32dQqK&Ij{#D-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N@rc2Tr`| z;?MlpSC(5<=U@)(0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WFxY{uPd)pxS3migi|QQAfj!^=2ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%}paK&S9{`w0qU$#`|U=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQ*@3sb^~y7@e#5e*ItO!L4>-U9 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h0fyoYB_tLxfo_z7Lr8)<5U=KLJ0S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6pQ-2CG=Kk+xex@@V= z!5r8F4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<6rvIGBd`h~Zg{FP-(bq?mh9&msI9N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701Ct&2#~=Ll zzd7%n%a-aK%z-`N00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0EJMiY8e!?$5_1tAkbq?mh9&msI9N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z1Ct$i(>p%*$d~-qvZXo)b6^iRzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9)4!q?xFZ_#JpR;VK&cPhm0}gP2 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$V6p=r{q0v?_NB{~E!8=g1AD*$4sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S-)dVC!LL-SCM2UA9!` zU=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%fQ*?})!{_>CA_{C*Qbq?mh9&msI9N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701Ct%N?Gb1H z{vTYvY^lz{9M}U6aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sc+y12woPpPyUx>OLY$Bz#edb103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCG~qc<(!Jz4PqPFI%c}FbDR4103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC(u?7+LOc-jRIyL;JEor5{B2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)hz+eY%`Pw7D@b}+!-l94O zb6^iRzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKY9)4%~R*KfmOmzk2baItO!L4>-U94sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0fyoYh@{^x< z>*>4qFRF7e2ljvi9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70100y_z}_dk@u$A;vfCEbIhX@`zyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zjCSBbPq^@mbN+7Gt2zgBU=KLJ0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6pQ>^lFZ*WdO2Wv}WS%z-`N00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0kJMiEaJnN-@eZT?LIhX@`zyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8Om<+`2kv;q!;VTjzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4I566Qhn}+Q;QOo2VL5Ogm;-ZQ4(tI3IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MML Date: Fri, 25 Feb 2022 16:27:31 -0500 Subject: [PATCH 006/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/cooling/cooling_cuda_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu new file mode 100644 index 000000000..812788dac --- /dev/null +++ b/src/cooling/cooling_cuda_tests.cu @@ -0,0 +1,44 @@ +/*! +* \file hllc_cuda_tests.cpp +* \author Robert 'Bob' Caddy (rvc@pitt.edu) +* \brief Test the code units within hllc_cuda.cu +* +*/ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../cooling/cooling_cuda.h" // Include code to test + +#ifdef COOLING + +TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 5; + Real const testT = 5; + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 100; + + Real absoluteDiff; + Real ulpsDiff; + + Bool istrue; + + istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(istrue) + << “The fiducial value is: ” << fiducialNumber << std::endl + << “The test value is: ” << testNumber << std::endl + << “The absolute difference is: ” << absoluteDiff << std::endl + << “The ULP difference is: ” << ulpsDiff << std::endl; +} + +#endif // COOLING \ No newline at end of file From 2f7d03efc21a2e2516478d7ddb4c6ad335c0e974 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 16:31:53 -0500 Subject: [PATCH 007/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 812788dac..bfcaa0fe0 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -7,6 +7,7 @@ // STL Includes #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -30,13 +31,13 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real absoluteDiff; Real ulpsDiff; - Bool istrue; + Bool isTrue; istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - EXPECT_TRUE(istrue) - << “The fiducial value is: ” << fiducialNumber << std::endl - << “The test value is: ” << testNumber << std::endl + EXPECT_TRUE(isTrue) + << “The fiducial value is: “ << fiducialNumber << std::endl + << “The test value is: “ << testNumber << std::endl << “The absolute difference is: ” << absoluteDiff << std::endl << “The ULP difference is: ” << ulpsDiff << std::endl; } From 01f3f014058003dec8abe95dd6e24e669c3ba2f6 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 16:32:51 -0500 Subject: [PATCH 008/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index bfcaa0fe0..faa54b1ea 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -36,10 +36,10 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) - << “The fiducial value is: “ << fiducialNumber << std::endl - << “The test value is: “ << testNumber << std::endl - << “The absolute difference is: ” << absoluteDiff << std::endl - << “The ULP difference is: ” << ulpsDiff << std::endl; + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } #endif // COOLING \ No newline at end of file From 7ab4c35023c08b0498e3147de6781c8d969db958 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 12:53:50 -0500 Subject: [PATCH 009/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index faa54b1ea..fd0dbddd3 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -1,7 +1,7 @@ /*! -* \file hllc_cuda_tests.cpp -* \author Robert 'Bob' Caddy (rvc@pitt.edu) -* \brief Test the code units within hllc_cuda.cu +* \file cooling_cuda_tests.cpp +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Testing the CIE cooling rate function in cooling_cuda.cu * */ @@ -10,15 +10,15 @@ #include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test +#include "../cooling/cooling_cuda.h" // Include code to test -#ifdef COOLING +#ifdef COOLING_GPU TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name { From 36870aadb2d227ff011d1d831151f54763b7028f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 12:58:17 -0500 Subject: [PATCH 010/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index fd0dbddd3..cd72ea8ac 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -33,7 +33,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Bool isTrue; - istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) << "The fiducial value is: " << fiducialNumber << std::endl From 000ca5eef8a1dc13f2188b8c2afab26d82e06646 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 13:03:06 -0500 Subject: [PATCH 011/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index cd72ea8ac..aaeb75818 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -31,7 +31,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real absoluteDiff; Real ulpsDiff; - Bool isTrue; + bool isTrue; isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); From 8cdc94af2d0e37b42060919569d761d6b325d718 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:30:26 -0500 Subject: [PATCH 012/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index aaeb75818..c657a0d9f 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -17,6 +17,7 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../cooling/cooling_cuda.h" // Include code to test +#include "../utils/testing_utilities.h" #ifdef COOLING_GPU From 7176c965ee18e3418adae9741225fc74f3cbf707 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:36:53 -0500 Subject: [PATCH 013/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index c657a0d9f..4c18ae097 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -17,7 +17,6 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../cooling/cooling_cuda.h" // Include code to test -#include "../utils/testing_utilities.h" #ifdef COOLING_GPU @@ -34,7 +33,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, bool isTrue; - isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) << "The fiducial value is: " << fiducialNumber << std::endl From cd5b712dc429ef2eaf461a28c4f85d2bb04b3871 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:46:43 -0500 Subject: [PATCH 014/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 4c18ae097..fcc6dca85 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -28,8 +28,8 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real const fiducialNumber = 100; - Real absoluteDiff; - Real ulpsDiff; + double absoluteDiff; + int64_t ulpsDiff; bool isTrue; From 721cc38ea1701531706b5685faa95297d3c306d8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:09:29 -0500 Subject: [PATCH 015/694] develop cooling tests --- src/cooling/cooling_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 9182313de..d9d71fd87 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -329,7 +329,7 @@ __device__ Real primordial_cool(Real n, Real T) /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ __host__ Real CIE_cool(Real n, Real T) +__host__ __device__ Real CIE_cool(Real n, Real T) { Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 From 1869a95b7bd8de075daada99b83dbe9448e4ba95 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:11:00 -0500 Subject: [PATCH 016/694] develop cooling tests --- src/cooling/cooling_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index d9d71fd87..9182313de 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -329,7 +329,7 @@ __device__ Real primordial_cool(Real n, Real T) /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__host__ __device__ Real CIE_cool(Real n, Real T) +__device__ __host__ Real CIE_cool(Real n, Real T) { Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 From 2a12da313ee54a2b5dbb9e1ea91cfccc66686b69 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:29:48 -0500 Subject: [PATCH 017/694] develop cooling tests --- src/cooling/cooling_cuda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index af00de1ae..ce8e79ff5 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -45,7 +45,7 @@ __device__ Real primordial_cool(Real n, Real T); /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ Real CIE_cool(Real n, Real T); +__device__ __host__ Real CIE_cool(Real n, Real T); /* \fn __device__ Real Cloudy_cool(Real n, Real T) From 3e503985254333028f8dc72b658dd9b39c1b3b4a Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:34:39 -0500 Subject: [PATCH 018/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index fcc6dca85..d1721fa8c 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -22,8 +22,8 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name { - Real const testn = 5; - Real const testT = 5; + Real const testn = 1.0; + Real const testT = 5.0; Real const testNumber = CIE_cool(testn, testT); Real const fiducialNumber = 100; From 31725a1527eb4d500cfa14d83f95cd8f6d98910c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:41:59 -0500 Subject: [PATCH 019/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index d1721fa8c..bc6a2169d 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -8,6 +8,7 @@ // STL Includes #include #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -20,10 +21,10 @@ #ifdef COOLING_GPU -TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name +TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { Real const testn = 1.0; - Real const testT = 5.0; + Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); Real const fiducialNumber = 100; From 4135bbdced1963189a1f17a39c961b8faca59a60 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:52:57 -0500 Subject: [PATCH 020/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index bc6a2169d..0de5c3bc1 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -23,7 +23,7 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { - Real const testn = 1.0; + Real const testn = pow(10, -8); Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); From eee6d46ea81cd13a46f1f1ee69722ef1df4af004 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:54:27 -0500 Subject: [PATCH 021/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 0de5c3bc1..9e9db0a60 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -23,11 +23,11 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { - Real const testn = pow(10, -8); + Real const testn = 1; Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); - Real const fiducialNumber = 100; + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); double absoluteDiff; int64_t ulpsDiff; From 7ff4c4c2744504a7f7b4ee967d3f16edb2815800 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 14:35:26 -0500 Subject: [PATCH 022/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 2 +- src/dust/dust_cuda.cu | 195 ++++++++++++++++++++++++++++ src/dust/dust_cuda.h | 17 +++ src/dust/dust_cuda_updated.cu | 10 ++ src/dust/dust_cuda_updated_tests.cu | 46 +++++++ 5 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 src/dust/dust_cuda.cu create mode 100644 src/dust/dust_cuda.h create mode 100644 src/dust/dust_cuda_updated.cu create mode 100644 src/dust/dust_cuda_updated_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 9e9db0a60..7b2ef41db 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -43,4 +43,4 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, te << "The ULP difference is: " << ulpsDiff << std::endl; } -#endif // COOLING \ No newline at end of file +#endif // COOLING_GPU \ No newline at end of file diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu new file mode 100644 index 000000000..015e4f9f2 --- /dev/null +++ b/src/dust/dust_cuda.cu @@ -0,0 +1,195 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; + } + if (nz == 1) { + ks = 0; + ke = 1; + } else { + ks = n_ghost; + ke = nz - n_ghost; + } + + Real d_gas, E; // gas density, energy + Real n, T; // number density, temperature, initial temperature + // dust density rate of change, change in dust density, refined timestep + Real dd_dt, dd, dt_sub; + Real mu; // mean molecular weight + Real d_dust; // dust density + Real d_metal; // metal density + Real vx, vy, vz, p; + #ifdef DE + Real ge; + #endif + + mu = 0.6; + + // get a global thread ID + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; + // add a thread id within the block + int tid = threadIdx.x; + + _syncthreads(); + + // only threads corresponding to real cells do the calculation + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + d_gas = dev_conserved[id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + p = (E - 0.5 * d_gas * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + p = fmax(p, (Real) TINY_NUMBER); + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif + + n = d_gas * DENSITY_UNIT / (mu * MP); // number density of gas (in cgs) + + // calculate the temperature of the gas + T_init = p * PRESSURE_UNIT / (n * KB); + + #ifdef DE + T_init = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + #endif + + T = T_init; + + // dust density + d_dust = dev_conserved[5*n_cells + id]; + + // dust mass rate of change + dd_dt = d_gas_accretion(T, d_gas, d_dust, d_metal) + + d_thermal_sputtering(T, d_gas, d_dust); + + // Calculate change in dust density during simulation timestep + dd = dt * dd_dt; + + // if change in dust density is greater than 1% then refine timestep + while (dd/d_dust > 0.01) { + // what dt gives dd = 0.01*d_dust? + dt_sub = 0.01 * d_dust / dd_dt; + // use dt_sub in forward Euler update + d_dust += dd_dt * dt_sub; + + // how much time is left from the original timestep? + dt -= dt_sub; + + // update dust density rate of change + dd_dt = gas_accretion(T, d_gas, d_dust, d_metal) + + thermal_sputtering(T, d_gas, d_dust); + + /* calculate new change in density at this rate and repeat if greater + than 1% change */ + dd = dt * dd_dt; + } + + d_dust += dt * dd_dt; + + dev_conserved[5*n_cells + id] = d_dust; + + if (n > 0 && T > 0 && dd_dt > 0.0) { + // limit the timestep such that delta_T is 10% + min_dt[tid] = 0.01 * d_dust / dd_dt; + } + } + __syncthreads() + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s metals = {0.97, 0.40, 0.096, 0.099, 0.079, 0.058, 0.14, 0.040}; + Real metallicity + std::for_each(metals.begin(), metals.end(), [&] (int n) { + metallicity += n; + }); + + Real initialize_densities() { + Real d0_gas = MP * n; // g/cm^3 + Real d0_metal = metallicity * d0_gas; + Real d0_dust = d0_gas / 100 // assume 1% dust-to-gas fraction + + return d0_gas, d0_metal, d0_dust; + } + + Real calc_tau_g() { + Real tau_g_ref = 0.2*pow(10, 9); // 0.2 Gyr in s + Real d_ref = MP; // 1 H atom per cubic centimeter + Real T_ref = 20.0; // 20 K + Real tau_g; + tau_g = tau_g_ref * (d_ref/d0_gas)) * pow(T_ref/T,1/2); + + return tau_g; + } + + Real calc_tau_sp() { + Real a1 = 1; // dust grain size in units of 0.1 micrometers + Real d0 = n/(6*pow(10,-4)); // gas density in units of 10^-27 g/cm^3 + Real T_0 = 2*pow(10,6); // K + Real omega = 2.5; + Real A = 0.17*pow(10,9) * YR_IN_S; // 0.17 Gyr in s + + return A * (a1/d0) * (pow(T_0/self.T, omega) + 1); // s + } + +}; \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h new file mode 100644 index 000000000..34852adb2 --- /dev/null +++ b/src/dust/dust_cuda.h @@ -0,0 +1,17 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#ifndef DUST_CUDA_H +#define DUST_CUDA_H + +#include"gpu.hpp" +#include +#include"global.h" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +__device__ Real d_gas_accretion(Real T, Real d_gas, Real d_dust, + Real d_metal); + +__device__ Real thermal_sputtering(Real T, Real d_dust); diff --git a/src/dust/dust_cuda_updated.cu b/src/dust/dust_cuda_updated.cu new file mode 100644 index 000000000..f39c04114 --- /dev/null +++ b/src/dust/dust_cuda_updated.cu @@ -0,0 +1,10 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda_updated.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + diff --git a/src/dust/dust_cuda_updated_tests.cu b/src/dust/dust_cuda_updated_tests.cu new file mode 100644 index 000000000..9682dd326 --- /dev/null +++ b/src/dust/dust_cuda_updated_tests.cu @@ -0,0 +1,46 @@ +/*! +* \file dust_cuda_tests.cu +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Test dust model functions +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../cooling/cooling_cuda.h" // Include code to test + +#ifdef DUST_GPU + +TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 1; + Real const testT = pow(10, 5.0); + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); + + double absoluteDiff; + int64_t ulpsDiff; + + bool isTrue; + + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(isTrue) + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; +} + +#endif // DUST_GPU \ No newline at end of file From bb88d0bddd1e544a904a986bb5d930fdfb3f4b85 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 3 Mar 2022 17:12:25 -0500 Subject: [PATCH 023/694] add dust model --- src/dust/dust_cuda.cu | 315 ++++++++---------- src/dust/dust_cuda.h | 46 ++- ...da_updated_tests.cu => dust_cuda_tests.cu} | 4 +- src/dust/dust_cuda_updated.cu | 10 - 4 files changed, 189 insertions(+), 186 deletions(-) rename src/dust/{dust_cuda_updated_tests.cu => dust_cuda_tests.cu} (88%) delete mode 100644 src/dust/dust_cuda_updated.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 015e4f9f2..59bf79295 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,195 +1,170 @@ #ifdef CUDA #ifdef DUST_GPU -#include"dust_cuda.h" +#include"dust_cuda_updated.h" #include -#include +#include #include"global.h" #include"global_cuda.h" #include"gpu.hpp" -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array) { - __shared__ Real min_dt[TPB]; // TPB = threads per block - - int n_cells = nx * ny * nz; - int is, ie, js, je, ks, ke; - is = n_ghost; - ie = nx - n_ghost; - if (ny == 1) { - js = 0; - je = 1; - } else { - js = n_ghost; - je = ny - n_ghost; - } - if (nz == 1) { - ks = 0; - ke = 1; - } else { - ks = n_ghost; - ke = nz - n_ghost; - } - - Real d_gas, E; // gas density, energy - Real n, T; // number density, temperature, initial temperature - // dust density rate of change, change in dust density, refined timestep - Real dd_dt, dd, dt_sub; - Real mu; // mean molecular weight - Real d_dust; // dust density - Real d_metal; // metal density - Real vx, vy, vz, p; - #ifdef DE - Real ge; - #endif - - mu = 0.6; - - // get a global thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; - // add a thread id within the block - int tid = threadIdx.x; - - _syncthreads(); - - // only threads corresponding to real cells do the calculation - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - d_gas = dev_conserved[id]; - E = dev_conserved[4*n_cells + id]; - // make sure thread hasn't crashed - if (E < 0.0 || E != E) return; - - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; - - p = (E - 0.5 * d_gas * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - p = fmax(p, (Real) TINY_NUMBER); - - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif - - n = d_gas * DENSITY_UNIT / (mu * MP); // number density of gas (in cgs) - - // calculate the temperature of the gas - T_init = p * PRESSURE_UNIT / (n * KB); - +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, +int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + // get grid inidices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + Get_Indices(nx, ny, nz, is, ie, js, je, ks, ke); + + // get a global thread ID + int id; + int xid, yid, zid; + int tid; + Get_GTID(id, xid, yid, zid, tid); + + // define physics variables + Real d_gas, d_dust; // fluid mass densities + Real n; // gas number density + Real T, E, p; // temperature, energy, pressure + Real mu = 0.6; // mean molecular weight + Real vx, vy, vz; // velocities #ifdef DE - T_init = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - #endif - - T = T_init; + Real ge; + #endif // DE - // dust density - d_dust = dev_conserved[5*n_cells + id]; + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current time-step + Real dd_max = 0.01; // allowable percentage of dust density increase - // dust mass rate of change - dd_dt = d_gas_accretion(T, d_gas, d_dust, d_metal) + - d_thermal_sputtering(T, d_gas, d_dust); - - // Calculate change in dust density during simulation timestep - dd = dt * dd_dt; - - // if change in dust density is greater than 1% then refine timestep - while (dd/d_dust > 0.01) { - // what dt gives dd = 0.01*d_dust? - dt_sub = 0.01 * d_dust / dd_dt; - // use dt_sub in forward Euler update - d_dust += dd_dt * dt_sub; + _syncthreads(); + + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + // get quantities from dev_conserved + d_gas = dev_conserved[id]; + d_dust = dev_conserved[5*n_cells + id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif // DE + + // calculate physical quantities + p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); + + Real T_init; + T_init = Calc_Temp(p, n); + + #ifdef DE + T_init = Calc_Temp_DE(d_gas, ge, gamma, n); + #endif // DE + + T = T_init; + + // calculate change in dust density + Dust dustObj(T, n, dt, d_gas, d_dust); + dustObj.calc_tau_sp(); + + dd_dt = dustObj.calc_dd_dt(); + dd = dd_dt * dt; + + // ensure that dust density is not changing too rapidly + while (d_dust/dd > dd_max) { + dt_sub = dd_max * d_dust / dd_dt; + dustObj.d_dust += dt_sub * dd_dt; + dustObj.dt -= dt_sub; + dt = dustObj.dt; + dd_dt = dustObj.calc_dd_dt(); + dd = dt * dd_dt; + } + + // update dust and gas densities + dev_conserved[5*n_cells + id] = dustObj.d_dust; + dev_conserved[id] += dd; + } + __syncthreads(); + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s 0 && T > 0 && dd_dt > 0.0) { - // limit the timestep such that delta_T is 10% - min_dt[tid] = 0.01 * d_dust / dd_dt; +__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, int je, int ks, int ke) { + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; } - } - __syncthreads() - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; s metals = {0.97, 0.40, 0.096, 0.099, 0.079, 0.058, 0.14, 0.040}; - Real metallicity - std::for_each(metals.begin(), metals.end(), [&] (int n) { - metallicity += n; - }); - - Real initialize_densities() { - Real d0_gas = MP * n; // g/cm^3 - Real d0_metal = metallicity * d0_gas; - Real d0_dust = d0_gas / 100 // assume 1% dust-to-gas fraction - - return d0_gas, d0_metal, d0_dust; - } - - Real calc_tau_g() { - Real tau_g_ref = 0.2*pow(10, 9); // 0.2 Gyr in s - Real d_ref = MP; // 1 H atom per cubic centimeter - Real T_ref = 20.0; // 20 K - Real tau_g; - tau_g = tau_g_ref * (d_ref/d0_gas)) * pow(T_ref/T,1/2); - - return tau_g; - } - - Real calc_tau_sp() { - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n/(6*pow(10,-4)); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2*pow(10,6); // K - Real omega = 2.5; - Real A = 0.17*pow(10,9) * YR_IN_S; // 0.17 Gyr in s +__device__ Real Calc_Temp(Real p, Real n) { + Real T = p * PRESSURE_UNIT / (n * KB); + return T; +} - return A * (a1/d0) * (pow(T_0/self.T, omega) + 1); // s - } +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n) { + Real T = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + return T; +} -}; \ No newline at end of file +#endif // DUST_GPU +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 34852adb2..78a5f8c26 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,3 +1,6 @@ +/*! \file dust_cuda.h + * \brief Declarations of dust functions. */ + #ifdef CUDA #ifdef DUST_GPU @@ -9,9 +12,44 @@ #include"global.h" __global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); +int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +// general purpose functions: +__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, +int je, int ks, int ke); + +__device__ void Get_GTID(int id, int xid, int yid, int zid, int tid); + +__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, +Real gamma); + +__device__ Real Calc_Temp(Real p, Real n); + +#ifdef DE +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); +#endif // DE + +class Dust: { + + public: + Real T, n, dt, d_gas, d_dust; + Real tau_sp; + Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { + T = T_in; + n = n_in; + dt = dt_in; + d_gas = d_gas_in; + d_dust = d_dust_in; + } + void calc_tau_sp(); + Real calc_dd_dt(); + + private: + Real MP = 1.6726*pow(10,-24); // proton mass in g + Real YR_IN_S = 3.154*pow(10,7); // one year in s -__device__ Real d_gas_accretion(Real T, Real d_gas, Real d_dust, - Real d_metal); +}; -__device__ Real thermal_sputtering(Real T, Real d_dust); +#endif // DUST_CUDA_H +#endif // DUST_GPU +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda_updated_tests.cu b/src/dust/dust_cuda_tests.cu similarity index 88% rename from src/dust/dust_cuda_updated_tests.cu rename to src/dust/dust_cuda_tests.cu index 9682dd326..fa27f2474 100644 --- a/src/dust/dust_cuda_updated_tests.cu +++ b/src/dust/dust_cuda_tests.cu @@ -17,11 +17,11 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test +#include "../dust/dust_cuda_updated.h" // Include code to test #ifdef DUST_GPU -TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name +TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name { Real const testn = 1; Real const testT = pow(10, 5.0); diff --git a/src/dust/dust_cuda_updated.cu b/src/dust/dust_cuda_updated.cu deleted file mode 100644 index f39c04114..000000000 --- a/src/dust/dust_cuda_updated.cu +++ /dev/null @@ -1,10 +0,0 @@ -#ifdef CUDA -#ifdef DUST_GPU - -#include"dust_cuda_updated.h" -#include -#include -#include"global.h" -#include"global_cuda.h" -#include"gpu.hpp" - From 52efddf6d56cf0d7e25e78626eef03df498efe6e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 4 Mar 2022 16:44:48 -0500 Subject: [PATCH 024/694] develop dust model --- src/dust/dust_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 59bf79295..a927d403a 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,7 +1,7 @@ #ifdef CUDA #ifdef DUST_GPU -#include"dust_cuda_updated.h" +#include"dust_cuda.h" #include #include #include"global.h" From 1c2753b26e076cd57a6aa9335b3abc00289e4766 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 13:53:21 -0500 Subject: [PATCH 025/694] add cooling type for tests --- src/system_tests/hydro_system_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 72a6dc349..22f1dd6d4 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -31,7 +31,7 @@ * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi +class tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi :public ::testing::TestWithParam { @@ -39,7 +39,7 @@ class tHYDROSYSTEMSodShockTubeParameterizedMpi systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, +TEST_P(tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { sodTest.numMpiRanks = GetParam(); @@ -47,7 +47,7 @@ TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, } INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tHYDROSYSTEMSodShockTubeParameterizedMpi, + tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= From 43877606de8b4c4e3fa13a60d22ab616f28fb1aa Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 14:07:54 -0500 Subject: [PATCH 026/694] update file names for cooling test --- ...izedMpi_CorrectInputExpectCorrectOutput.h5 | Bin 12587920 -> 0 bytes ...edMpi_CorrectInputExpectCorrectOutput.txt} | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 rename src/system_tests/input_files/{tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt => tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt} (100%) diff --git a/src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 b/src/system_tests/fiducial_data/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.h5 deleted file mode 100644 index 9152a0663c3659c10a0d8a56905d78fec315168f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12587920 zcmeF)3;b>6oImj2>F9pC9G66xjQeG%VaBy~&M1ZGrrbuQiyB>w)BQq7E+vFgk}iY@ zNeKTgGNlp?MmhP9WYGMnNQLS4Uu*C6{hhO)Ilr^d$*uK#-@RVbx3$;)ZNK$=p6~lv zd)1uh9h*LPlhsyv@G4X7Un{LNb)Tsm{_);9l`5n4{nAO#{cl8s? zjs9WQ)cvR0b8C06ui1V6y0b?wti8ghZnM?K8&5UwjL(nS{n*r|qu+39y?8-QH3FM% z{G2V?3w~*L)iK|`+0C;UjGcGcYyR&095}V--h1w{*FO91x%~m7Gt+mKWl1*W`?gl^_m^`+;hj~mAMZeOszCLv(eO3 zSDkw5-Q7H&HEZ-6$L}3=aNDlKzZQPpa>LKtXZRWS;PWm$zWwWjg`YQj_<1{bAGplF z-zyH7_rc@uJZ#R?M#JA|*3^nqbDHbjYy0{8?DD#)sWn%gYRqW=T6g&OnXa=rxl{Xw zg}>Lz-RJDO`!0LzJb(DZ&EJ;c^M-QFnHo-uMvhfR;+&y;L>oxoAw(ygebsV3&^Y!0)*bDc&cH!rae&0QJoIiizr!ITmn_T_o z3xD=+*MDT;=dM2defQgCkG*$l8X-P=S%2So|M-n(oOIng?z8Z7R~yZvQGdn9F7teU z#=f`x_Dj$Fmx~vE?r3~>8TQ}w$1d~dZn(m;pMT7eKX}r@`L+CTe(k*bp2N8{)4b~1 zb1G^x1vgsc{}!1Ct4@F3_RY}lxJUD@=C6DHx#7pCpZ4B$*ZI3NlXe+CZ}bave_wdr zU4Opr^?UEsT=_l!ypgnRzqh`~4?_xVL_-5(m>K|X8h>BBe9nmu}9qtTCbYK|sV`_fFW z&u+%J{mGerzvY`%r9Hpga?1_>2Ucp{-~Kgw)_YbR{zLP(b>knJdf#ppYTx|0&mOqr z!j*H8H~k6!aqAbo__^(vY}9?`1*@;S#7`g1{;8=e*I4rVMr-NR)YF@FW=Vgo*`0jP zs_QQ4ZPR0P@|-2VZ@unQKDXME-?zm7v3qcGa8rBdd)gsg-L|VQ=<4&;;rP@t4w382 zx?JDT)yVyc-SOXc^$}ftc=39iS4aKRmvM+ZpWNjcxyQVS>v&Li{()UxOC37U*Kvp( zBiBcDc}DKxTg;F0{qb{acR#0&`o3@DuuhlXBfGkJS0n#$KIVDkUe1TUU3d21H*$<| z2#i4v}Bf$a!h=k9p9S>-Bq%&(Fv`s{F^tr}p_0xkr`1 z{M+X~{Le8DqVJ>X{~Wo;x)!-dmB0M?fA?w8AGO!$`>6U~|LcGK@BTlB1L1yD{^!U& z?rS3VsPdP;_22q${mgrt9J$ARVB{WE{_?l}TmP;9IUET0qw+sT?$-ZYIY-`QjlPen|8vcY$UUn3o%zx6+d1L1yD{^!Wu`kyQ3$h)l3_fhqKu6Yr; zN0q<)-T%A)cmJQmfp9-6|8wLX_kodnRQb!_`fvTW{^xKY+>grt9JyQnbLAX)mo@r6 zs{YS4FCzD-@|VB+fA|0H|8qDH?nmW+j@;uuFmjJ7fB9Sgt^d~l91ev0Q78Ey>qhMR z6+fqr=1bqlq0B$_0ge4X>xkpg z{lJ|IVjifY-1{~Tkz?dKQ~q&X@&1?(YCroua*JFi-*cOL+VADh>3{vN|MkE7|C|o+KWFZ7 zA0U7ETmP;9*8iLi@IPnn*8kkO%U}NOeINgG&jb0(-}gV?|9t<;=>Y$8=I;JKckc3+ zzwdu}@B8$>{@4Hd-~E4X2jnk*&;RnS1NvY8>wo?4`F~Ca_@6U(&wp~~E`RxZ{+IW@ zPyg$G{jdMs|L1l<{_^+yFYh{_|MkEA*Z-dX=X8MoIdk{?CwK1hm%rzKdGGu5zy8<% z`rrM3ZU^Kqf6xE&t^@jC|LcGKUq1hj6wb|eM=4)yjhsgKKMed*P zj@7pRZ@#A1aX72XIp#ssxZX3n^JV=^_qsZoAAKE%&viMLu9f+Jx_h5G%Da|vh&>n*YG^@kKCh1{&BwidB^>pAM)>aet&HL$zT5M_nPoM+>grty61=fuRGsK z>x2B|&;OgRnbaq>p4b2SU;pcW_y0K^;D65C<9Qy9pHb?+^zq)bCR4)8x`?wzE1 z{jdK$|Ig_F|8wT<`A_cL3{vN|MkE7|J)A9U;du|<=67Q%AYiG7g{Z_VqodtFaGw%!Akut~`#{)zN(D z+c-p?k^7g1`JXlCxn13)tDCCL{_khascjrK>vD}cugE=Ke?j-U+V=Tbb7~uh=Xd!= z-^VwKa7tIU1#F4q@zHFA#{`A2RV+e38HW+wU{{&p8kDf6m-veUrcZ`G3}&=!a}o z|LcGKum9cu=X8MoIdhNu9Qn)N`fvTW{^xXn|2cEF{^!nJ{_=0{`}m)G9>`z*zW@3D z=lfqy2l$^eclZCfbC3iezy8<%?*DT;Ab^7()40~q^0#y+9pg*tMh zmT`#uV;|tiKaOL6*m$2h%DvG_Ww-z$LqCRf7a`W93%g-mglpMQ?1us<}3fYT%R;=<)16>az4mk z{_XRg()~Ct=SMkz9QS+vl=(*PQOoo4?swj0er3Lqd(`rL`SXtZJs-+^BloD~d3pCc z?=rtK-^e{`dA|I4$NioUWxkPn)bhN%`<-{0Uzu;@9<@AQ{=DOU&xbPK$USO#Uf%uA zyUef5H*$|!o-cpialhw7nQ!DCwLCBHe&=20SLPeJM=j5nKkvBT^P$W)a*tY`mv_JO zF7qq%johP_=gXgW-0%5N<{P<3Ezirl-+7n$mH9^QQOon?&pYn-d?@pc+@qG~<=yYR z%lyiGBloD~`SRx-_j^8+`9|(h%k%Q?civ@wWxkPn)bf1!^N#yHAIf|q_o(H0dG|Z- zGQTq4$USO#zWjN|{hkkHzL9&>^1QtJop+gEnQ!DCwLD+`yyJe)hce&DJ!*Mg-u=$I z%&*Kha*tY`FMr-~zvn}lZ{!}eJTLEl=UwJk<{P<3Ezg%f@3`Odq0BdOk6NCWcfa#4 z^DFa>+@qG~%b$1L@A**X8@Wd<&&#{td6)T>`9|(h%k$;WJMQ;@_hO8j{7|y%6udDsO5Qi_dD-0 zzcSy*J!*Nr{CUUyo)2Zdk$cqgyuACJcbQ+AZ{!}eJYW92<9^SFGT+EOYI$DX{m#40 zugo`ck6NBDf8KGw=R=uq%xZm@k%r|n6TAr78zw<8h zEAx%qqn78(pLg8v`B3H?xkoL}%e&usm-&_XM($C|^X1Pw?)Q8s^Nrl2mgnW&@4UR z+eU-tdYU@uYIz*5tK;L;_wm@E$UTmqQ_O>y59%oYzK=u9+h-R!NA4RI^P)VCpI7_k zLF5?oH~KwtjMw8>?N`2kY+jLn%#-r-;&pX2FZwzTk!$2V({a4s*X#6qkI&P{bEaC( zhj^cH93RKNjz{DF4?a#*YcSEOCGu@z zxBgrIOBed?p5K4uTA6Ek%zys#pZ}!`{H();GS~8$|8?X%Q@@sZm&fv#zxChxU%D`p zCtmND7iF&HG5`CO=d$)+nRj_CfB9Sgt^cJ9%j%5x-#Z;Cb1jegfA7rgPt{Ll-sQ3U zOde_db7T+3tr|8?{EbN5G?cX=#-`CI?3|D_9mZkK%iU&fs>*YcSE zf0-QqTJyckyF8Y^{H_1i|I&rOmRJ6F|Fj;Jxt7QL|I?25y>0%Md6&oXm%sJj`d_+m zZ*$D=bI+VAb1jegf6vCXFY~d?yF8Y^{H_1i|I&rN_~!RqvR`Gc&SVgel7DZkL53a>%a9s z{0JY)<9J;idC`|~SlS$CS_k@aoqo@yo0pMu>;oM8LC0~7W4tep)ukKXd+RtwoQo>)%u^q z1^(yA+4`S5XZg#&z0WJ>0snK)3;E06_rL$`V(Db9$NFFY>wo^}qht|MA>Es^|YXT;P9>oXhWhaV&rNTmP;9vF=B;{^xLk|2cAw^}ej~m%sJj z`XB3lRO^2Z7x;L!8*Z-qF;D0U;_@689(mDCd-}-O; zk99w4>4f7to#20+xh`$L%3uE0f9rp&`%#zH5AW-@AN=oket&HL$zT4~f9rp&`%(W` zSNwcGT;YE|a{6=illHd(v{H_1i|5*2<{-quA zzxUU1i2r|`alg0BclpcT`fvS@bwBF8#V5bVpVue;|9Qvv-adcjFMsR5^*`4AsP}fa z{C4R+78GRl#_5qJ^h}Yvd=7T!&q3`3cXuh{^ zx7zDJ-|Ppj<{O8zirmjGj?Ig@`=ZQ0@{ZTlQU0}$L*yQ{_UmpCk9^ z@5sBXabB%^K9sq~vE1v&`v$jl9bm=heFBL796T%f0U0 zBj3ootZ`ngdmfaz$Fbb&&OP#tyvrKr)w<_FnR^_|z3$v2-^jbHabB%^9+bJqvE1v< zJ@Spb%Npm^y5~WedmPKX?%X5a$h)j@Uafl`l)1;T-0RLg@{PR98t2uz=Rui!9Lv4# z+#}z}yR30ut$QAnxyP~G>&`v$jl9bm=heFBL796T%f0U0Bj3ootZ`ngdmfaz$Fbb& z&OP#tyvrKr)w<_FnR^_|z3$v2-^jbHabB%^9+bJqvE1v&`v$jl9bm=heFBL796T%f0U0Bj3ootZ`ngdmfaz$Fbb&&OP#tyvrKr)w<_FnR^_| zz3$v2-^jbHabB%^9+bJqvE1v8HS(QQZ=Q&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6QE%?)uV7a}Qo` zaFc2r%maPE0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_qcHs8;I~;ZSlLt2q>w0RcagclNc|TMB@wz&iAGMCd zOy0-!uHUT(>S!L+G7gci{OihfY3HZ>bLJlNL;mt_zxVM!=RDB=IdkWK?%d@s|D}1} z8|y>N7qzZ=6ZyLTt1H)~?Nj;Z%su9Z{N?Za|52AmKV+-=U;pcW{qO!irvv=YnS0#l z$Y1`}f9t>XKc@ry&zZaRKX>l(m%o4ioAeCW^}qht z|DONnbb$XkbNBowckc3+zvq8>@B8$>{@4Hd-~E4X2jnk*&;RnS1NvY8>wo?4`F~Ca z_@6U(&wp~~E`RxZ{+IW@Pyg$G{jdMs|L1l<{_^+yFYh{_|MkEA*Z-dX=X8MoIdk{? zCwK1hm;bUn|BH1(9nG8C#v$(i%Ik{P)lu%Xjzg3CZ{|P#yRUqpY8=c1eZT<@aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14&-*=2lxNX&(2tXaMQ4^r=}VQx#ynu^3R<=@8#dF|K@>tU>@iL4sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS^{-htQ$I;wG6 z{5aJ%E=}IYUVGj3k2+E{4(5SA-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4ay#(AW4C(fVdo8QQjLRopbt2} z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0Wz=6aL+_>5H$6tKk69=ksFc0(r2ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%gb*?}9L@Xmc7yThgf z)i{_3`hWu*-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29LVgzFE3gD;oth+o&(i5mwo?9)8{?nDFf9wm#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701DPH8`LjN|^{(3;K2VK=d7uwC zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKY9-4*cv9k9xvwD}QI88VB=0A8>#J9N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701DPH8>7!4&?c3YD zd!QNz^FSYPfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW4u9k_1W8y@tofBM8gH4f&1KHvZcIKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02QoYG zlk3jd^=m6%Jy4B@JkSRm-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N<7|2M*ikfj9qhkHL>r<6s`> z0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$AhQFftbO*0^A-$#q#6hFKp${`103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCGsgSa9fy7u|mP z?15?=%maPE0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_qcHoDvU+bu!oc*YQY8=c1eZT<@aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14y1Nq zubUS9daXkTKU9r_d7uwCzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKY9#4lH=mh36i8_a+0?IG6|efCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3GNbSI``<(gH$L~E@pc)7BKp${`103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCHHw*yM3fyy$7m4Hl@z!936h z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K$Vh4VB+lfzp+B)|gsK&uO&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6Q2zZol>W&wJL3 z2R~GegL$A2IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROii#11Uj=7yag^1KZPs&Oz6^Z^GrzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IFQ#J9N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<701Bo5@!38h<$`7{q-9R-C=7B!o00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0QJMiyUulSf9k6Uk`8VB=0 zA8>#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<701DPGT;*1lXa>hHi8mPv>JkSRm-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N<7^2QHtwb+vtd zv-UtW4(5SA-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4GCOeDYEQmu%gvrPP>q9mpbt2}0S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6yT zTzb=;7vJ@v6$h$uFc0(r2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%gb+JRfHnf>C&?mxIxH4f&1KHvZcIKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02NFAQ*-FQq`JoFwI8cp)d7uwCzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9_4y^UVBVV(^HwKrf#=$($ z2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)hKxPNlx%SGh?{>!EGSxVk2l{{m9N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70102Zgz`C<D^t*pDP>q9mpbt2}0S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0Wz=6yTeD#=ztoOpJR~e|r!936h9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K$W(Usw_EGC>yvw5os&Oz6 z^Z^GrzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IFQm#J9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<701DPE-{^_X;N0U?<6s`>0}gP2103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$Agu$lrdF7mHP!qz{99X|-X2ZOnmu}9qv6ri)Y8`3Q)>@DYVN}aQ!5RBcB84M zt~&M9yJwF+X!+*V_LG-eZn@$Az)H>g+rMVdde8mV9sbkQRMhtHzTKy^Z+_fo58QET z>ZR?=f40BwP2;gqca`fdaVwy5>N9sd@9Y)SYR2I)*B|ixZ#H>v@q)E(esEJi z|C3*Q?pX_SzvGy%e(n1QZgA)w7p=V0o*!uH{u}-3^qso;YIQgseIAFC_ub;i=QlZ? zx78k}p4jC6#r-Z^Z`Y=F{XU)hG2i^iDI0EZ=Bd@cxspbc1`X(9dq3(Xa4W}r~5zpebL1UyJZwouuIUV4C&fKm4xpSAl{M&yI z&i~x=K>qUg{m=J5-~Vzt!2g`NyZ_IfyZq(v`(NJsKK-x%^}qgi|DW3d`ODw)zr5>! z{@4HdU;lglpVI;U=gi&npWM02U;du|<-PCI|N3A5>wowExgC(d{5}86yAJ4o{jdM^ zzvurs9pHb?+&%xvoxA+y@A+Te`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2_q6|G-@o=g zGQLl!{hCiR^?$c7@Ex%~Y`0J7?cIKnv2Wn;&yQ>NkBdJ?P95#1)|YX(J`+&!O(6KLY><=6JgtoQaFLKf2cK=YvqkUBG?c>n(`*iN7UcU8x9=yR9 zwz}UL>;Cw%4Zg7DiU0hapYGJumtDBxB_}j>^K%}4=3Qzv+zx(2czvADUdi3RYAMtfAo9g{;vD~^zde#*y2s^-SOP!enIW&IK21D zTX+5WBKdE#&*i5)Y%%#q|3|;ey^eW*^tWE~nf>LT)-USl|C|o+KWFZ7A0U7ETmP;9*8iLi@IPnn*8kkO z%U}NOf49f~-19*G^7sAE_dnnNayr2OoVmOI&z-ycwx~(|N3A5d;Xu(0siOA-SeN^xyxVvp8w^&@6-SKU;pcW_y4&akiYyr|I51$ z=zsmM|MkD;|2ZAtf6m-J|H+-Z{N?ZYU*7vZ{jdM^zy5dspW6ZX%ir_Ayz9VB{om~i zJiY&C>=PRMMaI5?@$>3vAHBYf!!Zx|hm#I#{v5EYKflcvcWL&0-1dntzU&iC?e>F? zeSu?t*w`mj9qpIe`*DceSAEN)U;phU_mc+)uX@rV`+&!O(6KLY><=6J#HgeFQu{g% z(eIJ_+&6t-{(Uz1@sp2u`@Tm!x2Z>-a`bzzZtDA)N01z{cqUq zwcl^@Jm#f8UwQSWzUc)AE%>*lM()R6_>=P=+8iJM|E_x2rcFKJoP%F-sanrCJo`C! z|3{POv0vWoKXz#9(KoKS>O-0u{T{iWeAeNY-qE~%(uGfX#sN+J$hS8C>C3z0{ng>P z^l=D?^U?2-`)PCk@vWOQ=TH00t6%ajP5tO$ zpMK#h)!vT7X)C|y->z!%J8_Q_e)zJczW>&*ZQABp<{$kYxu5oh^-e#jdEaSs2hY7s z?e93O@tV(^+2nWPX-B^5olQM^}qht|N4KC z=l`DnF2nuvz1{aozaRhk&wu{&Kc@ry&zXDN7sy}!)_?22^*^Tr{Lh)Y^*?v+@|S;e z-xvQKzyJQrfBzN#{lDk`p8xy*|48D2`~Tec0rHo>=YO96dH$Ev0siOA-SeN^xyxVv zp8w^&@6-SKU;pcW_y4&akiYyr|I51$=zsmM|MkD;|2ZAtf6m-J|H+-Z{N?ZYU*7vZ z{jdM^zy5dspW6ZX%ir_Ayz7Ae*Z=xo|9k$Q(*geH%-!>!+__K4KlTMaq1zue_6d#s zB2Vk~4U9iOLhbg$>^$wuc$~V)2ma&e_Wp0bl|FQOQ``L?+duyy_JfXnfn$Hz*e5jp zybyJ?Z)R`Dq09ZaUw`7oCpNji`xU=`PFrIi@YoMJ_63gpVPl`rm@n$6Z~8h8k^5V( zJm>b`ZMfnqZaH|@?XPX?2~R!u@7wzI*WY+kTi?0W@7}J~GY+T!&!@L-^W5y_N4~PH z@40#9?b_Pqe(Qt&eXU{MUwh~5)7sZB+U@?Esr8J*x32yBb=o{{dfg|kY3r{Z_}G7F zYxH}U`>nGUto_{f_5VI=yQjAGqAPxX$)d;cx;h%4zKp{Sv#&j&&GEVqANTF1-n!=d zSNVQ>9L`6-$Mqck;TOKXJ%9YuU-9y`9(l+!S6BNx4w2*M-*)^H+Z@08zK8#~t!4hv z@A3XuAGXU;?fb4+|UHR4)8x`?vtMX#J<3>KWyw18v8%SzJW1+)X{#JeH({&{os{nx9k7= z&fD@KZH@gOV?XHF7r5IW_LkTuG}a4sv~Ol#$02gx{D6NP?&G@k?wu@7ns?1zZ23t&#h!8(#Xo_V|vU%>8&<@A}{e{#~tR9JZac{OuoX z^4xa$<>wFQ!EIN4=TmLI(eGXE+fF^PcWqnmyXzI3E_xiVtE2Jh$vC{^ zfBy5TOWM5e`q`RqXzNWo-u30ShVxy&Pv^eoPS4zPnEUp(zC>;Mp^|Y}`F=+n-{yYv zYhE;Tpv-@|-@EI*|5_Jr+~&W=Z!SGt?e93Ob=AwSYx7(ArcFQ7=KqIT4?A>NyZgMd z|6{((y^eVweJuYv@?6%wlz-0LW1h%g{?`Bg-oHg3-_xr9^}qht|L*^DI>7&&xySv2 z{N-=`xBgrIb2`BPoVi>7bLTF9`M39d{LeiPwx~(|N3A5d;Xu(0siOA-SeN^xyxVvp8w^&@6-SKU;pcW z_y4&akiYyr|I51$=zsmM|MkD;|2ZAtf6m+|J^$(U1&;k;W1rCJ{VrqQz?eVkXg|!p zjl%=}<_G7t>;Ebrd;ANUI=%nn???MVPwxxd?GL+cw@+y77pac+&Ft+sbh&T4#@n9# z(Ki3zZT6(K+8XBZ$g?OXJ@y!)AVnOoP#@=xlR{OjoBvagr+SpM?2{$G{${#F0$fBmojyYK(L|0Vk# zz<>Vppa1;N=>Y$8<{r<7wnMxb2`BPoVk1c zlRJ0$%ir_Ay!U*MX()|Iz-B_J6C(i<$E8_63gpKVzTJ z>HR;ueFNjq4^zASFuU>W&3MHAk8eG2ui?IrvHxSYA9U;s9Q(t@K7s0JztrB3L*%~V zMK`qhAG+nf+YIMHw-5OAe$d^%z|;G~#yn9+`=$1L93uBOuJT_yZ}^5syl(Cj4mf34 zKl$_P+q&iz^DY?H8}`3RZN{UPahT3^pFx}Jeb;!}R9ojp?*I1{Ck*r6@dc|4>()pA zx7y@i+c>=Oq3;~#{-dML80H)EqRV~m!8d+=c>byX^#HZ$k6OlI_x+9;=DEeI?mw*W zID3P(PW#r)^XcoIck;)F?^{?u(H*O;oAXZY=Q!-J@5hF@e`M>&FID~<%{zIRyIRLM zME+;4@xlrDkLI`B>&QKFlz&}$My^X+! z+_}qN{+|Ekz3}Qa-TUIt3sgt@ruKduwp;nq(f+U7UEkK}eZaf@py$TEz_CAU zIH8XAOYQqOd}NgS+J4w(~zVd|j<|9F802yZyZVhBf9z8FV_M6umAObzVChEfd1G2`d|Nh{*%)I{^!iy z`ky;@`OClk_Z|GtJrCqBf8YOn|MUGXrvv=YnY;V{+_}qN{=Wa^z3!{@4HdU;lglpVI;U=gi&npWM02U;du|<-PCI|N3A5>wowExgC(d{5}86yAJ4o z{jdM^zvurs9pHb?+&%xvoxA+yznJHL<^Hd+PiVS*5u-n9uf_Tw`$50${L#L^u|I75 zIl<~^ztmdBVV9|Eh7KNnpNoez_5qLmpkrU)*dI3b2~$V=rS^RsBKO_)e%FQ=^=6U43 z-*H4&eIWmH%?JITD{ubi&RhQSZ`b>n2mH@HALK88_x}q$3>ULh{jdM^ zzyA09C#M7a&zXDN-^gG7)_?22^*^Tr{Lh)Y^*?v+@|S;m-^c&l^FaRc_x;cJKi~gy zI>7&&xx4?*oxA+y@B3fg`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2`d|Nh{-4tU{^!iy z^Pk+g%U}MU|K+{!)BpNk|LcGE|G6EIzx+M_%exNffBmoj^}px;IUV4C&fGo!$(_6W z%gS`k9|Vpeo?Ki*OF0>}Qa>iBxu@9~J-_rLZJ8(zQHmfySalI4c=rf(jj zHsewKIPA31DEA{iy!gD&eDLUXwd!%W@raYg^Y8M$@yKn5ud9`g!|C51<^Gm`U2^`b zynQqe)VjtY@-Nr>vj62?SMHIg{Oim$@|{%WFaO1T&*6WLd7=MvolCf6m<9|L4wK{_^+zFYkSy{@4HdU;n%R&+UNxwo>P z|2_ZD=>Y$8=I;4V?%d@sf6xE&-uLN${jdM^zx)5(4#;2rp8w@t2lT)G*Z=z8^Z%R< z@IPnnp8w>|UH9Tr}OUi0iWIvy4)AITqhh4{pjsJ@zLi;oHST@ z^Rw1j?}GD2^{DIBc06hyhm+Qqdxo4R&EuK+{`~ih=7(DMI2`@J#d{yQANx0>^J-n= z5c$sJL!6g;UAae|k+1yg%r|nLsmecR?lB+aFaITd|B3k#^F^&|zC^y(|GIKr+CG(k z&fH^u$Y1`}|8>@je#ln!zy8<%`rrM3P6zm(Gxxa9k-z+{|JHx&e@+MZpEGysf9~An zFaP$wkN>&nf&As~`=9TBzW?QPfd4sjcmJO|clpcT_rJXNefnSj>wo?4{y(<^@|VBo ze|gsd{jdM^zyA09Kc@ry&zZaDKe=<4zx+M_%X{Ca|MkEA*Z=PSb2}h^`FsACcOB6G z`d|O+f6xDOI>7&&xqJSTJ9qiZ-}Aq`_kH?b|LcGK@BTlx1M-)@=YM(E0sXK4^}qg) z=l`)EbR35d>d1*&$072LeW0Vxv@dYHUmeYhzK=tb_hVoF!0n!Q4lu8Y-nJgOgu zgXEqf=L!8hQ{TVu>7)6f);$jIe#7FukKB)Y=jgmz*EmGJGp%!RUhZ|}9(hK-@~<=B z$a$tJ|D3tUe2~BVm-PK7=10sIwXXRR`C9+$%5`b`RQ@@0kNF{g`CI?DdtUTIwyOX2 zzy8<%?*DT-!2g`N$9<0c7&&xm*8p=PrNwxA%Sg&pi+1FMr?veE;+P zFQ)_i&zZaX|J=FDU;e)T<-PCI|N3A5>wowExgC(d{5}86yAJ4o{jdM^zvurs9pHb? z+&%xvoxA+y@A+Te`#$}z|MkEAcmJQ;0r|_{^S`|7fd1G2`d|Nh{-4tU{^!iy^Pk+g z%U}MU|K+{!)BpNk|LcGE|G6EIzx+M_%exNffBmoj_5bwq|876%I1V4wkrTC!L*yO% zKu4WvU*LGZI+_=KABQIIjSqV5C$GC;R#%^?4#y+QILLi8|Fh&>Ugr$?pMT$x18Uvl za6vgAy5s5GAKhKQI&|XvTE?O4Z}}&k-}0}kpO@7o`OClk-e>*KIS=%I&fH_2kiY!- ze_b`-=c2Edw(5WVumAPG`~RE{@IPnnaep9x`CI?3|JMJU4)8x`?$-a@xyxVv&3#|| z_dWjopa1@2asRz*%#XCs$HU25tNXv)>wx^_@A;4CKc4^Obb$XkbC3I3`ODw>Z~eFa z=X8MoIdixE=gwXJ^7rq5^4|C9fBmoj^}qZ7+z!ZJ{+|EkT?h2P{@4Hd-}C>R4)8x` z?wzE1{jdK$|Ig_F|8wT<`A_cL z3{vN|MkE7|J)8NZT{W+rvE%Z$D?^u$v8~^`Jb^5bkrH13r)Y@`Qbe1 z{dmk8?D(APANPP$o;Io{|E{aIsqMI&T=_W2J@-7H&cB@Jw|C>B4t+ei)^R9vkKk23c-uJw9d z*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~iIIi`2UDx=$j=alioW{qg z*6}EFkK&UyT#%X+ zk23c-uJw9d*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~iIIi`2UDx=$ zj=alioW{qg*6}EFkK&UyT#%X+k23c-uJw9d*Z91Syvu5w#>c7F@hEeT<65uRb&b#K$h)k@X?&b&9gi~i zIIi`2UDx=$j=alioW{qg*6}EFkK&UyT#%X+k23c-uJw9d*Z91Syvu5w#>c7F@#u1oeW0V(zVEr~8}0Ai z0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$AhiQ0pa12%KC;Oz1JyW~2l{{m z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70102Zgz-JHGZ?~^}>f-~|IG6|efCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G$n3zmZ#nhl=U)4T zfodGg1AV{&4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S;t#;KJYBc=ahyIc=aC2lGH5aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2nH{*~ zmY+Z1gl!HSsK&uO&<7mg00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G;6P>vzV)@={B+;tHyNnL!936h9N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K$W(Tf($~o`<$ZyUcsK&uO&<7mg00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6P>vzWYCO*8lV;4j!n+!936h z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K$W(U51(TmRe?W-;ysK&uO&<7mg00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G;6Q2z-n7-$C;!_) zgYT=x!936h9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K$Vh4V3`pwJz{vFp1RO4VC=mQRLfCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS{YfA;P?%;$RH|F}fbDmk{Ym0gyx&WtaE7E+c_ zAtXe|ntiE3wvc_xP9e&^&G@2}P(=16k(81wQAqLo&Wt(N@0{OtU-$jHuKS$(p0CIG z=XJhvG4p(XKaZL5&zl1r-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2H?so=+r?&CP-8=k z)xrC~bHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14&2NR9BBPk-RT8R#8@4?4?G7P-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N@ss?7+U3 zTRJa&V{wet!TZ2-zyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8+{_Lf%(1a)gDnSRtPb7>o&yeWfCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaNuTk;BZ{CAuqNZ8Dn+uKJXlHfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3hvjYd8`f}^o6W_#G9lQ@b2OQu4 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)hz|HKyfkIzTtaeZF7^{Q#f#-k&9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70101-i9T=Kx$+^DuVh&gx zybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103MMP3*wI(NAZ+aJWZ|)xrC~bHD)(aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14&2lZ>_6G+ z#QlR}4q6?&4?G7P-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N@ss?7--=R~n={5OdJ#;C4sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S?^E z4s4rqyj#_W55-s=ybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103MM&FsMEneyHCx4SFHSRK3%JO>=$00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G;K0r7z}6ZQ&(uHmag5c$`@nO+0S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS{2)DG19YFMU)`(w6R9lQ@b z2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)hz|HJHv&Wz8J|I43tJT5#z;nO>4sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S?^E4s@Mgs>IC2 zFfn9gIp6>XIKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02X1Bu zo;mUA=AQ<}Y_&RgA9xNpzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKY9M*@4<+zCTm4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S?^E4%8Y}rO<`Ay)jk??*q>P2ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%g5GdqxZ z`jdsXXPF;kb?`p$9B_aG9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<7012?e)6_2HV@s5+v4z@aYA9xNpzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKY9M+JU)?pK6z{O|Obp2k!&V0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wa5Fp5@!<*^3mxuO(dyuR;5py` z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00(Yn2TG;M{CT=7BP&`RybnAF9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MMP3%C0G><;k>8XbA zR!Ea!+LmA1q^*$VmOee}Kf1R#n^HTpc{q0b z?)xjG8Jhpq*QPhEkmj}e=SQE54IIvLvEap@S_clz+}7*Bz25{5Ej^uPLA~1ohbC6; zefsRdz~SJDXNI&~A2`&s_U~)ghW@)vnH_pv{Ogv@)dGj^zI(;^1rG%dFFcX+^QTh> zR#fbEs#io*U`@``e`{6a)4<~IR}bxxX;)xnwt@2w#V-#mu37KU_$4ucWtpbmx^TiB zHbr*0Gq7UM&!yLQT^jhX`jyR}{PU{iJV8ot3b9R_gI?%gEl}VGoDHRyM zwaf>ff6yQ>wa8uH>|Xv>U~1vG<~jNm2rRky*Xdg>$J-RzVOHe!-6yL)9vF0aah8ZH zy91-LEcmubsj+d-|4`}5TaDM3ZT>-%<#XRT66oBcM49)GObGPd+b&C&?;Z(^Irqn) z!b^V(4BWi!sRox@*p%5J*ztkNlgi${CN8Q#`K*8Dem&}F-iMa8-+W@(OU>uq-!J~% zHOI0S{OD@`tHD8+qgv(pu})m6frHP_YfvmOcF4xbd4HQ9_qXZ;M_j2qGnN0Pr~*;h zpRD*zm8DApL-NIyj;dcVa`NeYF|(HqUfDF&q1*3`jYv_Sj1N`;1r$&~0RnJ#ywq zw{>a8{n$DEu=VTGOqtYo+sG;F(mdPiiHL^1)}`q(u+si{=hmeuKK)4dS*O>fiCHso z-SYW1rFO_yD*M9p&DW)wRPm8HAEsHCrsbA#&FYm7Oj_Hu;L%U^21e|wes|L+JBIFi zy|VAU9DfDIt=e93Qu>jBN%{7UUiiWzNv|L1V-vchzz&J$*kNK|)|*!|E*!HvFmTIj zRla$(TVPDXx8Dv{`y()+>FS}Emy`}n+uQa)>rqPr@6JDS>w%aJfw@gP7k%mY>wkZp z{OdNUPfF>Kc#eC13XC~lY3QfH){lAvv#dab!i6eH@44xYjoLad3 z?`QY#3{G0xa?aQ9-4~oOxqJ0n%eGH?y=bXT=#m0EB%b5_?Sh>KxfGaiY39WR6Q2kSx^#R0(y7)4VzXZQ`tFbJPkP;Bi%sZ~ z0y`w07>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzS)DiC*hQQd0mYGg>d=dE9v#GQ0# zNZj{_7R_GQGU-0RcdF(~x)-j`gErxw#FW<|@mwblC>`vW?Wr#cKAbjg@8R}E3rs%~ zjLlx~qpK%Z1mF2{|E!&lmJamp{!VbKD{H@CTV-*O4A6Goz77nn)9I_8TSo`p8&)?u>Qc@?-@Wa!^tHU5To%j7`KCBrTm@{P3;V1gO8kih;{rA2l zehW;>zwDLjS=T3Ro-zOUI{DXais|xB%Qrq5y)Zfuo4st!rKL{?#+=)f{i)JF2PW?r zGGo+t#Y3+zj9nf1d~npF!bhI_s7K(f8t-Q7mhrQs&D%b4yUw3!LtRp0htZp!9)IuG z4+LU!{?+)q`b`2;i`;W$ra>RwA&qM_|7ZMo2Dulc&Fv=m?z4Ax}wg?ZS&Ja zWmuYbOtpgVZ2u$vmU(a9AGhzP#P1I#4Zk}$B!AN)GmkzWj17JL%iE-;O`#p?gns_} zBy+*|!b6|?CU^F0fkBs}TIH(t@`qImSITU2#v7 zmW{VR`bEi%d#ZUHuUcnlmCice&xY0PCh)S z$C8&GeeA8yeY>Qr&&EfqfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@XqY=Ol4-9q<* z1v~dBQD$y~PZRI?efWz$k(YMdzG`#*%d5M*JRn`mUOyz>A2w;|F&nx^Ed_RnzGdE^ z4&`2+8=pRJ-|Cfiyjgb8EiF3yxV1^JWA=g{U2VBK*tX}NZ$6oRWUy`T0ehY+*ejTD zKVXhVD}ux4JyGY(kMo07&Xs?!T87*f32tbP7f|vx_0Br>2-rs#>NiHxAahO!td>Km-{00b>NMz zh2~eiC(!Zk?jO99_OalwH_}gA@x>p(-UD~e-cj?iO`#n|7aNi1m&tbrd*&Vb(VaEN z1!rcBtC4rtpNak-Ejv5dJM?v}_n<$66W=~^@8gB@1c&5{T|K3(AR^uwda1A>F=+Tf88eaNhuu?&UMybse|Kky*4=No%f@AJz2I^?H3jY2mStY zk9=E3CH@@Hx8SJYsBD{uoqlUfaB{i#M$~<$Sa8s#+xy@BS$xv#7ME;7mlW7x%=wzP ze0geVpzpp+zpa}1M&fzbY`!Y`(UWP@bQ&@)x_P_qe?W-+_sNp?`zG$f z44G```=pfGVQ%`{qH904>%J$}EsuWg<>?a(4$S+{|N0b2_&#v_;`f)X4Q=S(g>JkL zswThw=il%3|Ly`2yPNiY_Mf4 zqfk3U*iKdRK2m5(;<{oJXK*F`7Y2Uyi6-iw&xI;?Eo>011eK*2SKdo-=^ z(B0csJlApJff+xQ33To{?)mliKN1~%ZsfMP=~@QHoUgR@=l*{L+J^4`YrS%RVEDWz z>YQlzc;dZ){nECwp)M)0L&wl_eek89g9Epg`Cw(E_KD})|F)+Rw|Gd=vN!+k`g-%$+vsMw~Y%1rnjpcQEcdjz=(a-@6Mj{!}$4k=I#+cuzLK! z(chJOt6!)1&qh`5R`i>F!H^dp*H8794Wkb2H%ErbH#OZ@tXxN=<9yn_M;jlZkI!A{^51kG8ylHSqzSva$K1?Hur9;`e=_`@L7bFnD^7;@Q{7kGk{oSx-#85WnD?2Y&CE zvs!%Jc0HczF?e>umKvIOcYO19JxZ^tQPZZp4kZ_y%NM`!&iB*poLoB?oAuCF;}$KA z%bUJtsvZ|UUVI{Bs>%1=_2dV$S2p~xeTnYHpT4W{#mS%jT6*lfR4cl^{l}WXXOpIF zE&bq{azl$Zd}2j*n?gI3IsL>PcjxSv;Qpl6*S=qKwfmx?8M}1(=YNfg4n6O$?x~hK z`PWMY$`yJf!((ZZU)z*bp!k4%jUOB|_KQ;a(&pUQJM9mP`)t|SJ;%{2A7m=qu|n}i zAEd0$#z(7w0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36ApuqoZfif?I?g35PdCZz)4`euc zruA*n6OPm#Jhga>6%Dpz$yV+5yvr`+YkbRn!xt`paLt{w2iLyWrpykZbA0Bny^Wg0 z_YRbc$~|~S!>~F%Q_uK%WBDD`_Wal;y3U&4i~c#b=2wqJ z&;01f*PGf^iC$g(c(-P&UJ2d*e&n8GyAK8iZrNP2aOn(zzI)qcP1R*e;$~d%hYj^e zp&hcEF5fFKe17KH&veP3aIgDEB_6sMy=X(3Vi{&+<_q3)YK15=CS>+#K|AEM_za(2{R?RN!+6nJRt zmrGU$I%YkzeRq$dfl2ws4=evcY_RYCQy)CjJ9*!@@!BSJNwFQ`M<4!fR_EVm1Seej z{8XM9l|uKq$Cb#L<3wOwuHKpNe7tHP4{G&6MGzfO?Q6jo` zi~8|Dz5eo^rUQp0Zkqga6Ss4^B{tM0rFAHurAWfLme255v0&)AzIWv5(!naD^k+NYP= zgdQoUL)2}TKL5Gaq1O}7wM_Qh369@UuJx|Kj4AamzrW&@55K?oT$UU|rY)F0B**>n z53EoA*NBosXBDc|`q1)K)gKsh@=&K`tDd+rDfjI&3fUCe;nDrm+TFQyO~TLrC98D1 z62Gi`gDcNp`8@f&|D%mab-BUVQq>pzNB`Zg|L-nPbkyaB_w4M}I3h=x^vxmzw|*FV z`S|`H?*IF9{=fU9_t7b!fC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jDDc0l!1A_%az$Fy zJs5d7bpPXv=s#}c0JNd+c0OnlC}RXr9Ql7B+}%IDGrM^{T-f5pYx!BN@LjxHCMJ9=}w zQvD`IY>ED)Z)DHRf7OZpq~_ehgWnxvQ)q{+5AJ%b`|tGQ)Jc?t|X&?4I($ z*lbU2IrrWC;PANvewdjqbFgF9EO`eEnGo!|=l$kK$E``+DZ6UeP>&Sep+$j8V?yUU zB-@cUjwamyoFTYkX2N~Yw>-9GdT`*D&3T94GCSdY@&2C0$D@~B8~9qC44V_S)9Gv7Yr77uE>Yppi}T-Y6FsEXL-R97%#Hr)t55IP zJ780wZ|Lhn=^xM7@ab^pp;;#q&hxHP&oxT$eqm&r`U@}A%JB7z1JeHUze+md4 zJkQDZI({;zb$EVuJ}U(jP(T3%6i`3`1r$&~0ROK zOtGOZDY3&lE#EI!{l)pg(EY!k)*5yp{@9Ywa_4H%JK_HC>zxWl2F9H4e&(4o&nDap zT%^#o)4{PrHom>R@GFVCv(HHz>XFhrB%EjHUiZbHeq22;cF3gB_a5CJ7?S_RVD959 z0~1F+-Dky>`vZe6HOe=+Wx=RQ2da%LJ*Y}lrB81?eBr|bHidVnnDu&#JrAY|j@bL^ z{&Tl>Pq+tudg}_^6TU7SdHH&_gx5t2_86CNFLeLERcW8FZ@#wfs12hIF{^tAVza(G zBuA6A3FqBm$~RpD1Gkpx*FA0ixI9B^b1qCFtnepIk#?rO~D=ZK3=JCh3WBk zH%m9>!*SW3D)8#?UjNzo`|k=Upnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0!alv zY_n}~m#k|RN341;cgx{*A6=bh%-DYE-d!Eh`;J--kG{N2r|>Qi?7Qdcf+C;hjLT5D zbp6@Y0&$`HKYMk|cOm|8#Rg-d3g@ul(V=|mXD>|KIjU0p@#l}#{@V-puWnxa=!}u+ zss}^&e)sJ=_s?MH9{4ZnjJ_}a$hfbHMK3s-a6fFdUB`0S@aa(H)+ggb_k9=m{o&CY z63%6jka#cfc{m70h)KVEUjE*PCrwE+tZ3xb9#s}DpEWHIa^T$^Q6B|H-Wpx~x46e`m~}`x z-=7nn`|8KrPboLz+;8u;jv2r5g&tKVP5vvk+{7g{XLr3nBG55gwfdiQn`^_XLsa8h zy-Qy#aW47m{D1b-?IYEHy!Ex3v13D%9YP(T3% z6i`3`1r$&~0R&QD(XU;EvDS4uCiHb;i*73&Ro zZ1w1S9;-9!m2^?5vKK6IVrH6P$81mLPE~lD4WAAbzV1CBu5q` zbB56U&NJ6-d1>*Dd!8HLxEFXsFm(TK%%;y5*s$ubI^mx8>s{7Xzj5A;LiarvuAP0( zJAv4&2UgE(H~hxEz~d7(W3F85ZTNL~vdK4TZ`|{}A@n{Nw>(F|p8_NH>@MA{^6P7J z-~Q6vZC`#_w*KbzwO;%Dc-i`?53G4((Ks6(9paY!F>-f?@q5>t%eJ!Ik(jKZbKkS+ z@I!w_bwAOn?L&tfmv1(HO8+}c-etqIL(^H6zx?q;nxyAG_cu|akL`&$`EJCj`fnD^ z^=Qt>^358xI9C7nUqko4*LmX0a=+N{>X3N84Q_uf^xW@F`{$yWWwRVATBB9zYQ-+) zTXtdqdIRX*_B?ZF}Wzu1xg^vgD^I=m8f*Q|rZKdD;1%+-sxZz^6PZ*at(eIFfZIV5SD zHCg-8vc=0DZl5+7n=Lq?-_8>@ygDSD?}C_tsgut;*fHyF5jqmg@O-WXu7uH zxO!*XU5;McJf`;4Hj8W9uih zyUb;7srt(uztp^5CUuRq9mYOaz=lVMH61#qf4%g@;Oc!po&2#&t;ZA3z4O)XkxzA9 z-u>BGU4pS$uRm66(^(t79n#(M(zWF0K62`Y2}PsdUAy}GMZZk%)Z(^a$86=7*029@ zXd|!fSY5-0U5CW;4V{0VuRCAu7V%iy9Br&Z zzUv1j+3@O6?x}Mn>UOF!Epp=cH^&Uf@Xo*b=SzRS0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mi1$3#{q>@E!YCZmzH*zQEt&^FQ|W>W_=1zwhzs8P_&le(r^e z>89H7>acoZ@om>%t@A?ZAAZVo=i=cNL-+eWU$5)=Taq_)FKp<3*cQ!0TlV|n%{$w$ z>yU81_a>irXq$gqdqVHE9bQRYZ^ou!f!M6q(iK>}J+z^Fz}Ez9ICh9ze&gKx1lsm! zQoY}*eZkIMzU|jIs!!BCQMHe5{rFm`FP?0E$7lUYm)hR3^{Yi|oV4N5VRgx$?<=;f z{N&ZgvfuvEz2DrHbnf#ul{*ssSNw@7nS*V6%zCnGp6xb_J9L>`@y2t%H}b%r*?X64 zo7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j{HqJBuG09zch3YHmpU@M&w{kkD;Ixu za?aRmlfDey|666!l)vt^;niVfw}`<9eyDq>)Qa>2*3GSTEp)!M9{%Ic{>d9?+oMg9 zmmcq0{;m`^X}Yb=i`lENfWx~x#x_J2SkMK1)ebEjZL#{cy|ch z^PPC^(Jy|#^qsG&zrObUFN0t7toK=D*}$xrm09jsF(u^QBc&fL8Jtsh$E@LLYfc*Q#G9HTkyplP@Qodz<5tj~pC4Wcb4?;`_YOwrh5(By>rdW2ht(D^1m_r;%yEAZi4 zS+_6zqGHQWhVD2&@8e+PZ>RPbjJo&xiA&x-6=}n(L-BggY>I0A#?_Jsa`fNw`l+n{ z>Yp$D`3fkYfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3MimJN-wagR7{CALtgr% z{?7$%U#V%stHY9I}59WIwYQN@_C2ucaAIhLfu>mTX{{7gnQuU96!(| zqYdv4$>$!`w#O?co@q8UK6p9m?2mJMmV5qCxBD~p=oPW*ZyESp{M4@wth#0S@*4A_QzxDK>gaOMp1+#v?8+i>@4R+sLzmbuZ8&$>RWk9p zul{!Ja#jAiQn&7uN6MwEw&~31z8^&gFGpC{{Md#(bT6w1`-hs>-n)y+)?= zpI?3a3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@Wdp2O7rK&#+|v z;*wcTcmDHY;nSnm9NzXovo(1FN7trK_nHl_4vUx7TDqra$Gab?`M~upM?ZZeuGPIw zA9(JCxTI}ZFsje@h5i^==HAfe-;k}}d>eKh63;jJyhHapms>Z#L-x>?KHg*CFQI#Z z3m)!YxxWqX4$0>pId*@^A6D-f829^?A2zjIw6<*BYN`MB#lWT|zO1mW^VCAEO6;iJ zKlPu7E7d{gM51z?$$MF+M=f3i2*-qn9%{%$msx|Sgi`;uh(Ze>J zJCr(-_}s6(wMf}czb(Di{9uh$&5youa9`Woqkg~g-PLnHoKM=5FWQ72;nv}Y^U2SB z(c)uAEl%^?szOt0?LQfDu}bzY8$5GIsV6J{tAFnB=l^>OWV=|t#N(0gH+g^GGc)QR zd-3C<&utl-YTXZ0OYGYG^zMB3Jn--NDEuu7D4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apg_tkFz@GC$9}xBe&W2^cg_0rwOXrZl{;}VZ`_bADfct-k^kQX-mCxB z&gygG9(q6X*36A=z5Rtn%fE}Lv*gbu??;}WIH1YtB5TI(f4p0Pt@kGFqHiO|?z?vE z*?b=*?h6Of+fa{i>kvBMQ@BBlLIx8YV`~87T70QP01wPWIWZeQbygMYHd)bfL zG|TnYnOSiWz56uy^u)7e;$BSMp#Fi;#ZP`xVdlNxeqa3L#+pA@ZM4CLM~6jwW4?|p z`}(X>ot8X&>W;zPlFt2yrDfK{b;#djClZ2etq2?;ZXB@F_5L z&RwTl4%)Hrk)GN57w*^Kc=2;3&X#?;Rps#c41FdFD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36ApuoSTz}$D|PXGBpf!ujhZ+rRt#&OO{UcE!j8I~&>*-(gYLPN$9)J6R`7y1bo7mK^j({Fse7 zR{s6m@4dcw&Z)kStS+|WZzWpI-DtzRL(+49`xgn%`>hJ&e6v#EPdzbQC*R(4hab-mIpME>~@A+H(Eea^0 zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3MimJ$}RBTCqJhC^waHAQ|@QrBme&? zuwdWuVx>++KmXo>w?3^nDr4SN_be}8>9=%QlXu0ysUw^bAJz_)2LZfn* zpZ-j?htpnvdi({^v06kbLf?SC(%4%jbg*mx=ghY4Nd#pLuBX_SoMx*U6XX&sNWr`sl!^ zJb$(xc<|JTNj4lh9B&dj*CNkn%Go^6pEYW}zdhfle?Rxm`-+r_jh>e0$bqWI?&-L~ zhG&PoPdw3m($PA3lArsWo3_3D=p*M2<}H3|_3$ARUYK__V&A}nuk5km-68q8|L^~f z(x0e+0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0t$pjffWh&e{Y># zFL`GdYu$c+hhU@7evoBdt`}_Bbx1tlE$es`N` zq1{yI!L7S(*mc-(b@!`#lg~X*?ZeqJ@2*-iPwhhm%T#ODJx}eG0|%tbcQ#M$xP@)k zm&kHsAGC=*!k|NZ=1&X%JUCD7_;>%h?^M}5C9Xb^^>1G^3vHFSXBuzJQ{sAy_w&~+ zXv4L`)haC>D}Fgo#N!_w>XbchN~MR!tef5DkLmN$pRf7*XJ_YjZ1~LizD>Tc;oTwk zuEy8Lp5FOKczhS@vrs?*1r$&~0R^dZ!Z}PdhqY}1u-YP%Xu5x5Opv!EtTg zPuS@{pG*Vvd3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0{>WG zX4@v2YX`nMG4qw`Yv-3}Fkxn!DOdWuGHh$Y_U?T((uQA$#Pdx)_ji~5Q6cuR3^U(* z?M%IDfBtQD>iz}K_Z^xeVgD@mMrs>w9r^`_W`3{m%%pR_J$JeS1Ey^*z9DzI{J-po zj(as>x6e6QI&t57GrJA-2(J!jTcs~QBjMbKwS1ybu455d<~;G%+QhA}rFr639BUK0 zglmTio2!J*IcvG!E^qqB?*>w(%Jp`$U3tn;a!#|(9*9s`0fC36Apnw7jD4>7> z3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfCB$mV8&<7p1uFchVy4^tx`Q-r-ydT_@eOb z52dMaJ!u<7+JqkA)**DhH_mM2!4rpoi*Y^8)MvLm#()JrYD74ofto7NE8+*nk_6VmAC*Rof(4`qS z&OPVO!Tax+m;c_-F3R!viRh#~d5=x#5uP0iWv-L?#<}mvIdN{fx?de?nzUa(dt;Md z+r&O!hh>L47>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36A zpnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3jDtXruQ0M`L%`U*uSQ`=+y( zLc6TN*X4e(H$jKKtKR(kx%aEssblY&1#%>99-G+XCh72WR^I_Bpnw7jD4>7>3Mim} z0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7j zD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4>7>3Mim}0tzUg zfC36Apnw7jD4>7>3Mim}0tzUgfC36Apnw7jD4@W9TwvPxxl=E<$nsLcUixwMuQt3p zB)=1;Z7OpnSNk>b(>~qMGp_%(Gl@H(c0(KL5w0EjA6b!n?l~%qJ-GOehZcmk)cu2A z+i!1%4y$tg{oHRou_&%@-o`oXP1zy&>wr~20R|*btB8s?*;;L&|d;ez!XMY?2GZ{0P zd{1(p>rr0G$vNl#T<;``E_5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5;& zbb+z+_E@;#qjwBWJ^uR9W53_$F*Xk6-8bs9?#uJ9A2y`y@s(w_nc^`%4rSfbe+@h% zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oN9;Z-KFAbUyL;HGUdh_Soa4U$^!c zABVE;8?Cg;?5}_N?ey};pZA>XG35~#hgA1?|N5Dp009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7dXy1>{vi~2X7-DaW3csgu2^4~r0UHrwH9^>hd{%hbF0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5;&cnjP(;+uVD%)R;=kMVTaaDyJLzZ&-0X&&S0 z5bs|<(-R;-fB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5J(reaf2S)^jSD$zQ=ev zG?;w$-V?e_800aY4(Y!Jo)I8GfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5Qw+H z4U^B_@tiM5?CLR|4h?o0b>7A&e>2cyJRRcw>t}ib1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009E&0ymWJ|NhZDk7fJ6$IqeulW*4F^w6i9c#Nk*`mcd!1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C7;w>=dkNw|0F5UkDduU?61JD&T=1uaSv zL+Aan$r(4co_+X7JO6m)F%L~Ww#S|8UR-*6&F|#QCpis0 z%E`B{={~<<&s!d>GxFm;t6cR^$2xUydividuaH?STn5ES&XTgQk!Ce96yI%;26i7id54;gvgle&_zl3HOY@@~nm{ruv`X z<-w;@{cjxI|M_cf>{w^n3(xPm>+6XHQ$DM6_aQ4LCVe>bioWZ;nz-(n*7rVp?-7YU zeVezwYvJ|DK|i!VvC+o|)qEw+j6xt;_Wd6-{jB6YBR_chr^h}_F8I%(Pb_+<`Ga*1 zy+bi3Ou}>G#|h=>v(e+pPZXYY$wR=>Pg@8=U>Y zwzFFfy!@*>kJvtn8Qinx0__*SwED4&|9(gE&H-0+c&*v1$#?!ealKYU-+8dk*#mce z^Ug0j)@ilXeXU!)oOnCA_`tzEA4*Kx?~n!8P2MRncI(+aHokbv#PBbF8npG4ZxbU1 zKQ^ZEw-al=5@$vskSzZ@zTl!{>1JCtdjG^!|EFGVb=CRbJy@sN%?r;P|L?Nz|5YC^ zc_p!+`vKq0ne%R9>UuXXf6w)+CMJLL>u1aUP?{L{)bo2E@x$@6|JG&T&>wdh7sU+j zS#yE^_Mg4!@DrLXc{=&d6{@(gW-yq5r=pUti^!L_3`UX+1K>sN9qrbQQ(Km>41^P#+ zAN{@ckG?^aE6_hm{pjzlfAkHaT!H>k>PLTX{iAOXmPlCC|96=l={)% zTmR@AM7aX}qtuW7-ug%1Aj%c!AEkcu_tro922rj+|0wmNzqkI;H;8ft`bViB{k`>% zzCn~L&_7E3== z*B!IDxA#Ubuyp_D?Qc%bxaZ~#UcGwtvr3PSb*|Xr)U$5x zo>;KP8q==3d-KG7U4DG(@sE#94EkZx1}%PkZFc)DJJkDS(|@!Z^Vu6a-8%L1=qIv* zYy^^>-aoZ-;_Ua5Q@*&NahuCNO)l8==aEwfZ1G^7(K~K?+kMM-taDb^uZDd&Ao2D+ zi#K_8pXCyhkJ-4xtnSSD@Q+(6=Wk&-hcFh!HIt7-B5a5x9^hUy4LMFxAVivq4Vy(?~0$-OpKiI!j7X? zxh8S@#Lt(Xztcm>(Q|s<_Rbw^CkB1L@+Ga7nNza-T$?JdIao5HBytcG|--b)S{~KQT$M^r?rQiSir!4*c_kFsgO^dD1IPRZSf8n%< zVkP=VpP zR-%6t>V6f4m`3iZOdr~ZZ0B8rvhABB41+*AL;X%WRr^p8TlaPFyp;k1Zi zCHhC9UO4yEzi?Vau@e2GP%oT&>R&i5qF9OkQK%QrJ@qe~7E!E3|0vW8=bri(PKzj3 zqJI?Xg>z5+3#UaCE73m+^}@NQ{)N*bik0Xeg?i!KQ~$zg5yeXMk3zk0?x}y_w1{FQ z`bVK&IQP`Qa9TvM68)o4FPwYoUpOtISc(2os29#X^)H+jQLIG&DAWt*p86L~izrs2 ze-!G4b5H#Xr$rPi(LW0H!nvpZh0`L6mFORZdg0u&w)(djdj5lt*1gpJ--VO*FRkQH z{g3(o?f+K3|Kl0sm+t>K@8hNWKdyV)()}MFeY5=h(X(zSS?`&bk9qlu3;)thy{TXX z5~q~y+jnWJWT$@}{N0E3TP4S|pK|z>JGDxV82s3n7I!vFjKA#Xb?zBHFERY9o#*^~ z_t0dYKF7WB`|Gn4gSU8b!ZkZ@)2?j)=aCKfpRxOS#~t!+C;NXFOxeG-kYw-rdq3B7 z^A*a!$2Wc7E_vAvE&kKD*WlzGZ$5p?dk@V@41D^mPVb(2Zen!f&%WOLvb{@oS$V~C zXMNo&*?axzJ$Ksk^U}4>yz$`ftA9H6(>uQ(-|NOMf9-cK|<(9A!#g*< z;f0^3t=;^XCBJPs_OIR5y9z}h*{5&w))S6+B6-UBzg)K0D%T`$d+pVdUhf{By#Bk> zH*Nde#i{T8!6og#-y|`n-S6M^Z~k5K+WWU&c-SL-lRY1LX~-viyO%C|=Vj~8e&b)I zo1gM%!y9)UQ7Cg+$6pgjl>PnR{J9I0r}VpY-eFBINKWW}@bMq-^JTKnMdxfaanHJm z{;!|5!NqH@oEW{u)RVf@dnj?`zPHyucbz8_*MHw)LGt|-%YQ#c4_RZ{n%#OH^TMvr z{k6M#SD^^B`~IbsXFS#Qhvc9i+Ml@b(bJOm_G;2`ovThy^j_n=Q0sspkbBC5PO($3dTe zJTWBz2M~JRTr&t)3oP)O7H*2_y3%U;}X{_zvKOHPVSc+kox^-+-<)HzP+t= z&o5d(_}A|0U4RtNjaQ z9_z?XK>zH#LVLIVp%u+;9{scP3hmwchgLMZdGycDE3|j(A6n7u=FvYpuh8DDe`rOs zn@9ibyh3}o{-G7kZXW%!^9t?V`iE9DyLt4_&MUNc>mORt?B>xwJFn2*t$%1mvztf% z?7TvIxBj6O&2Aq3v-1k=-TH@CG`o59&(15fck3To(d_2YKRd6`-mQOVMYEen|LnX% zd$<0f70qrQ{j>85?cMr^Ry4bL^v}*Kw0G+tTG8z0(LXz{(B7?oXhpM|NB``+LVLIV zp%u+;9{scP3hmwchgLMZdGycDE3|j(A6n7u=FvYpuh8DDe`rOsn@9ibyh3}o{-G7k zZXW%!^9t?V`iE9DyLt4_&MUNc>mORt?B>xwJFn2*t$%1mvztf%?7TvIxBj6O&2Aq3 zv-1k=-TH@CG`o59&(15fch^?`mS1oA+{K4%FgY}z(C4YPe`;U&?{`1GcjAVg7d<}g zozxe~*Vg;j_l-__HF;2K-}uMZZ(4qx)IP$YdxbV%$+CA2I-~6g6Oz|G z)B4`0-+C`Oy781&r<^l0Ic)o(js87mjl`H2pZ}$2X_v%+25t76((1FsfK7fq=7X-? z5=Y)}QjhM-G%wxyio@RBw83u={J7C`OEw?WH#DEn=P6yKasMtmzjRrm(+6#4-1^YA z$y>j=WdFVU-<&+Z@fDA(w%^Fq_k3#on=k%2weR=b279;qG%_fxx%Z@vGYW9B@w(*6g<#VfQqOZVSskF8H!tzk*Y zm)AT$VWpmlLEk_7*;@UFC$8^acX+Q+kC%7fee2DV1D`&t)8vMOlGoOM>Z;GK9G<-H z>8Dz}eERSQR_oj8;Puv@mMHt2TsP^i(0oFlC-v{TrjPuq(T|DYU+p~S(@TCz44wDh zJy$n-BQfggmkwL*mTt+xO$N*yvelC0Ep?_Jy4*AMl4IK5G4$XYwlBHhq3xf2Ysvws zfB&8M=w4gyK4ng-zkmN1-N&WPqmSIvwSCLs<-hyArq55@J@wGD8{V=wG3dLOSGan~ zRZIKNz9%`M`zZ%ZxNp7Wpzl|{WQ`|FQ~jImdc!UC%KsirzN`NqpMyP%4j|?Wp1y1M zeusVX=!1zNckXe};?s6d-2Tm;4UXLV=)^V4FYdY0Isf=?|FrM_@!yy9YrM-D&pbDv zy#GCQ*Yo`^&JR+_(%#x7x|M-3eeE;LyzHD;+_4oT9-_L;We|+1QO|HNG ze*fe98Swp&Z~L;z_1E9;e|$d!zW?!UUpBe^`uqKl?`OdGKfdkDCf8qozyI<54EX-X zw|&{<`s?raKfa#<-~afwFPmI{{r&#O_cP%8AK&(6lk2a)-~af227LeH+rDga{q^_z zAK%Y_?|*#Tmrbs}{(k@C`x)^4k8k_3$@SOY?|*zh1HS+9ZC^II{`&jB zzbo7S?ZN>&wyW6xtHo>G`yKkoWjDuLv-qa3_WM7w;k@LIm+o4=|Lgc)mrLyfz2CAg zeljIF^!3v=*n9si%KJ}PWyRKAUwHq(aXU>YK7n|zczTaBe=A*>XqxDq+6VTnV?RyY z-QczBzkl+;#NZ~cJ^asOnkH}B@rdd5PWmD_vf){mx4+@8ME^I|`*y~ti;@HC@BQ3* zE4^ECz;3Uuy~l$Ow0OVQH><65$_4S(EWYVyz1_Ea$IaGwJ~{B2*7r_7`t-!jr|x;h zt@r+(xcRhQT5NyCL&@uZSZ=G~TTU*yV6Qeu?LYn2^6&e8J!X`gb;4^SS30Ui$zdnl zJgVuy#`o`f=BXWfPk5yG1meBoX@~V5HUE}>wN8}n>wd|KdnX3eFWvpUXTMGieWS}7 z4K}&2bmD58_AMRQrS$5j+P?S6J=>Q)JngJ!>yB$Yy=%Rldi8tuqDhO_S!T@ItM3(W z#p0d5^-+C0jai}3@RA{KFTQET^>!{9_sGI`=APE6oKudBrOb@0HJbZ^OEaKJT^S ztgm~0(c}1Cb|{(I<^6Myn{iUf^>2Aa-o*aGCGp#minP|J!wsm)_L$$aBrheVzm}S=R**iJYLs79-XV2Qva%C;_^%@HKqPl z$;9Jz{o~QOswwraN+uq!>mQHKRZXdXRWk8-UH^D=u4+pCtCESw>-xu|b5&F7UzJQe zUe`Y!ovWHs|Egr-@w)!;=v>v5`d1|rkJt5&N9U@h)W0g3c)YHEJUUl3rT$gP#N&1S zPCLXWrACJyeO{srXGVyp_|9Et+YD)d9l8MLb`p2VlRa5F;l}tQd*FPSe ztC~{(s$}Bvy8iL#T-B8NS0xjV*Y%G_=c=aEzbct{ysm#dI#)HN{#D7u<8}Sx(YdNA z^{+}M9kkIq$1nM?m2?*6h##r|*2E^TtmMuSE?SJi#IUYG)ZssHIuwJY2I^PnG} zKdgNJ#}D2PF$c_Q{n;L?uCd9*Et{;c$z%Oi_^2?aw5qBEQu{w1`0MLO zcPqK-p!1iz<(KgYWg*;z<`i{qlJmFZ!{n`*^)D1xouR+P2^H@>@E*)Op^=Czl+Q z7&3AFTMs;Yi^SM%n)H2l?njB#{=bhLzfHTc&SO?vah39YV#oYCV0PnG_FJa!*N@Kn z`MmqLTj%Vv?_IA=?~cumzPB)^w5qBE+I`x$)96&sS>J8<%ZUA6*{y89&qtbX_D#uz zZNIp7_0wNX4Bm3H5id@jnfl$I-F%ac7biwGOzg4t@87py{XZwKGUlY-_kH-yVH<2b z_@=6^@AbkKc;J*o+ii|N_^ozV{%gDI8jl>1`hCBCi(S54DfPR*_tRf~Ffq})-hvli z>$`2Ld%yKp+kMZ(fcmAoj+=Z$qF?HBbIY4|-F@@mjn^&R@S(z<((0-bX!mxi<7Ly@ zCR2a+|F+@_`y_{TUasr3Zw^QfnQ-Yx<5pTfF}lfhS9Q2|K-u4chdbVWQL<0p<|X}( z@0u9&L;Dk2eAlt6>w3L71j_!dk6LiZ@yS%ro^|eRpS-sI3d7pY-Za^}-Uf{?9Xu-8 z|IJVCJ+R+^#LzdothU!Wqf7twj}zu~A2@i{Cwnaze#zlm?{LNVt@iJ=;DO?p)++xj zQ2zZ-{rxY!aMFO17cTg*}f$*L?ZhV@j|3_U%dk*=9(| z&tH+u76cxVZE+@SUt0yM*nQZ!g{~{VfD;*8vU~s3+w&*ht)INY4p!lEUfqI zA6Cz7r_n!Kv9R8+e^@=Uokss`#lm{O{$cgZb{hS&6$|VA`iIps+iCRARxGUd>mOFn zY^TvbTd}a-uYXuQvzL{rZR1GuvtO&sHp~_v;^4&upjB zKU=Y|-miaHJ+qxg|7^v=dcXc*^~`n}{j(Jd>;3wN)ic{^^v_l-toQ35R?lpw(LYnKSUt0yM*nQZ!g{~{VfD;*8vU~s3+w&*ht)INY4p!lEUfqIA6Cz7r_n!Kv9R8+ ze^@=Uokss`#lm{O{$cgZb{hS&6$|VA`iIps+iCRARxGUd>mOFnY^TvbTd}a-uYXuQ zvzL{rZR1Guvru)W5?9+q7Tt`)5Dhf5AU0_J7;%+NJx* zWqW_`k5=F>_aD%p=|kVmJF#s4*A8`e+rE7N*VYX-ow4hiH=VZHlvyn{ZoSHJ?GnpJ zJB_<%E0F4c#K7~mI<9R0&+fxkd#~h~jb7dDyzdSv+yC$QEq5MXI_&Ka#`pQ|mc;O{ z_FQ9V%N>%bePGwR=Cf91`+=_7w%4m=`^WFl>D(Xh8}P&XZ+^6{eW0@q5b^s<{^(b> z5Af(MM;%$V5Abny4(^oN|95cxg%>SKUjO~GpESB<=S1%f?;CZ(3xkrKK7793D`)mi z+;U^rAMToVQt8wVNBnsBkIzOthTGRr;J%Mu88iHi9WO242m0JS-%F-`=N~?FUWc;I zb(7P^m3`lr-R4 zw1@>JwcGRU!M(m-d-gK*OZT{Ia{bcJUte{`(-$vW*8BG}o3=|1eEO-zLzca@>~nE% zV!fwIUYq;++nsMew`B1Cn@wE5#e|5L@Af$gOy97_8+Yur^A4qt_qp(lW3Sw#^rz>C zw(s=(zf*tj&-?tq&krr@e%HK7@0VONceep+%ovxr{`+wsE`IU7l2=}NV%66#otxwB zogT43>8+{1-+PbSsPySNlW&=Of_Iwzu)koGwR*)Tf}3yeGLU#Hk$DHZ{zRzEZMt3 z)6#7|_$)d6h>;7LZZIx6V(?>Ezp(X5W&O|m?!-Y=^`G|lY1`fN!LX-lczdTsEHG(K z>filiejSk3f3?nsl=px6!XR-#CMRqy;i!4|zFIWFs1}(D7>0e~QYI(W(*D`34T~7ZZ3s%d^)xVZOi|lgx7g?}c zUatPN3|eHD)4#}q)$(%nuVv68yPW<-7Oa++tA8zn7TM+WFS1~@yj=Zj8MMeQr+<+J ztL5eDU(29Hb~*ivELbfsSN~cDEwan$Uu3~*dAa)6GH8)qPX8hcR?ExPzm`FZ>~i`S zS+H7OuKu+QT4a~gzsQ2s@^baBWzZtKoc=`?td^Ine=UO++2!;vvS78mT>Wbqw8$=} ze~|^N<>l&M%b-PeIsJ<)SS>GC|5^quvdigTWWj2Ax%$^KXpvn`{~`-k%gfcjmO+c` za{3opuv%WO{{zVq7mY1u4ErS-><^D?l2}2ftP`3Z) z6Wv$pS+@V@Y753MF5Ca{kpUMx)U45xJ9%V%-!;YRjXVJ>*Z`JH1ki{*KO!Zu$?Xb0y zW#8}pull-dKln*cJldtK`z6<{b7a}RzzgnOWpvrTz)gR9bwSy_(D$@ExogSP4oCdd za?YaSo<6>-zvr7ie>?T<>vt*J2l|KZEeDjoH~Gm;8y?>;F=FuOVQVdVGckCJzb7|+ ze|p*P!@?7Oub&wBOzVr!TVacKKYjMoj6wZZjIUf6lizpp^OMf5H)~R||Gb~?-sXu- zlB2HKr+@zmza)CsTX58thaZ|qJ-=|;kL#BG4s6%7?{%{`J7MjXbK7m+`ijoIZ~pPj zo5E0t?-VU&+_7oTAqVd|yY4Qny58D3ksSKQUe6sgZ0%&~dq4Zwi#jF-eSdV5F+H1> z{oY^lw@#-ghkv#61v~XwuKn68%sPF!tDcXqTo{wjtTTMX#Vg%Db9Sod1-+YmUAl1O zE8Cacy(n?b3i}>8X5R4jJJ0VtvelqYW&JNZ>%m23{rmiS^kXF-Z@K9@UoO9M7z**7 zqD8+|_djyc2~$ho-tmax?YG{ztb6BI-)mU*y{~ubqZ_r~`HAJ<{`$W8RrQ~G+oHKs z?wL3+zJg&)oK{PF97fPT2fP4I)jxoUIgFrx4tN2as(%0x za~MJY9Pk1-RsR4Y<}iZ(Ip76ws{R2)%wYumbHEGWRQ&^pn8OJA=YSW$srm;HF^3WK z&jBxhQ}quZVh$tdp95Y1r|KU-#2iM@KL@-3PSrnvh&ha)e-3y7oT`5S5px(p{~YiF zI92}uBIYoH{yE?UaH{?RM9g6X{d2$z;8gtsh?v6&`saWbz^VEN5HW`l^v?k=fK&Ak zAYu+9=$`{#0H^97K*StI&_4&f08Z6EfQUJapnnc{0i3FT01R!M7r?3d2M{rb5%kXiFMw04?7w5Z-;bZS z(K6-xKkhWM-yi#bK3?+X6`OW@_3mR+`#(N0>-F7t4uBxmBXn5q(64WL`P!Y!_W$g( z{9dJh?EkrOa_H-)zWL?y=OqTzyM6G+-R6|-3w&MuW~-Lx%JK%3et()5Cear7QANFu!=o{;Pe(vo9%l7@gxmU;Y${x?S@!fOE_XTb+ zWB)()h2G-6k@ff9^3)+01V9k$5oY#iw&<2?R_ao=|M&Ax9=m$QKHtf~O}2Wm?pHe` zd)FI3`pM^?@37Y9qi?Kp)pl)%e);X~y>_}T)@T6@KWF2*zwda+;N23v>y3Nx>0>7) z1~=LIfCjhSP}aZSnip|KAaO_$mF`DCi^PII5VyR82@4I4hx zcKTMEe75MrFRSXmZI_RiKWo1MBOn&HKo0$vd-<5Vh8FkqRR!M7r?3d2M{rb z5%kXiFMw0^4K{PF97fPT2fP4I)jxoUIgFrx4tN2as(%0xa~MJY9Pk1-RsR4Y z<}iZ(Ip76ws{R2)%wYumbHEGWRQ&^pn8OJA=YSW$srm;HF^3WK&jBxhQ}quZVh$td zp95Y1r`Ejxl8-(b+U(8%2x2`#S^tjj^t<$r{Xb_`?Eg8Z;bTKid%MTJsr^51{k7B1 zm;JH-@2BmL*}q?_Z<>D@YqWrdpM6Z{|D5sdc4w6B_x=0XbGnrE->ciZE3{v3<6VB= zt>IdK>|L>#!_?|zUdOq@jjhp@} zF?8O%xo>awOZ#E(&D&|o?Je4Duwc7=Pi)mW)@T6@KYP-PQ;*sGhFr(E$;}dXyO} z{O8{DHacUYrTvfV*5T+L9h+|Sb@w@&u6*o-=U;rW!*SQ$c*$xztlr_cqmO^D>v#W| zy;|Q+v+jN@)@C6M-+r~eyX`Xn+m+`WJ9X03%}-xp&XFSz`Qo%6O4@XN>Y@G5diMQ_ z{)yi858C6!w*O9CTmOrR1AhJ?B!f85k{rCnb1!Z2*$K%3^^=!g{PN;CJ$5=^?8%c3 zXw!AM`Liy%f296le>(IJyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt` zyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V z>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t z*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt` zyKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V z>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t z*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V>0g~t*vIt`yKi;V{n7uVKkxrJ zd-6}$yjJ(R*E@6{zsrjsFRoL*|L+kiAC}txw`Bbe>s?pfbsaAbf%dD9f2-N!dv+M{|8?I@#?Kiu>&yDn+b-Ox-qL;GH+?s?ANcDl{Br-e@_nISIBG_5OlOtV z3AFzCo0aD_>)5zr|L>*yzwbUVwcm5+e}cFM+A z9y}>o-E|!=4uM(cZNBQ8)1T^?>|OuxL(chX+0!+%os7cBATz zeRrMH)=v(o_xo=9&;25Gyu*~PT_0_8@ph&6|Ms6-s=Kb^#UU{B(d}1Rd-t2yNqyf_ zeUI;v7*OxGmA0JuLhEJs{P(K=-tYJS>EG_RmuBwU;e_NjYyUofba6~)mDLH%>Hff% zNA0-M*i`>EA2#dJC)GdoeV>(Rz1+TY&KPmnmikvGSlq|;FYbQ1u3rCK1&jN9{foO_ zuB+ESSHa>wU;pCnm+R{F&sDIv&)2`W`{lZN{c{y8?(_98?tZzhUjJMLi~D^2i@RU0 ztJgnQ!Qwt&|KjeK>+1E-Rj|0v*T1;?<+^(Pa}_M^^Yt(8ez~q*|6B!&`+WV2yI-!W z*FRUm;yz#h;_jF0>h;f6u(;3HzqtG5x_bR{6)f)a^)K#zxvpOSTm_5!eEo~NU#_dy zKUcxxK41Uh?w9N8_0LtXxX;(WxclY0di`@1EbjC5FYbQ1u3rCK1&jN9{foO_uB+ES zSHa>wU;pCnm+R{F&sDIv&)2`W`{lZN{c{y8?(_98?tZzhUjJMLi~D^2i@RU0tJgnQ z!Qwt&|KjeK>+1E-Rj|0v*T1;?<+^(Pa}_M^^Yt(8ez~q*|6B!&`+WV2yI-!W*FRUm z;yz#h;_jF0>h;f6u(;3HzqtG5x_bR{6)f)a^)K#zxvsvd{wTSH7y>g#HGBT|Qx7`okNtnwTDt%1><4DP@vkKxpS}O? zsn`E^ANX_Ijk@l#;g{CEp)gi&6}bwu`siQltu|@Mj;ZhW|L*^OVd9j&-MU;nMZTv{=&8kOMWd~eB=7)}ji}q`_KVZ=_kN%c=ef(bMGc9O5)ay0n z;hR@Wy|2^gvF99F7^}C6Tm@2nJB_~a*R?vF^-Q-xZ94qpzx`k6-Rsot_FbZXuEK?V zzW#;XC*P~nKi|TIeSQ54yHCDXr+>bM3;X)|7j~a~uTKAb3m5kF^)Ku``Cgs=`4%qh z>+4_Gee%6J{qrqc*w@#;u>0hDb^7O9xUjFUe_{8@_v-Y|w{T%!U;o1Hlke5(pKsy9 zzP|p2-6!9x(?8$Bg?)Yf3%gIgSEqlzg$w)o`WJSee6LRbd7Q@m z!oI%#h21CLtJ6Q`uZ1kpM0-Q|9lG<_Vx8I>^}Kko&Nb2 zF6`^;U)X)}y*mB#EnL{w*T1m)4c=Uce2udjb$_sRF_^v}0&VP9YW!tRsr)#;ya z;ljSY{)OEq->cI<-@=7`eftEP?^1ZsM`p?|q)3vvs+F?xU`1rlf9<$^B?El-h!HIoW$oINj|Nll{`nfGG zpa0Rp3le|Y|99ql4OV_+t#?;Uy=YF1+~b4tKpcdxxVAZn(7jAy2m3_N~hY57~TLyDj@| zaKvW~H@oM#9eX~0#YlAE@-48#xKnrdI(fxIOZ%VFrL6xhv-5(PX=CKY6ohzSrgY|2G2LZn4}h=WaUegOcM1-g181 zbskN9?{9m5{>kf1yD_yd{I;!jP3!;CHdnS^>%&o#ul}Un|IO;$CEo(GQ$6oK<)r;m z{cn14=Ub;%^xq+Q+rcl5A5usEd>4rJ`ua!fANAe!kGep#_t!sK|ETY-f7At{y}$m^ z`bT|t{i7}r?fvzS)<5dI>mPN2Xz#CowEj`wUH_;HM0L5KkB>dA9aCf@2`Kf{!!mu|ELQ@dw>0-^^f}Q`bS+L+WYGtt$);a*FWk4 z(cWMGX#JzUyZ%uZi1z;aN9!N;-Sv;UK(zPQKU)8&@2-E;1){yb{?YnJeRut%E)ebg z^^ev+>bvV7b%AK_uYa`uQQuwvs0&1UfBmENkNWQVM_nM=`|BUAf7Ex^Kk5R}-e3P{ z{iD9S{!tf*_Wt@u>mT*q^^dwhwD;FPTK}l;u7A`8qP@TV(fUVycm1O-5bgc-kJdlx zyXzlyfoSiqf3*Hl-@U5-Q#alF@y}nq;@7hMKf4`u#t9Ys|28}1z`5;vUG#I*>vsRz z5tux+?Wk9Kyt2=q_y3*opV8g^*>A5CmhJ=Js%2_l*z$egcbLENB~Sn3lX|stcUOs8 zpnU)DBY)ZZfA{|`+yA}&DSb~`eBg0oOOrpe|Kj7BC2c$X`s2X|-?;7e2Q?bCXq8`H zje6bgUpoTRx0-+CfUb2X&pvUj?i(I<)a;Vv4{aZuct(Sz-6x#i_Rz0>UgnGG+q7M& z^W4ve^?zjRaU0)#>RYoL)Xv>qC2E0*lXlqPvp&Ckv$X$_O;i0_z2A4>zuO+Z;M{fB z>3hNK8_rtr@Yw@iOugRXg)^2pY{EYq4}9axwfBp9-R@sI0$YD}?X0(--|)Eh7k;?a zPXk-8mHOT{efyGKE@?4i$^D&{{dBwZ{;jWV*C_E`^9$x~S37rim8b<~HA*a+v)oE& zrTTC2aQ%xX{`SmUItLtCrh4Q;T z{qyTv=vUXj&7QTULchBHg(f8KzJ-2u{R_QNe%Ggeetir5>iQRYq5Q5-|NQzE`qlL>^g{VvpZ@vvE%dAFU+9JM zyFUH%>s#no*T2vU<#&Dh=hwH;udaWg7s~JY^v|zvp^)2+P>tE=F z^1D9$^XpsaSJ%JL3*~ox`sdfT(66q4p%=>U`t;AQZ=qkks{T`!9e7yj*jMH+-T(8| z9V+(!-M-Dnk2N{7@9~8`m-YQMfk{*MpEB?K-fR4M|KAzQKJdW46HAsW-v_?S)d!^Z zg)QF)zQv4XI=%PO1=swwt9w_W3v9dJhQB|(=!tFrXaDc={omV-cyou3X3iTp`@0`M zn%i^wqBduKJLR_?Z|%EXvk})$s&m+kLZ8d}{+hs@pN@a;;~yUyHEYD1yPvz|Gi$X^ z_3nGnv=&Rd-#@3#nIo6EbIGqWX7pHe(OYkvvhr3p?*HbJ7TGh`UhZ$t4eUOvwAVIkt-9UYsn(3 zuU!g#F6;Yi0^{2*Ueb8>;HTS-c;mPC4sG&C>U+QNshup_HuI9QxPH zEtIqL52a+y=Fq=pZlRo|e<&quHi!N-a|`7x{X;2PvpMvynOi7l=^skTn$4kq&D=sc zOaD+x)@%;_YvvZpS^9@kvSxGWUo*E*&eA`Wk~Nz{|C+gla+dy~l&skt`q#`Yl(X~? zrDVTS zY!3Zv<`&9X`iD}oW^?FYGq+IA(m#}vHJd~Knz@B?mj0oXtl1p;*UT-Hv-A(8WX*%Wbna`>Xx$dgH)by1w`KN8|1m%KUe& z|IGSFFTe6pf9^l|tl_(DxYeGEmhS)Aa$&{(zuO)5^oV61{Ag?_^T&OT`ycqA#X*yQ znOxrgfA;@v`PN&XH(K@m#pV0J&slz()V{Fg`@lDU^{IVcYqI8sad!)4{=3?JHR{rC zpKbF$`+t}3|K8@2laFh7<>%vPx9E4_5l?(HrS+_-eNS$??>gI_-{8h`ZWuB;l=SC4(MkRnjxKQRcN4miDF=q@IyJ7O6)c3yc6>kk(rP-3lJC0lHpC9UaVwraum0&CaeJnJ+={2qtADyj+@9$lx8mvZ>Ywfrw`cmtt$6yp`loxu z?V0{@E1o{D{^=fZd!~Qfil@)3f4WE9p6MUA;_371pY9R2XZpvjc>28hr+dWhnf`Gr zo<6Vs=^k-=rhnXur_ZZ@x<}le=^wY^>GSHJ?h&_V`p2z!`n>w5d&KRT{&6dwKCk}i z9&vl7f82_v&#QmBN8Fz2AGhM^^Xi}O5w~ah$E|q!y!xkm#O;~>aVwraum0&CaeJnJ z+={2qtADyj+@9$lx8mvZ>Ywfrw`cmtt$6yp`loxu?V0{@E1o{D{^=fZd!~Qfil@)3 zf4WE9p6MUA;_371pY9R2XZpvjc>28hr+dWhnf`Gro<8qi?LYRm)j#Vw=%qK(&-ScX z1t#{~YT1?tZ}r5V```ciPVWq8^N;a=-v4vk^Xp%-^~FcLU##<5ce=nmQ?}@~{($)# zmG`edqkYBxuPx7t&A(`~&xyOFU&XUx6`0tw`T1wu z{@Z!~v;TMb{_kzpd|<-pFIO2d>yieE`G=lAX?oqm?w+;G&ucfFbbPC)>a2N0vCeDV z=>m5={_%28Jn_QV8EZb!VdrmGs@JZp_c5E*`EU2NexG#PC;hjbzTV|Sx4q!8C(_UM zEEIvX{x|jLTK0XP`TM+|W^Vt|-pRRj%~iqHe2qQ`rfzy z=G&*Pp0xG5v-Ur!+n4&MJB9kJynn-^dsXz`{G0*Pu9^Sxk@|;PJbf>z$O#iqQ zPoG!+bdR__(?4#-)92Mc-6L+#^p9Kd^m+A9_lVmw{o__VeO~?3J>vFE|F{)TpI85M zkGMV4KW@d-=hZ*mBW}<1k6ZEddG$~Ch}$#$<5oO>z$O#iqQPoG!+bdR__(?4#-)92Mc-6L+# z^p9Kd^m+A9_lVmw{o__VeO~?3J>vFE|F{)TpI85MkGMV4KW@d-=hZ*mBW}<1k6ZEd zdG$~Ch}$#$<5oOV#4{`KvmxH*M7 z?;Uqv`NWRbuG2Ve|L@fP@2!v6e9djP{^awSFZ_P_zk#Jut6pk3R1=YFYlh|NgeuHeccQeXiTE#cle>_{a1Qzj3As_0Oae{xSW-Z=7jD{WIx=e@y@I z8)uqO|4cgJAJael#+fG6Ka)=Q$Mg@sai$6N&!iLnG5y1DoM}S+GwFnXO#kp3XPQv| zOgiBo(?9&inI_aflTP@@^bfyrrU~`Wq!a!z{ljmZX+r%o>4bkw|L_}Uno$2tI^iGF zKm5j-Ce%NZPWZ?455IAy3H8sU6aF#%!*85vLj5!8gnvx`@Ed2EQ2$Ii;UCjK{KlCk z)IXC>_{a1Qzj3As_0Oae{xSW-Z=7jD{WIx=e@y@I8)uqO|4cgJAJael#+fG6Ka)=Q z$Mg@sai$6N&!iLnG5y1DoM}S+GwFnXO#kp3XPU6m{&NQmvHxQxwQ!F8xqp)t&Tsa^ zIhTzt@857nJ^TNL)4H-rmiBMhwqpO+7OyrL`#{}2XZ(2|_{}=?eBjj85Bj9CYj~}A z1=9Ba-e>5kt){m+=HJf_db@Rtcc(NP@%Dk6%zgfZUAO)6t>T@~$}pgPJ`4!za>+{Aotp}9c)VSFvGlvYm{-{jXaIAO*D*E4f)#WSon0Cnhn+W-VGjXcvPzY+^sfx=Cww@^cwiqCyPhxA5P;;ljxsGC;Vgjhu=8U zg!*UF3ICY>;Wy4Sq5he4!at^e_>D77sDCD%@Q>*qe&b9N>Yqs`{A2ou-#F8R`e)J! z|Cs*aH_kMn{+V>bKc;{9jWbQCe;Wy4Sq5he4!at^e z_>D77sDCD%@Q>*qe&b9N>Yqs`{A2ou-#F8R`e)J!|Cs*aH_kMn{+V>bKc;{9jWbQC zeg*Qoi=d(`{ebI0Ad z^Jc>zy!GUtihoiQR3)6jENI%g^Ks{;`gfXr;4zQg zeCH}(?f2c2UsQIk*NR_Y{08f+u+0g(-8lY$#Kw0Hc&clo1EvqWbfqR=G;03)@po-; zaMKCBk38_J{@tD{{z*+xnZWq^LyqWn_mIREy^q{`m33OSYjnWudro<%`3Cw|rWM|6 zss2Yl`ug}2QvEwVc2%#jfArr-|L|H@Hj)07sl?|s{o~WRvPt!?OeH?A=^vlgl})OD zWh(J`P5=0`u542MD^rQjYx>8hb!C(4UztjLUeiB5tt*>U|H@S2^P2wgX8_`Ifnd|FpFss5Fz#OF2rn*Q-=UD>4iSEdr5*YuB1 z>&hn8zcQ8hyrzGAT30ry{*|f3=QaJ~)4H-r^{-4NKCkH?pVpO4s()oF@p(=E__VHU zQvEAaiO*~L$ES5=lj>iYN_<|^KR&H1n^gbGRO0iR{_$yD*`)ecrV^jm^p8*L$|lvn zGL`terhj}|S2n5sm8rz%HT~n$y0S_2uS_LAujwD3)|E}Fe`PB1c}@TLw61JY{VP+6 z&ujX}r*&nM>R*{kd|vzi_b*-liLF1qylnsH<9BQJgZ+QQ{w$>W|Ihxv7v0k6fYRH4 zUHg+Zjauyf$bvH)%-;0XV}2j;*~|8S5A_Eph)*k}nPU*J(0f|=@kGp;9PaOyBRyWnZSKZYw+Ow?xTO&`t>!#(OTX_8g^`-#} zQ()XlUFJ+|H~YJBcbxV4A)SBuTdMyB6TcjBZCU@DKV7n1qiJs~`>)*|`e&Han`+$2 zH~x6xNmo5{>vESg{i^>J8<+JzpmC><^pD49qpW|wem}P>>;FsJi$FqCI)cRLQ z#Pd=Acy_OtTK@`(cs}YM&+Zjd>t7)e&qw{^*}Y|Qao{uL7OeAGXl-7BWnzd|COkNU^6d&SiHS4hP3QU7>$ub5i@3W<0=>L1VU6;tb9 zAra3<{o~oaVru;>B;xs4N&ibmEk60I^8KHWnA^|(zhQqa%K9(e|M%kL73<%z^`>`j zzf9-1?|u&FFNegclp6*e&)lsIP%t)yyKo{ z|IoqxPk75ob5xl3Ri5$Z_qqSA9{)AZc+ug%|HJ2g-L{x=S{ED{+r6o@pb#p zvGb-^Y5z@S=J>k(=h%7EtF-^7GIM<0{&VcS=~dc)Q<*uwZvQ!U-t;Q%zp2a|U$_4p zJ8ycG_TN-yj<4H)j-5BXO8ajrGsoBMKgZ6SUZwpvm6_w~_Mc@@2kv*<1N-0f_)mZ4&11YuuVPby;`hJ*-Ek*hddt7M>^=`Z?rxv?$J1Yb$q#P& zbG|-CfisTz`RhFA^yfV0jFaB>iql{F(|>>5$sd2;S@(GVKkx1T$^SCOtMn>16*%Me z54q!+ue<;Ep7H4WUvb1wKld4X`~S=%_xFFqra$NFV-z^!_7DD(mp$o%7aZ9C%45F$ z-1F@}#;(`&RTTSw-2T_J+!)_y|1oyl_Ep$_+j3+4Is1>XED{@a!t5p>_5hi+rA3>Z(DARKWG0jcHH(=*nit{WBfV$kFn#nufqP@mK)>G z*?){3w|y1%-?rQsf6o46?6~c#u>ZE@#`tsgA7jUDUxod*EjPxWv;P=7Zu=_iziqiO z{+#{C*m2ueVgGH*jq&H~KgN#Rz6$$qTW*X$Xa6yF-1b%2f7^0n{5kuNvE#O{!v5Qq z8{^N}e~cZseHHfKw%izh&i-TUxb3U3|F-4E_;dCjW5;b@#Ub|p$PsV+(wFW3{@+J# zdtHAnM}aTc|6@;m?=3Gs`1{{4|FH`X?EjgQf9$a@$nh#X<+cUN-~WE|cRurd*ZQHeU}6KpLOgvfB2qz z`~S^DbG!;qxov^dpZClg-RA87_U)&?^#j-X%ZEJv6MOrA?1uaMKXTja`g1u79N7P; zXTJQDhaTMj9zT9N`{&qgsi!RVzmxr!Dx2dv_Rq0Xt*5kqt+F|;YyTWO)p|<%*D9Oi zy7tepQ>~}8f331Pu514sJJoth``0R)~}8f331Pu514sJJoth``0R)~}8 zf331Pu514sJJoth``0R)~}8f331P zu514sJJoth``0R)R2E?*9Jw zckJ%};Nze99eq8mr&+qd7ybVCyB+a{Z~Tti-s9L$J?iN^>&?N{EhxBm;T-PhB4nxzYzcIEp&ef+ymJn!^7 z-Tljd@z4I}gZB4-ul<)U-RJcV?0@-tp7^_8xx4?fKK++h*uT%6YCVGQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24 zzgF2C*R_9+ooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%Q`)~) z*&Nrke~z7MJ*E9?mCbQo`{&rH)>GQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24zgF2C z*R_9+ooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%Q`)~)*&Nrk ze~z7MJ*E9?mCbQo`{&rH)>GQQR@ofawSSJCYCWa>Yn9D$UHj+Qsn%24zgF2C*R_9+ zooYR${cDxYab5f8*s0c2+P_xW9M`pfj-6^frTuG_&2e4(=h&&%QyyynxBj0GJkIa` ztyR3Q>we+>FMja1zy84Q|K0ul@3+|9|I40y`UCoUT2Hfdf%5miAN#grUvu3zUwZkm zKYj1V|HzXcbepB`=X!k>IPJ~f@bTjw`^V=T*#8f|`)`iFWPks!>GNql)lvmc`?WJ) z_?buFw;f?tl3Ouldx${eR{=9&Y~}yDjyU_Ft-G&g)1c% zZc9D2{g*14^E&p=x!Y1tZU3c8=Dd#mbMCg(Q`>*3k~yzq|D3xm_0;xXs$|aV*gxlP zOFgyymnxa_I`+@G+fq+$|D{UiypH{I?zYrZ+kdH&Ij>{?oVzXc)b?MhWX|i@Kj&^s zJ+=LpDw*>-_RqQ7QcrFFrAp?!j{S4)w$xMGf2oo=uVeq5yDjz9_Ft-G&g)1c%Zc9D2 z{g*14^E&p=x!Y1tZU3c8=Dd#mbMCg(Q`>*3k~yzq|D3xm_0;xXs$|aV*gxlPOFgyy zmnxa_I`+@G+fq+$|D{UiypH{I?zYrZ+kdH&Ij>{?oVzXc)b?MhWX|i@Kj&^sJ@p~? zf6A+W*6)8_s&L=exyt@eKmS!%{r&F`ylLN0@9CB*@cG~We!{7L{Rh{-$`g(~;ncev^^7OH&;EVywe-`ScKpc?y!gF8b&~y; zE}P?e`};q{{yFwq`YG+dbkSVbvwyC=mVR3MFI_a(_3WQ(uce>X{!16lbv^s%+H2{j zwg1vZb6wB=x%OK6Y3;vs(OlQFf3Cfjep>r4T{PGA?4N6|rJvUROBc;`J^Sa{Yw4%8 z|I$TsUC;ix_FDRB?Z0%8G{- z(nWJ!&;GggTKZ}2zjV=D*Ry}Fy_SAj`!8KI*Y)h5YpHd=h|!Or?vmm zMRQ%x{<-#A`f2UIbkSVbvwyC=mVR3MFI_a(_3WQ(uce>X{!16lbv^s%+H2{jwg1vZ zb6wB=x%OK6Y3;vs(OlQFf3Cfjep>r4T{PGA?4N6|rJvUROBc;`J^Sa{Yw4%8|I$Ts zUC;ix_FDRB?Z0%8G{-(nWJ! z&;GggTKZ}2zjV=D*Ry}Fy_SC3tM32mT7`!~tl?M>|8?KDju%>GR>cY722cRNj!2eW^Z%-!C^{@qT~dlUP2J57@ZvwxG!-QL9h-A>cw!R+58bGJ9K zf49>#c`*Ao$=vNt?BDG)O&-ktO)__T6Z>~NO_K++f0NAJ-o*ahPSfPU?B67Fw>PnW zx6?FvF#9*j-0e;5-|aL_9?bqtGIx6u`*%A{lLxbZlg!=T#Qxn*)8xVI-z0OlH?e=W z(=>T7`!~tl?M>|8?KDju%>GR>cY722cRNj!2eW^Z%-!C^{@qT~dlUP2J57@ZvwxG!-QL9h-A>cw!R+58bGJ9Kf49># zc`*Ao$=vNt?BDG)O&-ktO)__T6Z>~NO_K++f0NAJ-sE5R_xsG_rL$> zQSW^JuRr(M6>sIz9Ts@u?O)m9BYLEY1$Os8=^g)aJNs9R z-0`LD-|;vN9?kv@B6oZX`*%D}gGaM}gUB7uu@f5+oAcr^Prh}`il z?BDS?4Ia(@4I+1Z3;TCGPJ>6Ye}l*!-@^VKkJI4M?B5`A$G5P5$Ky13H2XJ*-0>~! z-|;vN9?kv@B6oZX`*%D}gGaM}gUB7uu@f5+oAcr^Prh}`il?BDS? z4Ia(@4I+1Z3;TCGPJ>6Ye}l*!-{K4R_xs;F9;(u#?e2f@_rD)`y|e$}vsXN}(rsL} z(*iF%{DV*V+OzL={xQ4z|MR!JqSMFpSd|Lczf#yFm$iSAp(;J5{VRn{a#{N)8LHA_ z+P_lRB$u^+lA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1c zQrIMywSSVKDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lR zB$u^+lA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMy zwSSVKDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lRB$u^+ zlA$U+ru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMywSSVK zDm|wCD}_yRS^Fm$s?uZHzf#yFm$iSAp(;J5{VRn{a#{N)8LHA_+P_lRB$u^+lA$U+ zru{30O>$ZLCmE{JW7@w`*d&*=f0CgpJ*NFDg-vo<`zINy(qr1cQrIMyy~_T6|9g_v zmV35BHxwAS*^YX9Y%X1b#NGp)7!v)X_8 zrkSp2|4eHw|E%_3zG5BHxwAS*^YX9Y%X1b#NGp)7!v)X_8rkSp2 z|4eHw|E%_3zG|ZTyiVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T z?IG=7Ep3Vm+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m z?O!cziVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7 zEp3Vm+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m?O!cz ziVNF6#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7Ep3Vm z+dsuj)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}IJ{?*c^xUl_G%v9|m?O!cziVNF6 z#Z1*6(*D)brns>EQ_NKDA?;r+ZHf!qKgCSd9@75R(x$ku{Zq_T?IG=7Ep3Vm+dsuj z)gIFR)zYT8u>DiaRP7<{UoCBl3)?@%Ow}GT_W$ZvJn;c%e(#yjsCFY4?y|u1U-86y zKj{;v-q-$J&QtxN>|Z@`*Eg_#*Yi|=IQv&m-1QCY-}OAzAI|>O6L)6 z^~7D@!2Vs&Q~lxWUp;ZxH?V)#^HhI0`&Uog^$qOb^*q%d&i>UCcYOonSBPu%ql?BDe~)gR9O)f0Dp1N(P9PxXhhfAz#& z-@yJ|&r|*3>|Z@`*Eg_#*Yi|=IQv&m-1QCY-}OAzAI|>O6L)6^~7D@ z!2Vs&Q~lxWUp;ZxH?V)#^HhI0`&Uog^$qOb^*q%d&i>UCcYOonSBPu%ql?BDe~)gR9O)f0Dp1N(P9PxXiU{Qlqg8$W*R zV=nu->Nj+Qt_nQ=;O-xN?^oNut6f?>jQv}c?(;tO?{k+{Pi6mBrTe^({rlXd)l=EO zRp~zOWB)#PY4ue0Z&kX_``EwFU0OYr{acmp^FH?PbC*_6W&c*C`@E0+``o3~Q`x^& z=|1mc|2}tV^;Gt6Rl3jn*uT$RT0NEhTb1tfKKAc(msU?@|5l~@ypR3++@;l1*}qlk zKJR1yK6h#LRQ7LGy3hOAzt3G-J(c}imG1LC_V06-R!?RBR;ByAkNx}HrPWi}zg6iz z?_>WycWL!h_HR|X&->WF&s|zQmHk_l?(;tO?{k+{Pi6mBrTe^({rlXd)l=EORp~zO zWB)#PY4ue0Z&kX_``EwFU0OYr{acmp^FH?PbC*_6W&c*C`@E0+``o3~Q`x^&=|1mc z|2}tV^;Gt6Rl3jn*uT$RT0NEhTb1tfKKAc(msU?@|5l~@ypR3++@;l1*}qlkKJR1y zK6h#LRQ7LGy3hOAzt3G-J(c}imG1LC_V06-R!?RBR;ByAkNx}HrPWi}zg6iz?_>Wy zcWL!hvH#7^ec#P)@zA&2s?~knr;h@^xxc&p``DxPQ`oMSH1=;@ys!J& zzpp)7KaKrc7w_wS_U~(t)=y*q*2Vj}pZ)vVqxI9+zjg7x?q~nL_GtYy_HSLhulw1* zuRU5njs05}@9Tc{?`x0NPhwfm{Yme4XWB=C0`?{a~``V-R)7Zat@xJb7|GxHU{WSJ(UA(XR*}tzn zT0f2bTNm%^e)jKckJe9P|JKF(x}W{~+N1T;*uQo0zV2uLzV>MSH1=;@ys!J&zpp)7 zKaKrc7w_wS_U~(t)=y*q*2Vj}pZ)vVqxI9+zjg7x?q~nL_GtYy_HSLhulw1*uRU5n zjs05}@9Tc{?`x0NPhwfm{Yme4X^ZEVn_9Htdo$yQVZhe3ETerY(?(cpx`>#91N_V&aO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$FToO;XApl{W}?ArN^-UO5r=Xt^GS0 zVx`Bh|4QLIxvl*>8Dgc!u>VToJGrg>I~ii7$B6xJ`pUn2;k#aW*mYKV47XjW!1MNZ zxBp55tbAMhuROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{uROrYx3~Yw1FU>|`>#B}%D1=w$^)!?d;70Ez{-F4MeZSiOsy$|Vf7svtyZ?KZ|8u?%eE#R$J$}yF-~Rl+@~rqg%y#to_xbnv z_xbnt|2ZAt|D3zW-*fD5e?R~I{P**JP6zlu=k9*~&%L|-?Z5l?KK{@BKCr+2{qH~j z`_KRWo6`aQ&$+w5|IfX<{q666|IPdNKA(S|f1iJ!e}Dg<+X4IA-@pIO`*XnO-{;@w z-{;@I|Ig_F|L5G@zyHj=yZ!C&-~Z5a>Xm) z|F!@7T+81B9={JdelKvi;JCMc_71V@Rd>JQxFZj~AFep=I{U{X91r|8>5Ah>kJH65m%Iuf6|% zaNP4CTZh;)c8=ZS7+;U?E06Kt9mn{25WB~*+%?|c<=36tf26+;&H8?h&wU)r4nF^5 ze9p^V<2-hcV|+c1_P0Oh$NAs=@wtv;eD32IUyq~Dzt6wVzt4Yo;5e2C;eH&;efdAu z-sQfrdmPLEZh!mx`S0g{>4NhyTnP8$SnkUIF?KF@jq}(&j`8(4+TZ?u{`>hK9ypHW zLAW2sa$o+BwRgF1>>kJRzuVvbe*XLUU%KFY3>U)vIF`Hee~g{WUE@4e+>c|q zEC0vXx!g6*WA`}5*W+k^`}_It=YM$MIF<+DejLkv`9Id)<-W0d9LxW1fBXCS@8^H% zg7Yz42>0Vy?#ll$b}n~~^VmI(@%1>`-~N97`}rRpIF98(xF5%IU;dA^ce!uu9>?;( z+u#0v{`>h~y5M{a7sCBGmb>zQjGfC}<2-hcV|+c1_P4*E|9<|52aaQT5bnn@_Vs%o z$J)8vHO^!A_&v~Z9{=5O%=ba;9J|M{{C&{zeU5wYgHbxf-h0Q(o_3#uzt{P)r=7Te z+}S^#;CSHozF+pVQ99gvf6qAHcmF(gkFWpd{jWRje(o=O+9)0FRqPs{^Ek%qyvP3c zIqvO0T8DcUpZoBw-2eFf?{nOHAB@r=_7CsvKi0mR-lumjT#wJY{pZ*}-WT!nEOw7$ zydUDf$M-vq_5O%`WA`}Pf2^H1eQxbP$IkYjduRLGfA`<7%J%{P=YC(<-~NaC|7-aB z{~YfNpZ_^_E`RRFx&7_W|1bV~aeO{TKl=Rp{QLa-{Kvoh$I;*a=Wv1lbL_n7?=kkb zzn}kp{>RV#IQscNhYS3lV`o4A=ib@=_TT+`Uim)Y|J?5j``h3D{`+DVOD9J?_xbnv z_xbnvkAL@xqksRI!v+4&vGb^T?x>HhK0_K$CHJn(zp&$(#S z4sYDB`}{o{4LWq)_ay}h${ zh|h8CylMaVzU;5p^B?+sdv*K!{2#j8DDS8Ed)}tczy0mM`}dns-tS(=Ro~A(|5xoX z+xx@*_CM6WZ^hpW{rn&M{o(UJ_P$s5^UVJC=l{I_`^M+r=ilew=ik5o&FKLD=iEL1 zcY*!w@8`dt|9<|@=>Y%d+}+Rrxp%j}{dfP~$N#zC2lltW|NZBG|M}m4b2`BPId}K> z|G9U!zy1C1zj^=O=kxFL@AL2T@9+O}J79nN`}ezxVn4`~3U-`~3U+|J)AP-~RsnZ{D8+*Yxx6_dt&6MEstg`1jxV zebDjs`29a|9>;j!IPSeKvUP|Z9g6qC z*Y2M??tMN+>2P(Q_gnAp?zp%6=pEwy5by8NU$579=%4qS_P-4e4*vaUl=n^S8K3(& zZrXp8*FF2|h~48DyT{S~WBA4$j`6vVqtCz3zt6wV|EPcenf-Nz`*95S5BK93?#Ge;{O3Ra zNA+*^*A?!^G2D+M|M|~<{?G2@sINcVk7KwWNB;Al|NI}-zu8|`xF5%GKaTw8KmYkZ zyOX27{%}8z;eH(X&wu{&e^mcwe_i2z9K-!M@}K|w=l|?Zj{5q;{WymEapXV$`Op7R z{hR%Dh5K;~_v6Ta{_~&zvpYHJ>ks$i81BcB|NQ4a|3~$2_SY5e$1&WGBmeo&fBw(z zc|pA4mT4pa1+H)xX(aSGXU?a6gXx=Rg1XKf9BozW#7Oj^TbB`Okm;^M6$T zW`AAbejLO7IP#zW{OAAdPLBHe!~Hmh`*Gwy|M}1VP5y-&j(bjI>rn0*?w7~%`?9@G z&wJ?iXY3k3=gZyWJiaf^9rt>U*5TmJm!I<5yYK#fU&rY>+~aC?KYsteJMMixM(c1* zKlkyvuj%*uzMP(xJIA@>n4kaHH9oI#?l|_|<-X4?p>bSd#t_7J-|vfU7p)}ti8)U%e~9<^81~S^?oS#F3;^f*52iw<=*9a`TfqvdOwtVm*@5# zYwvQ;a_{oI{C?+Sy&uZG%X52=wRgE^xp#S9e!ugv-Vf#8<+;7b+PmDd+`Bw4zu);- z?}u{l^4#8I?OpC!?p>aj-|u{^_d~gNd2a8q_Ad7<_b$)N?{_}d`=Q*sJh%5)dzX8b zdza_s_d6f!{ZQ^*p4)q@y~{nzz033R`<;*Vekk`Y&+R?d-sPU<-sO4u{m#dFKa_iy z=k^|J?{d#_@AAC-e&=JoAIiPUb9;}qce!V|cX?iZzw@!)59QwFxxL5QyWF$fyF4$y z-}zYYhjQ=o+}>mDUG7=#U7nZU?|iKHL%DZ(ZtpSoj^FbOr z`pFmX{+{;(#~1A%LV7I=!w#zmGTV9$$C7s*9ucC_f+ZI?veudEmJB zJ{YY-xqF=3f6Tqh_iyZ79_?@c-TysMzF(Y=^}Z-S_i^s?Ki1BhKDV)Vd9=U%`9J>8 zzw&b*=RW^F|33dd|C|2a;Ok@f5Z;$Z{*STm)qPIO-Q(Q;_V@GO&;P6Q!++21M0j5w z`Ja2&Yx=n?caL-X+uzTBKmV_(Bfc-E7vX(*WS~q zjAlryf=rzhckdE|eedoBGrDtC`_``h2o ze?R}1_Qv)4awWVkkNodzr&^zza`!m5zy1CE_w&D&JFeS@FX4T8f+lcwZj*zs?@5KL_RR zac+P6`}yzZe`}B2udO@beR<@6+a1<=-}_8)uy*tgs}z8=T;?~Z%#hao%Ma_3wBX!rL#f5Gup`{#~(T}JB=d%xQ52jBOj z?R!mMPwZcQe|+6>@AE%;2m8;lbNN0lcaQV(`<##Y{)oNHqrJ!6yWF+hJ3?=kl-cP)31^YZ(gkNJLxz00G$$K1QzwcI_<%kOhO=KCS` zE|2yebMJE3a`!kdzt8!Y?}ymCJlcEAy~|z8-Q&FcKIdb;A7bzFXzwxiE_W?=kMr{T zoR9f_h`r0By~o_U+_l_2&dcv}KIZ!&_AZb19&_(<*K+qbFTc3?=kl-cP)31^YZ(gkNJLxz00G$$K1QzwcI_<%kOhO=KCS`E|2ye zbMJE3a`!kdzt8!Y?}ymCJlcEAy~|z8-Q&FcKIdb;A7bzFXzwxiE_W?=kMr{ToR9f_ zh`r0By~o_U+%d;zV3L{ zPT70J&gC&)SNV14d;5>lq1?GVALVs=9smFL?dgU0Wv}?}j(Z(P=}_*x>GMCz>-9RW z`u?^5ReQ|#{;Z|oaK{_~&z{0|S}IE#m~z0TM-j{N68 z|M?#t#BnwkXL-G`Zyfp0fBy47Jc#2g9?teUW8XOPpa1;le|Qkb*<76E^~Sz&j4r7iW3Bv2Pst&wu{& zKRk%zEFRAGI%D5B@}K|w=YMz*$Jtz*<@LtCapXV$`Op9GAda(mINR%tedEY~{_~&z z;Xxc{b8(i}8~etQ|NQ4a|HFeg&f?*0uQT?IBmd|2|LWcc@!uWy-Y27X2oFbponGHz zS6=douX^0y9{4@bj`MZ+klnrSeg65!0S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b1{$N|3x{IDrM7rVRv(a|6Ls>eC% z;CpZk4p z|GD?)z5RE8{(B#IA9x@59B_aG9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$;Lr}l?}3h^PKWL^T9*TRf8gYoUvc~|IO^bi;B&wM4sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S?UV!0-IRTW^2ayG}gJQ3vk>p92nXfCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aA0N!j{l0ky5OeAoOqa{ z4&Dbo2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)hz{C!G$fn9gbHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14$SPpKb-T2N8aGI-@oIigZF{Y0S7q10S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WFtY=n zeAzkoJmLI5+i}#v`@rXb103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC(u*?~*%{HfD#@azZeIO^bi;B&wM4sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S-*fn9gbHD)(aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14ovL8CvNmz|M8pu>&XIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02WEEQl55}T&98a)b$1+f@ILT4-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4W_IA> zZ~E2`-S)d5u;ZwM_kqs=2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQwF5u)sOx;})=%BJ*ii@X1D^v9aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sc*%2QInq!!P{K^PjlmsDt-`&jANGzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4I54#X4}JS%uJx>&>|Elg zgZF{Y0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WFtY<+deMCkd)QrfE^*Ys`@rXb103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC(u*nx}x z_}A}v!7CoJXIKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02PSslk{dqqQ}_DO7w%)g_EXpY-e+FAfn9gbHD)(aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14$SPpTYu?k zA9~&=cP@0)!TZ4HfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3GnAw3hJmsOM-0<-`7dqA6liB=uXXunWu5a1zR$hZ_3SzR*c{*h2ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4COfd}{F`ol-r37u)j600d%yt>aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14vco-!7q6B z_g-?q0o6H}1AD*$4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S-)dVAluk_`_dX_U=-hgE_DV9N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MM zXa^p8%C7HO4mhAX2XkN#IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROiisSfPfeb0ga`P=Va)hlmb*IU;0W_9~~ zuIzFS$#tFcy>kD=I`5CH>xb2Y`FA^qkFRt6?{&>wB=`5NfBxQe{fE-$=juT}uiTV# zxO$yqa^70^@hx+n_et)l?^!>uu5#bBb4ae&taHrX*Zh-r>buv^tE=2M^&FDtk@C-+ zWWG1`ef{5k=)B7R(DRr!AM%&~_P=NOKXVT3|IFNT&&gl@{J-bQ?8CHb|Lwp1xBou> z&vbzQGjq@99Qn)N{qO#F|Ic)Q|1)!U|If}{{_@}c+{gdfb0B~D`}d!J|M~adOb7Ts zGk2f=XXh?|`TO_Z+|PaX-~QWw`|tDrYzO2oe}Dg*dk@%u`)~j4zrX*_bb$XebNBb3 z*}2PK{{H?q_j8~9xBvFv{`>qt+X4B@-{1e{-UIgE{@Z{1@9+OJ9pL}W-2MG$cJA_* zzrX*@{oH5&?Z5rE|33fEc0m5}_xHcK_kjJk|MuVh*T4Vgdm!`upZT89d=FsUP*;9z z>N(_lee3tX=6k>My}=0siPzUk+XT(4VypKJXd@O&S1z85$ysH^-p{T!0(p}Frr z?K=<7!~WC0V_i>D4}478e_GdbNZzT*{Xy&8Q+Ka>b$kE!pVs9ZGS|sHea-#Eb^fVc z-hb+Q)Etw0YR)Ck)q28u9@Lcwn|2P#GdU;s)cicp)tcX{J??|#o?3IweAl1r<=^S^ z(A0U(-lx`cu>U>wyyludxu@pmsq&XU=llG-KYN{;y-&^0Q|-U~xBvD(9;m%Mi2JEE zU;g*XyXKqRQ|s^LFMs#H`@g!N@8Lq+Pp!G~zemnB*YwFfH9t?4zx>_*?*Dk8_VOU^ zr`CM=-z)E$Z*otqzn8!K-T&_Y>Vm$93voZS=F0ybIoDj%C->C+JXQYkcmKQp7I36*7Y3q!1uo2d1;q(c=|fe)Sq1U$vr=R+WP0}_TJxlX_s?2yX2ZZ zPtAKib^V;W%D>w=oK^NdzSaDnvVKlo%|VxQNdEC&{=M?uX->~9u4munKO_IlMeeiY zo|<{c@AJIcYd(^1a!-|iubg+aV zVgG03T<`t#@|QpV@BO(r`_aAHfBSF$?SKC6pX&4f3>Wx6Bj=qy$H-s)?tk}x?)_Bv z{|p!SKO<-N|LmOQFaPb&d36r>KYK3ZFMt33ySIzg$*#TqxBvFv{^##Lss8>m!v+4& z$a$yFFY=ea```Vadq36vKf?w7&&b*RKRajn%isV1XYS`b`)~j4zx}Tc`20WH1^LV0 z-~Z;`3-;gs+kg9?zx${9`~M6V_&+1(`tQE<@|VB+-~FF^Kh^y|!v+4&$T|0Yt@4+@ z```Vadq36vKf?w7&&avn`|0H`fA_!pKlgsB`+tTD)&G1CWWFCZzt4SS_))^&FD(GY)-FqB5 z&-VY&^O!at@|XYN{=SvZh3@~}^I`vc=X<34O#brc|J;Ay*nj(P|LwoO|IKuO|1)#X zzYF9qfA_!p-~B(+0sha--Tgm1clpbI`*R=vXU~EB7fBWzA|7-{3FMogkn|lw~fBSF$?Z3bO&vbzQGjsR%pV_&~U;h67H}`X& z{kQ-2-~RjjKidKM%irJs=H3JMb^m=2WKSpZJwN&TZ@v#YKhO97q)*MP@O}WgGaA>b+sQ|&f!S={+M;{>MHl{=a6~G ze0P6e?|1n7ey98&%Y%cTkGjlF^32|+?v#I*_n!V9$vri>r^>(geV^P@lY6TC<-h%R zfay+legEuzYW6FirKmYmP)xYWQ757u)ek%X@&wu_;ce3mI$Nki}pUQv!^Pm4+{hR(?aX&Tg zr}Cfw{OA94C%e9X+)s`Bsr=_Z|M}n5zv=H4_fzA3D*ySB@}K|w=l^skyS{(iPmTMj{O3Ra`QO#Q>F*WyQ{#Rr|M|~<{!e$Z z>-)$3)VQC@fBy5I|6To?{$6oEHSVYKpa1;l|8yt2zJJ_Ljr*zm=Rg1X-_^hA?-ln` z<9;gt`Okm;Pj|BG`^Wv%xSz^@{_~&zJNXwk)RhyPb`CYyxL<31Zqwe=|GkHw&*YkW zzUH1j&!tya=h^KX4(5E<%is5u?eF(h$DPBON67uj>))%Z{pfZM_jT{{-uLx+&uyls zHRtqd&;3uX+1K=H@4RciHUHYz=k>kjqvl9Vylb8{ z@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+==%Dd)S z^R9h;Uf*jTYTmV%cdxu_o;B~<*XQ-U=Aq_YdwKWDyXIN*u6=!8-)kOf-nEx^ue@uX zHSgNj=k>kjq2^tCdH2e@=2`QueSKcvYaVLewU>9Vylb8{@7mYr^}Xhy=3RSv_sYBG zS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+==%Dd)S^R9h;Uf*jTYTmV%cdxu_ zo;B~<*XQ-U=Aq_YdwKWDyXIN*u6=!8-)kOf-nEx^ue@uXHSgNj=k>kjq2^tCdH2e@ z=2`QueSKcvYaVLewU>9Vylb8{@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tc zYo0al+Sljxz2>3jU3+==%Dd)S^R9h;Uf*jTYTmV%cdxu_o;B~<*XQ-U=Aq_YdwKWD zyXIN*u6=!8-)kOf-nEx^ue@uXHSgNj=k>kjq2^tCdH2e@=2`QueSKcvYaVLewU>9V zylb8{@7mYr^}Xhy=3RSv_sYBGS@W)aeO}*d9%|mTmv^tcYo0al+Sljxz2>3jU3+== z%sb!zotoTJ<=!jz;(e|B-^@I}**&berWcrp&F~}M*UJCR$nV?TvzmK)`ODw^@BaUG zeeu74d^-~FYvup(o!kAjS2g$a@|VB+-~E4oIpaU)`1B;+*UJCnGq3x5k81Ae+8_V<$JUv6Un~EQZ7xS^FKX`TV76 zLCrnA{N?ZdcmE%4kNo!?U3cPrt^7Z_IUK9Gueqm}zx>_*?*C)ulKfa~_g+t@7@fcg?ltp1wY(?>P_2yHFaa)p7W5rYn6A;ylbvC_w@BSeb0GF-nGiRXWli}ntS^CoWAEgB=1_~-81i+Yt21< zeNNwV9+G#h^6r^;&9&y9zCNe#ISts z>3hyY@~&0hJ@c-)*4)$A=kz`2A$iv-@1A+rTx;&>>vQ^^^N_r2m3Pm)Ypyl-^z}J? z&v{7RwaU9^-Zj^nd;0pEzUMq7?^@;EGw+&f%{_g6PTzALl6S50?wNPZwdS6_KBwBP$v2h%{O3Ra<3Z{)7pJ^$@=fJG|M}1Vc#t~9!)foCd{g<)fBy47 z9;8llamxE9-&Fqdpa1-i2dPs$oc5l{HNFRpyl?VN zp0j@0om4`Okm;^FJP>PIGa}`zGI1{_~&z{Er8zQ#_pZp2;_r|NQ4a z|KmaGG#975Z}Ls$KmYm9|9Fr(#lvaunS4|EKimH!%|U*zuI8lsImE;6@9F(^-*emT z4}a9I1K$I!&O3(_We%85<{-e+C*?q8HuZ}tgxzC<^`OnUu_wwJq|IL9p zFbDR4103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW4ccOc&bo$8zpe@@-bKx30J>UQb zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02PQkP>o+d``pcgFUwc&NU=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQ*nwsLTQ0iooG1N` z>Kx30J>UQbIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02Sz*awX@#%_UoT=`=UArb6^iRzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9)4t(X> zb3XH$2mk4!ItO!L4>-U94sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h0fyoZs{kZF{ed9^Lx2Vp+9M}U6aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sc+y1OIdC1)u)x$KSrF&cPhm0}gP2103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$V6p>We%FWgJ^$VBSXAd=4(tI3 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02RJaFA00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G;J|1Hmgm0jvLATcvQKpm=D;3sfCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW4Y9r(`+FFNo0f8t*k)j600d%yt>aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14or4n>#kEDcGl~jv8c|$9M}U6 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sc+!1Fw4jHGg&8Wy_Z89L#||-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4COhyauiACp30Evz zs&g<0_J9K%-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`292o4tttVY_`KM32dQqK&Ij{#D-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N@rc2Tr`| z;?MlpSC(5<=U@)(0S7q10S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WFxY{uPd)pxS3migi|QQAfj!^=2ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%}paK&S9{`w0qU$#`|U=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%fQ*@3sb^~y7@e#5e*ItO!L4>-U9 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h0fyoYB_tLxfo_z7Lr8)<5U=KLJ0S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6pQ-2CG=Kk+xex@@V= z!5r8F4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<6rvIGBd`h~Zg{FP-(bq?mh9&msI9N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701Ct&2#~=Ll zzd7%n%a-aK%z-`N00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0EJMiY8e!?$5_1tAkbq?mh9&msI9N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z1Ct$i(>p%*$d~-qvZXo)b6^iRzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKY9)4!q?xFZ_#JpR;VK&cPhm0}gP2 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$V6p=r{q0v?_NB{~E!8=g1AD*$4sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S-)dVC!LL-SCM2UA9!` zU=Hj72ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%fQ*?})!{_>CA_{C*Qbq?mh9&msI9N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<701Ct%N?Gb1H z{vTYvY^lz{9M}U6aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sc+y12woPpPyUx>OLY$Bz#edb103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCG~qc<(!Jz4PqPFI%c}FbDR4103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC(u?7+LOc-jRIyL;JEor5{B2OQu42ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)hz+eY%`Pw7D@b}+!-l94O zb6^iRzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKY9)4%~R*KfmOmzk2baItO!L4>-U94sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0fyoYh@{^x< z>*>4qFRF7e2ljvi9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70100y_z}_dk@u$A;vfCEbIhX@`zyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zjCSBbPq^@mbN+7Gt2zgBU=KLJ0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0Wz=6pQ>^lFZ*WdO2Wv}WS%z-`N00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0kJMiEaJnN-@eZT?LIhX@`zyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8Om<+`2kv;q!;VTjzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4I566Qhn}+Q;QOo2VL5Ogm;-ZQ4(tI3IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103MML Date: Fri, 25 Feb 2022 16:27:31 -0500 Subject: [PATCH 027/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/cooling/cooling_cuda_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu new file mode 100644 index 000000000..812788dac --- /dev/null +++ b/src/cooling/cooling_cuda_tests.cu @@ -0,0 +1,44 @@ +/*! +* \file hllc_cuda_tests.cpp +* \author Robert 'Bob' Caddy (rvc@pitt.edu) +* \brief Test the code units within hllc_cuda.cu +* +*/ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../cooling/cooling_cuda.h" // Include code to test + +#ifdef COOLING + +TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 5; + Real const testT = 5; + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 100; + + Real absoluteDiff; + Real ulpsDiff; + + Bool istrue; + + istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(istrue) + << “The fiducial value is: ” << fiducialNumber << std::endl + << “The test value is: ” << testNumber << std::endl + << “The absolute difference is: ” << absoluteDiff << std::endl + << “The ULP difference is: ” << ulpsDiff << std::endl; +} + +#endif // COOLING \ No newline at end of file From 05b746b55bb894d32ed35750074602c256f5d2e9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 16:31:53 -0500 Subject: [PATCH 028/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 812788dac..bfcaa0fe0 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -7,6 +7,7 @@ // STL Includes #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -30,13 +31,13 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real absoluteDiff; Real ulpsDiff; - Bool istrue; + Bool isTrue; istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - EXPECT_TRUE(istrue) - << “The fiducial value is: ” << fiducialNumber << std::endl - << “The test value is: ” << testNumber << std::endl + EXPECT_TRUE(isTrue) + << “The fiducial value is: “ << fiducialNumber << std::endl + << “The test value is: “ << testNumber << std::endl << “The absolute difference is: ” << absoluteDiff << std::endl << “The ULP difference is: ” << ulpsDiff << std::endl; } From f5fdc6d46a41f342f5e0e45d954f9a54c20465ad Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 25 Feb 2022 16:32:51 -0500 Subject: [PATCH 029/694] add cooling test --- src/cooling/cooling_cuda_tests.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index bfcaa0fe0..faa54b1ea 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -36,10 +36,10 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) - << “The fiducial value is: “ << fiducialNumber << std::endl - << “The test value is: “ << testNumber << std::endl - << “The absolute difference is: ” << absoluteDiff << std::endl - << “The ULP difference is: ” << ulpsDiff << std::endl; + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } #endif // COOLING \ No newline at end of file From a4450cd018d9d82a55dc1df263f595fe7ceb8024 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 12:53:50 -0500 Subject: [PATCH 030/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index faa54b1ea..fd0dbddd3 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -1,7 +1,7 @@ /*! -* \file hllc_cuda_tests.cpp -* \author Robert 'Bob' Caddy (rvc@pitt.edu) -* \brief Test the code units within hllc_cuda.cu +* \file cooling_cuda_tests.cpp +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Testing the CIE cooling rate function in cooling_cuda.cu * */ @@ -10,15 +10,15 @@ #include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test +#include "../cooling/cooling_cuda.h" // Include code to test -#ifdef COOLING +#ifdef COOLING_GPU TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name { From ba12db1e3aa881683fb60a55bcdd29ecb051fb1f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 12:58:17 -0500 Subject: [PATCH 031/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index fd0dbddd3..cd72ea8ac 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -33,7 +33,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Bool isTrue; - istrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) << "The fiducial value is: " << fiducialNumber << std::endl From edd13316d177af5c9114797da5fe43937f85d982 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 13:03:06 -0500 Subject: [PATCH 032/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index cd72ea8ac..aaeb75818 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -31,7 +31,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real absoluteDiff; Real ulpsDiff; - Bool isTrue; + bool isTrue; isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); From 7e958fa82a0bcea8a0df5d14e94717d7e51a3e40 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:30:26 -0500 Subject: [PATCH 033/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index aaeb75818..c657a0d9f 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -17,6 +17,7 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../cooling/cooling_cuda.h" // Include code to test +#include "../utils/testing_utilities.h" #ifdef COOLING_GPU From c1193539d0b7271ae50e5114ae930d4a801bbf0b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:36:53 -0500 Subject: [PATCH 034/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index c657a0d9f..4c18ae097 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -17,7 +17,6 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../cooling/cooling_cuda.h" // Include code to test -#include "../utils/testing_utilities.h" #ifdef COOLING_GPU @@ -34,7 +33,7 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, bool isTrue; - isTrue = nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); EXPECT_TRUE(isTrue) << "The fiducial value is: " << fiducialNumber << std::endl From a507a2c1acbf51268591ceacb48a27f102d9232a Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 28 Feb 2022 15:46:43 -0500 Subject: [PATCH 035/694] fix cooling test --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 4c18ae097..fcc6dca85 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -28,8 +28,8 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, Real const fiducialNumber = 100; - Real absoluteDiff; - Real ulpsDiff; + double absoluteDiff; + int64_t ulpsDiff; bool isTrue; From 0dcd6cca4052455dc4c0c284df9efb661d4c5df2 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:09:29 -0500 Subject: [PATCH 036/694] develop cooling tests --- src/cooling/cooling_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 9b60d1ecb..d9d71fd87 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -329,7 +329,7 @@ __device__ Real primordial_cool(Real n, Real T) /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ Real CIE_cool(Real n, Real T) +__host__ __device__ Real CIE_cool(Real n, Real T) { Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 From 60261c81884a909be64be5d16ab101ac907ee985 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:11:00 -0500 Subject: [PATCH 037/694] develop cooling tests --- src/cooling/cooling_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index d9d71fd87..9182313de 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -329,7 +329,7 @@ __device__ Real primordial_cool(Real n, Real T) /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__host__ __device__ Real CIE_cool(Real n, Real T) +__device__ __host__ Real CIE_cool(Real n, Real T) { Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 From 7cd95139d6365bcf6915892bf8835ed239686a85 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:29:48 -0500 Subject: [PATCH 038/694] develop cooling tests --- src/cooling/cooling_cuda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index af00de1ae..ce8e79ff5 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -45,7 +45,7 @@ __device__ Real primordial_cool(Real n, Real T); /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ Real CIE_cool(Real n, Real T); +__device__ __host__ Real CIE_cool(Real n, Real T); /* \fn __device__ Real Cloudy_cool(Real n, Real T) From b300d8e6515bdf8dbbfe603173693c5011ec47a9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:34:39 -0500 Subject: [PATCH 039/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index fcc6dca85..d1721fa8c 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -22,8 +22,8 @@ TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name { - Real const testn = 5; - Real const testT = 5; + Real const testn = 1.0; + Real const testT = 5.0; Real const testNumber = CIE_cool(testn, testT); Real const fiducialNumber = 100; From 3e17fa993f615cfaa0a2352729f9d3ec79e97890 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:41:59 -0500 Subject: [PATCH 040/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index d1721fa8c..bc6a2169d 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -8,6 +8,7 @@ // STL Includes #include #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -20,10 +21,10 @@ #ifdef COOLING_GPU -TEST(tCOOLINGPracticeTest, PracticeTestExpectCorrectOutput) // test suite name, test name +TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { Real const testn = 1.0; - Real const testT = 5.0; + Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); Real const fiducialNumber = 100; From 221b763a7eddb8750509653cc7a752af10925e23 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:52:57 -0500 Subject: [PATCH 041/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index bc6a2169d..0de5c3bc1 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -23,7 +23,7 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { - Real const testn = 1.0; + Real const testn = pow(10, -8); Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); From ee6bc2b056077b76d442fc1e8ab8e6a7924eaeb1 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 12:54:27 -0500 Subject: [PATCH 042/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 0de5c3bc1..9e9db0a60 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -23,11 +23,11 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name { - Real const testn = pow(10, -8); + Real const testn = 1; Real const testT = pow(10, 5.0); Real const testNumber = CIE_cool(testn, testT); - Real const fiducialNumber = 100; + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); double absoluteDiff; int64_t ulpsDiff; From 7cc68b82c0175b7eac6fe1dac2990ec7faa1c52a Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 1 Mar 2022 14:35:26 -0500 Subject: [PATCH 043/694] develop cooling tests --- src/cooling/cooling_cuda_tests.cu | 2 +- src/dust/dust_cuda.cu | 195 ++++++++++++++++++++++++++++ src/dust/dust_cuda.h | 17 +++ src/dust/dust_cuda_updated.cu | 10 ++ src/dust/dust_cuda_updated_tests.cu | 46 +++++++ 5 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 src/dust/dust_cuda.cu create mode 100644 src/dust/dust_cuda.h create mode 100644 src/dust/dust_cuda_updated.cu create mode 100644 src/dust/dust_cuda_updated_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu index 9e9db0a60..7b2ef41db 100644 --- a/src/cooling/cooling_cuda_tests.cu +++ b/src/cooling/cooling_cuda_tests.cu @@ -43,4 +43,4 @@ TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, te << "The ULP difference is: " << ulpsDiff << std::endl; } -#endif // COOLING \ No newline at end of file +#endif // COOLING_GPU \ No newline at end of file diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu new file mode 100644 index 000000000..015e4f9f2 --- /dev/null +++ b/src/dust/dust_cuda.cu @@ -0,0 +1,195 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; + } + if (nz == 1) { + ks = 0; + ke = 1; + } else { + ks = n_ghost; + ke = nz - n_ghost; + } + + Real d_gas, E; // gas density, energy + Real n, T; // number density, temperature, initial temperature + // dust density rate of change, change in dust density, refined timestep + Real dd_dt, dd, dt_sub; + Real mu; // mean molecular weight + Real d_dust; // dust density + Real d_metal; // metal density + Real vx, vy, vz, p; + #ifdef DE + Real ge; + #endif + + mu = 0.6; + + // get a global thread ID + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; + // add a thread id within the block + int tid = threadIdx.x; + + _syncthreads(); + + // only threads corresponding to real cells do the calculation + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + d_gas = dev_conserved[id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + p = (E - 0.5 * d_gas * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + p = fmax(p, (Real) TINY_NUMBER); + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif + + n = d_gas * DENSITY_UNIT / (mu * MP); // number density of gas (in cgs) + + // calculate the temperature of the gas + T_init = p * PRESSURE_UNIT / (n * KB); + + #ifdef DE + T_init = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + #endif + + T = T_init; + + // dust density + d_dust = dev_conserved[5*n_cells + id]; + + // dust mass rate of change + dd_dt = d_gas_accretion(T, d_gas, d_dust, d_metal) + + d_thermal_sputtering(T, d_gas, d_dust); + + // Calculate change in dust density during simulation timestep + dd = dt * dd_dt; + + // if change in dust density is greater than 1% then refine timestep + while (dd/d_dust > 0.01) { + // what dt gives dd = 0.01*d_dust? + dt_sub = 0.01 * d_dust / dd_dt; + // use dt_sub in forward Euler update + d_dust += dd_dt * dt_sub; + + // how much time is left from the original timestep? + dt -= dt_sub; + + // update dust density rate of change + dd_dt = gas_accretion(T, d_gas, d_dust, d_metal) + + thermal_sputtering(T, d_gas, d_dust); + + /* calculate new change in density at this rate and repeat if greater + than 1% change */ + dd = dt * dd_dt; + } + + d_dust += dt * dd_dt; + + dev_conserved[5*n_cells + id] = d_dust; + + if (n > 0 && T > 0 && dd_dt > 0.0) { + // limit the timestep such that delta_T is 10% + min_dt[tid] = 0.01 * d_dust / dd_dt; + } + } + __syncthreads() + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s metals = {0.97, 0.40, 0.096, 0.099, 0.079, 0.058, 0.14, 0.040}; + Real metallicity + std::for_each(metals.begin(), metals.end(), [&] (int n) { + metallicity += n; + }); + + Real initialize_densities() { + Real d0_gas = MP * n; // g/cm^3 + Real d0_metal = metallicity * d0_gas; + Real d0_dust = d0_gas / 100 // assume 1% dust-to-gas fraction + + return d0_gas, d0_metal, d0_dust; + } + + Real calc_tau_g() { + Real tau_g_ref = 0.2*pow(10, 9); // 0.2 Gyr in s + Real d_ref = MP; // 1 H atom per cubic centimeter + Real T_ref = 20.0; // 20 K + Real tau_g; + tau_g = tau_g_ref * (d_ref/d0_gas)) * pow(T_ref/T,1/2); + + return tau_g; + } + + Real calc_tau_sp() { + Real a1 = 1; // dust grain size in units of 0.1 micrometers + Real d0 = n/(6*pow(10,-4)); // gas density in units of 10^-27 g/cm^3 + Real T_0 = 2*pow(10,6); // K + Real omega = 2.5; + Real A = 0.17*pow(10,9) * YR_IN_S; // 0.17 Gyr in s + + return A * (a1/d0) * (pow(T_0/self.T, omega) + 1); // s + } + +}; \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h new file mode 100644 index 000000000..34852adb2 --- /dev/null +++ b/src/dust/dust_cuda.h @@ -0,0 +1,17 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#ifndef DUST_CUDA_H +#define DUST_CUDA_H + +#include"gpu.hpp" +#include +#include"global.h" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +__device__ Real d_gas_accretion(Real T, Real d_gas, Real d_dust, + Real d_metal); + +__device__ Real thermal_sputtering(Real T, Real d_dust); diff --git a/src/dust/dust_cuda_updated.cu b/src/dust/dust_cuda_updated.cu new file mode 100644 index 000000000..f39c04114 --- /dev/null +++ b/src/dust/dust_cuda_updated.cu @@ -0,0 +1,10 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda_updated.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + diff --git a/src/dust/dust_cuda_updated_tests.cu b/src/dust/dust_cuda_updated_tests.cu new file mode 100644 index 000000000..9682dd326 --- /dev/null +++ b/src/dust/dust_cuda_updated_tests.cu @@ -0,0 +1,46 @@ +/*! +* \file dust_cuda_tests.cu +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Test dust model functions +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../cooling/cooling_cuda.h" // Include code to test + +#ifdef DUST_GPU + +TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 1; + Real const testT = pow(10, 5.0); + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); + + double absoluteDiff; + int64_t ulpsDiff; + + bool isTrue; + + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(isTrue) + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; +} + +#endif // DUST_GPU \ No newline at end of file From c376aec7cb2d429c6778be22c9552b5dae1af71f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 3 Mar 2022 17:12:25 -0500 Subject: [PATCH 044/694] add dust model --- src/dust/dust_cuda.cu | 315 ++++++++---------- src/dust/dust_cuda.h | 46 ++- ...da_updated_tests.cu => dust_cuda_tests.cu} | 4 +- src/dust/dust_cuda_updated.cu | 10 - 4 files changed, 189 insertions(+), 186 deletions(-) rename src/dust/{dust_cuda_updated_tests.cu => dust_cuda_tests.cu} (88%) delete mode 100644 src/dust/dust_cuda_updated.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 015e4f9f2..59bf79295 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,195 +1,170 @@ #ifdef CUDA #ifdef DUST_GPU -#include"dust_cuda.h" +#include"dust_cuda_updated.h" #include -#include +#include #include"global.h" #include"global_cuda.h" #include"gpu.hpp" -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array) { - __shared__ Real min_dt[TPB]; // TPB = threads per block - - int n_cells = nx * ny * nz; - int is, ie, js, je, ks, ke; - is = n_ghost; - ie = nx - n_ghost; - if (ny == 1) { - js = 0; - je = 1; - } else { - js = n_ghost; - je = ny - n_ghost; - } - if (nz == 1) { - ks = 0; - ke = 1; - } else { - ks = n_ghost; - ke = nz - n_ghost; - } - - Real d_gas, E; // gas density, energy - Real n, T; // number density, temperature, initial temperature - // dust density rate of change, change in dust density, refined timestep - Real dd_dt, dd, dt_sub; - Real mu; // mean molecular weight - Real d_dust; // dust density - Real d_metal; // metal density - Real vx, vy, vz, p; - #ifdef DE - Real ge; - #endif - - mu = 0.6; - - // get a global thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; - // add a thread id within the block - int tid = threadIdx.x; - - _syncthreads(); - - // only threads corresponding to real cells do the calculation - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - d_gas = dev_conserved[id]; - E = dev_conserved[4*n_cells + id]; - // make sure thread hasn't crashed - if (E < 0.0 || E != E) return; - - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; - - p = (E - 0.5 * d_gas * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - p = fmax(p, (Real) TINY_NUMBER); - - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif - - n = d_gas * DENSITY_UNIT / (mu * MP); // number density of gas (in cgs) - - // calculate the temperature of the gas - T_init = p * PRESSURE_UNIT / (n * KB); - +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, +int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + // get grid inidices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + Get_Indices(nx, ny, nz, is, ie, js, je, ks, ke); + + // get a global thread ID + int id; + int xid, yid, zid; + int tid; + Get_GTID(id, xid, yid, zid, tid); + + // define physics variables + Real d_gas, d_dust; // fluid mass densities + Real n; // gas number density + Real T, E, p; // temperature, energy, pressure + Real mu = 0.6; // mean molecular weight + Real vx, vy, vz; // velocities #ifdef DE - T_init = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - #endif - - T = T_init; + Real ge; + #endif // DE - // dust density - d_dust = dev_conserved[5*n_cells + id]; + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current time-step + Real dd_max = 0.01; // allowable percentage of dust density increase - // dust mass rate of change - dd_dt = d_gas_accretion(T, d_gas, d_dust, d_metal) + - d_thermal_sputtering(T, d_gas, d_dust); - - // Calculate change in dust density during simulation timestep - dd = dt * dd_dt; - - // if change in dust density is greater than 1% then refine timestep - while (dd/d_dust > 0.01) { - // what dt gives dd = 0.01*d_dust? - dt_sub = 0.01 * d_dust / dd_dt; - // use dt_sub in forward Euler update - d_dust += dd_dt * dt_sub; + _syncthreads(); + + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + // get quantities from dev_conserved + d_gas = dev_conserved[id]; + d_dust = dev_conserved[5*n_cells + id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif // DE + + // calculate physical quantities + p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); + + Real T_init; + T_init = Calc_Temp(p, n); + + #ifdef DE + T_init = Calc_Temp_DE(d_gas, ge, gamma, n); + #endif // DE + + T = T_init; + + // calculate change in dust density + Dust dustObj(T, n, dt, d_gas, d_dust); + dustObj.calc_tau_sp(); + + dd_dt = dustObj.calc_dd_dt(); + dd = dd_dt * dt; + + // ensure that dust density is not changing too rapidly + while (d_dust/dd > dd_max) { + dt_sub = dd_max * d_dust / dd_dt; + dustObj.d_dust += dt_sub * dd_dt; + dustObj.dt -= dt_sub; + dt = dustObj.dt; + dd_dt = dustObj.calc_dd_dt(); + dd = dt * dd_dt; + } + + // update dust and gas densities + dev_conserved[5*n_cells + id] = dustObj.d_dust; + dev_conserved[id] += dd; + } + __syncthreads(); + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s 0 && T > 0 && dd_dt > 0.0) { - // limit the timestep such that delta_T is 10% - min_dt[tid] = 0.01 * d_dust / dd_dt; +__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, int je, int ks, int ke) { + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; } - } - __syncthreads() - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; s metals = {0.97, 0.40, 0.096, 0.099, 0.079, 0.058, 0.14, 0.040}; - Real metallicity - std::for_each(metals.begin(), metals.end(), [&] (int n) { - metallicity += n; - }); - - Real initialize_densities() { - Real d0_gas = MP * n; // g/cm^3 - Real d0_metal = metallicity * d0_gas; - Real d0_dust = d0_gas / 100 // assume 1% dust-to-gas fraction - - return d0_gas, d0_metal, d0_dust; - } - - Real calc_tau_g() { - Real tau_g_ref = 0.2*pow(10, 9); // 0.2 Gyr in s - Real d_ref = MP; // 1 H atom per cubic centimeter - Real T_ref = 20.0; // 20 K - Real tau_g; - tau_g = tau_g_ref * (d_ref/d0_gas)) * pow(T_ref/T,1/2); - - return tau_g; - } - - Real calc_tau_sp() { - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n/(6*pow(10,-4)); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2*pow(10,6); // K - Real omega = 2.5; - Real A = 0.17*pow(10,9) * YR_IN_S; // 0.17 Gyr in s +__device__ Real Calc_Temp(Real p, Real n) { + Real T = p * PRESSURE_UNIT / (n * KB); + return T; +} - return A * (a1/d0) * (pow(T_0/self.T, omega) + 1); // s - } +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n) { + Real T = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + return T; +} -}; \ No newline at end of file +#endif // DUST_GPU +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 34852adb2..78a5f8c26 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,3 +1,6 @@ +/*! \file dust_cuda.h + * \brief Declarations of dust functions. */ + #ifdef CUDA #ifdef DUST_GPU @@ -9,9 +12,44 @@ #include"global.h" __global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); +int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +// general purpose functions: +__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, +int je, int ks, int ke); + +__device__ void Get_GTID(int id, int xid, int yid, int zid, int tid); + +__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, +Real gamma); + +__device__ Real Calc_Temp(Real p, Real n); + +#ifdef DE +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); +#endif // DE + +class Dust: { + + public: + Real T, n, dt, d_gas, d_dust; + Real tau_sp; + Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { + T = T_in; + n = n_in; + dt = dt_in; + d_gas = d_gas_in; + d_dust = d_dust_in; + } + void calc_tau_sp(); + Real calc_dd_dt(); + + private: + Real MP = 1.6726*pow(10,-24); // proton mass in g + Real YR_IN_S = 3.154*pow(10,7); // one year in s -__device__ Real d_gas_accretion(Real T, Real d_gas, Real d_dust, - Real d_metal); +}; -__device__ Real thermal_sputtering(Real T, Real d_dust); +#endif // DUST_CUDA_H +#endif // DUST_GPU +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda_updated_tests.cu b/src/dust/dust_cuda_tests.cu similarity index 88% rename from src/dust/dust_cuda_updated_tests.cu rename to src/dust/dust_cuda_tests.cu index 9682dd326..fa27f2474 100644 --- a/src/dust/dust_cuda_updated_tests.cu +++ b/src/dust/dust_cuda_tests.cu @@ -17,11 +17,11 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test +#include "../dust/dust_cuda_updated.h" // Include code to test #ifdef DUST_GPU -TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name +TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name { Real const testn = 1; Real const testT = pow(10, 5.0); diff --git a/src/dust/dust_cuda_updated.cu b/src/dust/dust_cuda_updated.cu deleted file mode 100644 index f39c04114..000000000 --- a/src/dust/dust_cuda_updated.cu +++ /dev/null @@ -1,10 +0,0 @@ -#ifdef CUDA -#ifdef DUST_GPU - -#include"dust_cuda_updated.h" -#include -#include -#include"global.h" -#include"global_cuda.h" -#include"gpu.hpp" - From e200119b1d0792f44eedaebc1425ed405619cb55 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 4 Mar 2022 16:44:48 -0500 Subject: [PATCH 045/694] develop dust model --- src/dust/dust_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 59bf79295..a927d403a 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,7 +1,7 @@ #ifdef CUDA #ifdef DUST_GPU -#include"dust_cuda_updated.h" +#include"dust_cuda.h" #include #include #include"global.h" From 137eccf030ee0fa186388d282f5b0ad38e5380b9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 9 Mar 2022 11:18:07 -0500 Subject: [PATCH 046/694] fix bugs in dust code --- src/dust/dust_cuda.cu | 9 +++++---- src/dust/dust_cuda.h | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index a927d403a..5e4757a61 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -15,13 +15,13 @@ int n_fields, Real dt, Real gamma, Real *dt_array) { // get grid inidices int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; - Get_Indices(nx, ny, nz, is, ie, js, je, ks, ke); + Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); // get a global thread ID int id; int xid, yid, zid; int tid; - Get_GTID(id, xid, yid, zid, tid); + Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); // define physics variables Real d_gas, d_dust; // fluid mass densities @@ -37,6 +37,7 @@ int n_fields, Real dt, Real gamma, Real *dt_array) { Real dd_dt; // instantaneous rate of change in dust density Real dd; // change in dust density at current time-step Real dd_max = 0.01; // allowable percentage of dust density increase + Real dt_sub; //refined timestep _syncthreads(); @@ -120,7 +121,7 @@ Real Dust::calc_dd_dt() { // forward-Euler methods: -__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, int je, int ks, int ke) { +__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke) { is = n_ghost; ie = nx - n_ghost; if (ny == 1) { @@ -139,7 +140,7 @@ __device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, int } } -__device__ void Get_GTID(int id, int xid, int yid, int zid, int tid) { +__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz) { int blockId = blockIdx.x + blockIdx.y * gridDim.x; int id = threadIdx.x + blockId * blockDim.x; int zid = id / (nx * ny); diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 78a5f8c26..fe7a6c87c 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -15,10 +15,9 @@ __global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); // general purpose functions: -__device__ void Get_Indices(int nx, int ny, int nz, int is, int ie, int js, -int je, int ks, int ke); +__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); -__device__ void Get_GTID(int id, int xid, int yid, int zid, int tid); +__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); __device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, Real gamma); From 3cfedcd872e77c9ca7d815cd7e9730a31d2f6440 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 29 Mar 2022 14:30:15 -0400 Subject: [PATCH 047/694] use const variable names that indicate they are constant --- src/utils/hydro_utilities.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 51439ac29..c1d6bf214 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -25,48 +25,50 @@ * T : temperature * mx, my, mz : x, y, and z momentum * n : number density + +* "k" prefix in a variable indicates that it is a const. */ namespace hydro_utilities { - inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { + inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &k_E, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { Real P; - P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + P = (k_E - 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz)) * (k_gamma - 1.0); P = fmax(P, TINY_NUMBER); return P; } - inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { - Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.); + inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &k_E, Real const &k_d, Real const &k_mx, Real const &k_my, Real const &k_mz, Real const &k_gamma) { + Real P = (k_E - 0.5 * (k_mx*k_mx + k_my*k_my + k_mz*k_mz) / k_d) * (k_gamma - 1.); return fmax(P, TINY_NUMBER); } - inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) { - Real T = P * PRESSURE_UNIT / (n * KB); + inline __host__ __device__ Real Calc_Temp(Real const &k_P, Real const &k_n) { + Real T = k_P * PRESSURE_UNIT / (k_n * KB); return T; } #ifdef DE - inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) { - Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + inline __host__ __device__ Real Calc_Temp_DE(Real const &k_d, Real const &kge, Real const &k_gamma, Real const&k_n) { + Real T = k_d * kge * (k_gamma - 1.0) * PRESSURE_UNIT / (k_n * KB); return T; } #endif // DE - inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { + inline __host__ __device__ Real Calc_Energy_Primitive(Real const &k_P, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { // Compute and return energy - return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); + return (fmax(P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); } - inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { + inline __host__ __device__ Real Get_Pressure_From_DE(Real const &k_E, Real const &k_U_total, Real const &k_U_advected, Real const &k_gamma) { Real U, P; - Real eta = DE_ETA_1; + Real const k_eta = DE_ETA_1; // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. - if (U_total/E > eta) { - U = U_total; + if (k_U_total/k_E > k_eta) { + U = k_U_total; } else { - U = U_advected; + U = k_U_advected; } - P = U * (gamma - 1.0); + P = U * (k_gamma - 1.0); return P; } From 2c9ac5c06b82a280a0d9ee81e3bcdbecc29788ea Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 11 Apr 2022 13:02:52 -0400 Subject: [PATCH 048/694] fix bug in hydro utils variable name --- src/utils/hydro_utilities.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index c1d6bf214..8df9c4013 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -56,7 +56,7 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Energy_Primitive(Real const &k_P, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { // Compute and return energy - return (fmax(P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); + return (fmax(k_P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &k_E, Real const &k_U_total, Real const &k_U_advected, Real const &k_gamma) { From d667bca9c5f7cf0ca8a97ce0bc1c77561f5fb314 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 11 Apr 2022 16:14:33 -0400 Subject: [PATCH 049/694] undo merge w/. dust branch --- src/dust/dust_cuda.cu | 171 ------------------------------------ src/dust/dust_cuda.h | 54 ------------ src/dust/dust_cuda_tests.cu | 46 ---------- 3 files changed, 271 deletions(-) delete mode 100644 src/dust/dust_cuda.cu delete mode 100644 src/dust/dust_cuda.h delete mode 100644 src/dust/dust_cuda_tests.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu deleted file mode 100644 index 5e4757a61..000000000 --- a/src/dust/dust_cuda.cu +++ /dev/null @@ -1,171 +0,0 @@ -#ifdef CUDA -#ifdef DUST_GPU - -#include"dust_cuda.h" -#include -#include -#include"global.h" -#include"global_cuda.h" -#include"gpu.hpp" - -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, -int n_fields, Real dt, Real gamma, Real *dt_array) { - __shared__ Real min_dt[TPB]; // TPB = threads per block - - // get grid inidices - int n_cells = nx * ny * nz; - int is, ie, js, je, ks, ke; - Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); - - // get a global thread ID - int id; - int xid, yid, zid; - int tid; - Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); - - // define physics variables - Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real T, E, p; // temperature, energy, pressure - Real mu = 0.6; // mean molecular weight - Real vx, vy, vz; // velocities - #ifdef DE - Real ge; - #endif // DE - - // define integration variables - Real dd_dt; // instantaneous rate of change in dust density - Real dd; // change in dust density at current time-step - Real dd_max = 0.01; // allowable percentage of dust density increase - Real dt_sub; //refined timestep - - _syncthreads(); - - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // get quantities from dev_conserved - d_gas = dev_conserved[id]; - d_dust = dev_conserved[5*n_cells + id]; - E = dev_conserved[4*n_cells + id]; - // make sure thread hasn't crashed - if (E < 0.0 || E != E) return; - - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; - - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif // DE - - // calculate physical quantities - p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); - - Real T_init; - T_init = Calc_Temp(p, n); - - #ifdef DE - T_init = Calc_Temp_DE(d_gas, ge, gamma, n); - #endif // DE - - T = T_init; - - // calculate change in dust density - Dust dustObj(T, n, dt, d_gas, d_dust); - dustObj.calc_tau_sp(); - - dd_dt = dustObj.calc_dd_dt(); - dd = dd_dt * dt; - - // ensure that dust density is not changing too rapidly - while (d_dust/dd > dd_max) { - dt_sub = dd_max * d_dust / dd_dt; - dustObj.d_dust += dt_sub * dd_dt; - dustObj.dt -= dt_sub; - dt = dustObj.dt; - dd_dt = dustObj.calc_dd_dt(); - dd = dt * dd_dt; - } - - // update dust and gas densities - dev_conserved[5*n_cells + id] = dustObj.d_dust; - dev_conserved[id] += dd; - } - __syncthreads(); - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; s -#include"global.h" - -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, -int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); - -// general purpose functions: -__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); - -__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); - -__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, -Real gamma); - -__device__ Real Calc_Temp(Real p, Real n); - -#ifdef DE -__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); -#endif // DE - -class Dust: { - - public: - Real T, n, dt, d_gas, d_dust; - Real tau_sp; - Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { - T = T_in; - n = n_in; - dt = dt_in; - d_gas = d_gas_in; - d_dust = d_dust_in; - } - void calc_tau_sp(); - Real calc_dd_dt(); - - private: - Real MP = 1.6726*pow(10,-24); // proton mass in g - Real YR_IN_S = 3.154*pow(10,7); // one year in s - -}; - -#endif // DUST_CUDA_H -#endif // DUST_GPU -#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cu b/src/dust/dust_cuda_tests.cu deleted file mode 100644 index fa27f2474..000000000 --- a/src/dust/dust_cuda_tests.cu +++ /dev/null @@ -1,46 +0,0 @@ -/*! -* \file dust_cuda_tests.cu -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Test dust model functions -* -*/ - -// STL Includes -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" -#include "../dust/dust_cuda_updated.h" // Include code to test - -#ifdef DUST_GPU - -TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name -{ - Real const testn = 1; - Real const testT = pow(10, 5.0); - Real const testNumber = CIE_cool(testn, testT); - - Real const fiducialNumber = 4.6639082688443984*pow(10, -22); - - double absoluteDiff; - int64_t ulpsDiff; - - bool isTrue; - - isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - - EXPECT_TRUE(isTrue) - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; -} - -#endif // DUST_GPU \ No newline at end of file From c36ffd23d892b64c4d73348b82f1fb54abdcf159 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 11 Apr 2022 16:18:05 -0400 Subject: [PATCH 050/694] undo merge w/. dust branch --- src/cooling/cooling_cuda_tests.cu | 46 ------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 src/cooling/cooling_cuda_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu deleted file mode 100644 index 7b2ef41db..000000000 --- a/src/cooling/cooling_cuda_tests.cu +++ /dev/null @@ -1,46 +0,0 @@ -/*! -* \file cooling_cuda_tests.cpp -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Testing the CIE cooling rate function in cooling_cuda.cu -* -*/ - -// STL Includes -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test - -#ifdef COOLING_GPU - -TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name -{ - Real const testn = 1; - Real const testT = pow(10, 5.0); - Real const testNumber = CIE_cool(testn, testT); - - Real const fiducialNumber = 4.6639082688443984*pow(10, -22); - - double absoluteDiff; - int64_t ulpsDiff; - - bool isTrue; - - isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - - EXPECT_TRUE(isTrue) - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; -} - -#endif // COOLING_GPU \ No newline at end of file From 933c79142d6d7debc841af71c3c111ab8341edd7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 11:26:29 -0400 Subject: [PATCH 051/694] Revert "undo merge w/. dust branch" This reverts commit c36ffd23d892b64c4d73348b82f1fb54abdcf159. --- src/cooling/cooling_cuda_tests.cu | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/cooling/cooling_cuda_tests.cu diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu new file mode 100644 index 000000000..7b2ef41db --- /dev/null +++ b/src/cooling/cooling_cuda_tests.cu @@ -0,0 +1,46 @@ +/*! +* \file cooling_cuda_tests.cpp +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Testing the CIE cooling rate function in cooling_cuda.cu +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../cooling/cooling_cuda.h" // Include code to test + +#ifdef COOLING_GPU + +TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 1; + Real const testT = pow(10, 5.0); + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); + + double absoluteDiff; + int64_t ulpsDiff; + + bool isTrue; + + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(isTrue) + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; +} + +#endif // COOLING_GPU \ No newline at end of file From 917dbbc2d0d93f9d74ac8c2f3bad2de92394282e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 11:29:21 -0400 Subject: [PATCH 052/694] Revert "undo merge w/. dust branch" This reverts commit d667bca9c5f7cf0ca8a97ce0bc1c77561f5fb314. --- src/dust/dust_cuda.cu | 171 ++++++++++++++++++++++++++++++++++++ src/dust/dust_cuda.h | 54 ++++++++++++ src/dust/dust_cuda_tests.cu | 46 ++++++++++ 3 files changed, 271 insertions(+) create mode 100644 src/dust/dust_cuda.cu create mode 100644 src/dust/dust_cuda.h create mode 100644 src/dust/dust_cuda_tests.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu new file mode 100644 index 000000000..5e4757a61 --- /dev/null +++ b/src/dust/dust_cuda.cu @@ -0,0 +1,171 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, +int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + // get grid inidices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); + + // get a global thread ID + int id; + int xid, yid, zid; + int tid; + Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); + + // define physics variables + Real d_gas, d_dust; // fluid mass densities + Real n; // gas number density + Real T, E, p; // temperature, energy, pressure + Real mu = 0.6; // mean molecular weight + Real vx, vy, vz; // velocities + #ifdef DE + Real ge; + #endif // DE + + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current time-step + Real dd_max = 0.01; // allowable percentage of dust density increase + Real dt_sub; //refined timestep + + _syncthreads(); + + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + // get quantities from dev_conserved + d_gas = dev_conserved[id]; + d_dust = dev_conserved[5*n_cells + id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif // DE + + // calculate physical quantities + p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); + + Real T_init; + T_init = Calc_Temp(p, n); + + #ifdef DE + T_init = Calc_Temp_DE(d_gas, ge, gamma, n); + #endif // DE + + T = T_init; + + // calculate change in dust density + Dust dustObj(T, n, dt, d_gas, d_dust); + dustObj.calc_tau_sp(); + + dd_dt = dustObj.calc_dd_dt(); + dd = dd_dt * dt; + + // ensure that dust density is not changing too rapidly + while (d_dust/dd > dd_max) { + dt_sub = dd_max * d_dust / dd_dt; + dustObj.d_dust += dt_sub * dd_dt; + dustObj.dt -= dt_sub; + dt = dustObj.dt; + dd_dt = dustObj.calc_dd_dt(); + dd = dt * dd_dt; + } + + // update dust and gas densities + dev_conserved[5*n_cells + id] = dustObj.d_dust; + dev_conserved[id] += dd; + } + __syncthreads(); + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s +#include"global.h" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, +int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +// general purpose functions: +__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); + +__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); + +__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, +Real gamma); + +__device__ Real Calc_Temp(Real p, Real n); + +#ifdef DE +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); +#endif // DE + +class Dust: { + + public: + Real T, n, dt, d_gas, d_dust; + Real tau_sp; + Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { + T = T_in; + n = n_in; + dt = dt_in; + d_gas = d_gas_in; + d_dust = d_dust_in; + } + void calc_tau_sp(); + Real calc_dd_dt(); + + private: + Real MP = 1.6726*pow(10,-24); // proton mass in g + Real YR_IN_S = 3.154*pow(10,7); // one year in s + +}; + +#endif // DUST_CUDA_H +#endif // DUST_GPU +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cu b/src/dust/dust_cuda_tests.cu new file mode 100644 index 000000000..fa27f2474 --- /dev/null +++ b/src/dust/dust_cuda_tests.cu @@ -0,0 +1,46 @@ +/*! +* \file dust_cuda_tests.cu +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Test dust model functions +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../dust/dust_cuda_updated.h" // Include code to test + +#ifdef DUST_GPU + +TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 1; + Real const testT = pow(10, 5.0); + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); + + double absoluteDiff; + int64_t ulpsDiff; + + bool isTrue; + + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(isTrue) + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; +} + +#endif // DUST_GPU \ No newline at end of file From eac4b7a547563b5b999b40d246e3d1abdac08da6 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 11:29:46 -0400 Subject: [PATCH 053/694] Revert "fix bug in hydro utils variable name" This reverts commit 2c9ac5c06b82a280a0d9ee81e3bcdbecc29788ea. --- src/utils/hydro_utilities.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 8df9c4013..c1d6bf214 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -56,7 +56,7 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Energy_Primitive(Real const &k_P, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { // Compute and return energy - return (fmax(k_P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); + return (fmax(P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &k_E, Real const &k_U_total, Real const &k_U_advected, Real const &k_gamma) { From 14bdfecc0264da5b2782b6c3dd0cbd2c78ac09be Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 12:03:51 -0400 Subject: [PATCH 054/694] undo merges by recloning CAAR branch of cholla --- src/cooling/cooling_cuda.cu | 2 +- src/cooling/cooling_cuda.h | 2 +- src/cooling/cooling_cuda_tests.cu | 46 ------------------- src/system_tests/hydro_system_tests.cpp | 6 +-- ...edMpi_CorrectInputExpectCorrectOutput.txt} | 0 src/utils/hydro_utilities.h | 34 +++++++------- 6 files changed, 21 insertions(+), 69 deletions(-) delete mode 100644 src/cooling/cooling_cuda_tests.cu rename src/system_tests/input_files/{tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt => tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt} (100%) diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 71cab2eb4..14528b370 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -327,7 +327,7 @@ __device__ Real primordial_cool(Real n, Real T) /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ __host__ Real CIE_cool(Real n, Real T) +__device__ Real CIE_cool(Real n, Real T) { Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index ce8e79ff5..af00de1ae 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -45,7 +45,7 @@ __device__ Real primordial_cool(Real n, Real T); /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ -__device__ __host__ Real CIE_cool(Real n, Real T); +__device__ Real CIE_cool(Real n, Real T); /* \fn __device__ Real Cloudy_cool(Real n, Real T) diff --git a/src/cooling/cooling_cuda_tests.cu b/src/cooling/cooling_cuda_tests.cu deleted file mode 100644 index 7b2ef41db..000000000 --- a/src/cooling/cooling_cuda_tests.cu +++ /dev/null @@ -1,46 +0,0 @@ -/*! -* \file cooling_cuda_tests.cpp -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Testing the CIE cooling rate function in cooling_cuda.cu -* -*/ - -// STL Includes -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" -#include "../cooling/cooling_cuda.h" // Include code to test - -#ifdef COOLING_GPU - -TEST(tCOOLINGTestCIECool, TestCIECoolExpectCorrectOutput) // test suite name, test name -{ - Real const testn = 1; - Real const testT = pow(10, 5.0); - Real const testNumber = CIE_cool(testn, testT); - - Real const fiducialNumber = 4.6639082688443984*pow(10, -22); - - double absoluteDiff; - int64_t ulpsDiff; - - bool isTrue; - - isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - - EXPECT_TRUE(isTrue) - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; -} - -#endif // COOLING_GPU \ No newline at end of file diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 22f1dd6d4..72a6dc349 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -31,7 +31,7 @@ * */ /// @{ -class tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi +class tHYDROSYSTEMSodShockTubeParameterizedMpi :public ::testing::TestWithParam { @@ -39,7 +39,7 @@ class tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, +TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { sodTest.numMpiRanks = GetParam(); @@ -47,7 +47,7 @@ TEST_P(tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, } INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi, + tHYDROSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= diff --git a/src/system_tests/input_files/tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tHYDROtCOOLINGSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 8df9c4013..51439ac29 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -25,50 +25,48 @@ * T : temperature * mx, my, mz : x, y, and z momentum * n : number density - -* "k" prefix in a variable indicates that it is a const. */ namespace hydro_utilities { - inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &k_E, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { + inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { Real P; - P = (k_E - 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz)) * (k_gamma - 1.0); + P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); P = fmax(P, TINY_NUMBER); return P; } - inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &k_E, Real const &k_d, Real const &k_mx, Real const &k_my, Real const &k_mz, Real const &k_gamma) { - Real P = (k_E - 0.5 * (k_mx*k_mx + k_my*k_my + k_mz*k_mz) / k_d) * (k_gamma - 1.); + inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { + Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.); return fmax(P, TINY_NUMBER); } - inline __host__ __device__ Real Calc_Temp(Real const &k_P, Real const &k_n) { - Real T = k_P * PRESSURE_UNIT / (k_n * KB); + inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) { + Real T = P * PRESSURE_UNIT / (n * KB); return T; } #ifdef DE - inline __host__ __device__ Real Calc_Temp_DE(Real const &k_d, Real const &kge, Real const &k_gamma, Real const&k_n) { - Real T = k_d * kge * (k_gamma - 1.0) * PRESSURE_UNIT / (k_n * KB); + inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) { + Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); return T; } #endif // DE - inline __host__ __device__ Real Calc_Energy_Primitive(Real const &k_P, Real const &k_d, Real const &k_vx, Real const &k_vy, Real const &k_vz, Real const &k_gamma) { + inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { // Compute and return energy - return (fmax(k_P, TINY_NUMBER)/(k_gamma - 1.)) + 0.5 * k_d * (k_vx*k_vx + k_vy*k_vy + k_vz*k_vz); + return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); } - inline __host__ __device__ Real Get_Pressure_From_DE(Real const &k_E, Real const &k_U_total, Real const &k_U_advected, Real const &k_gamma) { + inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { Real U, P; - Real const k_eta = DE_ETA_1; + Real eta = DE_ETA_1; // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. - if (k_U_total/k_E > k_eta) { - U = k_U_total; + if (U_total/E > eta) { + U = U_total; } else { - U = k_U_advected; + U = U_advected; } - P = U * (k_gamma - 1.0); + P = U * (gamma - 1.0); return P; } From e7bb2165f145caa2ef437886de33071ab7fe8d94 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 12:08:37 -0400 Subject: [PATCH 055/694] resolve conflict --- src/dust/dust_cuda.cu | 171 ------------------------------------ src/dust/dust_cuda.h | 54 ------------ src/dust/dust_cuda_tests.cu | 46 ---------- 3 files changed, 271 deletions(-) delete mode 100644 src/dust/dust_cuda.cu delete mode 100644 src/dust/dust_cuda.h delete mode 100644 src/dust/dust_cuda_tests.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu deleted file mode 100644 index 5e4757a61..000000000 --- a/src/dust/dust_cuda.cu +++ /dev/null @@ -1,171 +0,0 @@ -#ifdef CUDA -#ifdef DUST_GPU - -#include"dust_cuda.h" -#include -#include -#include"global.h" -#include"global_cuda.h" -#include"gpu.hpp" - -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, -int n_fields, Real dt, Real gamma, Real *dt_array) { - __shared__ Real min_dt[TPB]; // TPB = threads per block - - // get grid inidices - int n_cells = nx * ny * nz; - int is, ie, js, je, ks, ke; - Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); - - // get a global thread ID - int id; - int xid, yid, zid; - int tid; - Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); - - // define physics variables - Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real T, E, p; // temperature, energy, pressure - Real mu = 0.6; // mean molecular weight - Real vx, vy, vz; // velocities - #ifdef DE - Real ge; - #endif // DE - - // define integration variables - Real dd_dt; // instantaneous rate of change in dust density - Real dd; // change in dust density at current time-step - Real dd_max = 0.01; // allowable percentage of dust density increase - Real dt_sub; //refined timestep - - _syncthreads(); - - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // get quantities from dev_conserved - d_gas = dev_conserved[id]; - d_dust = dev_conserved[5*n_cells + id]; - E = dev_conserved[4*n_cells + id]; - // make sure thread hasn't crashed - if (E < 0.0 || E != E) return; - - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; - - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif // DE - - // calculate physical quantities - p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); - - Real T_init; - T_init = Calc_Temp(p, n); - - #ifdef DE - T_init = Calc_Temp_DE(d_gas, ge, gamma, n); - #endif // DE - - T = T_init; - - // calculate change in dust density - Dust dustObj(T, n, dt, d_gas, d_dust); - dustObj.calc_tau_sp(); - - dd_dt = dustObj.calc_dd_dt(); - dd = dd_dt * dt; - - // ensure that dust density is not changing too rapidly - while (d_dust/dd > dd_max) { - dt_sub = dd_max * d_dust / dd_dt; - dustObj.d_dust += dt_sub * dd_dt; - dustObj.dt -= dt_sub; - dt = dustObj.dt; - dd_dt = dustObj.calc_dd_dt(); - dd = dt * dd_dt; - } - - // update dust and gas densities - dev_conserved[5*n_cells + id] = dustObj.d_dust; - dev_conserved[id] += dd; - } - __syncthreads(); - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; s -#include"global.h" - -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, -int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); - -// general purpose functions: -__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); - -__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); - -__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, -Real gamma); - -__device__ Real Calc_Temp(Real p, Real n); - -#ifdef DE -__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); -#endif // DE - -class Dust: { - - public: - Real T, n, dt, d_gas, d_dust; - Real tau_sp; - Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { - T = T_in; - n = n_in; - dt = dt_in; - d_gas = d_gas_in; - d_dust = d_dust_in; - } - void calc_tau_sp(); - Real calc_dd_dt(); - - private: - Real MP = 1.6726*pow(10,-24); // proton mass in g - Real YR_IN_S = 3.154*pow(10,7); // one year in s - -}; - -#endif // DUST_CUDA_H -#endif // DUST_GPU -#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cu b/src/dust/dust_cuda_tests.cu deleted file mode 100644 index fa27f2474..000000000 --- a/src/dust/dust_cuda_tests.cu +++ /dev/null @@ -1,46 +0,0 @@ -/*! -* \file dust_cuda_tests.cu -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Test dust model functions -* -*/ - -// STL Includes -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" -#include "../dust/dust_cuda_updated.h" // Include code to test - -#ifdef DUST_GPU - -TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name -{ - Real const testn = 1; - Real const testT = pow(10, 5.0); - Real const testNumber = CIE_cool(testn, testT); - - Real const fiducialNumber = 4.6639082688443984*pow(10, -22); - - double absoluteDiff; - int64_t ulpsDiff; - - bool isTrue; - - isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - - EXPECT_TRUE(isTrue) - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; -} - -#endif // DUST_GPU \ No newline at end of file From 7a364ffe31b5dc12de75d84ab52dca56dc91d845 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 13:02:52 -0400 Subject: [PATCH 056/694] fix syntax bug --- src/utils/hydro_utilities.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 17b093d95..f40517a7a 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -54,7 +54,7 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { // Compute and return energy - return (fmax(P, TINY_NUMBER)/gamma - 1.)) + 0.5 *d * (vx*k_vx + vy*vy + vz*vz); + return (fmax(P, TINY_NUMBER)/gamma - 1.) + 0.5 * d * (vx*vx + vy*vy + vz*vz); } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { From 22df94cf9376551bb6097e69a5f60c90ec7d67da Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 12 Apr 2022 13:06:56 -0400 Subject: [PATCH 057/694] re-add dust coda in dust branch after it was overwritten --- src/dust/dust_cuda.cu | 171 ++++++++++++++++++++++++++++++++++++ src/dust/dust_cuda.h | 54 ++++++++++++ src/dust/dust_cuda_tests.cu | 46 ++++++++++ 3 files changed, 271 insertions(+) create mode 100644 src/dust/dust_cuda.cu create mode 100644 src/dust/dust_cuda.h create mode 100644 src/dust/dust_cuda_tests.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu new file mode 100644 index 000000000..f47da06a8 --- /dev/null +++ b/src/dust/dust_cuda.cu @@ -0,0 +1,171 @@ +#ifdef CUDA +#ifdef DUST_GPU + +#include"dust_cuda.h" +#include +#include +#include"global.h" +#include"global_cuda.h" +#include"gpu.hpp" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, +int n_fields, Real dt, Real gamma, Real *dt_array) { + __shared__ Real min_dt[TPB]; // TPB = threads per block + + // get grid inidices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); + + // get a global thread ID + int id; + int xid, yid, zid; + int tid; + Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); + + // define physics variables + Real d_gas, d_dust; // fluid mass densities + Real n; // gas number density + Real T, E, p; // temperature, energy, pressure + Real mu = 0.6; // mean molecular weight + Real vx, vy, vz; // velocities + #ifdef DE + Real ge; + #endif // DE + + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current time-step + Real dd_max = 0.01; // allowable percentage of dust density increase + Real dt_sub; //refined timestep + + _syncthreads(); + + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + // get quantities from dev_conserved + d_gas = dev_conserved[id]; + d_dust = dev_conserved[5*n_cells + id]; + E = dev_conserved[4*n_cells + id]; + // make sure thread hasn't crashed + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; + + #ifdef DE + ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = fmax(ge, (Real) TINY_NUMBER); + #endif // DE + + // calculate physical quantities + p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); + + Real T_init; + T_init = Calc_Temp(p, n); + + #ifdef DE + T_init = Calc_Temp_DE(d_gas, ge, gamma, n); + #endif // DE + + T = T_init; + + // calculate change in dust density + Dust dustObj(T, n, dt, d_gas, d_dust); + dustObj.calc_tau_sp(); + + dd_dt = dustObj.calc_dd_dt(); + dd = dd_dt * dt; + + // ensure that dust density is not changing too rapidly + while (d_dust/dd > dd_max) { + dt_sub = dd_max * d_dust / dd_dt; + dustObj.d_dust += dt_sub * dd_dt; + dustObj.dt -= dt_sub; + dt = dustObj.dt; + dd_dt = dustObj.calc_dd_dt(); + dd = dt * dd_dt; + } + + // update dust and gas densities + dev_conserved[5*n_cells + id] = dustObj.d_dust; + dev_conserved[id] += dd; + } + __syncthreads(); + + // do the reduction in shared memory (find the min timestep in the block) + for (unsigned int s=1; s +#include"global.h" + +__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, +int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); + +// general purpose functions: +__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); + +__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); + +__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, +Real gamma); + +__device__ Real Calc_Temp(Real p, Real n); + +#ifdef DE +__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); +#endif // DE + +class Dust: { + + public: + Real T, n, dt, d_gas, d_dust; + Real tau_sp; + Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { + T = T_in; + n = n_in; + dt = dt_in; + d_gas = d_gas_in; + d_dust = d_dust_in; + } + void calc_tau_sp(); + Real calc_dd_dt(); + + private: + Real MP = 1.6726*pow(10,-24); // proton mass in g + Real YR_IN_S = 3.154*pow(10,7); // one year in s + +}; + +#endif // DUST_CUDA_H +#endif // DUST_GPU +#endif // CUDA diff --git a/src/dust/dust_cuda_tests.cu b/src/dust/dust_cuda_tests.cu new file mode 100644 index 000000000..784f37e5f --- /dev/null +++ b/src/dust/dust_cuda_tests.cu @@ -0,0 +1,46 @@ +/*! +* \file dust_cuda_tests.cu +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Test dust model functions +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../dust/dust_cuda_updated.h" // Include code to test + +#ifdef DUST_GPU + +TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name +{ + Real const testn = 1; + Real const testT = pow(10, 5.0); + Real const testNumber = CIE_cool(testn, testT); + + Real const fiducialNumber = 4.6639082688443984*pow(10, -22); + + double absoluteDiff; + int64_t ulpsDiff; + + bool isTrue; + + isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + + EXPECT_TRUE(isTrue) + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; +} + +#endif // DUST_GPU From 23ccb169763182b8586f1ce1ffe630d7f8042fe8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 29 Apr 2022 14:09:17 -0400 Subject: [PATCH 058/694] integrate dust kernel into cholla --- src/dust/dust_cuda.cu | 247 ++++++++++++++++++++++++------------------ src/dust/dust_cuda.h | 50 +-------- 2 files changed, 148 insertions(+), 149 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index f47da06a8..434e5406e 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,57 +1,83 @@ #ifdef CUDA -#ifdef DUST_GPU +#ifdef SCALAR -#include"dust_cuda.h" -#include -#include -#include"global.h" -#include"global_cuda.h" -#include"gpu.hpp" +#include "dust_model.h" -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, -int n_fields, Real dt, Real gamma, Real *dt_array) { - __shared__ Real min_dt[TPB]; // TPB = threads per block +#include +#include +#include - // get grid inidices +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../grid/grid3D.h" + +int main() { + Conserved_Init(host_conserved, rho, vx, vy, vz, P, rho_d, gamma, k_n_cells, k_nx, k_ny, k_nz, k_n_ghost, k_n_fields); +} + + +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { + dim3 dim1dGrid(k_ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, params_dev); + CudaCheckError(); +} + +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { + //__shared__ Real min_dt[TPB]; + // get grid indices + Real const K = 1e30; int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; - Get_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); - + cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); // get a global thread ID - int id; - int xid, yid, zid; - int tid; - Get_GTID(id, xid, yid, zid, tid, nx, ny, nz); + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; + // add a thread id within the block // define physics variables Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real T, E, p; // temperature, energy, pressure - Real mu = 0.6; // mean molecular weight + Real n = 1; // gas number density + Real T, E, P; // temperature, energy, pressure Real vx, vy, vz; // velocities #ifdef DE Real ge; #endif // DE + dt *= 3.154e7; // in seconds + // define integration variables Real dd_dt; // instantaneous rate of change in dust density Real dd; // change in dust density at current time-step Real dd_max = 0.01; // allowable percentage of dust density increase Real dt_sub; //refined timestep - _syncthreads(); - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { // get quantities from dev_conserved d_gas = dev_conserved[id]; + //d_dust = dev_conserved[5*n_cells + id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; + //printf("kernel: %7.4e\n", d_dust); // make sure thread hasn't crashed - if (E < 0.0 || E != E) return; - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; + // multiply small values by arbitrary constant to preserve precision + d_gas *= K; + d_dust *= K; + + if (E < 0.0 || E != E) return; + + vx = dev_conserved[1*n_cells + id] / d_gas; + vy = dev_conserved[2*n_cells + id] / d_gas; + vz = dev_conserved[3*n_cells + id] / d_gas; #ifdef DE ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; @@ -59,113 +85,124 @@ int n_fields, Real dt, Real gamma, Real *dt_array) { #endif // DE // calculate physical quantities - p = Calc_Pressure(E, d_gas, vx, vy, vz, gamma); + P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); Real T_init; - T_init = Calc_Temp(p, n); + T_init = hydro_utilities::Calc_Temp(P, n); #ifdef DE - T_init = Calc_Temp_DE(d_gas, ge, gamma, n); + T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); #endif // DE T = T_init; - // calculate change in dust density - Dust dustObj(T, n, dt, d_gas, d_dust); - dustObj.calc_tau_sp(); + Real tau_sp = calc_tau_sp(n, T); - dd_dt = dustObj.calc_dd_dt(); + dd_dt = calc_dd_dt(d_dust, tau_sp); dd = dd_dt * dt; + params_dev[0] = T; + params_dev[1] = n; + params_dev[2] = tau_sp/3.154e7; + params_dev[3] = dd_dt; + params_dev[4] = dd; + // ensure that dust density is not changing too rapidly - while (d_dust/dd > dd_max) { + bool time_refine = false; + while (dd/d_dust > dd_max) { + time_refine = true; dt_sub = dd_max * d_dust / dd_dt; - dustObj.d_dust += dt_sub * dd_dt; - dustObj.dt -= dt_sub; - dt = dustObj.dt; - dd_dt = dustObj.calc_dd_dt(); + d_dust += dt_sub * dd_dt; + dt -= dt_sub; + dd_dt = calc_dd_dt(d_dust, tau_sp); dd = dt * dd_dt; } - // update dust and gas densities - dev_conserved[5*n_cells + id] = dustObj.d_dust; - dev_conserved[id] += dd; - } - __syncthreads(); - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; s 1) { + jstart = H.n_ghost; + jend = H.ny-H.n_ghost; + } + else { + jstart = 0; + jend = H.ny; + } + if (H.nz > 1) { + kstart = H.n_ghost; + kend = H.nz-H.n_ghost; + } + else { + kstart = 0; + kend = H.nz; + } + + // set initial values of conserved variables + for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) + { + // set constant initial states + host_conserved[id] = rho; + host_conserved[1*n_cells+id] = rho*vx; + host_conserved[2*n_cells+id] = rho*vy; + host_conserved[3*n_cells+id] = rho*vz; + host_conserved[4*n_cells+id] = P/(gamma-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz); + #ifdef DE + host_conserved[(n_fields-1)*n_cells+id] = P/(gamma-1.0); + #endif // DE + #ifdef SCALAR + host_conserved[5*n_cells+id] = rho_d; + #endif // SCALAR + } + } } + } } -__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz) { - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; - // add a thread id within the block - int tid = threadIdx.x; -} - -__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, Real gamma) { - Real p; - p = (E - 0.5 * d_gas * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - p = fmax(p, (Real) TINY_NUMBER); - return p; -} - -__device__ Real Calc_Temp(Real p, Real n) { - Real T = p * PRESSURE_UNIT / (n * KB); - return T; -} - -__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n) { - Real T = d_gas * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - return T; -} -#endif // DUST_GPU -#endif // CUDA +#endif // SCALAR +#endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 04b9cafc8..94584f2bb 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,54 +1,16 @@ -/*! \file dust_cuda.h - * \brief Declarations of dust functions. */ - -#ifdef CUDA -#ifdef DUST_GPU - #ifndef DUST_CUDA_H #define DUST_CUDA_H -#include"gpu.hpp" #include -#include"global.h" - -__global__ void dust_kernel(Real *dev_conserved, int nx, int ny, int nz, -int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array); - -// general purpose functions: -__device__ void Get_Indices(int n_ghost, int nx, int ny, int nz, int &is, int &ie, int &js, int &je, int &ks, int &ke); - -__device__ void Get_GTID(int &id, int &xid, int &yid, int &zid, int &tid, int nx, int ny, int nz); - -__device__ Real Calc_Pressure(Real E, Real d_gas, Real vx, Real vy, Real vz, -Real gamma); - -__device__ Real Calc_Temp(Real p, Real n); -#ifdef DE -__device__ Real Calc_Temp_DE(Real d_gas, Real ge, Real gamma, Real n); -#endif // DE +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *params_dev); -class Dust: { +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *params_dev); - public: - Real T, n, dt, d_gas, d_dust; - Real tau_sp; - Dust(Real T_in, Real n_in, Real dt_in, Real d_gas_in, Real d_dust_in) { - T = T_in; - n = n_in; - dt = dt_in; - d_gas = d_gas_in; - d_dust = d_dust_in; - } - void calc_tau_sp(); - Real calc_dd_dt(); +void Conserved_Init(Real *host_conserved, Real rho, Real vx, Real vy, Real vz, Real P, Real rho_dust, Real gamma, int n_cells, int nx, int ny, int nz, int n_ghost, int n_fields); - private: - Real MP = 1.6726*pow(10,-24); // proton mass in g - Real YR_IN_S = 3.154*pow(10,7); // one year in s +__device__ Real calc_tau_sp(Real n, Real T); -}; +__device__ Real calc_dd_dt(Real d_dust, Real tau_sp); -#endif // DUST_CUDA_H -#endif // DUST_GPU -#endif // CUDA +#endif // DUST_CUDA_H \ No newline at end of file From 9f1d50786a774d46fe293cd99c479a3dfade6053 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 3 May 2022 12:03:19 -0400 Subject: [PATCH 059/694] add dust build --- Makefile | 2 +- builds/make.type.dust | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 builds/make.type.dust diff --git a/Makefile b/Makefile index aa5192cc5..16dc995e0 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle s src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \ src/integrators src/io src/main.cpp src/main_tests.cpp \ src/model src/mpi src/old_cholla src/particles src/reconstruction \ - src/riemann_solvers src/system_tests src/utils + src/riemann_solvers src/system_tests src/utils src/dust SUFFIX ?= .$(TYPE).$(MACHINE) diff --git a/builds/make.type.dust b/builds/make.type.dust new file mode 100644 index 000000000..90cc371c5 --- /dev/null +++ b/builds/make.type.dust @@ -0,0 +1,50 @@ +#-- Default hydro + dust_gpu + +#-- separated output flag so that it can be overriden in target-specific +# for make check +OUTPUT ?= -DOUTPUT -DHDF5 + +MPI_GPU ?= + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DBLOCK +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMP +DFLAGS += -DHLLC + +ifeq ($(findstring cosmology,$(TYPE)),cosmology) +DFLAGS += -DSIMPLE +else +DFLAGS += -DVL +endif + +# need this if using Disk_3D +# DFLAGS += -DDISK_ICS + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +DFLAGS += -DDE + +# Evolve additional scalars +# DFLAGS += -DSCALAR + +# Apply the cooling in the GPU from precomputed tables +DFLAGS += -DCOOLING_GPU + +#Measure the Timing of the different stages +DFLAGS += -DCPU_TIME + +DFLAGS += $(OUTPUT) + +#Select if the Hydro Conserved data will reside in the GPU +#and the MPI transfers are done from the GPU +#If not specified, MPI_GPU is off by default +#This is set in the system make.host file +DFLAGS += $(MPI_GPU) + +DFLAGS += -DPARALLEL_OMP +DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) \ No newline at end of file From 0caead4d1a3be3eb6ad95ac60effe252a3e1b333 Mon Sep 17 00:00:00 2001 From: ojwg Date: Tue, 3 May 2022 17:18:22 -0400 Subject: [PATCH 060/694] saving interim work --- builds/make.type.disk | 31 +- examples/scripts/parameter_file.txt | 20 +- src/cooling/cooling_wrapper.cu | 1 + src/cosmology/cosmology_functions_gpu.cu | 4 +- src/gravity/gravity_boundaries.cpp | 2 +- src/gravity/gravity_boundaries_gpu.cu | 7 + src/gravity/gravity_functions.cpp | 18 +- src/gravity/gravity_functions_gpu.cu | 26 +- src/gravity/potential_paris_galactic.cu | 2 +- src/grid/grid3D.h | 2 + src/hydro/hydro_cuda.cu | 24 +- src/main.cpp | 33 +- src/model/disk_galaxy.h | 44 ++- src/particles/feedback_CIC.cpp | 26 +- src/particles/feedback_CIC.h | 13 +- src/particles/feedback_CIC_gpu.cu | 469 +++++++++++++---------- src/particles/gravity_CIC_gpu.cu | 15 +- src/particles/particles_3D.cpp | 143 +++---- src/particles/particles_boundaries.cpp | 14 +- src/particles/supernova.h | 11 +- src/utils/timing_functions.cpp | 6 + src/utils/timing_functions.h | 2 + 22 files changed, 542 insertions(+), 371 deletions(-) diff --git a/builds/make.type.disk b/builds/make.type.disk index c77137b38..737447b35 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -1,23 +1,29 @@ -MPI_GPU = +MPI_GPU = -DMPI_GPU DFLAGS += -DPARTICLES -DFLAGS += -DPARTICLES_CPU -#DFLAGS += -DPARTICLES_GPU +#DFLAGS += -DPARTICLES_CPU +DFLAGS += -DPARTICLES_GPU #DFLAGS += -DONLY_PARTICLES DFLAGS += -DPARTICLE_IDS -DFLAGS += -DSINGLE_PARTICLE_MASS +#DFLAGS += -DSINGLE_PARTICLE_MASS +DFLAGS += -DPARTICLE_AGE +#DFLAGS += -DFEEDBACK +#DFLAGS += -DANALYSIS +#DFLAGS += -DPARTICLES_KDK DFLAGS += -DGRAVITY -#DFLAGS += -DGRAVITY_GPU +DFLAGS += -DGRAVITY_GPU # Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions -DFLAGS += -DSOR -#DFLAGS += -DPARIS_GALACTIC +#DFLAGS += -DSOR +DFLAGS += -DPARIS_GALACTIC DFLAGS += -DGRAVITY_ANALYTIC_COMP +DFLAGS += -DGRAVITY_5_POINTS_GRADIENT +#DFLAGS += -DSTATIC_GRAV +#DFLAGS += -DOUTPUT_ALWAYS DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC @@ -27,9 +33,12 @@ DFLAGS += -DDISK_ICS DFLAGS += -DDENSITY_FLOOR DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCLOUDY_COOL DFLAGS += -DDE -DFLAGS += -DCPU_TIME -DFLAGS += -DAVERAGE_SLOW_CELLS +#DFLAGS += -DCPU_TIME +#DFLAGS += -DAVERAGE_SLOW_CELLS +DFLAGS += -DHYDRO_GPU OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES DFLAGS += $(OUTPUT) @@ -39,4 +48,4 @@ DFLAGS += $(MPI_GPU) DFLAGS += -DPARALLEL_OMP DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) -DFLAGS += -DHIDE_CIC_ERRORS +#DFLAGS += -DHIDE_CIC_ERRORS diff --git a/examples/scripts/parameter_file.txt b/examples/scripts/parameter_file.txt index c6ed505ce..46f8bab6d 100644 --- a/examples/scripts/parameter_file.txt +++ b/examples/scripts/parameter_file.txt @@ -10,24 +10,23 @@ ny=256 # number of grid cells in the z dimension nz=256 # final output time -tout=10000 -#tout=3 +tout=200 # time interval for output -outstep=100 +outstep=20 # value of gamma gamma=1.66666667 # name of initial conditions init=Disk_3D_particles -bc_potential_type=1 #init=Disk_3D +bc_potential_type=1 #nfull=100 # domain properties -xmin=-15 -ymin=-15 -zmin=-15 -xlen=30 -ylen=30 -zlen=30 +xmin=-2 +ymin=-2 +zmin=-2 +xlen=4 +ylen=4 +zlen=4 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 @@ -47,3 +46,4 @@ flag_delta=2 ddelta_dt=-0.001 # path to output directory outdir=./raw/ +prng_seed=42 diff --git a/src/cooling/cooling_wrapper.cu b/src/cooling/cooling_wrapper.cu index eea38d344..75b32ebc7 100644 --- a/src/cooling/cooling_wrapper.cu +++ b/src/cooling/cooling_wrapper.cu @@ -7,6 +7,7 @@ #include #include #include "../global/global.h" +#include "../global/global_cuda.h" #include "../cooling/cooling_wrapper.h" #include "../cooling/cooling_cuda.h" diff --git a/src/cosmology/cosmology_functions_gpu.cu b/src/cosmology/cosmology_functions_gpu.cu index b1162c3ca..9dfb2ef34 100644 --- a/src/cosmology/cosmology_functions_gpu.cu +++ b/src/cosmology/cosmology_functions_gpu.cu @@ -22,9 +22,9 @@ void __global__ Change_GAS_Frame_System_kernel( Real dens_factor, Real momentum_ tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; +if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; - tid_grid = tid_x + tid_y*nx + tid_z*nx*ny; + tid_grid = tid_x + tid_y*nx + tid_z*nx*ny; density_d[tid_grid] = density_d[tid_grid] * dens_factor; momentum_x_d[tid_grid] = momentum_x_d[tid_grid] * momentum_factor; diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index caa8f4d6a..967ee6817 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -144,7 +144,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary( int direction, int side, int // for bc_pontential_type = 1 the mod_frac is the fraction // of the disk mass contributed by the simulated particles - Real mod_frac = 0; //1.0; //0; + Real mod_frac = 0.1; //1.0; //0; Real pot_val; int i, j, k, id; for ( k=0; k #endif -#ifdef PARTICLES +//#ifdef PARTICLES #include "../model/disk_galaxy.h" -#endif +//#endif -//Set delta_t when using gravity +//Set delta_t when usi#ng gravity void Grid3D::set_dt_Gravity(){ //Delta_t for the hydro @@ -589,11 +589,7 @@ void Grid3D::Compute_Gravitational_Potential( struct parameters *P ){ #endif printDiff(p.data(),Grav.F.potential_h,Grav.nx_local,Grav.ny_local,Grav.nz_local); #endif - - #ifdef GRAVITY_ANALYTIC_COMP - Add_Analytic_Potential(); - #endif - + #ifdef CPU_TIME Timer.Grav_Potential.End(); #endif @@ -629,7 +625,7 @@ void Grid3D::Add_Analytic_Potential() { Add_Analytic_Potential_GPU(); #else #ifndef PARALLEL_OMP - Add_Analytic_Potential(0, Grav.nz_local); + Add_Analytic_Potential(0, Grav.nz_local + 2*N_GHOST_POTENTIAL ); #else #pragma omp parallel num_threads( N_OMP_THREADS ) { @@ -638,7 +634,7 @@ void Grid3D::Add_Analytic_Potential() { omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs( Grav.nz_local+ 2*N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end ); Add_Analytic_Potential(g_start, g_end); } @@ -712,7 +708,7 @@ void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& int nz = Grav.nz_local + 2*N_GHOST_POTENTIAL; // the fraction of the disk that's not modelled (and so its analytic contribution must be added) - Real non_mod_frac = 1; //0.0; //1.0; + Real non_mod_frac = 0.9; //0.0; //1.0; int k, j, i, id; Real x_pos, y_pos, z_pos, R; diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 269bea75b..ff2c7f446 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -137,7 +137,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ -#if defined(GRAVITY_ANALYTIC_COMP) && defined(GRAVITY_GPU) +#if defined(GRAVITY_ANALYTIC_COMP) void __global__ Add_Analytic_Potential_Kernel( Real *analytic_d, Real *potential_d, int nx_pot, int ny_pot, int nz_pot) { int tid_x, tid_y, tid_z, tid; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -147,13 +147,15 @@ void __global__ Add_Analytic_Potential_Kernel( Real *analytic_d, Real *potential if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return; tid= tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + + potential_d[tid] += analytic_d[tid]; /* - if (tid_x < nx_pot && tid_y == 0 && tid_z == (nz_pot/2)) { - printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, potential_d[tid]); + if (tid_x < 10 && tid_y == (ny_pot/2) && tid_z == (nz_pot/2)) { + //printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, potential_d[tid]); printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, analytic_d[tid]); - }*/ - //potential_d[tid] += analytic_d[tid]; - potential_d[tid] = analytic_d[tid]; // FIXME debug only + } + */ + } @@ -180,8 +182,13 @@ void Grid3D::Add_Analytic_Potential_GPU() { //Copy the analytic potential from the device array to the device potential array hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d, Grav.F.potential_d, nx_pot, ny_pot, nz_pot); cudaDeviceSynchronize(); + /*gpuFor(10, + GPU_LAMBDA(const int i) { + printf("potential_after_analytic[%d, %d, %d] = %.4e\n", i, ny_pot/2, nz_pot/2, Grav.F.potential_d[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); + } + );*/ } -#endif //GRAVITY_ANALYTIC_COMP && GRAVITY_GPU +#endif //GRAVITY_ANALYTIC_COMP @@ -265,6 +272,11 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor ); + /*gpuFor(10, + GPU_LAMBDA(const int i) { + printf("extrapolated potential[%d, %d, %d] = %.4e\n", i, ny_pot/2, nz_pot/2, C.d_Grav_potential[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); + } + );*/ } diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index 2eb1fea04..8a88275fe 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -58,7 +58,7 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const Real dy = dr_[1]; const Real dz = dr_[0]; - const Real md = 0; //galaxy.getM_d(); //FIXME temporary change for debugging small # clusters + const Real md = 0.1*galaxy.getM_d(); //FIXME temporary change for debugging small # clusters const Real rd = galaxy.getR_d(); const Real zd = galaxy.getZ_d(); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 4732cb845..426f4d329 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -780,6 +780,8 @@ class Grid3D #endif//PARTICLES_GPU #ifdef GRAVITY_GPU void Copy_Particles_Density_GPU(); + int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); + void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); #endif//GRAVITY_GPU #endif//PARTICLES diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 185d467d7..62c1e0c37 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -380,8 +380,28 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, dev_conserved[4*n_cells + id] += Ekin_1 - Ekin_0; #endif - - #endif +/* + //if (xid > n_ghost-1 && xid < 10 && yid == 128 && zid == 137) { + //if (xid == 50 && yid == 118 && zid < 128) { + if (xid == 50 && yid == 118) { + d = dev_conserved[ id]; + d_inv = 1.0 / d; + vx = dev_conserved[1*n_cells + id] * d_inv; + vy = dev_conserved[2*n_cells + id] * d_inv; + vz = dev_conserved[3*n_cells + id] * d_inv; + + //Real P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + //printf("gx,gy,gz[%d, %d, %d] = [%.4e, %.4e, %4e]\n", xid, yid, zid, gx, gy, gz); + //printf("vx, vy, vz[%d, %d, %d] = [%.4e, %.4e, %4e]\n", xid, yid, zid, vx_n, vy_n, vz_n); + //printf("P[%d, %d, %d] = %4e\n", xid, yid, zid, P); + //printf("d[%d, %d, %d] = %4e\n", xid, yid, zid, d); + // printf("vx,vy,vz[%d, %d, %d] = [%.4e, %.4e, %4e]\n", xid, yid, zid, vx, vy, vz); + #ifdef DE + //printf("U[%d, %d, %d] = %4e\n", xid, yid, zid, dev_conserved[5*n_cells + id] * d_inv); + #endif + }*/ + + #endif //GRAVITY #if !( defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR) ) diff --git a/src/main.cpp b/src/main.cpp index eb5f3d51c..a47b509d1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,7 +15,10 @@ #include "utils/error_handling.h" #ifdef FEEDBACK #include "particles/supernova.h" -#endif +#ifdef ANALYSIS +#include "analysis/feedback_analysis.h" +#endif +#endif //FEEDBACK int main(int argc, char *argv[]) @@ -122,9 +125,12 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS G.Initialize_Analysis_Module(&P); if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); + #ifdef FEEDBACK + FeedbackAnalysis feedback_analysis(G); + #endif #endif - #ifdef FEEDBACK + #ifdef FEEDBACK //TODO: refactor this: encapsulate init in a method G.countSN = 0; G.countResolved = 0; G.countUnresolved = 0; @@ -136,6 +142,8 @@ int main(int argc, char *argv[]) #else Supernova::initState(&P, G.Particles.n_local); #endif // MPI_CHOLLA + #else // else we have PARTICLES_CPU + //Supernova::initState(&P); #endif // PARTICLES_GPU #endif // FEEDBACK @@ -153,6 +161,10 @@ int main(int argc, char *argv[]) G.Set_Boundary_Conditions_Grid(P); chprintf("Boundary conditions set.\n"); + #ifdef GRAVITY_ANALYTIC_COMP + G.Add_Analytic_Potential(); + #endif + #ifdef PARTICLES // Get the particles acceleration for the first timestep G.Get_Particles_Acceleration(); @@ -231,9 +243,8 @@ int main(int argc, char *argv[]) //Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - - #if defined(GRAVITY_ANALYTIC_COMP) && !defined(GRAVITY_GPU) - // add analytic component to gravity potential. + + #ifdef GRAVITY_ANALYTIC_COMP G.Add_Analytic_Potential(); #endif @@ -245,18 +256,16 @@ int main(int argc, char *argv[]) #ifdef FEEDBACK Real fdti = G.Cluster_Feedback(); if (fdti != 0 && dti != 0) { - printf("DTI COMP: returned: %.4e [%.4e kyr]\n", fdti, 1/fdti); - printf(" current: %.4e [ %.4e kyr ] \n", dti, 1/dti); + chprintf("DTI COMP: returned: %.4e [%.4e kyr]\n", fdti, 1/fdti); + chprintf(" current: %.4e [ %.4e kyr ] \n", dti, 1/dti); - } else { - printf("DTI COMP: returned: %.4e, current: %.4e\n", fdti, dti); - } + } if (fdti > dti) { - printf(" CHANGING\n"); + chprintf(" UPDATING dti\n"); dti = fdti; } #ifdef ANALYSIS - G.Compute_Gas_Velocity_Dispersion(); + feedback_analysis.Compute_Gas_Velocity_Dispersion(G); #endif #endif diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index ba4909604..fd1e33c4f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -2,6 +2,8 @@ #define DISK_GALAXY #include +#include +#include #include "../global/global.h" class DiskGalaxy { @@ -142,17 +144,49 @@ class DiskGalaxy { Real getM_d() const { return M_d; }; Real getR_d() const { return R_d; }; Real getZ_d() const { return Z_d; }; - Real getM_vir() { return M_vir; }; - Real getR_vir() { return R_vir; }; - Real getC_vir() { return c_vir; }; - Real getR_cool() { return r_cool; }; + Real getM_vir() const { return M_vir; }; + Real getR_vir() const { return R_vir; }; + Real getC_vir() const { return c_vir; }; + Real getR_cool() const { return r_cool; }; + +}; + +class ClusteredDiskGalaxy: public DiskGalaxy { + private: + Real lower_cluster_mass, higher_cluster_mass; + Real normalization; + + public: + ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) + : DiskGalaxy {md, rd, zd, mvir, rvir, cvir, rcool}, lower_cluster_mass {lm}, higher_cluster_mass {hm} { + //if (lower_cluster_mass >= higher_cluster_mass) + normalization = 1/log(higher_cluster_mass/lower_cluster_mass); + }; + + Real getLowerClusterMass() const {return lower_cluster_mass;} + Real getHigherClusterMass() const {return higher_cluster_mass;} + Real getNormalization() const {return normalization;} + + + std::vector generateClusterPopulationMasses(int N, std::mt19937_64 generator) { + std::vector population; + for (int i = 0; i < N; i++) { + population.push_back(singleClusterMass(generator)); + } + return population; + } + + Real singleClusterMass(std::mt19937_64 generator) { + std::uniform_real_distribution uniform_distro(0, 1); + return lower_cluster_mass * exp(uniform_distro(generator)/normalization); + } }; namespace Galaxies { // all masses in M_sun and all distances in kpc //static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); - static DiskGalaxy MW(6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); + static ClusteredDiskGalaxy MW(1e3, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8/0.015, 10, 100.0); }; diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp index 2f61e17b6..fe6fda41c 100644 --- a/src/particles/feedback_CIC.cpp +++ b/src/particles/feedback_CIC.cpp @@ -4,10 +4,12 @@ #ifdef FEEDBACK #include +#include #include "feedback_CIC.h" #include "particles_3D.h" #include "../grid/grid3D.h" #include "../io/io.h" +#include "../global/global.h" #include "supernova.h" #include #include @@ -26,12 +28,16 @@ std::random_device rd; //std::mt19937_64 gen(rd()); std::mt19937_64 generator(42); //FIXME read this in from init params or ChollaPrngGenerator +/* +void Supernova::initState(struct parameters *P) { + generator.seed(P->prng_seed); +}*/ -std::tuple getClusterFeedback(Real t, Real dt, Real age, Real density) { +std::tuple getClusterFeedback(Real t, Real dt, Real mass, Real age, Real density) { int N = 0; - if (t + age <= 1.0e4) { - std::poisson_distribution distribution(Supernova::SNR * dt); + if (t - age <= 1.0e4) { + std::poisson_distribution distribution(Supernova::SNR * mass * dt); N = distribution(generator); } Real n_0 = density * DENSITY_UNIT / (Supernova::MU*MP); // in cm^{-3} @@ -48,6 +54,10 @@ std::tuple getClusterFeedback(Real t, Real dt, Real Real Grid3D::Cluster_Feedback() { + #ifdef CPU_TIME + Timer.Feedback.Start(); + #endif + Real max_sn_dti = 0; #ifdef PARTICLES_GPU max_sn_dti = Cluster_Feedback_GPU(); @@ -101,7 +111,7 @@ Real Grid3D::Cluster_Feedback() { MPI_Reduce(&partiallyReducedInfo, &reducedInfo, N_INFO, MPI_CHREAL, MPI_SUM, root, world); if (procID==root) { #else - reducedInfo = partiallyReducedInfo; + memcpy(reducedInfo, partiallyReducedInfo, sizeof(partiallyReducedInfo)); #endif //MPI_CHOLLA countSN += reducedInfo[SN]; @@ -133,6 +143,11 @@ Real Grid3D::Cluster_Feedback() { free(thread_dti); #endif //PARTICLES_GPU + + #ifdef CPU_TIME + Timer.Feedback.End(); + #endif + return max_sn_dti; } @@ -181,9 +196,10 @@ void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Rea pcell_z = (int) floor( ( pos_z - zMin ) / H.dz ) + H.n_ghost; pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; - auto [N, energy, mass, momentum, r_sf] = getClusterFeedback(H.t, H.dt, Particles.age[pIndx], C.density[pcell_index]); + auto [N, energy, mass, momentum, r_sf] = getClusterFeedback(H.t, H.dt, Particles.mass[pIndx], Particles.age[pIndx], C.density[pcell_index]); if (N == 0) continue; + Particles.mass[pIndx] -= mass; feedback_energy = energy / dV; feedback_density = mass / dV; feedback_momentum = momentum / sqrt(3) / dV; diff --git a/src/particles/feedback_CIC.h b/src/particles/feedback_CIC.h index 7e5006043..5f89886ae 100644 --- a/src/particles/feedback_CIC.h +++ b/src/particles/feedback_CIC.h @@ -1,10 +1,6 @@ -#ifdef PARTICLES -#ifdef DE -#ifdef PARTICLE_AGE +#if defined(PARTICLES) && defined(DE) && defined(PARTICLE_AGE) +#pragma once - -#ifndef FEEDBACK_CIC_H -#define FEEDBACK_CIC_H #include "../global/global.h" const int N_INFO = 5; @@ -13,7 +9,4 @@ Real getClusterEnergyFeedback(Real t, Real dt, Real age); Real getClusterMassFeedback(Real t, Real dt, Real age); std::tuple getClusterFeedback(Real t, Real dt, Real age, Real density); -#endif -#endif -#endif -#endif +#endif // PARTICLES et. al diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 097b99f9c..6e9596c43 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -7,8 +7,11 @@ #include "../grid/grid3D.h" #include "../global/global_cuda.h" #include "../global/global.h" +#include "../io/io.h" #include "supernova.h" +#define TPB_FEEDBACK 64 +#define FEED_INFO_N 5 namespace Supernova { curandStateMRG32k3a_t* curandStates; @@ -92,198 +95,233 @@ __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, - Real mass, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, - Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, + Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, + Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states){ - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; - - Real xMax, yMax, zMax; - xMax = xMin + xLen; - yMax = yMin + yLen; - zMax = zMin + zLen; - - Real pos_x, pos_y, pos_z; - Real cell_center_x, cell_center_y, cell_center_z; - Real delta_x, delta_y, delta_z; - Real feedback_energy, feedback_density, feedback_momentum, n_0, shell_radius; - bool is_resolved; - int pcell_x, pcell_y, pcell_z, pcell_index; - Real dV = dx*dy*dz; - Real local_dti = 0.0; - - pos_x = pos_x_dev[tid]; - pos_y = pos_y_dev[tid]; - pos_z = pos_z_dev[tid]; - - bool in_local = (pos_x >= xMin && pos_x < zMax) && - (pos_y >= yMin && pos_y < yMax) && - (pos_z >= zMin && pos_z < zMax); - if (!in_local) { - printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); - return; + __shared__ Real s_info[FEED_INFO_N*TPB_FEEDBACK]; // for collecting SN feedback information, like # of SNe or # resolved. + int tid = threadIdx.x; + int gtid = blockIdx.x * blockDim.x + tid ; + + s_info[FEED_INFO_N*tid] = 0; + s_info[FEED_INFO_N*tid + 1] = 0; + s_info[FEED_INFO_N*tid + 2] = 0; + s_info[FEED_INFO_N*tid + 3] = 0; + s_info[FEED_INFO_N*tid + 4] = 0; + + if ( gtid < n_local) { + Real xMax, yMax, zMax; + xMax = xMin + xLen; + yMax = yMin + yLen; + zMax = zMin + zLen; + + Real pos_x, pos_y, pos_z; + Real cell_center_x, cell_center_y, cell_center_z; + Real delta_x, delta_y, delta_z; + Real feedback_energy = 0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; + bool is_resolved = false; + int pcell_x, pcell_y, pcell_z, pcell_index; + Real dV = dx*dy*dz; + Real local_dti = 0.0; + + pos_x = pos_x_dev[gtid]; + pos_y = pos_y_dev[gtid]; + pos_z = pos_z_dev[gtid]; + + bool in_local = (pos_x >= xMin && pos_x < zMax) && + (pos_y >= yMin && pos_y < yMax) && + (pos_z >= zMin && pos_z < zMax); + if (!in_local) { + printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", + pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + } + + int indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); + int indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); + int indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); + + bool ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; + if (ignore) { + printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", + pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); + } + + if (!ignore && in_local) { + pcell_x = (int) floor( ( pos_x - xMin ) / dx ) + n_ghost; + pcell_y = (int) floor( ( pos_y - yMin ) / dy ) + n_ghost; + pcell_z = (int) floor( ( pos_z - zMin ) / dz ) + n_ghost; + pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; + + unsigned int N = 0; + if ((t - age_dev[gtid]) <= Supernova::SN_ERA) { + curandStateMRG32k3a_t state = states[gtid]; + N = curand_poisson (&state, Supernova::SNR * mass_dev[gtid] * dt); + states[gtid] = state; + + if (N > 0) { + // first subtract ejected mass from particle + mass_dev[gtid] -= N * Supernova::MASS_PER_SN; + feedback_energy = N * Supernova::ENERGY_PER_SN / dV; + feedback_density = N * Supernova::MASS_PER_SN / dV; + n_0 = density[pcell_index] * DENSITY_UNIT / (Supernova::MU*MP); + feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) / sqrt(3.0) / dV; + shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + + s_info[FEED_INFO_N*tid] = 1.*N; + if (is_resolved) s_info[FEED_INFO_N*tid + 1] = 1.0; + else s_info[FEED_INFO_N*tid + 2] = 1.0; + + cell_center_x = xMin + indx_x*dx + 0.5*dx; + cell_center_y = yMin + indx_y*dy + 0.5*dy; + cell_center_z = zMin + indx_z*dz + 0.5*dz; + delta_x = 1 - ( pos_x - cell_center_x ) / dx; + delta_y = 1 - ( pos_y - cell_center_y ) / dy; + delta_z = 1 - ( pos_z - cell_center_z ) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + int indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + + if (!is_resolved) s_info[FEED_INFO_N*tid + 4] = feedback_momentum * dV; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * delta_y * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * delta_z); + atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * delta_z); + s_info[FEED_INFO_N*tid + 3] = feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] = (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + // s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); + atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * delta_y * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + + indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + if (is_resolved) { + atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z)); + atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); + atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); + s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; + } else { + atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); + atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); + atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); + //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + } + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + atomicMax(dti, local_dti); + } + } + } } - int indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - int indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - int indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); - - bool ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; - if (ignore) { - printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); + __syncthreads(); + + //reduce the info from all the threads in the block + for (unsigned int s = blockDim.x/2; s > 0; s>>=1) { + if(tid < s) { + s_info[FEED_INFO_N*tid] += s_info[FEED_INFO_N*(tid + s)]; + s_info[FEED_INFO_N*tid + 1] += s_info[FEED_INFO_N*(tid + s) + 1]; + s_info[FEED_INFO_N*tid + 2] += s_info[FEED_INFO_N*(tid + s) + 2]; + s_info[FEED_INFO_N*tid + 3] += s_info[FEED_INFO_N*(tid + s) + 3]; + s_info[FEED_INFO_N*tid + 4] += s_info[FEED_INFO_N*(tid + s) + 4]; + } + __syncthreads(); } - pcell_x = (int) floor( ( pos_x - xMin ) / dx ) + n_ghost; - pcell_y = (int) floor( ( pos_y - yMin ) / dy ) + n_ghost; - pcell_z = (int) floor( ( pos_z - zMin ) / dz ) + n_ghost; - pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; - - if (t + age_dev[tid] > Supernova::SN_ERA) return; - - curandStateMRG32k3a_t state = states[tid]; // <- more efficient? - unsigned int N = curand_poisson (&state, Supernova::SNR * dt); - states[tid] = state; - - if (N == 0) return; - - feedback_energy = N * Supernova::ENERGY_PER_SN / dV; - feedback_density = N * Supernova::MASS_PER_SN / dV; - n_0 = density[pcell_index] * DENSITY_UNIT / (Supernova::MU*MP); - feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) / sqrt(3.0) / dV; - shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); - is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; - - /*printf(" [%d]: got %d SN\n", tid, N); - if (is_resolved) printf(" [%d] resolved\n", tid); - else printf(" [%d] NOT resolved\n", tid); - printf(" [%d] E=%.3e, D=%.3e, P=%.3e, S_r=%.3e\n", tid, - feedback_energy*dV*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, - feedback_density*DENSITY_UNIT / (Supernova::MU*MP), - feedback_momentum*dV*VELOCITY_UNIT/1e5, shell_radius); - */ - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; - indx_x += n_ghost; - indx_y += n_ghost; - indx_z += n_ghost; - - int indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * delta_y * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * delta_z); - atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * delta_z); - //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); - //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); - atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); - //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * delta_y * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); - //info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); - //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); - //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); - //info[threadId*N_INFO + 3], feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); - //info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + if (tid == 0) { + info[FEED_INFO_N*blockIdx.x] = s_info[0]; + info[FEED_INFO_N*blockIdx.x + 1] = s_info[1]; + info[FEED_INFO_N*blockIdx.x + 2] = s_info[2]; + info[FEED_INFO_N*blockIdx.x + 3] = s_info[3]; + info[FEED_INFO_N*blockIdx.x + 4] = s_info[4]; } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - atomicMax(dti, local_dti); } @@ -296,24 +334,57 @@ Real Grid3D::Cluster_Feedback_GPU() { exit(-1); } - printf("Cluster_Feedback_GPU: start. dt=%.4e\n", H.dt); Real h_dti = 0.0; Real* d_dti; - cudaMalloc(&d_dti, sizeof(Real)); - cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice); - - int ngrid = (Particles.n_local + 64 - 1) / 64; - dim3 grid(ngrid); - dim3 block(64); - - hipLaunchKernelGGL(Cluster_Feedback_Kernel, grid, block, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, - Particles.particle_mass, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.domlen_x, H.domlen_y, H.domlen_z, - H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, + CHECK(cudaMalloc(&d_dti, sizeof(Real))); + CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); + + int ngrid = std::ceil((1.*Particles.n_local)/TPB_FEEDBACK); + Real h_info[5] = {0, 0, 0, 0, 0}; + Real info[5]; + Real* d_info; + CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N*ngrid*sizeof(Real))); + //FIXME info collection only works if ngrid is 1. The reason being that reduction of + // d_info is currently done on each block. Only the first block reduction + // is used + + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.domlen_x, H.domlen_y, H.domlen_z, + H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates); - cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost); - cudaFree(d_dti); - printf("Cluster_Feedback_GPU: end. calc dti=%.4e\n", h_dti); + CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N*sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaFree(d_dti)); + CHECK(cudaFree(d_info)); + + #ifdef MPI_CHOLLA + MPI_Reduce(&h_info, &info, 5, MPI_CHREAL, MPI_SUM, root, world); + #else + info = h_info; + #endif + + countSN += (int)info[Supernova::SN]; + countResolved += (int)info[Supernova::RESOLVED]; + countUnresolved += (int)info[Supernova::NOT_RESOLVED]; + totalEnergy += info[Supernova::ENERGY]; + totalMomentum += info[Supernova::MOMENTUM]; + + Real resolved_ratio = 0.0; + if (info[Supernova::RESOLVED] > 0 || info[Supernova::NOT_RESOLVED] > 0) { + resolved_ratio = info[Supernova::RESOLVED]/(info[Supernova::RESOLVED] + info[Supernova::NOT_RESOLVED]); + } + Real global_resolved_ratio = 0.0; + if (countResolved > 0 || countUnresolved > 0) { + global_resolved_ratio = countResolved / (countResolved + countUnresolved); + } + + chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", H.n_step, (long)info[Supernova::SN], resolved_ratio); + chprintf(" this iteration: energy: %.5e erg. x-momentum: %.5e S.M. km/s\n", + info[Supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[Supernova::MOMENTUM]*VELOCITY_UNIT/1e5); + chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)countSN, (long)countResolved, (long)countUnresolved, global_resolved_ratio); + chprintf(" energy: %.5e erg. Total x-momentum: %.5e S.M. km/s\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, totalMomentum*VELOCITY_UNIT/1e5); + return h_dti; } diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 88fadf095..4c9600e05 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -94,7 +94,13 @@ __global__ void Get_Gravity_Field_Particles_Kernel( Real *potential_dev, Real * #else gravity_z_dev[tid] = -0.5 * ( phi_r - phi_l ) / dz; #endif - + + /* + if (tid_x < 10 && tid_y == (ny_grav/2) && tid_z == (nz_grav/2)) { + printf("gravity_x_dev[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, gravity_x_dev[tid]); + //printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, analytic_d[tid]); + } + */ } @@ -125,6 +131,13 @@ void Particles_3D::Get_Gravity_Field_Particles_GPU_function( int nx_local, int n hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz ); CudaCheckError(); + + /* + gpuFor(10, + GPU_LAMBDA(const int i) { + printf("potential_final[%d, %d, %d] = %.4e\n", i, ny_g/2, nz_g/2, potential_dev[i + nx_g*ny_g/2 + nx_g*ny_g*nz_g/2]); + } + );*/ } diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 33ebf50d4..d5c0765ab 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -624,17 +624,14 @@ void Particles_3D::Initialize_Sphere(struct parameters *P){ * Initializes a disk population of uniform mass stellar clusters */ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { - #ifndef SINGLE_PARTICLE_MASS - chprintf( " Initialize_Disk_Stellar_Clusters: only SINGLE_PARTICLE_MASS currently supported\n"); - chexit(-1); - #endif chprintf( " Initializing Particles Stellar Disk\n"); // Set up the PRNG - ChollaPrngGenerator prng(P); + std::mt19937_64 generator {P->prng_seed}; std::gamma_distribution radialDist(2,1); //for generating cyclindrical radii - std::uniform_real_distribution zDist(0, 1); //for generating height above/below the disk. + std::uniform_real_distribution zDist(-0.2, 0.2); + std::uniform_real_distribution vzDist(-1e-8, 1e-8); std::uniform_real_distribution phiDist(0, 2*M_PI); //for generating phi std::normal_distribution speedDist(0, 1); //for generating random speeds. @@ -653,66 +650,65 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { real_vector_t temp_grav_x; real_vector_t temp_grav_y; real_vector_t temp_grav_z; - #ifndef SINGLE_PARTICLE_MASS real_vector_t temp_mass; - #endif - #ifdef PARTICLE_IDS - int_vector_t temp_ids; - #endif - #ifdef PARTICLE_AGE + int_vector_t temp_ids; real_vector_t temp_age; - #endif Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - particle_mass = 1e5; //solar masses //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius - unsigned long int N = 38; //(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius + //unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius + Real total_mass = 0; + Real upper_limit_cluster_mass = 1e7; long lost_particles = 0; - for ( part_int_t i = 0; i < N; i++ ){ - do { - R = R_d*radialDist(prng.generator); - } while (R > R_max); - - phi = phiDist(prng.generator); - x = R * cos(phi); - y = R * sin(phi); - z = 0; - - if (x < G.xMin || x > G.xMax) continue; - if (y < G.yMin || y > G.yMax) continue; - if (z < G.zMin || z > G.zMax) continue; - - ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); - vPhi = sqrt(R*ac); - - vx = -vPhi*sin(phi); - vy = vPhi*cos(phi); - vz = 0; - - //add particle data to the particles vectors - temp_pos_x.push_back(x); - temp_pos_y.push_back(y); - temp_pos_z.push_back(z); - temp_vel_x.push_back(vx); - temp_vel_y.push_back(vy); - temp_vel_z.push_back(vz); - temp_grav_x.push_back(0.0); - temp_grav_y.push_back(0.0); - temp_grav_z.push_back(0.0); - - #ifdef PARTICLE_AGE - //if (fabs(z) >= Z_d) age.push_back(1.1e4); - //else age.push_back(0.0); - temp_age.push_back(0.0); - #endif + part_int_t id = -1; + while (total_mass < upper_limit_cluster_mass) { + Real cluster_mass = Galaxies::MW.singleClusterMass(generator); + total_mass += cluster_mass; + id += 1; // do this here before we check whether the particle is in the MPI domain, otherwise + // could end up with duplicated IDs + do { + R = R_d*radialDist(generator); + } while (R > R_max); + + phi = phiDist(generator); + x = R * cos(phi); + y = R * sin(phi); + z = 0.0; //zDist(generator); + + if (x < G.xMin || x > G.xMax) continue; + if (y < G.yMin || y > G.yMax) continue; + if (z < G.zMin || z > G.zMax) continue; + + ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); + vPhi = sqrt(R*ac); + + vx = -vPhi*sin(phi); + vy = vPhi*cos(phi); + vz = 0.0; //vzDist(generator); + + //add particle data to the particles vectors + temp_pos_x.push_back(x); + temp_pos_y.push_back(y); + temp_pos_z.push_back(z); + temp_vel_x.push_back(vx); + temp_vel_y.push_back(vy); + temp_vel_z.push_back(vz); + temp_grav_x.push_back(0.0); + temp_grav_y.push_back(0.0); + temp_grav_z.push_back(0.0); + temp_mass.push_back(cluster_mass); + //if (fabs(z) >= Z_d) age.push_back(1.1e4); + //else age.push_back(0.0); + temp_age.push_back(0.0); + temp_ids.push_back(id); } n_local = temp_pos_x.size(); - #if defined(PARTICLE_IDS) +/* part_int_t global_id_offset = 0; #ifdef MPI_CHOLLA // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks @@ -722,27 +718,21 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { for ( int i=0; i 0) chprintf(" lost %lu particles\n", lost_particles); - chprintf( " Stellar Disk Particles Initialized, n_local: %lu\n", n_local); + chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, total_mass: %.3e s.m.\n", id, n_local, total_mass); } void Particles_3D::Initialize_Zeldovich_Pancake( struct parameters *P ){ - //No partidcles for the Zeldovich Pancake problem. n_local=0 + //No particles for the Zeldovich Pancake problem. n_local=0 chprintf("Setting Zeldovich Pancake initial conditions...\n"); diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index f67c1fd64..432903b2c 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -182,7 +182,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_x0 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_x0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -201,7 +201,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_x1 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_x1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -220,7 +220,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_y0 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_y0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -239,7 +239,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_y1 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_y1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -258,7 +258,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_z0 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_z0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -277,7 +277,7 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, i #ifdef MPI_GPU if ( buffer_length > Particles.G.recv_buffer_size_z1 ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor*buffer_length, true ); + Extend_GPU_Array( &recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor*buffer_length, true ); Particles.G.recv_buffer_size_z1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; } #else @@ -736,7 +736,7 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir // If the number of particles in the array exceeds the size of the array, extend the array if ( (*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER > *buffer_size ){ printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &send_buffer_d, *buffer_size, G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER, true ); + Extend_GPU_Array( &send_buffer_d, *buffer_size, G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER, true ); *buffer_size = (part_int_t) G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER; } diff --git a/src/particles/supernova.h b/src/particles/supernova.h index c876abb94..9b1eebc0d 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -9,14 +9,10 @@ namespace Supernova { - static const int NUMBER = 0; - static const int ENERGY = 1; - static const int MASS = 2; - static const int MOMENTUM = 3; - static const int SHELL_RADIUS = 4; + const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4; - // supernova rate: 1SN / 100 solar masses, with 10^5 solar masses per cluster, spread over 10^4 kyr - static const Real SNR=0.1; + // supernova rate: 1SN / 100 solar masses per 10^4 kyr + static const Real SNR=1e-6; static const Real ENERGY_PER_SN = 5.3e-05; // 1e51 ergs/SN in solarMass*(kpc/kyr)**2 static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN static const Real FINAL_MOMENTUM = 0.29; // 2.8e5 solarMasses km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) @@ -29,6 +25,7 @@ namespace Supernova { extern part_int_t n_states; void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); + //void initState(struct parameters *P); #endif //PARTICLES_GPU } diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 51c0285a6..61b73b468 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -84,6 +84,12 @@ void Time::Initialize(){ #ifdef CHEMISTRY_GPU &(Chemistry = OneTime("Chemistry")), #endif + #ifdef FEEDBACK + &(Feedback = OneTime("Feedback")), + #ifdef ANALYSIS + &(FeedbackAnalysis = OneTime("FeedbackAnalysis")), + #endif + #endif // FEEDBACK &(Total = OneTime("Total")), }; diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 02fe5db4b..32c1909e0 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -53,6 +53,8 @@ class Time OneTime Advance_Part_2; OneTime Cooling; OneTime Chemistry; + OneTime Feedback; + OneTime FeedbackAnalysis; std::vector onetimes; From 6f0d1f0a11a5785096f18b4d9669b6d20645db45 Mon Sep 17 00:00:00 2001 From: ojwg Date: Tue, 3 May 2022 17:22:01 -0400 Subject: [PATCH 061/694] saving interim work, part2 --- src/analysis/feedback_analysis.cpp | 142 +++++++++++++++++++++++ src/analysis/feedback_analysis.h | 32 ++++++ src/analysis/feedback_analysis_gpu.cu | 159 ++++++++++++++++++++++++++ 3 files changed, 333 insertions(+) create mode 100644 src/analysis/feedback_analysis.cpp create mode 100644 src/analysis/feedback_analysis.h create mode 100644 src/analysis/feedback_analysis_gpu.cu diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp new file mode 100644 index 000000000..d1ff8af56 --- /dev/null +++ b/src/analysis/feedback_analysis.cpp @@ -0,0 +1,142 @@ +#include "feedback_analysis.h" +#include "../io/io.h" +#include "../model/disk_galaxy.h" + +#ifdef MPI_CHOLLA +#include "../mpi/mpi_routines.h" +#endif + + +FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) { + // allocate arrays + h_circ_vel_x = (Real *) malloc(G.H.n_cells*sizeof(Real)); + h_circ_vel_y = (Real *) malloc(G.H.n_cells*sizeof(Real)); + + #ifdef PARTICLES_GPU + CHECK( cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells*sizeof(Real)) ); + CHECK( cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells*sizeof(Real)) ); + #endif + + + //setup the (constant) circular speed arrays + int id; + Real vca, r, x, y, z; + + for (int k=G.H.n_ghost; k + + +class FeedbackAnalysis { + + Real *h_circ_vel_x, *h_circ_vel_y; + #ifdef PARTICLES_GPU + Real *d_circ_vel_x, *d_circ_vel_y; + #endif + + #ifdef PARTICLES_GPU + void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G); + #endif + + public: + int countSN; + int countResolved; + int countUnresolved; + Real totalEnergy; + Real totalMomentum; + + FeedbackAnalysis(Grid3D& G); + ~FeedbackAnalysis(); + + void Compute_Gas_Velocity_Dispersion(Grid3D& G); + void Reset(); + +}; \ No newline at end of file diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu new file mode 100644 index 000000000..c3cfcf8dd --- /dev/null +++ b/src/analysis/feedback_analysis_gpu.cu @@ -0,0 +1,159 @@ + + +#include "feedback_analysis.h" +#include "../io/io.h" +#include +#ifdef PARTICLES_GPU + +// in cgs, this is 0.01 cm^{-3} +#define MIN_DENSITY 148273.7 +#define TPB_ANALYSIS 1024 + + +__device__ void warpReduce(volatile Real *buff, size_t tid) +{ + if (TPB_ANALYSIS >= 64) buff[tid] += buff[tid + 32]; + if (TPB_ANALYSIS >= 32) buff[tid] += buff[tid + 16]; + if (TPB_ANALYSIS >= 16) buff[tid] += buff[tid + 8]; + if (TPB_ANALYSIS >= 8) buff[tid] += buff[tid + 4]; + if (TPB_ANALYSIS >= 4) buff[tid] += buff[tid + 2]; + if (TPB_ANALYSIS >= 2) buff[tid] += buff[tid + 1]; +} + + +void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x, Real *momentum_y, + Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y, Real *partial_mass, Real *partial_vel) { + __shared__ Real s_mass[TPB_ANALYSIS]; + __shared__ Real s_vel[TPB_ANALYSIS]; + int id, zid, yid, xid, tid; + + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx*ny); + yid = (id - zid*nx*ny) / nx; + xid = id - zid*nx*ny - yid*nx; + tid = threadIdx.x; + + s_mass[tid] = 0; + s_vel[tid] = 0; + Real vx, vy, vz; + if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost && density[id] > MIN_DENSITY) { + s_mass[tid] = density[id]; + vx = momentum_x[id]/ density[id]; + vy = momentum_y[id]/ density[id]; + vz = momentum_z[id]/ density[id]; + s_vel[tid] += ((vx - circ_vel_x[id])*(vx - circ_vel_x[id]) + + (vy - circ_vel_y[id])*(vy - circ_vel_y[id]) + + (vz*vz) + )*density[id]; + } + __syncthreads(); + + for (unsigned int s=blockDim.x/2; s>0; s>>=1) { + if (tid < s) { + s_mass[tid] += s_mass[tid + s]; + s_vel[tid] += s_vel[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + //printf("ReduceKernel 1: blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = %.5e\n", blockIdx.x, s_mass[0], s_vel[0]); + partial_mass[blockIdx.x] = s_mass[0]; + partial_vel[blockIdx.x] = s_vel[0]; + } +} + + +void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *output_m, Real *output_v, int n) { + __shared__ Real s_mass[TPB_ANALYSIS]; + __shared__ Real s_vel[TPB_ANALYSIS]; + + size_t tid = threadIdx.x; + //size_t i = blockIdx.x*(TPB_ANALYSIS*2) + tid; + //size_t gridSize = TPB_ANALYSIS*2*gridDim.x; + size_t i = blockIdx.x*(TPB_ANALYSIS) + tid; + size_t gridSize = TPB_ANALYSIS*gridDim.x; + s_mass[tid] = 0; + s_vel[tid] = 0; + + while (i < n) { + s_mass[tid] += input_m[i]; + s_vel[tid] += input_v[i]; + i += gridSize; + } + //while (i < n) { s_mass[tid] += input[i] + input[i+TPB_ANALYSIS]; i += gridSize; } + __syncthreads(); + + if (TPB_ANALYSIS >= 1024) { if (tid < 512) { s_mass[tid] += s_mass[tid + 512]; s_vel[tid] += s_vel[tid + 512]; } __syncthreads(); } + if (TPB_ANALYSIS >= 512) { if (tid < 256) { s_mass[tid] += s_mass[tid + 256]; s_vel[tid] += s_vel[tid + 256]; } __syncthreads(); } + if (TPB_ANALYSIS >= 256) { if (tid < 128) { s_mass[tid] += s_mass[tid + 128]; s_vel[tid] += s_vel[tid + 128]; } __syncthreads(); } + if (TPB_ANALYSIS >= 128) { if (tid < 64) { s_mass[tid] += s_mass[tid + 64]; s_vel[tid] += s_vel[tid + 64]; } __syncthreads(); } + + if (tid < 32) { warpReduce(s_mass, tid); warpReduce(s_vel, tid); } + __syncthreads(); + + if (tid == 0) { + //printf("Reduce_Tubulence_kernel 2: n = %d/%d, blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = %.5e\n", + // n, gridDim.x, blockIdx.x, s_mass[0], s_vel[0]); + output_m[blockIdx.x] = s_mass[0]; + output_v[blockIdx.x] = s_vel[0]; + } +} + + +void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G) { + size_t ngrid = std::ceil((1.*G.H.nx*G.H.ny*G.H.nz)/TPB_ANALYSIS); + + Real* d_partial_mass; + Real* d_partial_vel; + Real* h_partial_mass = (Real *) malloc(ngrid*sizeof(Real)); + Real* h_partial_vel = (Real *) malloc(ngrid*sizeof(Real)); + CHECK(cudaMalloc((void**)&d_partial_mass, ngrid*sizeof(Real))); + CHECK(cudaMalloc((void**)&d_partial_vel, ngrid*sizeof(Real))); + + Real total_mass = 0; + Real total_vel = 0; + + hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, + G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, + d_circ_vel_x, d_circ_vel_y, d_partial_mass, d_partial_vel); + + size_t n = ngrid; + Real *mass_input = d_partial_mass; + Real *vel_input = d_partial_vel; + while (n > TPB_ANALYSIS) { + ngrid = std::ceil( (n*1.)/TPB_ANALYSIS ); + //printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid); + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, mass_input, vel_input, d_partial_mass, d_partial_vel, n); + mass_input = d_partial_mass; + vel_input = d_partial_vel; + n = ngrid; + } + + if (n > 1) { + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, d_partial_mass, d_partial_vel, d_partial_mass, d_partial_vel, n); + } + + //cudaDeviceSynchronize(); + + CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid*sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid*sizeof(Real), cudaMemcpyDeviceToHost)); + + #ifdef MPI_CHOLLA + MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); + MPI_Allreduce(h_partial_vel, &total_vel, 1, MPI_CHREAL, MPI_SUM, world); + #else + total_mass = h_partial_mass[0]; + total_vel = h_partial_vel[0]; + #endif + + chprintf("sum(density): %.5e, sum(|v-v_circ|^2*dens): %.5e\n", total_mass, total_vel); //FIXME remove debug printout + chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, sqrt(total_vel/total_mass)*VELOCITY_UNIT/1e5); + + CHECK(cudaFree(d_partial_vel)); + CHECK(cudaFree(d_partial_mass)); + + free(h_partial_mass); + free(h_partial_vel); +} + + #endif // PARTICLES_GPU From f2b9b8649a30cab26ed6003ed0fb85df141fda79 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 4 May 2022 15:29:54 -0400 Subject: [PATCH 062/694] add dust build type --- builds/make.type.dust | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 90cc371c5..06c54eda1 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -19,24 +19,11 @@ else DFLAGS += -DVL endif -# need this if using Disk_3D -# DFLAGS += -DDISK_ICS - -# Apply a density and temperature floor -DFLAGS += -DDENSITY_FLOOR -DFLAGS += -DTEMPERATURE_FLOOR - -# Solve the Gas Internal Energy usisng a Dual Energy Formalism -DFLAGS += -DDE - # Evolve additional scalars # DFLAGS += -DSCALAR -# Apply the cooling in the GPU from precomputed tables -DFLAGS += -DCOOLING_GPU - #Measure the Timing of the different stages -DFLAGS += -DCPU_TIME +#DFLAGS += -DCPU_TIME DFLAGS += $(OUTPUT) @@ -44,7 +31,4 @@ DFLAGS += $(OUTPUT) #and the MPI transfers are done from the GPU #If not specified, MPI_GPU is off by default #This is set in the system make.host file -DFLAGS += $(MPI_GPU) - -DFLAGS += -DPARALLEL_OMP -DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) \ No newline at end of file +DFLAGS += $(MPI_GPU) \ No newline at end of file From 25d06734806bc9c96bcfa21c5a86d43d290c00d7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 4 May 2022 15:32:58 -0400 Subject: [PATCH 063/694] add dust build type --- builds/make.type.dust | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 06c54eda1..7b9f0c1ee 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -20,7 +20,10 @@ DFLAGS += -DVL endif # Evolve additional scalars -# DFLAGS += -DSCALAR +DFLAGS += -DSCALAR + +# Define dust macro +DFLAGS += -DDUST #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME From e78e33223fcd7a2e5fe7eca2623b509c140eed59 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 4 May 2022 16:21:16 -0400 Subject: [PATCH 064/694] add dust build type --- src/grid/initial_conditions.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index da9c88c0d..345421fbb 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -230,6 +230,12 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real #ifdef DE C.GasEnergy[id] = P/(gama-1.0); #endif // DE + + #ifdef SCALAR + #ifdef DUST + C.scalar[id] = rho*1e-2; + #endif // DUST + #endif // SCALAR } /* if (i==istart && j==jstart && k==kstart) { From a0d6340455a916ced36e21112b2bb13d24c59316 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 4 May 2022 16:29:34 -0400 Subject: [PATCH 065/694] add dust_update to grid3d --- src/grid/grid3D.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 04a6aa52a..75d3f0219 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -40,6 +40,10 @@ #include "../cooling/cooling_cuda.h" // provides Cooling_Update #endif +#ifdef DUST +#include "../dust/dust_cuda.h" // provides dust_update +#endif + /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ @@ -493,6 +497,11 @@ Real Grid3D::Update_Grid(void) Real cooling_max_dti = Cooling_Calc_dt(dev_dti_array, host_dti_array, H.nx, H.ny, H.nz); #endif //COOLING_GPU + #ifdef DUST + // ==Apply dust from dust/dust_cuda.h== + dust_update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, dev_dti_array); + #endif // DUST + // Update the H and He ionization fractions and apply cooling and photoheating #ifdef CHEMISTRY_GPU #ifdef CPU_TIMER From 03f740769d1b6ef2a0818a2d8e63205d9a8469ed Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 4 May 2022 16:39:28 -0400 Subject: [PATCH 066/694] add compilable version of implementation of dust build type --- src/dust/dust_cuda.cu | 75 +++---------------------------------------- src/dust/dust_cuda.h | 15 ++++++--- src/grid/grid3D.cpp | 4 +-- 3 files changed, 17 insertions(+), 77 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 434e5406e..a6f7d93e1 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,7 +1,8 @@ #ifdef CUDA +#ifdef DUST #ifdef SCALAR -#include "dust_model.h" +#include "dust_cuda.h" #include #include @@ -16,15 +17,10 @@ #include "../utils/cuda_utilities.h" #include "../grid/grid3D.h" -int main() { - Conserved_Init(host_conserved, rho, vx, vy, vz, P, rho_d, gamma, k_n_cells, k_nx, k_ny, k_nz, k_n_ghost, k_n_fields); -} - - void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - dim3 dim1dGrid(k_ngrid, 1, 1); + dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, params_dev); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); CudaCheckError(); } @@ -101,12 +97,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g dd_dt = calc_dd_dt(d_dust, tau_sp); dd = dd_dt * dt; - params_dev[0] = T; - params_dev[1] = n; - params_dev[2] = tau_sp/3.154e7; - params_dev[3] = dd_dt; - params_dev[4] = dd; - // ensure that dust density is not changing too rapidly bool time_refine = false; while (dd/d_dust > dd_max) { @@ -118,8 +108,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g dd = dt * dd_dt; } - params_dev[5] = time_refine; - // update dust density d_dust += dd; @@ -149,60 +137,7 @@ __device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } -// function to initialize conserved variable array, similar to Grid3D::Constant in grid/initial_conditions.cpp -void Conserved_Init(Real *host_conserved, Real rho, Real vx, Real vy, Real vz, Real P, Real rho_d, Real gamma, int n_cells, int nx, int ny, int nz, int n_ghost, int n_fields) -{ - int i, j, k, id; - int istart, jstart, kstart, iend, jend, kend; - - istart = H.n_ghost; - iend = H.nx-H.n_ghost; - if (H.ny > 1) { - jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { - jstart = 0; - jend = H.ny; - } - if (H.nz > 1) { - kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { - kstart = 0; - kend = H.nz; - } - - // set initial values of conserved variables - for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) - { - // set constant initial states - host_conserved[id] = rho; - host_conserved[1*n_cells+id] = rho*vx; - host_conserved[2*n_cells+id] = rho*vy; - host_conserved[3*n_cells+id] = rho*vz; - host_conserved[4*n_cells+id] = P/(gamma-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz); - #ifdef DE - host_conserved[(n_fields-1)*n_cells+id] = P/(gamma-1.0); - #endif // DE - #ifdef SCALAR - host_conserved[5*n_cells+id] = rho_d; - #endif // SCALAR - } - } - } - } -} - #endif // SCALAR +#endif // DUST #endif // CUDA \ No newline at end of file diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 94584f2bb..ac993a24c 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,16 +1,21 @@ +#ifdef CUDA +#ifdef DUST + #ifndef DUST_CUDA_H #define DUST_CUDA_H -#include - -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *params_dev); +#include "../utils/gpu.hpp" +#include +#include "../global/global.h" -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *params_dev); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); -void Conserved_Init(Real *host_conserved, Real rho, Real vx, Real vy, Real vz, Real P, Real rho_dust, Real gamma, int n_cells, int nx, int ny, int nz, int n_ghost, int n_fields); +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); __device__ Real calc_tau_sp(Real n, Real T); __device__ Real calc_dd_dt(Real d_dust, Real tau_sp); +#endif // DUST +#endif // CUDA #endif // DUST_CUDA_H \ No newline at end of file diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 75d3f0219..9db20c824 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -41,7 +41,7 @@ #endif #ifdef DUST -#include "../dust/dust_cuda.h" // provides dust_update +#include "../dust/dust_cuda.h" // provides Dust_Update #endif @@ -499,7 +499,7 @@ Real Grid3D::Update_Grid(void) #ifdef DUST // ==Apply dust from dust/dust_cuda.h== - dust_update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, dev_dti_array); + Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #endif // DUST // Update the H and He ionization fractions and apply cooling and photoheating From 34434eb0ccb0300858d76f1f685d4984a595a84d Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 6 May 2022 14:48:58 -0400 Subject: [PATCH 067/694] add working dust model --- src/dust/dust_cuda.cu | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index a6f7d93e1..4a0f523ce 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -41,15 +41,14 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // define physics variables Real d_gas, d_dust; // fluid mass densities - Real n = 1; // gas number density + Real n; // gas number density + Real mu = 0.6; // mean molecular weight Real T, E, P; // temperature, energy, pressure Real vx, vy, vz; // velocities #ifdef DE Real ge; #endif // DE - dt *= 3.154e7; // in seconds - // define integration variables Real dd_dt; // instantaneous rate of change in dust density Real dd; // change in dust density at current time-step @@ -65,9 +64,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g //printf("kernel: %7.4e\n", d_dust); // make sure thread hasn't crashed - // multiply small values by arbitrary constant to preserve precision - d_gas *= K; - d_dust *= K; + n = d_gas*DENSITY_UNIT / (mu * MP); if (E < 0.0 || E != E) return; @@ -92,7 +89,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g T = T_init; - Real tau_sp = calc_tau_sp(n, T); + Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // s dd_dt = calc_dd_dt(d_dust, tau_sp); dd = dd_dt * dt; @@ -111,9 +108,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density d_dust += dd; - // remove scaling constant - d_gas /= K; - d_dust /= K; dev_conserved[5*n_cells + id] = d_dust; #ifdef DE @@ -130,14 +124,15 @@ __device__ Real calc_tau_sp(Real n, Real T) { Real omega = 2.5; Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s - return A * (a1/d0) * (pow(T_0/T, omega) + 1); // s + Real tau_sp = A * (a1/d0) * (pow(T_0/T, omega) + 1); // s + + return tau_sp; } __device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } - #endif // SCALAR #endif // DUST #endif // CUDA \ No newline at end of file From 36dfa0ed496d2ab1a2dc58a18cc454962728d6a5 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 11 May 2022 14:06:32 -0400 Subject: [PATCH 068/694] add tests for sputtering growth rate and timescales, also correct timescale units in dust kernel to be in simulation units --- src/dust/dust_cuda.cu | 11 ++---- src/dust/dust_cuda.h | 4 +- src/dust/dust_cuda_tests.cpp | 74 ++++++++++++++++++++++++++++++++++++ src/dust/dust_cuda_tests.cu | 46 ---------------------- 4 files changed, 79 insertions(+), 56 deletions(-) create mode 100644 src/dust/dust_cuda_tests.cpp delete mode 100644 src/dust/dust_cuda_tests.cu diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 4a0f523ce..b18ec90e1 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -25,9 +25,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n } __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - //__shared__ Real min_dt[TPB]; // get grid indices - Real const K = 1e30; int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); @@ -58,13 +56,12 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { // get quantities from dev_conserved d_gas = dev_conserved[id]; - //d_dust = dev_conserved[5*n_cells + id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; //printf("kernel: %7.4e\n", d_dust); // make sure thread hasn't crashed - n = d_gas*DENSITY_UNIT / (mu * MP); + n = d_gas*DENSITY_UNIT / (mu*MP); if (E < 0.0 || E != E) return; @@ -89,15 +86,13 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g T = T_init; - Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // s + Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // kyr, sim units - dd_dt = calc_dd_dt(d_dust, tau_sp); + dd_dt = calc_dd_dt(d_dust, tau_sp); dd = dd_dt * dt; // ensure that dust density is not changing too rapidly - bool time_refine = false; while (dd/d_dust > dd_max) { - time_refine = true; dt_sub = dd_max * d_dust / dd_dt; d_dust += dt_sub * dd_dt; dt -= dt_sub; diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index ac993a24c..ac2f02c50 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -12,9 +12,9 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); -__device__ Real calc_tau_sp(Real n, Real T); +__device__ __host__ Real calc_tau_sp(Real n, Real T); -__device__ Real calc_dd_dt(Real d_dust, Real tau_sp); +__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); #endif // DUST #endif // CUDA diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp new file mode 100644 index 000000000..2f2742cc4 --- /dev/null +++ b/src/dust/dust_cuda_tests.cpp @@ -0,0 +1,74 @@ +/*! +* \file dust_cuda_tests.cpp +* \author Helena Richie (helenarichie@pitt.edu) +* \brief Test dust model functions +* +*/ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" +#include "../dust/dust_cuda.h" // Include code to test + +#ifdef DUST + +TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) // test suite name, test name +{ + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_n = 1; + Real const k_test_T = pow(10, 5.0); + + Real const k_fiducial_num = 182565146.96398282; + + Real test_num = calc_tau_sp(k_test_n, k_test_T) / YR_IN_S; // yr + + double abs_diff; + int64_t ulps_diff; + + bool is_true; + + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) + << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; +} + +TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) // test suite name, test name +{ + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_tau_sp = 0.17e6; // kyr + Real const k_test_d_dust = 1e-26 / DENSITY_UNIT; // sim units + + Real const k_fiducial_num = -2.6073835738056728; + + Real test_num = calc_dd_dt(k_test_d_dust, k_test_tau_sp); + + double abs_diff; + int64_t ulps_diff; + + bool is_true; + + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) + << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; +} + +#endif // DUST \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cu b/src/dust/dust_cuda_tests.cu deleted file mode 100644 index 784f37e5f..000000000 --- a/src/dust/dust_cuda_tests.cu +++ /dev/null @@ -1,46 +0,0 @@ -/*! -* \file dust_cuda_tests.cu -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Test dust model functions -* -*/ - -// STL Includes -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" -#include "../dust/dust_cuda_updated.h" // Include code to test - -#ifdef DUST_GPU - -TEST(tDUSTAccretionTest, AccretionTestExpectCorrectOutput) // test suite name, test name -{ - Real const testn = 1; - Real const testT = pow(10, 5.0); - Real const testNumber = CIE_cool(testn, testT); - - Real const fiducialNumber = 4.6639082688443984*pow(10, -22); - - double absoluteDiff; - int64_t ulpsDiff; - - bool isTrue; - - isTrue = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); - - EXPECT_TRUE(isTrue) - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; -} - -#endif // DUST_GPU From 87a4da18f3eb2ccd65b54bc4c0f4d993a8847aee Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 18 May 2022 11:23:09 -0400 Subject: [PATCH 069/694] add cooling to dust build type --- builds/make.type.dust | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 7b9f0c1ee..d24512619 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -1,4 +1,4 @@ -#-- Default hydro + dust_gpu +#-- Default hydro + dust #-- separated output flag so that it can be overriden in target-specific # for make check @@ -25,6 +25,9 @@ DFLAGS += -DSCALAR # Define dust macro DFLAGS += -DDUST +# Apply the cooling in the GPU from precomputed tables +DFLAGS += -DCOOLING_GPU + #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME From 56f8dd335c218c57b17e12c2c339407d4cf4ff2e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 18 May 2022 12:53:57 -0400 Subject: [PATCH 070/694] add cloud-wind init code --- cloud-wind/cloud-wind.txt | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 cloud-wind/cloud-wind.txt diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt new file mode 100644 index 000000000..5d15115b8 --- /dev/null +++ b/cloud-wind/cloud-wind.txt @@ -0,0 +1,44 @@ +# +# Sample Parameter File +# + +################################################ +# Parameters required for all problems +################################################ +# number of grid cells in the x dimension +nx=2048 +# number of grid cells in the y dimension +ny=512 +# number of grid cells in the z dimension +nz=512 +# x direction lower domain boundary +xmin=-0.5 +# x direction domain length +xlen=1.0 +# y direction lower domain boundary +ymin=-0.5 +# y direction domain length +ylen=1.0 +# z direction lower domain boundary +zmin=-0.5 +# y direction domain length +ylen=1.0 +# final output time +tout=0.2 +# time interval for output +outstep=0.01 +# ratio of specific heats +gamma=1.66666667 +# name of initial conditions +init=Clouds +# type of boundary condition, options include 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom) +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 +# path to output directory +outdir=./ + + From b3af6d3c37733dde244492f06b03cd7697abdbc3 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 19 May 2022 12:00:19 -0400 Subject: [PATCH 071/694] update cloud-wind input file --- cloud-wind/cloud-wind.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 5d15115b8..caf5950c9 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -6,15 +6,15 @@ # Parameters required for all problems ################################################ # number of grid cells in the x dimension -nx=2048 +nx=512 # number of grid cells in the y dimension -ny=512 +ny=128 # number of grid cells in the z dimension -nz=512 +nz=128 # x direction lower domain boundary -xmin=-0.5 +xmin=-1.0 # x direction domain length -xlen=1.0 +xlen=2.0 # y direction lower domain boundary ymin=-0.5 # y direction domain length @@ -22,11 +22,11 @@ ylen=1.0 # z direction lower domain boundary zmin=-0.5 # y direction domain length -ylen=1.0 +zlen=1.0 # final output time tout=0.2 # time interval for output -outstep=0.01 +outstep=0.2 # ratio of specific heats gamma=1.66666667 # name of initial conditions From 909e9dc88b7bdd6fc209e2d9ed0415f8d6699e2a Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 19 May 2022 12:11:34 -0400 Subject: [PATCH 072/694] add wind build type --- builds/make.type.dust | 2 ++ cloud-wind/cloud-wind.txt | 44 ---------------------------------- constant.txt | 50 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 44 deletions(-) delete mode 100644 cloud-wind/cloud-wind.txt create mode 100644 constant.txt diff --git a/builds/make.type.dust b/builds/make.type.dust index d24512619..9b20ea4b1 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -31,6 +31,8 @@ DFLAGS += -DCOOLING_GPU #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME +#DFLAGS += -DSLICES + DFLAGS += $(OUTPUT) #Select if the Hydro Conserved data will reside in the GPU diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt deleted file mode 100644 index 5d15115b8..000000000 --- a/cloud-wind/cloud-wind.txt +++ /dev/null @@ -1,44 +0,0 @@ -# -# Sample Parameter File -# - -################################################ -# Parameters required for all problems -################################################ -# number of grid cells in the x dimension -nx=2048 -# number of grid cells in the y dimension -ny=512 -# number of grid cells in the z dimension -nz=512 -# x direction lower domain boundary -xmin=-0.5 -# x direction domain length -xlen=1.0 -# y direction lower domain boundary -ymin=-0.5 -# y direction domain length -ylen=1.0 -# z direction lower domain boundary -zmin=-0.5 -# y direction domain length -ylen=1.0 -# final output time -tout=0.2 -# time interval for output -outstep=0.01 -# ratio of specific heats -gamma=1.66666667 -# name of initial conditions -init=Clouds -# type of boundary condition, options include 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom) -xl_bcnd=1 -xu_bcnd=1 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 -# path to output directory -outdir=./ - - diff --git a/constant.txt b/constant.txt new file mode 100644 index 000000000..af1610674 --- /dev/null +++ b/constant.txt @@ -0,0 +1,50 @@ +# +# Parameter File for box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=10 +# number of grid cells in the y dimension +ny=1 +# number of grid cells in the z dimension +nz=1 +# final output time (kyr) +tout=100000.0 +# time interval for output +outstep=100 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=14827371.70126647 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1e-2 +# Magnetic Field +Bx=0.0 +By=0.0 +Bz=0.0 +# value of gamma +gamma=1.666666667 + From 5b237c5bc898b3420602489f7ad15378ef96430b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 20 May 2022 11:35:20 -0400 Subject: [PATCH 073/694] change sim boundaries --- cloud-wind/cloud-wind.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index caf5950c9..5eb8d0ada 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -12,15 +12,15 @@ ny=128 # number of grid cells in the z dimension nz=128 # x direction lower domain boundary -xmin=-1.0 +xmin=0.0 # x direction domain length xlen=2.0 # y direction lower domain boundary -ymin=-0.5 +ymin=0.0 # y direction domain length ylen=1.0 # z direction lower domain boundary -zmin=-0.5 +zmin=0.0 # y direction domain length zlen=1.0 # final output time From 16ba335a187ce0896772845a4c80fa12a2576f9c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 20 May 2022 11:43:12 -0400 Subject: [PATCH 074/694] change sim boundaries --- cloud-wind/cloud-wind.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 5eb8d0ada..296e7820b 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -6,7 +6,7 @@ # Parameters required for all problems ################################################ # number of grid cells in the x dimension -nx=512 +nx=256 # number of grid cells in the y dimension ny=128 # number of grid cells in the z dimension From 0002f5a477dc57f31ee608ab2e6c2f721d794a51 Mon Sep 17 00:00:00 2001 From: ojwg Date: Sun, 8 May 2022 13:52:40 -0400 Subject: [PATCH 075/694] SN feedback improvements bug fix SN feedback interim saving work before refreshing from CAAR branch --- src/analysis/feedback_analysis_gpu.cu | 4 +- src/cooling/cooling_cuda.cu | 17 +- src/particles/feedback_CIC_gpu.cu | 234 +++++++++++--------------- src/particles/particles_3D.cpp | 2 +- src/particles/supernova.h | 14 +- 5 files changed, 124 insertions(+), 147 deletions(-) diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index c3cfcf8dd..37af2ff4f 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -5,8 +5,9 @@ #include #ifdef PARTICLES_GPU +#define MU 0.6 // in cgs, this is 0.01 cm^{-3} -#define MIN_DENSITY 148273.7 +#define MIN_DENSITY 0.01 * MP * MU *LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT // 148279.7 #define TPB_ANALYSIS 1024 @@ -146,7 +147,6 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G) { total_vel = h_partial_vel[0]; #endif - chprintf("sum(density): %.5e, sum(|v-v_circ|^2*dens): %.5e\n", total_mass, total_vel); //FIXME remove debug printout chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, sqrt(total_vel/total_mass)*VELOCITY_UNIT/1e5); CHECK(cudaFree(d_partial_vel)); diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 14528b370..855b8b6cc 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -124,8 +124,11 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int T = T_init; //if (T > T_max) printf("%3d %3d %3d High T cell. n: %e T: %e\n", xid, yid, zid, n, T); // call the cooling function + #ifdef CLOUDY_COOL + cool = Cloudy_cool(n, T); + #else cool = CIE_cool(n, T); - //cool = Cloudy_cool(n, T); + #endif // calculate change in temperature given dt del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); @@ -139,8 +142,11 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int // how much time is left from the original timestep? dt -= dt_sub; // calculate cooling again + #ifdef CLOUDY_COOL + cool = Cloudy_cool(n, T); + #else cool = CIE_cool(n, T); - //cool = Cloudy_cool(n, T); + #endif // calculate new change in temperature del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); } @@ -160,9 +166,14 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #ifdef DE ge -= KB*del_T / (mu*MP*(gamma-1.0)*SP_ENERGY_UNIT); #endif + // calculate cooling rate for new T + #ifdef CLOUDY_COOL + cool = Cloudy_cool(n, T); + #else cool = CIE_cool(n, T); - //cool = Cloudy_cool(n, T); + #endif + //printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); // only use good cells in timestep calculation (in case some have crashed) if (n > 0 && T > 0 && cool > 0.0) { diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 6e9596c43..8fbf507b1 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -35,7 +35,7 @@ __device__ double atomicMax(double* address, double val) __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { int id = blockIdx.x*blockDim.x + threadIdx.x; - curand_init(seed, id, 0, &states[id]); + curand_init(seed + id, id, 0, &states[id]); } @@ -50,6 +50,7 @@ __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* state void Supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { printf("Supernova::initState start\n"); n_states = n_local*allocation_factor; + //n_states = 10; cudaMalloc((void**) &curandStates, n_states*sizeof(curandStateMRG32k3a_t)); //int ngrid = (n_states + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -94,10 +95,19 @@ __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real } + +__device__ Real frac(int i, Real dx) { + return (-0.5*i*i -0.5*i + 1 + i*dx)*0.5; +} + +__device__ Real d_fr(int i, Real dx) { + return (dx > 0.5)*i*(1-2*dx) + ((i+1)*dx + 0.5*(i - 1)) -3*(i-1)*(i+1)*(0.5 - dx); +} + __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, - Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, - Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, - Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states){ + Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, + Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, + Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states){ __shared__ Real s_info[FEED_INFO_N*TPB_FEEDBACK]; // for collecting SN feedback information, like # of SNe or # resolved. int tid = threadIdx.x; @@ -109,7 +119,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea s_info[FEED_INFO_N*tid + 3] = 0; s_info[FEED_INFO_N*tid + 4] = 0; - if ( gtid < n_local) { + if (gtid < n_local) { Real xMax, yMax, zMax; xMax = xMin + xLen; yMax = yMin + yLen; @@ -118,7 +128,9 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea Real pos_x, pos_y, pos_z; Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; - Real feedback_energy = 0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; + Real x_frac, y_frac, z_frac; + Real px, py, pz, ek, d; + Real feedback_energy=0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; bool is_resolved = false; int pcell_x, pcell_y, pcell_z, pcell_index; Real dV = dx*dy*dz; @@ -128,7 +140,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea pos_y = pos_y_dev[gtid]; pos_z = pos_z_dev[gtid]; - bool in_local = (pos_x >= xMin && pos_x < zMax) && + bool in_local = (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax); if (!in_local) { @@ -136,11 +148,11 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); } - int indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - int indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - int indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); + int indx_x = (int) floor( ( pos_x - xMin ) / dx ); + int indx_y = (int) floor( ( pos_y - yMin ) / dy ); + int indx_z = (int) floor( ( pos_z - zMin ) / dz ); - bool ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; + bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x > nx_g-2 || indx_y > ny_g-2 || indx_z > nz_g-2; if (ignore) { printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); @@ -159,13 +171,13 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea states[gtid] = state; if (N > 0) { - // first subtract ejected mass from particle mass_dev[gtid] -= N * Supernova::MASS_PER_SN; feedback_energy = N * Supernova::ENERGY_PER_SN / dV; feedback_density = N * Supernova::MASS_PER_SN / dV; n_0 = density[pcell_index] * DENSITY_UNIT / (Supernova::MU*MP); - feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) / sqrt(3.0) / dV; + feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93); shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); + //printf(" N=%d, shell_rad=%0.4e, n_0=%0.4e\n", N, shell_radius, n_0); is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; s_info[FEED_INFO_N*tid] = 1.*N; @@ -175,126 +187,81 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea cell_center_x = xMin + indx_x*dx + 0.5*dx; cell_center_y = yMin + indx_y*dy + 0.5*dy; cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; - indx_x += n_ghost; - indx_y += n_ghost; - indx_z += n_ghost; - - int indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - - if (!is_resolved) s_info[FEED_INFO_N*tid + 4] = feedback_momentum * dV; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * delta_y * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * delta_z); - atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * delta_z); - s_info[FEED_INFO_N*tid + 3] = feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] = (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * delta_z); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - // s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); - atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * delta_z); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * delta_y * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * delta_x * delta_y * (1-delta_z)); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * delta_z); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * delta_z); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], -delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * delta_y * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * delta_y * (1-delta_z)); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], -delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * delta_x * (1-delta_y) * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * delta_x * (1-delta_y) * (1-delta_z)); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], -delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - atomicAdd(&density[indx], feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z)); - atomicAdd(&gasEnergy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); - atomicAdd(&energy[indx], feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z)); - s_info[FEED_INFO_N*tid + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; - } else { - atomicAdd(&momentum_x[indx], delta_x * feedback_momentum); - atomicAdd(&momentum_y[indx], delta_y * feedback_momentum); - atomicAdd(&momentum_z[indx], delta_z * feedback_momentum); - //s_info[FEED_INFO_N*tid + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; + + + int indx; + + if (is_resolved) { //if resolved inject energy and density + s_info[FEED_INFO_N*tid + 3] = feedback_energy *dV; + + indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); + indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); + indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); + + cell_center_x = xMin + indx_x*dx + 0.5*dx; + cell_center_y = yMin + indx_y*dy + 0.5*dy; + cell_center_z = zMin + indx_z*dz + 0.5*dz; + + delta_x = 1 - ( pos_x - cell_center_x ) / dx; + delta_y = 1 - ( pos_y - cell_center_y ) / dy; + delta_z = 1 - ( pos_z - cell_center_z ) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + for (int k = 0; k < 2; k++) { + indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; + + // i_frac are the fractions of energy/density to be allocated + // to each of the 8 cells. + x_frac = i*(1-delta_x) + (1-i)*delta_x; + y_frac = j*(1-delta_y) + (1-j)*delta_y; + z_frac = k*(1-delta_z) + (1-k)*delta_z; + + atomicAdd(&density[indx], x_frac * y_frac * z_frac * feedback_density); + atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy ); + atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy ); + + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + } + } + } + } else { //if not resolved, inject momentum and density + s_info[FEED_INFO_N*tid + 4] = feedback_momentum; + feedback_momentum /= sqrt(3.0); + + delta_x = ( pos_x - indx_x*dx ) / dx; + delta_y = ( pos_y - indx_y*dy ) / dy; + delta_z = ( pos_z - indx_z*dz ) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + for (int i = -1; i < 2; i++) { + for (int j = -1; j < 2; j++) { + for (int k = -1; k < 2; k++) { + // index in array of conserved quantities + indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; + + px = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_momentum; + py = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z) * feedback_momentum; + pz = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z) * feedback_momentum; + d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_density; + ek = (px*px + py+py + pz*pz)/2/d; + + atomicAdd(&momentum_x[indx], px); + atomicAdd(&momentum_y[indx], py); + atomicAdd(&momentum_z[indx], pz); + atomicAdd( &density[indx], d ); + atomicAdd( &energy[indx], ek); + + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + } + } + } } - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); atomicMax(dti, local_dti); } } @@ -325,7 +292,6 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea } - Real Grid3D::Cluster_Feedback_GPU() { if (H.dt == 0) return 0.0; @@ -348,7 +314,7 @@ Real Grid3D::Cluster_Feedback_GPU() { // d_info is currently done on each block. Only the first block reduction // is used - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.domlen_x, H.domlen_y, H.domlen_z, H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates); diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index d5c0765ab..d53b2cc46 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -661,7 +661,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius //unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius Real total_mass = 0; - Real upper_limit_cluster_mass = 1e7; + Real upper_limit_cluster_mass = 3e7; long lost_particles = 0; part_int_t id = -1; while (total_mass < upper_limit_cluster_mass) { diff --git a/src/particles/supernova.h b/src/particles/supernova.h index 9b1eebc0d..fa0e4250a 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -13,13 +13,13 @@ namespace Supernova { // supernova rate: 1SN / 100 solar masses per 10^4 kyr static const Real SNR=1e-6; - static const Real ENERGY_PER_SN = 5.3e-05; // 1e51 ergs/SN in solarMass*(kpc/kyr)**2 - static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN - static const Real FINAL_MOMENTUM = 0.29; // 2.8e5 solarMasses km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) - static const Real MU = 0.6; - static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) - static const Real SN_ERA = 1.0e4; // assume SN occur during first 10 Myr after cluster formation. - + static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; + static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN + static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT* 1e5 *TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + static const Real MU = 0.6; + static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) + static const Real SN_ERA = 1.0e4; // assume SN occur during first 10 Myr after cluster formation. + #ifdef PARTICLES_GPU extern curandStateMRG32k3a_t* curandStates; extern part_int_t n_states; From 0272513d1e65a51e6e6891f2cfc06b314f27355b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 8 Jun 2022 11:18:53 -0400 Subject: [PATCH 076/694] add work for cloud wind sims --- builds/make.type.dust | 6 +++--- cloud-wind/cloud-wind.txt | 4 ++-- src/grid/initial_conditions.cpp | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 9b20ea4b1..370621d3c 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -26,12 +26,12 @@ DFLAGS += -DSCALAR DFLAGS += -DDUST # Apply the cooling in the GPU from precomputed tables -DFLAGS += -DCOOLING_GPU +# DFLAGS += -DCOOLING_GPU #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME -#DFLAGS += -DSLICES +DFLAGS += -DSLICES DFLAGS += $(OUTPUT) @@ -39,4 +39,4 @@ DFLAGS += $(OUTPUT) #and the MPI transfers are done from the GPU #If not specified, MPI_GPU is off by default #This is set in the system make.host file -DFLAGS += $(MPI_GPU) \ No newline at end of file +DFLAGS += $(MPI_GPU) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 296e7820b..f1e32b2f5 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -24,9 +24,9 @@ zmin=0.0 # y direction domain length zlen=1.0 # final output time -tout=0.2 +tout=1e6 # time interval for output -outstep=0.2 +outstep=1e5 # ratio of specific heats gamma=1.66666667 # name of initial conditions diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 611aa7b16..582390487 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1255,7 +1255,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 2.5; // cloud radius in code units (kpc) + Real R_cl = 0.1; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1269,7 +1269,7 @@ void Grid3D::Clouds() // single centered cloud setup for (int nn=0; nn Date: Fri, 10 Jun 2022 11:35:15 -0400 Subject: [PATCH 077/694] remove old comments --- cholla-tests-data | 2 +- src/dust/dust_cuda.cu | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index 34577601f..5a3443034 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 34577601fd4abbdead625b1ea5cfb802a6325f9c +Subproject commit 5a34430345d7dc746637364e8613642ebbbbc5c4 diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index b18ec90e1..1d92297fc 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -58,8 +58,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g d_gas = dev_conserved[id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; - //printf("kernel: %7.4e\n", d_dust); - // make sure thread hasn't crashed n = d_gas*DENSITY_UNIT / (mu*MP); From 8d2fa8f957bbf2a45c9bef814f1cf2762d93a659 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 10 Jun 2022 11:37:44 -0400 Subject: [PATCH 078/694] change cloud dust density initial condition to 1:100 dust-to-gas ratio --- src/grid/initial_conditions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 582390487..741813202 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1344,7 +1344,7 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_cl/(gama-1.0); #endif #ifdef SCALAR - C.scalar[id] = C.density[id]*0.3; + C.scalar[id] = C.density[id]*0.01; #endif } } From 408116048e9ff0742a9a972f1d081cc1dc6eaa0b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 27 Jun 2022 13:55:18 -0400 Subject: [PATCH 079/694] tweak initial conditions --- builds/make.type.dust | 2 +- src/dust/dust_cuda.cu | 5 +++++ src/grid/initial_conditions.cpp | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 370621d3c..0edeea49b 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -26,7 +26,7 @@ DFLAGS += -DSCALAR DFLAGS += -DDUST # Apply the cooling in the GPU from precomputed tables -# DFLAGS += -DCOOLING_GPU +DFLAGS += -DCOOLING_GPU #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 1d92297fc..6572501af 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -18,6 +18,8 @@ #include "../grid/grid3D.h" void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); @@ -27,6 +29,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { // get grid indices int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; int is, ie, js, je, ks, ke; cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); // get a global thread ID @@ -58,6 +61,8 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g d_gas = dev_conserved[id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; + //printf("kernel: %7.4e\n", d_dust); + // make sure thread hasn't crashed n = d_gas*DENSITY_UNIT / (mu*MP); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 741813202..e04c84a83 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1279,7 +1279,7 @@ void Grid3D::Clouds() n_cl = 5.4e-2; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; - vx_bg = 10*TIME_UNIT/KPC; + vx_bg = 1000*TIME_UNIT/KPC; //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; From 5e5400cdcbe853454684b24fa53c04c1be16b7e6 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 28 Jun 2022 09:51:12 -0400 Subject: [PATCH 080/694] add dust density floor --- builds/make.type.dust | 2 +- src/dust/dust_cuda.cu | 5 +++++ src/grid/initial_conditions.cpp | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 0edeea49b..e30d0ae60 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -10,7 +10,7 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPPMC DFLAGS += -DHLLC ifeq ($(findstring cosmology,$(TYPE)),cosmology) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 6572501af..4fe04ec06 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -106,6 +106,11 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density d_dust += dd; + Real dust_floor = 1e-35 + if (d_dust < dust_floor) { + d_dust = dust_floor + } + dev_conserved[5*n_cells + id] = d_dust; #ifdef DE diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index e04c84a83..f7e13c2df 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1276,10 +1276,10 @@ void Grid3D::Clouds() } n_bg = 1.68e-4; - n_cl = 5.4e-2; + n_cl = 1; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; - vx_bg = 1000*TIME_UNIT/KPC; + vx_bg = 10*TIME_UNIT/KPC; //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; From e9297a679496c85f754e35efd96b8a8e137cca60 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 1 Jul 2022 12:33:17 -0400 Subject: [PATCH 081/694] tweak initial conditions --- builds/make.type.dust | 2 +- src/dust/dust_cuda.cu | 5 ----- src/grid/initial_conditions.cpp | 4 ++-- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index e30d0ae60..0edeea49b 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -10,7 +10,7 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPPMP DFLAGS += -DHLLC ifeq ($(findstring cosmology,$(TYPE)),cosmology) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 4fe04ec06..6572501af 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -106,11 +106,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density d_dust += dd; - Real dust_floor = 1e-35 - if (d_dust < dust_floor) { - d_dust = dust_floor - } - dev_conserved[5*n_cells + id] = d_dust; #ifdef DE diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index f7e13c2df..f10ad027b 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1275,11 +1275,11 @@ void Grid3D::Clouds() printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1.68e-4; + n_bg = 1e-2; n_cl = 1; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; - vx_bg = 10*TIME_UNIT/KPC; + vx_bg = 100*TIME_UNIT/KPC; //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; From af94b049e8f15c26c287fd41ba0dddb238b7d1fb Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 13 Jul 2022 10:30:29 -0400 Subject: [PATCH 082/694] add correctly-named function calls and updated input file for wind boundary --- cloud-wind/cloud-wind.txt | 4 +- src/grid/boundary_conditions.cpp | 110 +++-------------------------- src/grid/cuda_boundaries.cu | 114 +++++-------------------------- src/grid/cuda_boundaries.h | 6 +- 4 files changed, 29 insertions(+), 205 deletions(-) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index f1e32b2f5..9690c4192 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -32,8 +32,8 @@ gamma=1.66666667 # name of initial conditions init=Clouds # type of boundary condition, options include 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom) -xl_bcnd=1 -xu_bcnd=1 +xl_bcnd=wind +xu_bcnd=3 yl_bcnd=3 yu_bcnd=3 zl_bcnd=3 diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 341360eba..36443ad3c 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -421,9 +421,9 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax) * \brief Select appropriate custom boundary function. */ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) { - if (strcmp(bcnd, "noh")==0) { + if (strcmp(bcnd, "wind")==0) { // from grid/cuda_boundaries.cu - Noh_Boundary(); + Wind_Boundary(); } else { printf("ABORT: %s -> Unknown custom boundary condition.\n", bcnd); @@ -433,13 +433,10 @@ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) -/*! \fn void Noh_Boundary() - * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, - as per the Noh problem in Liska, 2003, or in Stone, 2008. */ -void Grid3D::Noh_Boundary() +/*! \fn void Wind_Boundary() + * \brief Apply wind boundary */ +void Grid3D::Wind_Boundary() { - // This is now a wrapper function -- the actual boundary setting - // functions are in grid/cuda_boundaries.cu int x_off, y_off, z_off; // set x, y, & z offsets of local CPU volume to pass to GPU @@ -451,100 +448,9 @@ void Grid3D::Noh_Boundary() z_off = nz_local_start; #endif - Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, - x_off, y_off, z_off, H.dx, H.dy, H.dz, - H.xbound, H.ybound, H.zbound, gama, H.t); - -/* - int i, j, k, id; - Real x_pos, y_pos, z_pos, r; - Real vx, vy, vz, d_0, P_0, P; - - d_0 = 1.0; - P_0 = 1.0e-6; - - // set exact boundaries on the +x face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - - } - } - } - - // set exact boundaries on the +y face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - - } - } - } - - // set exact boundaries on the +z face - if (H.nz > 1) { - - for (k=H.nz-H.n_ghost; k= nx-n_ghost && xid < nx && yid < ny && zid < nz) { @@ -407,102 +407,20 @@ __global__ void Noh_Boundary_kernel(Real * c_device, else vz = 0; // set the conserved quantities if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } - __syncthreads(); - - // +y boundary next - isize = nx; - jsize = n_ghost; - ksize = nz; - - // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; - - // map thread id to ghost cell id - yid += ny-n_ghost; // +y boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) { - - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; - - // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + else c_device[gid] = d_0*(1.0 + t/r); - // calculate the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); c_device[gid+1*n_cells] = vx*c_device[gid]; c_device[gid+2*n_cells] = vy*c_device[gid]; c_device[gid+3*n_cells] = vz*c_device[gid]; c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } - __syncthreads(); - - // +z boundary last (only if 3D) - if (nz == 1) return; - - isize = nx; - jsize = ny; - ksize = n_ghost; - - // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; - - // map thread id to ghost cell id - zid += nz-n_ghost; // +z boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) { - - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; - - // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); - - // calculate the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } + } + __syncthreads(); } -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t) +void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t) { // determine the size of the grid to launch @@ -513,13 +431,13 @@ void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int jsize = ny; ksize = nz; - dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dGrid((isize*jsize*ksize + TPB-1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device, - nx,ny,nz,n_cells,n_ghost, - x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t); + hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, + c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, dz, + xbound, ybound, zbound, gamma, t); diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index f7212401a..d307daf15 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -14,8 +14,8 @@ void SetGhostCells(Real * c_head, int isize, int jsize, int ksize, int imin, int jmin, int kmin, int dir); -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t); +void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t); #endif From d9dff10d77a60891394c90b9a321232ff913f3aa Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 14 Jul 2022 15:35:15 -0400 Subject: [PATCH 083/694] add caar merge --- src/grid/cuda_boundaries.cu | 27 +++++---------------------- src/grid/grid3D.h | 2 +- src/grid/initial_conditions.cpp | 5 +++-- 3 files changed, 9 insertions(+), 25 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index d68477503..2495b8049 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -364,17 +364,14 @@ __global__ void Wind_Boundary_kernel(Real * c_device, { int id, xid, yid, zid, gid; Real x_pos, y_pos, z_pos, r; - Real vx, vy, vz, d_0, P_0; - - d_0 = 1.0; - P_0 = 1.0e-6; + Real vx; // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; int isize, jsize, ksize; - // +x boundary first + // -x boundary isize = n_ghost; jsize = ny; ksize = nz; @@ -385,7 +382,7 @@ __global__ void Wind_Boundary_kernel(Real * c_device, xid = id - zid*isize*jsize - yid*isize; // map thread id to ghost cell id - xid += nx - n_ghost; // +x boundary + xid += 0; // +x boundary gid = xid + yid*nx + zid*nx*ny; if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { @@ -395,20 +392,9 @@ __global__ void Wind_Boundary_kernel(Real * c_device, y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; - // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); - - // calculate the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); + vx = -10*TIME_UNIT/KPC; // km/s + // set conserved variables c_device[gid+1*n_cells] = vx*c_device[gid]; c_device[gid+2*n_cells] = vy*c_device[gid]; c_device[gid+3*n_cells] = vz*c_device[gid]; @@ -422,7 +408,6 @@ void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, in int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { - // determine the size of the grid to launch // need at least as many threads as the largest boundary face // current implementation assumes the test is run on a cube... @@ -439,8 +424,6 @@ void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, in c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); - - } diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index ec48c27be..51040153e 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -626,7 +626,7 @@ class Grid3D /*! \fn void Noh_Boundary() * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, as per the Noh problem in Liska, 2003, or in Stone, 2008. */ - void Noh_Boundary(); + void Wind_Boundary(); /*! \fn void Spherical_Overpressure_3D() * \brief Initialize the grid with a 3D spherical overdensity and overpressue. */ diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index f10ad027b..eb79d6bcd 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1275,11 +1275,12 @@ void Grid3D::Clouds() printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1e-2; + n_bg = 1e-4; n_cl = 1; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; - vx_bg = 100*TIME_UNIT/KPC; + // vx_bg = 100*TIME_UNIT/KPC; + vx_bg = 0.0; //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; From 7662f0b576e515201d8e67f06404fe76d89d8ace Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 15 Jul 2022 10:56:23 -0400 Subject: [PATCH 084/694] set all values of converved variables --- src/grid/cuda_boundaries.cu | 25 ++++++++++++++++++++----- src/grid/initial_conditions.cpp | 2 +- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 2495b8049..87a9c4a14 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -382,11 +382,25 @@ __global__ void Wind_Boundary_kernel(Real * c_device, xid = id - zid*isize*jsize - yid*isize; // map thread id to ghost cell id - xid += 0; // +x boundary + xid += 0; // -x boundary gid = xid + yid*nx + zid*nx*ny; if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + + Real n_0; + Real vx, vy, vz, d_0, P_0; + + n_0 = 1e-2; // same value as n_bg in cloud initial condition function + + // same values as rho_bg and p_bg in cloud initial condition function + d_0 = n_bg*mu*MP/DENSITY_UNIT; + P_0 = n_bg*KB*T_bg / PRESSURE_UNIT; + + vx = 100*TIME_UNIT/KPC; // km/s + vy = 0.0; // km/s + vz = 0.0; // km/s + // use the subgrid offset and global boundaries to calculate absolute positions on the grid x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; @@ -395,10 +409,11 @@ __global__ void Wind_Boundary_kernel(Real * c_device, vx = -10*TIME_UNIT/KPC; // km/s // set conserved variables - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + c_device[gid] = d_0; + c_device[gid+1*n_cells] = vx*d_0; + c_device[gid+2*n_cells] = vy*d_0; + c_device[gid+3*n_cells] = vz*d_0; + c_device[gid+4*n_cells] = P_0/(gama-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); } __syncthreads(); } diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index eb79d6bcd..10ae22ea8 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1275,7 +1275,7 @@ void Grid3D::Clouds() printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1e-4; + n_bg = 1e-2; n_cl = 1; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; From 00e114e8562cc37c1358213506aabc0fd51600d5 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 15 Jul 2022 11:02:29 -0400 Subject: [PATCH 085/694] add back in noh custom boundary functions --- src/grid/boundary_conditions.cpp | 104 ++++++++++++++++++ src/grid/cuda_boundaries.cu | 175 ++++++++++++++++++++++++++++++- src/grid/cuda_boundaries.h | 6 +- src/grid/grid3D.h | 6 +- 4 files changed, 285 insertions(+), 6 deletions(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 36443ad3c..d747c9941 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -421,6 +421,10 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax) * \brief Select appropriate custom boundary function. */ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) { + if (strcmp(bcnd, "noh")==0) { + // from grid/cuda_boundaries.cu + Noh_Boundary(); + } if (strcmp(bcnd, "wind")==0) { // from grid/cuda_boundaries.cu Wind_Boundary(); @@ -453,4 +457,104 @@ void Grid3D::Wind_Boundary() H.xbound, H.ybound, H.zbound, gama, H.t); } +/*! \fn void Noh_Boundary() + * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, + as per the Noh problem in Liska, 2003, or in Stone, 2008. */ +void Grid3D::Noh_Boundary() +{ + // This is now a wrapper function -- the actual boundary setting + // functions are in grid/cuda_boundaries.cu + int x_off, y_off, z_off; + // set x, y, & z offsets of local CPU volume to pass to GPU + // so global position on the grid is known + x_off = y_off = z_off = 0; + #ifdef MPI_CHOLLA + x_off = nx_local_start; + y_off = ny_local_start; + z_off = nz_local_start; + #endif + + Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, + x_off, y_off, z_off, H.dx, H.dy, H.dz, + H.xbound, H.ybound, H.zbound, gama, H.t); + +/* + int i, j, k, id; + Real x_pos, y_pos, z_pos, r; + Real vx, vy, vz, d_0, P_0, P; + d_0 = 1.0; + P_0 = 1.0e-6; + // set exact boundaries on the +x face + for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); + // set the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); + C.momentum_x[id] = vx*C.density[id]; + C.momentum_y[id] = vy*C.density[id]; + C.momentum_z[id] = vz*C.density[id]; + C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; + } + } + } + // set exact boundaries on the +y face + for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); + // set the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); + C.momentum_x[id] = vx*C.density[id]; + C.momentum_y[id] = vy*C.density[id]; + C.momentum_z[id] = vz*C.density[id]; + C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; + } + } + } + // set exact boundaries on the +z face + if (H.nz > 1) { + for (k=H.nz-H.n_ghost; k= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } + __syncthreads(); + + // +y boundary next + isize = nx; + jsize = n_ghost; + ksize = nz; + + // not true i,j,k but relative i,j,k + zid = id/(isize*jsize); + yid = (id - zid*isize*jsize)/isize; + xid = id - zid*isize*jsize - yid*isize; + + // map thread id to ghost cell id + yid += ny-n_ghost; // +y boundary + gid = xid + yid*nx + zid*nx*ny; + + if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D, calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } + __syncthreads(); + + // +z boundary last (only if 3D) + if (nz == 1) return; + + isize = nx; + jsize = ny; + ksize = n_ghost; + + // not true i,j,k but relative i,j,k + zid = id/(isize*jsize); + yid = (id - zid*isize*jsize)/isize; + xid = id - zid*isize*jsize - yid*isize; + + // map thread id to ghost cell id + zid += nz-n_ghost; // +z boundary + gid = xid + yid*nx + zid*nx*ny; + + if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D, calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } +} + + void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) @@ -442,3 +585,27 @@ void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, in } + + +void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t) +{ + + // determine the size of the grid to launch + // need at least as many threads as the largest boundary face + // current implementation assumes the test is run on a cube... + int isize, jsize, ksize; + isize = n_ghost; + jsize = ny; + ksize = nz; + + dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + + // launch the boundary kernel + hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device, + nx,ny,nz,n_cells,n_ghost, + x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t); + +} \ No newline at end of file diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index d307daf15..2c2b20753 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -16,6 +16,10 @@ void SetGhostCells(Real * c_head, void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t); + Real xbound, Real ybound, Real zbound, Real gamma, Real t); + +void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t); #endif diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 51040153e..fa3fc16fa 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -623,10 +623,14 @@ class Grid3D * \brief Select appropriate custom boundary function. */ void Custom_Boundary(char bcnd[MAXLEN]); + /*! \fn void Wind_Boundary() + * \brief Apply a constant wind to the -x boundary. */ + void Wind_Boundary(); + /*! \fn void Noh_Boundary() * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, as per the Noh problem in Liska, 2003, or in Stone, 2008. */ - void Wind_Boundary(); + void Noh_Boundary(); /*! \fn void Spherical_Overpressure_3D() * \brief Initialize the grid with a 3D spherical overdensity and overpressue. */ From 4bdb8d7d5dbf27ad10dcbab5e20cd12df53a50b6 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 15 Jul 2022 11:10:51 -0400 Subject: [PATCH 086/694] fix bugs and clean up code in wind boundary kernel --- src/grid/cuda_boundaries.cu | 40 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index c836ab519..b455617e7 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -363,8 +363,20 @@ __global__ void Wind_Boundary_kernel(Real * c_device, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { int id, xid, yid, zid, gid; - Real x_pos, y_pos, z_pos, r; - Real vx; + Real n_0, T_0; + Real mu = 0.6; + Real vx, vy, vz, d_0, P_0; + + n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) + T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + + // same values as rho_bg and p_bg in cloud initial condition function + d_0 = n_0*mu*MP/DENSITY_UNIT; + P_0 = n_0*KB*T_0 / PRESSURE_UNIT; + + vx = 100*TIME_UNIT/KPC; // km/s * (cholla unit conversion) + vy = 0.0; + vz = 0.0; // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; @@ -386,34 +398,12 @@ __global__ void Wind_Boundary_kernel(Real * c_device, gid = xid + yid*nx + zid*nx*ny; if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { - - - Real n_0; - Real vx, vy, vz, d_0, P_0; - - n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - - // same values as rho_bg and p_bg in cloud initial condition function - d_0 = n_bg*mu*MP/DENSITY_UNIT; - P_0 = n_bg*KB*T_bg / PRESSURE_UNIT; - - vx = 100*TIME_UNIT/KPC; // km/s * (cholla unit conversion) - vy = 0.0; - vz = 0.0; - - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; - - vx = -10*TIME_UNIT/KPC; // km/s - // set conserved variables c_device[gid] = d_0; c_device[gid+1*n_cells] = vx*d_0; c_device[gid+2*n_cells] = vy*d_0; c_device[gid+3*n_cells] = vz*d_0; - c_device[gid+4*n_cells] = P_0/(gama-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); } __syncthreads(); } From 41da028cd8897783a6359ce5614c067591e43efa Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 22 Jul 2022 14:44:44 -0400 Subject: [PATCH 087/694] add working wind boundary --- builds/make.type.dust | 1 + cloud-wind/cloud-wind.txt | 10 +++++----- src/grid/cuda_boundaries.cu | 11 +++++++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 0edeea49b..691349d8a 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -32,6 +32,7 @@ DFLAGS += -DCOOLING_GPU #DFLAGS += -DCPU_TIME DFLAGS += -DSLICES +DFLAGS += -DPROJECTION DFLAGS += $(OUTPUT) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 9690c4192..01fd42404 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -24,15 +24,15 @@ zmin=0.0 # y direction domain length zlen=1.0 # final output time -tout=1e6 +tout=1e5 # time interval for output -outstep=1e5 +outstep=1e3 # ratio of specific heats gamma=1.66666667 # name of initial conditions init=Clouds # type of boundary condition, options include 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom) -xl_bcnd=wind +xl_bcnd=4 xu_bcnd=3 yl_bcnd=3 yu_bcnd=3 @@ -40,5 +40,5 @@ zl_bcnd=3 zu_bcnd=3 # path to output directory outdir=./ - - +nfile=1e10 +custom_bcnd=wind \ No newline at end of file diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index b455617e7..9140cabd1 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -372,7 +372,7 @@ __global__ void Wind_Boundary_kernel(Real * c_device, // same values as rho_bg and p_bg in cloud initial condition function d_0 = n_0*mu*MP/DENSITY_UNIT; - P_0 = n_0*KB*T_0 / PRESSURE_UNIT; + P_0 = n_0*KB*T_0/PRESSURE_UNIT; vx = 100*TIME_UNIT/KPC; // km/s * (cholla unit conversion) vy = 0.0; @@ -397,14 +397,21 @@ __global__ void Wind_Boundary_kernel(Real * c_device, xid += 0; // -x boundary gid = xid + yid*nx + zid*nx*ny; - if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + // printf("xid: %d\n", xid); + // printf("yid: %d\n", yid); + // printf("zid: %d\n", zid); + // printf("gid: %d\n", gid); + + if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) { // set conserved variables + // printf("hello\n"); c_device[gid] = d_0; c_device[gid+1*n_cells] = vx*d_0; c_device[gid+2*n_cells] = vy*d_0; c_device[gid+3*n_cells] = vz*d_0; c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); } + // printf("oh no\n"); __syncthreads(); } From 4d4c47187f91f21f2a10f9bdca8ea15efc9d322c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 1 Aug 2022 14:36:11 -0400 Subject: [PATCH 088/694] develop wind boundary --- src/grid/cuda_boundaries.cu | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 9140cabd1..284cd9671 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -395,23 +395,16 @@ __global__ void Wind_Boundary_kernel(Real * c_device, // map thread id to ghost cell id xid += 0; // -x boundary - gid = xid + yid*nx + zid*nx*ny; - - // printf("xid: %d\n", xid); - // printf("yid: %d\n", yid); - // printf("zid: %d\n", zid); - // printf("gid: %d\n", gid); + gid = xid + yid*nx + zid*nx*ny; if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) { // set conserved variables - // printf("hello\n"); c_device[gid] = d_0; c_device[gid+1*n_cells] = vx*d_0; c_device[gid+2*n_cells] = vy*d_0; c_device[gid+3*n_cells] = vz*d_0; c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); } - // printf("oh no\n"); __syncthreads(); } From 7b71e46c2f67094fcd61eb1bb0450ecc50500d76 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:27:00 -0400 Subject: [PATCH 089/694] work on wind boundary --- builds/make.type.dust | 2 ++ cloud-wind/cloud-wind.txt | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 691349d8a..5e9efd950 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,6 +13,8 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC +DFLAGS += -DE + ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 01fd42404..05c1b10f8 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -6,7 +6,7 @@ # Parameters required for all problems ################################################ # number of grid cells in the x dimension -nx=256 +nx=512 # number of grid cells in the y dimension ny=128 # number of grid cells in the z dimension @@ -14,7 +14,7 @@ nz=128 # x direction lower domain boundary xmin=0.0 # x direction domain length -xlen=2.0 +xlen=4.0 # y direction lower domain boundary ymin=0.0 # y direction domain length @@ -24,9 +24,9 @@ zmin=0.0 # y direction domain length zlen=1.0 # final output time -tout=1e5 +tout=9e4 # time interval for output -outstep=1e3 +outstep=1e2 # ratio of specific heats gamma=1.66666667 # name of initial conditions @@ -41,4 +41,5 @@ zu_bcnd=3 # path to output directory outdir=./ nfile=1e10 -custom_bcnd=wind \ No newline at end of file +custom_bcnd=wind +n_hydro=3728643 \ No newline at end of file From 16de385f44ddabce3cce2e6929a52e1dfa178dc4 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:48:34 -0400 Subject: [PATCH 090/694] add dual energy flag to build for wind boundary simulations --- builds/make.type.dust | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 5e9efd950..f22bf1f67 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,7 +13,7 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC -DFLAGS += -DE +DFLAGS += -DDE ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE From 0d5d593a56c9f733a2e76b9b312da6190402aeee Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:48:34 -0400 Subject: [PATCH 091/694] add DE flag to build for wind boundary sims in dev-dust --- builds/make.type.dust | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builds/make.type.dust b/builds/make.type.dust index 0edeea49b..6aa6ea436 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,6 +13,11 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC +<<<<<<< HEAD +======= +DFLAGS += -DDE + +>>>>>>> 16de385... add dual energy flag to build for wind boundary simulations ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else From 7a1255e58fb8fc41e22b9693165a5b64190de526 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:48:34 -0400 Subject: [PATCH 092/694] add DE flag to build for wind boundary sims in dev-dust --- builds/make.type.dust | 3 +++ 1 file changed, 3 insertions(+) diff --git a/builds/make.type.dust b/builds/make.type.dust index 6aa6ea436..1f7cc0d4b 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -14,7 +14,10 @@ DFLAGS += -DPPMP DFLAGS += -DHLLC <<<<<<< HEAD +<<<<<<< HEAD +======= ======= +>>>>>>> 16de385... add dual energy flag to build for wind boundary simulations DFLAGS += -DDE >>>>>>> 16de385... add dual energy flag to build for wind boundary simulations From c9cd34dd6333e06b7fa95c7359ef05925a64bf0e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:48:34 -0400 Subject: [PATCH 093/694] add DE flag to build for wind boundary sims in dev-dust --- builds/make.type.dust | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 1f7cc0d4b..f22bf1f67 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,14 +13,8 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> 16de385... add dual energy flag to build for wind boundary simulations DFLAGS += -DDE ->>>>>>> 16de385... add dual energy flag to build for wind boundary simulations ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else @@ -40,6 +34,7 @@ DFLAGS += -DCOOLING_GPU #DFLAGS += -DCPU_TIME DFLAGS += -DSLICES +DFLAGS += -DPROJECTION DFLAGS += $(OUTPUT) From c53db9f3bbac59b13644887c25dd54d03755b873 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 2 Aug 2022 13:48:34 -0400 Subject: [PATCH 094/694] add dual energy flag to build for wind boundary simulations From c9977cb1a9d165d15d0ff1199b2904eecad153f7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 3 Aug 2022 10:34:17 -0400 Subject: [PATCH 095/694] add temperature floor --- builds/make.type.dust | 3 ++- src/dust/dust_cuda.cu | 2 +- src/global/global.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index f22bf1f67..be2762b8c 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,7 +13,8 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC -DFLAGS += -DDE +# DFLAGS += -DDE +DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 6572501af..ba14a8415 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -109,7 +109,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g dev_conserved[5*n_cells + id] = d_dust; #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = d*ge; + dev_conserved[(n_fields-1)*n_cells + id] = d_dust*ge; #endif } } diff --git a/src/global/global.h b/src/global/global.h index 560ddb5f4..dfc25faa7 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -54,7 +54,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" //Conserved Floor Values -#define TEMP_FLOOR 1e-3 // in Kelvin +#define TEMP_FLOOR 1e4 // in Kelvin #define DENS_FLOOR 1e-5 // in code units //Parameter for Enzo dual Energy Condition From 7615e6eae51f53ad95b6e6bb71dec246ed0115b7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 8 Aug 2022 10:25:38 -0400 Subject: [PATCH 096/694] work on setup for wind tunnel simulations --- builds/make.type.dust | 5 ++-- constant.txt | 50 --------------------------------- src/grid/initial_conditions.cpp | 4 +-- src/hydro/hydro_cuda.cu | 15 ++++++++++ 4 files changed, 20 insertions(+), 54 deletions(-) delete mode 100644 constant.txt diff --git a/builds/make.type.dust b/builds/make.type.dust index be2762b8c..fdd24e116 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -13,8 +13,9 @@ DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC -# DFLAGS += -DDE -DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DDE +# DFLAGS += -DAVERAGE_SLOW_CELLS +# DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/constant.txt b/constant.txt deleted file mode 100644 index af1610674..000000000 --- a/constant.txt +++ /dev/null @@ -1,50 +0,0 @@ -# -# Parameter File for box filled with gas -# - -################################################ -# number of grid cells in the x dimension -nx=10 -# number of grid cells in the y dimension -ny=1 -# number of grid cells in the z dimension -nz=1 -# final output time (kyr) -tout=100000.0 -# time interval for output -outstep=100 -# name of initial conditions -init=Constant -# domain properties -xmin=0.0 -ymin=0.0 -zmin=0.0 -xlen=1.0 -ylen=1.0 -zlen=1.0 -# type of boundary conditions -xl_bcnd=1 -xu_bcnd=1 -yl_bcnd=1 -yu_bcnd=1 -zl_bcnd=1 -zu_bcnd=1 -# path to output directory -outdir=./ - -################################################# -# density -rho=14827371.70126647 -# velocity -vx=0 -vy=0 -vz=0 -# pressure -P=1e-2 -# Magnetic Field -Bx=0.0 -By=0.0 -Bz=0.0 -# value of gamma -gamma=1.666666667 - diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 97c8d34d2..6695e0898 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1279,8 +1279,8 @@ void Grid3D::Clouds() n_cl = 1; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; - // vx_bg = 100*TIME_UNIT/KPC; - vx_bg = 0.0; + vx_bg = 100*TIME_UNIT/KPC; + // vx_bg = 0.0; //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index bf385d25f..61522b1b1 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -566,6 +566,21 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); #endif //MHD + Real P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + Real cs = sqrt(d_inv * gamma * P); + Real n = d*DENSITY_UNIT/(0.6*MP); + Real T = hydro_utilities::Calc_Temp(P, n); + + if (max_dti > 1) { + printf("\nmax_dti: %e\n", max_dti); + printf("E: %e g/(cm^2⋅s^2)\n", E*ENERGY_UNIT); + printf("P: %e g/(cm⋅s^2)\n", P*PRESSURE_UNIT); + printf("T: %e K\n", T); + printf("cs: %e km/s\n", cs*1e-5*VELOCITY_UNIT); + printf("d: %e g/cm^3\n", d*DENSITY_UNIT); + printf("vx: %e km/s\n", vx*1e-5*VELOCITY_UNIT); + } + } } From 1ba4d89813c259faf63f89c8502e2afd652c2a0f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 8 Aug 2022 14:55:23 -0400 Subject: [PATCH 097/694] update dust build type by adding a 10 K temperature floor and changing intregator and reconstruction. --- builds/make.type.dust | 8 ++++---- src/global/global.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index fdd24e116..261548768 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -10,17 +10,17 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPPMC DFLAGS += -DHLLC -DFLAGS += -DDE +# DFLAGS += -DDE # DFLAGS += -DAVERAGE_SLOW_CELLS -# DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else -DFLAGS += -DVL +DFLAGS += -DSIMPLE endif # Evolve additional scalars diff --git a/src/global/global.h b/src/global/global.h index dfc25faa7..68744ba4f 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -54,7 +54,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" //Conserved Floor Values -#define TEMP_FLOOR 1e4 // in Kelvin +#define TEMP_FLOOR 10 // in Kelvin #define DENS_FLOOR 1e-5 // in code units //Parameter for Enzo dual Energy Condition From 8fba3d74dc8423f32f4ebc9f784a4d4ca9704c5e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 9 Aug 2022 16:46:33 -0400 Subject: [PATCH 098/694] change VL integrator for dust build --- builds/make.type.dust | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 261548768..5b54e21ad 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -20,7 +20,7 @@ DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else -DFLAGS += -DSIMPLE +DFLAGS += -DVL endif # Evolve additional scalars From d33da57875adbdb56ee188eaeac4b7a0e313f6e7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 16 Aug 2022 10:34:47 -0400 Subject: [PATCH 099/694] merge in bug fix --- src/utils/hydro_utilities.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index f40517a7a..51439ac29 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -54,7 +54,7 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { // Compute and return energy - return (fmax(P, TINY_NUMBER)/gamma - 1.) + 0.5 * d * (vx*vx + vy*vy + vz*vz); + return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { From 60b1272eb1737ad32d87c92e625bb85840d481a2 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 16 Aug 2022 10:35:52 -0400 Subject: [PATCH 100/694] develop cloud wind build --- builds/make.type.dust | 6 +++--- cloud-wind/cloud-wind.txt | 4 ++-- src/global/global.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index fdd24e116..5b54e21ad 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -10,12 +10,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPPMC DFLAGS += -DHLLC -DFLAGS += -DDE +# DFLAGS += -DDE # DFLAGS += -DAVERAGE_SLOW_CELLS -# DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 05c1b10f8..4f08bbeb2 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -40,6 +40,6 @@ zl_bcnd=3 zu_bcnd=3 # path to output directory outdir=./ -nfile=1e10 +# nfile=1e10 custom_bcnd=wind -n_hydro=3728643 \ No newline at end of file +# n_hydro=3728643 \ No newline at end of file diff --git a/src/global/global.h b/src/global/global.h index dfc25faa7..68744ba4f 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -54,7 +54,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" //Conserved Floor Values -#define TEMP_FLOOR 1e4 // in Kelvin +#define TEMP_FLOOR 10 // in Kelvin #define DENS_FLOOR 1e-5 // in code units //Parameter for Enzo dual Energy Condition From d81a3701fe28e8c579803beb7fe7e01e56c1352a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 8 Sep 2022 15:47:43 -0400 Subject: [PATCH 101/694] Update all make host files to use C++17 --- Makefile | 4 ++-- builds/make.host.frontier | 4 ++-- builds/make.host.github | 8 ++++---- builds/make.host.lux | 10 +++++----- builds/make.host.poplar | 4 ++-- builds/make.host.poplar.aomp | 4 ++-- builds/make.host.poplar.cce+hip | 4 ++-- builds/make.host.shamrock | 6 +++--- builds/make.host.spock | 4 ++-- builds/make.host.summit | 8 ++++---- builds/make.host.tornado | 4 ++-- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index b4975b1ea..457e82530 100644 --- a/Makefile +++ b/Makefile @@ -60,8 +60,8 @@ CC ?= cc CXX ?= CC CFLAGS_OPTIMIZE ?= -g -Ofast -CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++14 -GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++14 +CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++17 +GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17 BUILD ?= OPTIMIZE CFLAGS += $(CFLAGS_$(BUILD)) diff --git a/builds/make.host.frontier b/builds/make.host.frontier index 14aae5d38..44b3b2378 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -8,8 +8,8 @@ GPUCXX ?= hipcc CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 -Wno-unused-result +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result GPUFLAGS = --offload-arch=gfx90a -Wno-unused-result HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings diff --git a/builds/make.host.github b/builds/make.host.github index acc003aad..23b9eed64 100644 --- a/builds/make.host.github +++ b/builds/make.host.github @@ -3,10 +3,10 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -std=c++14 -GPUFLAGS_OPTIMIZE = -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} +GPUFLAGS_DEBUG = -std=c++17 +GPUFLAGS_OPTIMIZE = -std=c++17 OMP_NUM_THREADS = 7 diff --git a/builds/make.host.lux b/builds/make.host.lux index 5dbe561f4..d5193c88c 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -1,13 +1,13 @@ -#-- make.inc for the Shamrock Server +#-- make.inc for the Shamrock Server #-- Compiler and flags for different build type CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 -GPUFLAGS = -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 +GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 @@ -19,5 +19,5 @@ FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft GRACKLE_ROOT = /home/brvillas/code/grackle -#Paris does not do GPU_MPI transfers +#Paris does not do GPU_MPI transfers PARIS_MPI_GPU = -DPARIS_NO_GPU_MPI \ No newline at end of file diff --git a/builds/make.host.poplar b/builds/make.host.poplar index f65743075..4c062f87a 100644 --- a/builds/make.host.poplar +++ b/builds/make.host.poplar @@ -5,8 +5,8 @@ CC = cc CXX = CC CFLAGS_DEBUG = -g -O0 ${F_OFFLOAD} CFLAGS_OPTIMIZE = -Ofast ${F_OFFLOAD} -CXXFLAGS_DEBUG = -g -O0 -std=c++11 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 ${F_OFFLOAD} +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} GPUFLAGS = --offload-arch=gfx906,gfx908 HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.poplar.aomp b/builds/make.host.poplar.aomp index 5ffb3de45..63ff512af 100644 --- a/builds/make.host.poplar.aomp +++ b/builds/make.host.poplar.aomp @@ -6,8 +6,8 @@ CXX = mpicxx HIPCONFIG = $(shell hipconfig -C) CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 GPUFLAGS = --offload-arch=gfx906,gfx908 LIBS = -lm -lstdc++ diff --git a/builds/make.host.poplar.cce+hip b/builds/make.host.poplar.cce+hip index 3309109d1..6ba1cfb9f 100644 --- a/builds/make.host.poplar.cce+hip +++ b/builds/make.host.poplar.cce+hip @@ -6,8 +6,8 @@ CXX = CC HIPCONFIG = $(shell hipconfig -C) CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 GPUFLAGS = --offload-arch=gfx906,gfx908 #-- How to launch job diff --git a/builds/make.host.shamrock b/builds/make.host.shamrock index 223f913d5..cc849b051 100644 --- a/builds/make.host.shamrock +++ b/builds/make.host.shamrock @@ -7,8 +7,8 @@ CXX = mpicxx #CXX = g++ CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 OMP_NUM_THREADS = 10 @@ -22,5 +22,5 @@ PFFT_ROOT = /home/bruno/code/pfft #GRACKLE_ROOT = /home/bruno/code/grackle_modified GRACKLE_ROOT = /home/bruno/local -#Paris does not do GPU_MPI transfers +#Paris does not do GPU_MPI transfers PARIS_MPI_GPU = -DPARIS_NO_GPU_MPI diff --git a/builds/make.host.spock b/builds/make.host.spock index 758ee9a6e..8cac7c086 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -7,8 +7,8 @@ CXX = CC CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 GPUFLAGS = --offload-arch=gfx908 HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.summit b/builds/make.host.summit index 2d557be26..83236db06 100644 --- a/builds/make.host.summit +++ b/builds/make.host.summit @@ -6,10 +6,10 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++14 -ccbin=mpicxx -GPUFLAGS_OPTIMIZE = -g -O3 -std=c++14 -ccbin=mpicxx +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} +GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.tornado b/builds/make.host.tornado index 9af6d5107..e8cf09a62 100644 --- a/builds/make.host.tornado +++ b/builds/make.host.tornado @@ -5,8 +5,8 @@ CC = gcc CXX = g++ CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 OMP_NUM_THREADS = 10 From 33e4eade06afa9ef63a3fa4fff119a010c1c6729 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 8 Sep 2022 16:28:55 -0400 Subject: [PATCH 102/694] Add debug builds that work with Arm DDT --- Makefile | 12 +++++++++--- builds/make.host.c3po | 2 +- builds/make.host.frontier | 3 ++- builds/make.host.github | 4 ++-- builds/make.host.summit | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 457e82530..7d58f01a7 100644 --- a/Makefile +++ b/Makefile @@ -35,10 +35,11 @@ ifeq ($(TEST), true) CPPFILES := $(filter-out src/main.cpp,$(CPPFILES)) LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include - CFLAGS = $(TEST_FLAGS) - CXXFLAGS = $(TEST_FLAGS) - GPUFLAGS = $(TEST_FLAGS) + CFLAGS += $(TEST_FLAGS) + CXXFLAGS += $(TEST_FLAGS) + GPUFLAGS += $(TEST_FLAGS) + # HACK # Set the build flags to debug. This is mostly to avoid the approximations # made by Ofast which break std::isnan and std::isinf which are required for # the testing @@ -62,6 +63,11 @@ CXX ?= CC CFLAGS_OPTIMIZE ?= -g -Ofast CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++17 GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17 + +CFLAGS_DEBUG ?= -g -O0 +CXXFLAGS_DEBUG ?= -g -O0 -std=c++17 +GPUFLAGS_DEBUG ?= -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx + BUILD ?= OPTIMIZE CFLAGS += $(CFLAGS_$(BUILD)) diff --git a/builds/make.host.c3po b/builds/make.host.c3po index c45d193b8..6c45dbbc2 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -5,7 +5,7 @@ CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.frontier b/builds/make.host.frontier index 44b3b2378..c225b3655 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -11,7 +11,8 @@ CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result -GPUFLAGS = --offload-arch=gfx90a -Wno-unused-result +GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wno-unused-result +GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wno-unused-result HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings #HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.github b/builds/make.host.github index 23b9eed64..aea03c28c 100644 --- a/builds/make.host.github +++ b/builds/make.host.github @@ -5,8 +5,8 @@ CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -std=c++17 -GPUFLAGS_OPTIMIZE = -std=c++17 +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 OMP_NUM_THREADS = 7 diff --git a/builds/make.host.summit b/builds/make.host.summit index 83236db06..ab1feda42 100644 --- a/builds/make.host.summit +++ b/builds/make.host.summit @@ -8,7 +8,7 @@ CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx +GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx -G -cudart shared GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 From eafe5d4932e55d164bc2374b9c70304b42f53159 Mon Sep 17 00:00:00 2001 From: ojwg Date: Wed, 27 Jul 2022 10:11:23 -0400 Subject: [PATCH 103/694] saving interim work --- Makefile | 11 + builds/make.host.c3po | 2 +- builds/make.host.crc | 4 +- builds/make.host.spock | 4 +- builds/make.type.cooling | 1 + builds/make.type.hydro | 8 +- builds/make.type.particles | 2 +- builds/setup.crc.gcc.sh | 2 +- builds/setup.frontier.cce.sh | 3 +- builds/setup.spock.cce.sh | 7 +- builds/setup.summit.gcc.sh | 6 +- builds/setup.summit.xl.sh | 2 +- cholla-tests-data | 2 +- scale_output_files/outputs_cosmo_2048_z0.txt | 340 ++++++++++++++++ scale_output_files/outputs_cosmo_pk_boera.txt | 3 + src/analysis/analysis.cpp | 4 +- src/analysis/feedback_analysis.h | 1 + src/analysis/feedback_analysis_gpu.cu | 15 +- src/chemistry_gpu/chemistry_functions.cpp | 2 + src/chemistry_gpu/chemistry_functions_gpu.cu | 19 +- src/cooling/cooling_cuda.cu | 57 +-- src/cooling/cooling_cuda.h | 11 +- src/cooling/cooling_wrapper.cu | 2 +- src/cosmology/cosmology_functions_gpu.cu | 2 +- src/cosmology/cosmology_functions_gpu.h | 2 +- src/global/global.h | 2 + src/global/global_cuda.cu | 8 +- src/global/global_cuda.h | 10 +- src/gravity/grav3D.cpp | 12 +- src/gravity/grav3D.h | 5 +- src/gravity/gravity_functions.cpp | 15 +- src/gravity/gravity_functions_gpu.cu | 8 + src/grid/boundary_conditions.cpp | 29 +- src/grid/cuda_boundaries.cu | 211 +++++++++- src/grid/cuda_boundaries.h | 4 + src/grid/grid3D.cpp | 78 ++-- src/grid/grid3D.h | 66 ++-- src/grid/initial_conditions.cpp | 308 +++++++++++---- src/grid/mpi_boundaries.cpp | 61 ++- src/hydro/hydro_cuda.cu | 366 ++++++++++-------- src/hydro/hydro_cuda.h | 32 +- src/hydro/hydro_cuda_tests.cu | 4 +- src/integrators/CTU_1D_cuda.cu | 1 + src/integrators/CTU_2D_cuda.cu | 2 + src/integrators/CTU_3D_cuda.cu | 3 +- src/integrators/CTU_3D_cuda.h | 2 +- src/integrators/VL_1D_cuda.cu | 1 + src/integrators/VL_2D_cuda.cu | 1 + src/integrators/VL_3D_cuda.cu | 8 +- src/integrators/VL_3D_cuda.h | 2 +- src/integrators/simple_3D_cuda.cu | 25 +- src/integrators/simple_3D_cuda.h | 2 +- src/io/io.cpp | 351 +++++------------ src/main.cpp | 76 ++-- src/model/disk_galaxy.h | 14 +- src/mpi/cuda_pack_buffers.h | 33 -- src/mpi/mpi_routines.cpp | 76 +--- src/mpi/mpi_routines.h | 3 - src/particles/density_CIC.cpp | 3 + src/particles/density_CIC_gpu.cu | 31 +- src/particles/density_boundaries.cpp | 18 + src/particles/feeback_CIC.h | 2 +- src/particles/feedback_CIC.cpp | 7 +- src/particles/feedback_CIC_gpu.cu | 350 +++++++++++++---- src/particles/gravity_CIC.cpp | 4 + src/particles/gravity_CIC_gpu.cu | 5 +- src/particles/io_particles.cpp | 6 +- src/particles/particles_3D.cpp | 39 +- src/particles/particles_3D.h | 12 +- src/particles/particles_3D_gpu.cu | 72 +++- src/particles/particles_boundaries.cpp | 10 +- src/particles/particles_dynamics.cpp | 2 + src/particles/supernova.h | 12 +- src/reconstruction/plmp_cuda.cu | 8 +- src/reconstruction/ppmc_cuda.cu | 11 +- src/reconstruction/ppmp_cuda.cu | 12 +- src/riemann_solvers/exact_cuda.cu | 6 +- src/riemann_solvers/hll_cuda.cu | 7 +- src/riemann_solvers/hllc_cuda.cu | 6 +- src/riemann_solvers/hlld_cuda.cu | 6 +- src/riemann_solvers/roe_cuda.cu | 6 +- src/system_tests/cooling_system_tests.cpp | 91 +++++ src/system_tests/gravity_system_tests.cpp | 30 ++ ...stant5_CorrectInputExpectCorrectOutput.txt | 56 +++ ...stant7_CorrectInputExpectCorrectOutput.txt | 56 +++ ...stant8_CorrectInputExpectCorrectOutput.txt | 56 +++ ...llapse_CorrectInputExpectCorrectOutput.txt | 34 ++ src/utils/cuda_utilities.cpp | 5 + src/utils/cuda_utilities.h | 77 ++++ src/utils/cuda_utilities_tests.cpp | 122 ++++++ src/utils/gpu.hpp | 16 +- src/utils/gpu_arrays_functions.cu | 75 ++++ src/utils/hydro_utilities.cpp | 5 + src/utils/hydro_utilities.h | 80 ++++ src/utils/hydro_utilities_tests.cpp | 129 ++++++ src/utils/mhd_utilities.h | 6 +- src/utils/mhd_utilities_tests.cpp | 38 +- src/utils/reduction_utilities.cu | 57 +++ src/utils/reduction_utilities.h | 205 ++++++++++ src/utils/reduction_utilities_tests.cu | 96 +++++ src/utils/testing_utilities.cpp | 23 +- src/utils/timing_functions.cpp | 35 +- src/utils/timing_functions.h | 1 + 103 files changed, 3145 insertions(+), 1114 deletions(-) create mode 100644 scale_output_files/outputs_cosmo_2048_z0.txt create mode 100644 scale_output_files/outputs_cosmo_pk_boera.txt delete mode 100644 src/mpi/cuda_pack_buffers.h create mode 100644 src/system_tests/cooling_system_tests.cpp create mode 100644 src/system_tests/gravity_system_tests.cpp create mode 100644 src/system_tests/input_files/tCOOLINGSYSTEMConstant5_CorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tCOOLINGSYSTEMConstant7_CorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tCOOLINGSYSTEMConstant8_CorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tGRAVITYSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput.txt create mode 100644 src/utils/cuda_utilities.cpp create mode 100644 src/utils/cuda_utilities.h create mode 100644 src/utils/cuda_utilities_tests.cpp create mode 100644 src/utils/gpu_arrays_functions.cu create mode 100644 src/utils/hydro_utilities.cpp create mode 100644 src/utils/hydro_utilities.h create mode 100644 src/utils/hydro_utilities_tests.cpp create mode 100644 src/utils/reduction_utilities.cu create mode 100644 src/utils/reduction_utilities.h create mode 100644 src/utils/reduction_utilities_tests.cu diff --git a/Makefile b/Makefile index b0dba3254..6a2abd480 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,11 @@ ifeq ($(TEST), true) CFLAGS = $(TEST_FLAGS) CXXFLAGS = $(TEST_FLAGS) GPUFLAGS = $(TEST_FLAGS) + + # Set the build flags to debug. This is mostly to avoid the approximations + # made by Ofast which break std::isnan and std::isinf which are required for + # the testing + BUILD = DEBUG else # This isn't a test build so clear out testing related files CFILES := $(filter-out src/system_tests/% %_tests.c,$(CFILES)) @@ -161,6 +166,12 @@ endif EXEC := bin/cholla$(SUFFIX) +# Get the git hash and setup macro to store a string of all the other macros so +# that they can be written to the save files +DFLAGS += -DGIT_HASH='"$(shell git rev-parse --verify HEAD)"' +MACRO_FLAGS := -DMACRO_FLAGS='"$(DFLAGS)"' +DFLAGS += $(MACRO_FLAGS) + $(EXEC): prereq-build $(OBJS) mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS) eval $(EXTRA_COMMANDS) diff --git a/builds/make.host.c3po b/builds/make.host.c3po index 99d621719..d11274e64 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -4,7 +4,7 @@ CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++11 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++11 ${F_OFFLOAD} GPUFLAGS_DEBUG = -g -O0 -std=c++11 -ccbin=mpicxx GPUFLAGS_OPTIMIZE = -g -O3 -std=c++11 -ccbin=mpicxx diff --git a/builds/make.host.crc b/builds/make.host.crc index cdb78fdf3..6a90e69fc 100644 --- a/builds/make.host.crc +++ b/builds/make.host.crc @@ -4,8 +4,8 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++11 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 CHOLLA_ARCH = sm_70 OMP_NUM_THREADS = 16 #-- How to launch job diff --git a/builds/make.host.spock b/builds/make.host.spock index 157577c1f..a3fcb0ef5 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -7,8 +7,8 @@ CXX = CC CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++11 ${F_OFFLOAD} +CXXFLAGS_DEBUG = -g -O0 -std=c++11 +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++11 GPUFLAGS = --offload-arch=gfx908 HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.type.cooling b/builds/make.type.cooling index 7bfa66513..baf4ed0e9 100644 --- a/builds/make.type.cooling +++ b/builds/make.type.cooling @@ -31,6 +31,7 @@ DFLAGS += -DDE # Apply the cooling in the GPU from precomputed tables DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCLOUDY_COOL #Measure the Timing of the different stages DFLAGS += -DCPU_TIME diff --git a/builds/make.type.hydro b/builds/make.type.hydro index fd4e7e3b6..6e9629c04 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -1,13 +1,13 @@ #-- Default hydro only build -#-- separated output flag so that it can be overriden in target-specific +#-- separated output flag so that it can be overriden in target-specific # for make check OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= DFLAGS += -DCUDA -DFLAGS += -DMPI_CHOLLA +DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC @@ -38,9 +38,9 @@ DFLAGS += $(OUTPUT) #Select if the Hydro Conserved data will reside in the GPU #and the MPI transfers are done from the GPU -#If not specified, MPI_GPU is off by default +#If not specified, MPI_GPU is off by default #This is set in the system make.host file -DFLAGS += $(MPI_GPU) +DFLAGS += $(MPI_GPU) DFLAGS += -DPARALLEL_OMP DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) diff --git a/builds/make.type.particles b/builds/make.type.particles index a0601f88e..24de6f9c2 100644 --- a/builds/make.type.particles +++ b/builds/make.type.particles @@ -6,7 +6,7 @@ DFLAGS += -DPARTICLES #Solve the particles in the GPU or CPU #NOTE: If using PARTICLES and MPI_GPU is turned on, then PARTICLES_GPU has to be turned on -# DFLAGS += -DPARTICLES_CPU +#DFLAGS += -DPARTICLES_CPU DFLAGS += -DPARTICLES_GPU diff --git a/builds/setup.crc.gcc.sh b/builds/setup.crc.gcc.sh index ca316a8bc..586dcbd00 100755 --- a/builds/setup.crc.gcc.sh +++ b/builds/setup.crc.gcc.sh @@ -3,7 +3,7 @@ #-- This script needs to be sourced in the terminal, e.g. # source ./setup.crc.gcc.sh -module load python/anaconda3-2020.11 gcc/10.1.0 cuda/11.1.0 openmpi/4.0.5 hdf5/1.12.0 +module load python/anaconda3-2020.11 gcc/10.1.0 cuda/11.1.0 openmpi/4.0.5 hdf5/1.12.0 googletest/1.11.0 echo "mpicxx --version is: " mpicxx --version diff --git a/builds/setup.frontier.cce.sh b/builds/setup.frontier.cce.sh index 0e8da6ea4..4a22344d2 100755 --- a/builds/setup.frontier.cce.sh +++ b/builds/setup.frontier.cce.sh @@ -1,12 +1,13 @@ #!/bin/bash #-- This script needs to be source-d in the terminal, e.g. -# source ./setup.frontier.cce.sh +# source ./setup.frontier.cce.sh module load cray-python module load rocm module load craype-accel-amd-gfx90a module load cray-hdf5 cray-fftw +module load googletest/1.10.0 #-- GPU-aware MPI export MPICH_GPU_SUPPORT_ENABLED=1 diff --git a/builds/setup.spock.cce.sh b/builds/setup.spock.cce.sh index b17e226fe..9f7956600 100755 --- a/builds/setup.spock.cce.sh +++ b/builds/setup.spock.cce.sh @@ -5,10 +5,9 @@ module load PrgEnv-cray module load cray-python -module load rocm +module load rocm/4.5.0 module load craype-accel-amd-gfx908 module load cray-hdf5 cray-fftw -unset GPUCXX #-- GPU-aware MPI export MPICH_GPU_SUPPORT_ENABLED=1 @@ -16,8 +15,6 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH} export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp" +#export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 - - diff --git a/builds/setup.summit.gcc.sh b/builds/setup.summit.gcc.sh index 523c4ce6b..81a99dd36 100755 --- a/builds/setup.summit.gcc.sh +++ b/builds/setup.summit.gcc.sh @@ -3,7 +3,9 @@ #-- This script needs to be source-d in the terminal, e.g. # source ./setup.summit.gcc.sh -module load gcc/10.2.0 cuda/11.4.0 fftw hdf5 python +#module load gcc/10.2.0 cuda/11.4.0 fftw hdf5 python +module load gcc cuda fftw hdf5 python googletest/1.11.0 -export F_OFFLOAD="-fopenmp -foffload=nvptx-none='-lm -Ofast'" +#export F_OFFLOAD="-fopenmp -foffload=nvptx-none='-lm -Ofast'" +export F_OFFLOAD="-fopenmp -foffload=disable" export CHOLLA_ENVSET=1 diff --git a/builds/setup.summit.xl.sh b/builds/setup.summit.xl.sh index fdba703aa..036b34791 100755 --- a/builds/setup.summit.xl.sh +++ b/builds/setup.summit.xl.sh @@ -3,7 +3,7 @@ #-- This script needs to be source-d in the terminal, e.g. # source ./setup.summit.xl.sh -module load xl cuda fftw hdf5 python +module load xl cuda fftw hdf5 python googletest/1.11.0 export F_OFFLOAD="-qsmp=omp -qoffload" export CHOLLA_ENVSET=1 diff --git a/cholla-tests-data b/cholla-tests-data index 5a3443034..34577601f 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 5a34430345d7dc746637364e8613642ebbbbc5c4 +Subproject commit 34577601fd4abbdead625b1ea5cfb802a6325f9c diff --git a/scale_output_files/outputs_cosmo_2048_z0.txt b/scale_output_files/outputs_cosmo_2048_z0.txt new file mode 100644 index 000000000..9f747a771 --- /dev/null +++ b/scale_output_files/outputs_cosmo_2048_z0.txt @@ -0,0 +1,340 @@ +9.900990099009901110e-03 +1.234711706464764087e-02 +1.479324403028538237e-02 +1.723937099592312214e-02 +1.968549796156086190e-02 +2.213162492719860167e-02 +2.457775189283634143e-02 +2.702387885847408466e-02 +2.947000582411182096e-02 +3.191613278974956419e-02 +3.436225975538730049e-02 +3.680838672102504372e-02 +3.925451368666278695e-02 +4.170064065230052325e-02 +4.414676761793826648e-02 +4.659289458357600278e-02 +4.903902154921374601e-02 +5.148514851485148924e-02 +5.393127548048922554e-02 +5.637740244612696877e-02 +5.882352941176470507e-02 +6.042780748663101331e-02 +6.203208556149732850e-02 +6.363636363636362980e-02 +6.524064171122995193e-02 +6.684491978609626017e-02 +6.844919786096256842e-02 +7.005347593582887666e-02 +7.165775401069518491e-02 +7.326203208556149316e-02 +7.486631016042780140e-02 +7.647058823529412352e-02 +7.807486631016043177e-02 +7.967914438502674002e-02 +8.128342245989304826e-02 +8.288770053475935651e-02 +8.449197860962567863e-02 +8.609625668449198688e-02 +8.770053475935829512e-02 +8.930481283422460337e-02 +9.090909090909091161e-02 +9.242424242424242264e-02 +9.393939393939394755e-02 +9.545454545454545858e-02 +9.696969696969696961e-02 +9.848484848484848064e-02 +1.000000000000000056e-01 +1.015151515151515166e-01 +1.030303030303030276e-01 +1.045454545454545525e-01 +1.060606060606060635e-01 +1.075757575757575746e-01 +1.090909090909090995e-01 +1.106060606060606105e-01 +1.121212121212121215e-01 +1.136363636363636326e-01 +1.151515151515151436e-01 +1.166666666666666685e-01 +1.181818181818181795e-01 +1.196969696969696906e-01 +1.212121212121212155e-01 +1.227272727272727265e-01 +1.242424242424242375e-01 +1.257575757575757625e-01 +1.272727272727272596e-01 +1.287878787878787845e-01 +1.303030303030303094e-01 +1.318181818181818066e-01 +1.333333333333333315e-01 +1.348484848484848286e-01 +1.363636363636363535e-01 +1.378787878787878785e-01 +1.393939393939394034e-01 +1.409090909090909005e-01 +1.424242424242424254e-01 +1.439393939393939226e-01 +1.454545454545454475e-01 +1.469696969696969724e-01 +1.484848484848484695e-01 +1.499999999999999944e-01 +1.515151515151514916e-01 +1.530303030303030165e-01 +1.545454545454545414e-01 +1.560606060606060663e-01 +1.575757575757575635e-01 +1.590909090909090884e-01 +1.606060606060605855e-01 +1.621212121212121104e-01 +1.636363636363636354e-01 +1.651515151515151603e-01 +1.666666666666666574e-01 +1.687499999999999833e-01 +1.708333333333333370e-01 +1.729166666666666630e-01 +1.749999999999999889e-01 +1.770833333333333148e-01 +1.791666666666666685e-01 +1.812499999999999944e-01 +1.833333333333333204e-01 +1.854166666666666741e-01 +1.875000000000000000e-01 +1.895833333333333259e-01 +1.916666666666666519e-01 +1.937500000000000056e-01 +1.958333333333333315e-01 +1.979166666666666574e-01 +2.000000000000000111e-01 +2.020833333333333370e-01 +2.041666666666666630e-01 +2.062499999999999889e-01 +2.083333333333333148e-01 +2.104166666666666685e-01 +2.124999999999999944e-01 +2.145833333333333481e-01 +2.166666666666666741e-01 +2.187500000000000000e-01 +2.208333333333333259e-01 +2.229166666666666519e-01 +2.250000000000000056e-01 +2.270833333333333315e-01 +2.291666666666666852e-01 +2.312500000000000111e-01 +2.333333333333333370e-01 +2.354166666666666630e-01 +2.374999999999999889e-01 +2.395833333333333426e-01 +2.416666666666666685e-01 +2.437500000000000222e-01 +2.458333333333333481e-01 +2.479166666666666741e-01 +2.500000000000000000e-01 +2.521367521367521292e-01 +2.542735042735042583e-01 +2.564102564102563875e-01 +2.585470085470085166e-01 +2.606837606837607013e-01 +2.628205128205128305e-01 +2.649572649572649596e-01 +2.670940170940170888e-01 +2.692307692307692180e-01 +2.713675213675213471e-01 +2.735042735042734763e-01 +2.756410256410256610e-01 +2.777777777777777901e-01 +2.799145299145299193e-01 +2.820512820512820484e-01 +2.841880341880341776e-01 +2.863247863247863068e-01 +2.884615384615384359e-01 +2.905982905982905651e-01 +2.927350427350426942e-01 +2.948717948717948789e-01 +2.970085470085470081e-01 +2.991452991452991372e-01 +3.012820512820512664e-01 +3.034188034188033956e-01 +3.055555555555555247e-01 +3.076923076923076539e-01 +3.098290598290598385e-01 +3.119658119658119677e-01 +3.141025641025640969e-01 +3.162393162393162260e-01 +3.183760683760683552e-01 +3.205128205128204844e-01 +3.226495726495726135e-01 +3.247863247863247427e-01 +3.269230769230768718e-01 +3.290598290598290010e-01 +3.311965811965811857e-01 +3.333333333333333148e-01 +3.372549019607842813e-01 +3.411764705882353033e-01 +3.450980392156862697e-01 +3.490196078431372362e-01 +3.529411764705882026e-01 +3.568627450980392246e-01 +3.607843137254901911e-01 +3.647058823529411575e-01 +3.686274509803921240e-01 +3.725490196078431460e-01 +3.764705882352941124e-01 +3.803921568627450789e-01 +3.843137254901960453e-01 +3.882352941176470673e-01 +3.921568627450980338e-01 +3.960784313725490002e-01 +4.000000000000000222e-01 +4.039215686274509887e-01 +4.078431372549019551e-01 +4.117647058823529216e-01 +4.156862745098038880e-01 +4.196078431372549100e-01 +4.235294117647058765e-01 +4.274509803921568984e-01 +4.313725490196078649e-01 +4.352941176470588314e-01 +4.392156862745097978e-01 +4.431372549019607643e-01 +4.470588235294117863e-01 +4.509803921568627527e-01 +4.549019607843137192e-01 +4.588235294117647411e-01 +4.627450980392157076e-01 +4.666666666666666741e-01 +4.705882352941176405e-01 +4.745098039215686070e-01 +4.784313725490196290e-01 +4.823529411764705954e-01 +4.862745098039216174e-01 +4.901960784313725839e-01 +4.941176470588235503e-01 +4.980392156862745168e-01 +5.019607843137254832e-01 +5.058823529411764497e-01 +5.098039215686274161e-01 +5.137254901960783826e-01 +5.176470588235294601e-01 +5.215686274509804266e-01 +5.254901960784313930e-01 +5.294117647058823595e-01 +5.333333333333333259e-01 +5.372549019607844034e-01 +5.411764705882353699e-01 +5.450980392156863363e-01 +5.490196078431373028e-01 +5.529411764705882693e-01 +5.568627450980392357e-01 +5.607843137254902022e-01 +5.647058823529411686e-01 +5.686274509803921351e-01 +5.725490196078431016e-01 +5.764705882352941790e-01 +5.803921568627451455e-01 +5.843137254901961120e-01 +5.882352941176470784e-01 +5.921568627450981559e-01 +5.960784313725491224e-01 +6.000000000000000888e-01 +6.039215686274510553e-01 +6.078431372549020217e-01 +6.117647058823529882e-01 +6.156862745098039547e-01 +6.196078431372549211e-01 +6.235294117647058876e-01 +6.274509803921568540e-01 +6.313725490196078205e-01 +6.352941176470587870e-01 +6.392156862745098644e-01 +6.431372549019608309e-01 +6.470588235294117974e-01 +6.509803921568628748e-01 +6.549019607843138413e-01 +6.588235294117648078e-01 +6.627450980392157742e-01 +6.666666666666667407e-01 +6.705882352941177071e-01 +6.745098039215686736e-01 +6.784313725490196401e-01 +6.823529411764706065e-01 +6.862745098039215730e-01 +6.901960784313725394e-01 +6.941176470588235059e-01 +6.980392156862745834e-01 +7.019607843137255498e-01 +7.058823529411765163e-01 +7.098039215686275938e-01 +7.137254901960785602e-01 +7.176470588235295267e-01 +7.215686274509804932e-01 +7.254901960784314596e-01 +7.294117647058824261e-01 +7.333333333333333925e-01 +7.372549019607843590e-01 +7.411764705882353255e-01 +7.450980392156862919e-01 +7.490196078431372584e-01 +7.529411764705882248e-01 +7.568627450980393023e-01 +7.607843137254902688e-01 +7.647058823529412352e-01 +7.686274509803922017e-01 +7.725490196078432792e-01 +7.764705882352942456e-01 +7.803921568627452121e-01 +7.843137254901961786e-01 +7.882352941176471450e-01 +7.921568627450981115e-01 +7.960784313725490779e-01 +8.000000000000000444e-01 +8.039215686274510109e-01 +8.078431372549019773e-01 +8.117647058823529438e-01 +8.156862745098040213e-01 +8.196078431372549877e-01 +8.235294117647059542e-01 +8.274509803921569207e-01 +8.313725490196079981e-01 +8.352941176470589646e-01 +8.392156862745099311e-01 +8.431372549019608975e-01 +8.470588235294118640e-01 +8.509803921568628304e-01 +8.549019607843137969e-01 +8.588235294117647634e-01 +8.627450980392157298e-01 +8.666666666666666963e-01 +8.705882352941176627e-01 +8.745098039215686292e-01 +8.784313725490195957e-01 +8.823529411764707842e-01 +8.862745098039217506e-01 +8.901960784313727171e-01 +8.941176470588236835e-01 +8.980392156862746500e-01 +9.019607843137256165e-01 +9.058823529411765829e-01 +9.098039215686275494e-01 +9.137254901960785158e-01 +9.176470588235294823e-01 +9.215686274509804488e-01 +9.254901960784314152e-01 +9.294117647058823817e-01 +9.333333333333333481e-01 +9.372549019607843146e-01 +9.411764705882355031e-01 +9.450980392156864696e-01 +9.490196078431374360e-01 +9.529411764705884025e-01 +9.568627450980393689e-01 +9.607843137254903354e-01 +9.647058823529413019e-01 +9.686274509803922683e-01 +9.725490196078432348e-01 +9.764705882352942012e-01 +9.803921568627451677e-01 +9.843137254901961342e-01 +9.882352941176471006e-01 +9.921568627450980671e-01 +9.960784313725490335e-01 +1.000000000000000000e+00 \ No newline at end of file diff --git a/scale_output_files/outputs_cosmo_pk_boera.txt b/scale_output_files/outputs_cosmo_pk_boera.txt new file mode 100644 index 000000000..9bb91d117 --- /dev/null +++ b/scale_output_files/outputs_cosmo_pk_boera.txt @@ -0,0 +1,3 @@ +1.666666666666666574e-01 +1.785714285714285754e-01 +1.923076923076922906e-01 diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index a6d764cfc..651c8e37e 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -80,7 +80,7 @@ void Grid3D::Compute_Lya_Statistics( ){ #endif //LYA_STATISTICS -#ifdef FEEDBACK +#ifdef SUPERNOVA void Grid3D::Compute_Gas_Velocity_Dispersion() { #ifdef PARTICLES_CPU int i, j, k, id, idm, idp; @@ -188,7 +188,7 @@ void Grid3D::Compute_Gas_Velocity_Dispersion() { chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", H.t, H.dt, vrms_poisson, vrms_analytic); #endif // PARTICLES_CPU } -#endif // FEEDBACK +#endif // SUPERNOVA void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ diff --git a/src/analysis/feedback_analysis.h b/src/analysis/feedback_analysis.h index be52dc03d..32771019b 100644 --- a/src/analysis/feedback_analysis.h +++ b/src/analysis/feedback_analysis.h @@ -22,6 +22,7 @@ class FeedbackAnalysis { int countUnresolved; Real totalEnergy; Real totalMomentum; + Real totalUnresEnergy; FeedbackAnalysis(Grid3D& G); ~FeedbackAnalysis(); diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index 37af2ff4f..d8e34d155 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -7,7 +7,7 @@ #define MU 0.6 // in cgs, this is 0.01 cm^{-3} -#define MIN_DENSITY 0.01 * MP * MU *LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT // 148279.7 +#define MIN_DENSITY 0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT // 148279.7 #define TPB_ANALYSIS 1024 @@ -42,10 +42,10 @@ void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Rea vx = momentum_x[id]/ density[id]; vy = momentum_y[id]/ density[id]; vz = momentum_z[id]/ density[id]; - s_vel[tid] += ((vx - circ_vel_x[id])*(vx - circ_vel_x[id]) + - (vy - circ_vel_y[id])*(vy - circ_vel_y[id]) + + s_vel[tid] = ( (vx - circ_vel_x[id])*(vx - circ_vel_x[id]) + + (vy - circ_vel_y[id])*(vy - circ_vel_y[id]) + (vz*vz) - )*density[id]; + )*density[id]; } __syncthreads(); @@ -69,8 +69,6 @@ void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *ou __shared__ Real s_vel[TPB_ANALYSIS]; size_t tid = threadIdx.x; - //size_t i = blockIdx.x*(TPB_ANALYSIS*2) + tid; - //size_t gridSize = TPB_ANALYSIS*2*gridDim.x; size_t i = blockIdx.x*(TPB_ANALYSIS) + tid; size_t gridSize = TPB_ANALYSIS*gridDim.x; s_mass[tid] = 0; @@ -81,7 +79,6 @@ void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *ou s_vel[tid] += input_v[i]; i += gridSize; } - //while (i < n) { s_mass[tid] += input[i] + input[i+TPB_ANALYSIS]; i += gridSize; } __syncthreads(); if (TPB_ANALYSIS >= 1024) { if (tid < 512) { s_mass[tid] += s_mass[tid + 512]; s_vel[tid] += s_vel[tid + 512]; } __syncthreads(); } @@ -147,6 +144,10 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G) { total_vel = h_partial_vel[0]; #endif + if (total_vel < 0 || total_mass < 0) { + chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", total_vel, total_mass); + } + chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, sqrt(total_vel/total_mass)*VELOCITY_UNIT/1e5); CHECK(cudaFree(d_partial_vel)); diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 3ea58326e..2564d2591 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -20,6 +20,8 @@ void Grid3D::Initialize_Chemistry( struct parameters *P ){ Chem.ny = H.ny; Chem.nz = H.nz; + Chem.H.runtime_chemistry_step = 0; + Chem.use_case_B_recombination = false; // Initialize the Chemistry Header diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 4556a573a..3d6e0052f 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -73,6 +73,13 @@ __host__ __device__ Real get_temperature( Real gamma ){ return temp; } +__host__ __device__ Real compute_U( Real temp, Real gamma ){ + Real mu, U_local; + mu = get_MMW(); + U_local = temp / ( gamma - 1 ) / mu / MP * KB / 1e10; + return U_local; +} + }; __device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, bool print ){ @@ -307,6 +314,10 @@ __device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, R printf( "#### Equlibrium \n" ); } + #ifdef TEMPERATURE_FLOOR + if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + #endif + energy = fmax( TS.U * TS.d, tiny ); dt = fmin( fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ) ); dt = fmin( fabs( 0.1 * energy / U_dot ), dt ); @@ -393,6 +404,9 @@ __device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real d // Update internal energy TS.U += U_dot / TS.d * dt; + #ifdef TEMPERATURE_FLOOR + if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + #endif if ( print ) printf("Updated U: %e \n", TS.U); @@ -579,7 +593,7 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghos cudaEventCreate(&stop); cudaEventRecord(start, 0); - int ngrid = (nx*ny*nz + TPB_CHEM - 1) / TPB_CHEM; + int ngrid = (nx*ny*nz - 1) / TPB_CHEM + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_CHEM, 1, 1); hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H ); @@ -588,7 +602,8 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghos cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); - Chem_H.runtime_chemistry_step = (Real) time; + Chem_H.runtime_chemistry_step = (Real) time/1000; // (Convert ms to secs ) + } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 855b8b6cc..9d1ccef3e 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -13,30 +13,22 @@ extern texture coolTexObj; extern texture heatTexObj; -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real *dt_array){ +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma){ + + int n_cells = nx*ny*nz; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama, dt_array); + hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama); CudaCheckError(); } -Real Cooling_Calc_dt(Real *d_dt_array, Real *h_dt_array, int nx, int ny, int nz){ - int ngrid = (nx*ny*nz + TPB - 1) / TPB; - Real min_dt = 1e10; - CudaSafeCall( cudaMemcpy(h_dt_array, d_dt_array, ngrid*sizeof(Real), cudaMemcpyDeviceToHost) ); - for (int i=0; i= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { @@ -112,17 +98,13 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n = d*DENSITY_UNIT / (mu * MP); // calculate the temperature of the gas - //#ifndef DE T_init = p*PRESSURE_UNIT/ (n*KB); - //#endif #ifdef DE - //T_init = ge*(gamma-1.0)*SP_ENERGY_UNIT*mu*MP/KB; T_init = d*ge*(gamma-1.0)*PRESSURE_UNIT/(n*KB); #endif // calculate cooling rate per volume T = T_init; - //if (T > T_max) printf("%3d %3d %3d High T cell. n: %e T: %e\n", xid, yid, zid, n, T); // call the cooling function #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T); @@ -154,12 +136,6 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int // calculate final temperature T -= del_T; - // set a temperature floor - // (don't change this cell if the thread crashed) - //if (T > 0.0 && E > 0.0) T = fmax(T, T_min); - // set a temperature ceiling - //T = fmin(T, T_max); - // adjust value of energy based on total change in temperature del_T = T_init - T; // total change in T E -= n*KB*del_T / ((gamma-1.0)*ENERGY_UNIT); @@ -172,14 +148,8 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int cool = Cloudy_cool(n, T); #else cool = CIE_cool(n, T); - #endif - //printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); - // only use good cells in timestep calculation (in case some have crashed) - if (n > 0 && T > 0 && cool > 0.0) { - // limit the timestep such that delta_T is 10% - min_dt[tid] = 0.1*T*n*KB/(cool*TIME_UNIT*(gamma-1.0)); - } + #endif // and send back from kernel dev_conserved[4*n_cells + id] = E; @@ -188,19 +158,6 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #endif } - __syncthreads(); - - // do the reduction in shared memory (find the min timestep in the block) - for (unsigned int s=1; sinit, "Spherical_Overdensity_3D")==0){ Gconst = 1; chprintf(" WARNING: Using Gravitational Constant G=1.\n"); - } + }*/ //Flag to transfer the Potential boundaries TRANSFER_POTENTIAL_BOUNDARIES = false; diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index d02508040..c5c9db173 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -50,6 +50,9 @@ class Grav3D Real xMin; Real yMin; Real zMin; + Real xMax; + Real yMax; + Real zMax; /*! \var nx * \brief Total number of cells in the x-dimension */ int nx_total; @@ -211,7 +214,7 @@ class Grav3D /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ - void Initialize( Real x_min, Real y_min, Real z_min, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); + void Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); void AllocateMemory_CPU(void); void Initialize_values_CPU(); diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index ffb8f4e3f..44f0bd097 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -115,7 +115,8 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell - min_dt_slow = Particles.dt / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; + da_particles = fmin( da_particles, Cosmo.max_delta_a ); + min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; H.min_dt_slow = min_dt_slow; #endif @@ -143,7 +144,8 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell - min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + //min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + min_dt_slow = 3*H.dx; H.min_dt_slow = min_dt_slow; #endif @@ -155,7 +157,8 @@ void Grid3D::set_dt_Gravity(){ #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - min_dt_slow = H.dt / C_cfl * 100 ; + //min_dt_slow = H.dt / C_cfl * 100 ; + min_dt_slow = 3*H.dx; H.min_dt_slow = min_dt_slow; #endif @@ -167,6 +170,10 @@ void Grid3D::set_dt_Gravity(){ Grav.dt_prev = Grav.dt_now; Grav.dt_now = H.dt; } + + #if defined(PARTICLES_GPU) && defined(PRINT_MAX_MEMORY_USAGE) + Particles.Print_Max_Memory_Usage(); + #endif } //NOT USED: Get Average density on the Global dommain @@ -349,7 +356,7 @@ static void printDiff(const Real *p, const Real *q, const int nx, const int ny, //Initialize the Grav Object at the beginning of the simulation void Grid3D::Initialize_Gravity( struct parameters *P ){ chprintf( "\nInitializing Gravity... \n"); - Grav.Initialize( H.xblocal, H.yblocal, H.zblocal, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost_potential_offset, P ); + Grav.Initialize( H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost_potential_offset, P ); chprintf( "Gravity Successfully Initialized. \n\n"); #ifdef PARIS_TEST diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index ff2c7f446..0e78720e1 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -280,6 +280,14 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ } +#ifdef PARTICLES_CPU +void Grid3D::Copy_Potential_From_GPU(){ + CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); +} +#endif //PARTICLES_CPU + + diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 0c56eb55c..36f860317 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -185,9 +185,14 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) int nPB, nBoundaries; int *iaBoundary, *iaCell; + /*if the cell face is an custom boundary, exit */ + if(flags[dir]==4) + return; + #ifdef MPI_CHOLLA /*if the cell face is an mpi boundary, exit */ - if(flags[dir]==5) return; + if(flags[dir]==5) + return; #endif /*MPI_CHOLLA*/ #ifdef GRAVITY @@ -312,7 +317,6 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, imax[0]-imin[0], imax[1]-imin[1], imax[2]-imin[2], imin[0], imin[1], imin[2], dir); - } /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) @@ -418,6 +422,7 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax) void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) { if (strcmp(bcnd, "noh")==0) { + // from grid/cuda_boundaries.cu Noh_Boundary(); } else { @@ -433,6 +438,24 @@ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) as per the Noh problem in Liska, 2003, or in Stone, 2008. */ void Grid3D::Noh_Boundary() { + // This is now a wrapper function -- the actual boundary setting + // functions are in grid/cuda_boundaries.cu + + int x_off, y_off, z_off; + // set x, y, & z offsets of local CPU volume to pass to GPU + // so global position on the grid is known + x_off = y_off = z_off = 0; + #ifdef MPI_CHOLLA + x_off = nx_local_start; + y_off = ny_local_start; + z_off = nz_local_start; + #endif + + Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, + x_off, y_off, z_off, H.dx, H.dy, H.dz, + H.xbound, H.ybound, H.zbound, gama, H.t); + +/* int i, j, k, id; Real x_pos, y_pos, z_pos, r; Real vx, vy, vz, d_0, P_0, P; @@ -521,7 +544,7 @@ void Grid3D::Noh_Boundary() } } - +*/ } diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 2953252ee..5ff5a4d2c 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -278,31 +278,46 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * // periodic case 1: id = ig+nx-2*n_ghost; + #ifdef MHD + idMag = id; + #endif //MHD break; // reflective case 2: id = 2*n_ghost-ig-1; *(a) = -1.0; + #ifdef MHD + idMag = id - 1; + #endif //MHD break; // transmissive case 3: id = n_ghost; + #ifdef MHD + idMag = id - 1; + #endif //MHD break; // custom case 4: id = -1; + #ifdef MHD + idMag = -1; + #endif //MHD break; // MPI case 5: id = ig; + #ifdef MHD + idMag = id; + #endif //MHD break; // default is periodic default: id = ig+nx-2*n_ghost; + #ifdef MHD + idMag = id; + #endif //MHD } - #ifdef MHD - idMag = id; - #endif //MHD } // upper face else @@ -312,48 +327,202 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * // periodic case 1: id = ig-nx+2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD break; // reflective case 2: id = 2*(nx-n_ghost)-ig-1; *(a) = -1.0; - #ifdef MHD - idMag = id + 1; - #endif //MHD break; // transmissive case 3: id = nx-n_ghost-1; - #ifdef MHD - idMag = id + 1; - #endif //MHD break; // custom case 4: id = -1; - #ifdef MHD - idMag = -1; - #endif //MHD break; // MPI case 5: id = ig; - #ifdef MHD - idMag = id; - #endif //MHD break; // default is periodic default: id = ig-nx+2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD } + #ifdef MHD + idMag = id; + #endif //MHD } return id; } +__global__ void Noh_Boundary_kernel(Real * c_device, + int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) +{ + int id,xid,yid,zid,gid; + Real x_pos, y_pos, z_pos, r; + Real vx, vy, vz, d_0, P_0; + + d_0 = 1.0; + P_0 = 1.0e-6; + + // calculate ghost cell ID and i,j,k in GPU grid + id = threadIdx.x + blockIdx.x * blockDim.x; + + int isize, jsize, ksize; + + // +x boundary first + isize = n_ghost; + jsize = ny; + ksize = nz; + + // not true i,j,k but relative i,j,k in the GPU grid + zid = id/(isize*jsize); + yid = (id - zid*isize*jsize)/isize; + xid = id - zid*isize*jsize - yid*isize; + + // map thread id to ghost cell id + xid += nx-n_ghost; // +x boundary + gid = xid + yid*nx + zid*nx*ny; + + if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } + __syncthreads(); + + // +y boundary next + isize = nx; + jsize = n_ghost; + ksize = nz; + + // not true i,j,k but relative i,j,k + zid = id/(isize*jsize); + yid = (id - zid*isize*jsize)/isize; + xid = id - zid*isize*jsize - yid*isize; + + // map thread id to ghost cell id + yid += ny-n_ghost; // +y boundary + gid = xid + yid*nx + zid*nx*ny; + + if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D, calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } + __syncthreads(); + + // +z boundary last (only if 3D) + if (nz == 1) return; + + isize = nx; + jsize = ny; + ksize = n_ghost; + + // not true i,j,k but relative i,j,k + zid = id/(isize*jsize); + yid = (id - zid*isize*jsize)/isize; + xid = id - zid*isize*jsize - yid*isize; + + // map thread id to ghost cell id + zid += nz-n_ghost; // +z boundary + gid = xid + yid*nx + zid*nx*ny; + + if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) { + + // use the subgrid offset and global boundaries to calculate absolute positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + + // for 2D calculate polar r + if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + // for 3D, calculate spherical r + else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + + // calculate the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); + else c_device[gid] = d_0*(1.0 + t/r); + c_device[gid+1*n_cells] = vx*c_device[gid]; + c_device[gid+2*n_cells] = vy*c_device[gid]; + c_device[gid+3*n_cells] = vz*c_device[gid]; + c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + } +} + + +void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t) +{ + + // determine the size of the grid to launch + // need at least as many threads as the largest boundary face + // current implementation assumes the test is run on a cube... + int isize, jsize, ksize; + isize = n_ghost; + jsize = ny; + ksize = nz; + + dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + + // launch the boundary kernel + hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device, + nx,ny,nz,n_cells,n_ghost, + x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t); + + + +} + + diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index 568ca7b97..f7212401a 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -14,4 +14,8 @@ void SetGhostCells(Real * c_head, int isize, int jsize, int ksize, int imin, int jmin, int kmin, int dir); +void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, + int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real gamma, Real t); + #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 5fd5e1176..41d9d37df 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -85,10 +85,23 @@ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real #else /*MPI_CHOLLA*/ /* position relative to local xyz bounds */ + /* This approach was replaced because it is less consistent for multiple cores. + Since distributive property does not perfectly hold for floating point operations + + > Global_bound + global_i * dx + + is more consistent than + + >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + local_i*dx. + *x_pos = H.xblocal + H.dx*(i-H.n_ghost) + 0.5*H.dx; *y_pos = H.yblocal + H.dy*(j-H.n_ghost) + 0.5*H.dy; *z_pos = H.zblocal + H.dz*(k-H.n_ghost) + 0.5*H.dz; + */ + *x_pos = H.xbound + (nx_local_start+i-H.n_ghost)*H.dx + 0.5*H.dx; + *y_pos = H.ybound + (ny_local_start+j-H.n_ghost)*H.dy + 0.5*H.dy; + *z_pos = H.zbound + (nz_local_start+k-H.n_ghost)*H.dz + 0.5*H.dz; #endif /*MPI_CHOLLA*/ @@ -123,6 +136,10 @@ void Grid3D::Initialize(struct parameters *P) // Set the CFL coefficient (a global variable) C_cfl = 0.3; + + #ifdef AVERAGE_SLOW_CELLS + H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number + #endif #ifndef MPI_CHOLLA @@ -167,9 +184,6 @@ void Grid3D::Initialize(struct parameters *P) flag_init = 1; } - // Set the flag that tells Update_Grid which buffer to read from - gflag = 0; - // Set header variables for time within the simulation H.t = 0.0; // and the number of timesteps taken @@ -293,12 +307,12 @@ void Grid3D::AllocateMemory(void) C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); #endif // DE - // set the number of thread blocks for the GPU grid (declared in global_cuda) - ngrid = (H.n_cells + TPB - 1) / TPB; // arrays that hold the max_dti calculation for hydro for each thread block (pre reduction) + int ngrid = (H.n_cells + TPB - 1) / TPB; CudaSafeCall( cudaHostAlloc(&host_dti_array, ngrid*sizeof(Real), cudaHostAllocDefault) ); CudaSafeCall( cudaMalloc((void**)&dev_dti_array, ngrid*sizeof(Real)) ); + CudaSafeCall( cudaMalloc((void**)&dev_dti, sizeof(Real)) ); #if defined( GRAVITY ) @@ -352,15 +366,8 @@ void Grid3D::AllocateMemory(void) //Compute the hydro delta_t ( H.dt ) if (H.n_step == 0) { - //Set the min_delta_t for averaging a slow cell - #ifdef AVERAGE_SLOW_CELLS - Real max_dti_slow = 1 / H.min_dt_slow; - #else // NOT AVERAGE_SLOW_CELLS - Real max_dti_slow = 0; // max_dti_slow is not used if NOT AVERAGE_SLOW_CELLS - #endif //max_dti_slow - // Compute the time step - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama, max_dti_slow); + max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); } else { max_dti = dti; @@ -416,14 +423,6 @@ Real Grid3D::Update_Grid(void) U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; #endif - //Set the min_delta_t for averaging a slow cell - Real max_dti_slow; - #ifdef AVERAGE_SLOW_CELLS - max_dti_slow = 1 / H.min_dt_slow; - #else // NOT AVERAGE_SLOW_CELLS - max_dti_slow = 0; // max_dti_slow is not used if NOT AVERAGE_SLOW_CELLS - #endif //max_dti_slow - // Run the hydro integrator on the grid if (H.nx > 1 && H.ny == 1 && H.nz == 1) //1D @@ -452,13 +451,13 @@ Real Grid3D::Update_Grid(void) { #ifdef CUDA #ifdef CTU - CTU_Algorithm_3D_CUDA(C.device, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential, max_dti_slow ); + CTU_Algorithm_3D_CUDA(C.device, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); #endif //not_VL #ifdef VL - VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential, max_dti_slow ); + VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); #endif //VL #ifdef SIMPLE - Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential, max_dti_slow ); + Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); #endif//SIMPLE #endif } @@ -473,28 +472,26 @@ Real Grid3D::Update_Grid(void) #ifdef COOLING_GPU // ==Apply Cooling from cooling/cooling_cuda.h== - Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, dev_dti_array); - // ==Calculate cooling dt from cooling/cooling_cuda.h== - // dev_dti_array and host_dti_array are global variables declared in global/global_cuda.h and allocated in Allocate_Memory - Real cooling_max_dti = Cooling_Calc_dt(dev_dti_array, host_dti_array, H.nx, H.ny, H.nz); + Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #endif //COOLING_GPU // Update the H and He ionization fractions and apply cooling and photoheating #ifdef CHEMISTRY_GPU - #ifdef CPU_TIMER - Timer.Chemistry.Start(); - #endif Update_Chemistry(); - #ifdef CPU_TIMER - Timer.Chemistry.End(); + #ifdef CPU_TIME + Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step ); #endif #endif + + #ifdef AVERAGE_SLOW_CELLS + //Set the min_delta_t for averaging a slow cell + Real max_dti_slow; + max_dti_slow = 1 / H.min_dt_slow; + Average_Slow_Cells( C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow ); + #endif //AVERAGE_SLOW_CELLS // ==Calculate the next time step with Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama, max_dti_slow); - #ifdef COOLING_GPU - max_dti = fmax(max_dti, cooling_max_dti); - #endif // COOLING_GPU + max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); #endif // CUDA #ifdef COOLING_GRACKLE @@ -519,8 +516,6 @@ Real Grid3D::Update_Grid(void) C.e_density = &C.scalar[ 5*H.n_cells ]; #endif - // reset the grid flag to swap buffers - gflag = (gflag+1)%2; return max_dti; @@ -550,8 +545,8 @@ Real Grid3D::Update_Hydro_Grid( ){ #ifdef CPU_TIME #ifdef CHEMISTRY_GPU - Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step / 1000); - //Subtract the time spent on the Chemical Update (Chem runtime was measured in ms, while the timer is on secs ) + Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); + //Subtract the time spent on the Chemical Update #endif Timer.Hydro.End(); #endif //CPU_TIME @@ -619,6 +614,7 @@ void Grid3D::FreeMemory(void) // free the timestep arrays CudaSafeCall( cudaFreeHost(host_dti_array) ); cudaFree(dev_dti_array); + cudaFree(dev_dti); #ifdef GRAVITY CudaSafeCall( cudaFreeHost(C.Grav_potential) ); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 426f4d329..03fbd1b64 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -40,7 +40,7 @@ #ifdef CHEMISTRY_GPU #include "chemistry_gpu/chemistry_gpu.h" -#endif +#endif #ifdef ANALYSIS #include "../analysis/analysis.h" @@ -156,19 +156,7 @@ struct Header /* \brief Global domain z-direction minimum */ Real zbound; - /*! \var domlen_x */ - /* \brief Local domain length in x-direction */ - Real domlen_x; - - /*! \var domlen_y */ - /* \brief Local domain length in y-direction */ - Real domlen_y; - - /*! \var domlen_z */ - /* \brief Local domain length in z-direction */ - Real domlen_z; - - /*! \var xblocal */ + /*! \var xblocal */ /* \brief Local domain x-direction minimum */ Real xblocal; @@ -176,10 +164,22 @@ struct Header /* \brief Local domain y-direction minimum */ Real yblocal; - /*! \var zblocal*/ + /*! \var zblocal */ /* \brief Local domain z-direction minimum */ Real zblocal; + /*! \var xblocal_max */ + /* \brief Local domain x-direction maximum */ + Real xblocal_max; + + /*! \var yblocal_max */ + /* \brief Local domain y-direction maximum */ + Real yblocal_max; + + /*! \var zblocal_max */ + /* \brief Local domain z-direction maximum */ + Real zblocal_max; + /*! \var xdglobal */ /* \brief Global domain length in x-direction */ Real xdglobal; @@ -278,10 +278,6 @@ class Grid3D * \brief Initialization flag */ int flag_init; - /*! \var gflag - * \brief Flag that determines which buffer contains updated conserved variables */ - int gflag; - /*! \var struct Header H * \brief Header for the grid */ struct Header H; @@ -309,11 +305,11 @@ class Grid3D // Object that contains data for Grackle cooling Cool_GK Cool; #endif - + #ifdef CPU_TIME Time Timer; #endif - + #ifdef CHEMISTRY_GPU // Object that contains data for the GPU chemistry solver Chem_GPU Chem; @@ -323,12 +319,13 @@ class Grid3D Analysis_Module Analysis; #endif - #ifdef FEEDBACK //TODO refactor this into Analysis module + #ifdef SUPERNOVA //TODO refactor this into Analysis module Real countSN; Real countResolved; Real countUnresolved; Real totalEnergy; Real totalMomentum; + Real totalUnresEnergy; #endif struct Conserved { @@ -364,19 +361,19 @@ class Grid3D #ifdef MHD /*! \var magnetic_x \brief Array containing the magnetic field in the x * direction of each cell in the grid. Note that this is the magnetic - * field at the x-1/2 face of the cell since constrained transport + * field at the x+1/2 face of the cell since constrained transport * requires face centered, not cell centered, magnetic fields */ Real *magnetic_x; /*! \var magnetic_y \brief Array containing the magnetic field in the y * direction of each cell in the grid. Note that this is the magnetic - * field at the y-1/2 face of the cell since constrained transport + * field at the y+1/2 face of the cell since constrained transport * requires face centered, not cell centered, magnetic fields */ Real *magnetic_y; /*! \var magnetic_z \brief Array containing the magnetic field in the z * direction of each cell in the grid. Note that this is the magnetic - * field at the z-1/2 face of the cell since constrained transport + * field at the z+1/2 face of the cell since constrained transport * requires face centered, not cell centered, magnetic fields */ Real *magnetic_z; #endif // MHD @@ -391,7 +388,7 @@ class Grid3D /*! \var grav_potential * \brief Array containing the gravitational potential of each cell, only tracked separately when using GRAVITY. */ Real *Grav_potential; - + #ifdef CHEMISTRY_GPU Real *HI_density; Real *HII_density; @@ -399,9 +396,9 @@ class Grid3D Real *HeII_density; Real *HeIII_density; Real *e_density; - #endif + #endif + - /*! pointer to conserved variable on device */ Real *device; Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, @@ -647,10 +644,12 @@ class Grid3D * \brief Initialize the grid with a 3D spherical overdensity for gravitational collapse */ void Spherical_Overdensity_3D(); + void Clouds(); + void Uniform_Grid(); void Zeldovich_Pancake( struct parameters P ); - + void Chemistry_Test( struct parameters P ); @@ -750,6 +749,7 @@ class Grid3D void Finish_Particles_Transfer(); #endif//MPI_CHOLLA void Transfer_Particles_Density_Boundaries( struct parameters P ); + void Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h ); // void Transfer_Particles_Boundaries( struct parameters P ); void WriteData_Particles( struct parameters P, int nfile); void OutputData_Particles( struct parameters P, int nfile); @@ -779,6 +779,8 @@ class Grid3D void Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ); #endif//PARTICLES_GPU #ifdef GRAVITY_GPU + void Copy_Potential_From_GPU(); + void Copy_Particles_Density_to_GPU(); void Copy_Particles_Density_GPU(); int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); @@ -811,13 +813,13 @@ class Grid3D void Update_Internal_Energy(); void Do_Cooling_Step_Grackle(); #endif - + #ifdef CHEMISTRY_GPU void Initialize_Chemistry( struct parameters *P ); void Compute_Gas_Temperature( Real *temperature, bool convert_cosmo_units ); void Update_Chemistry(); #endif - + #ifdef ANALYSIS void Initialize_Analysis_Module( struct parameters *P ); void Compute_and_Output_Analysis( struct parameters *P ); @@ -846,7 +848,7 @@ class Grid3D #ifdef PARTICLES #ifdef DE #ifdef PARTICLE_AGE - #ifdef FEEDBACK + #ifdef SUPERNOVA Real Cluster_Feedback(); Real Cluster_Feedback_GPU(); void Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Real* info, int thread_id, Real* dti); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 37759f5fc..4de96bcde 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1,5 +1,5 @@ /*! \file initial_conditions.cpp - * \brief Definitions of initial conditions for different tests. +/* \brief Definitions of initial conditions for different tests. Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. Functions are members of the Grid3D class. */ @@ -69,7 +69,9 @@ void Grid3D::Set_Initial_Conditions(parameters P) { } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { Spherical_Overpressure_3D(); } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { - Spherical_Overdensity_3D(); + Spherical_Overdensity_3D(); + } else if (strcmp(P.init, "Clouds")==0) { + Clouds(); } else if (strcmp(P.init, "Read_Grid")==0) { #ifndef ONLY_PARTICLES Read_Grid(P); @@ -120,30 +122,37 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.yblocal = H.ybound; H.zblocal = H.zbound; + H.xblocal_max = H.xblocal + P.xlen; + H.yblocal_max = H.yblocal + P.ylen; + H.zblocal_max = H.zblocal + P.zlen; + #else Real nx_param = (Real) nx_global; Real ny_param = (Real) ny_global; Real nz_param = (Real) nz_global; // Local Boundary Coordinates + /* H.xblocal = H.xbound + P.xlen * ((Real) nx_local_start) / nx_param; H.yblocal = H.ybound + P.ylen * ((Real) ny_local_start) / ny_param; H.zblocal = H.zbound + P.zlen * ((Real) nz_local_start) / nz_param; + */ + H.xblocal = H.xbound + ((Real) nx_local_start) * (P.xlen / nx_param); + H.yblocal = H.ybound + ((Real) ny_local_start) * (P.ylen / ny_param); + H.zblocal = H.zbound + ((Real) nz_local_start) * (P.zlen / nz_param); + + H.xblocal_max = H.xbound + ((Real) (nx_local_start + H.nx - 2*H.n_ghost)) * (P.xlen / nx_param); + H.yblocal_max = H.ybound + ((Real) (ny_local_start + H.ny - 2*H.n_ghost)) * (P.ylen / ny_param); + H.zblocal_max = H.zbound + ((Real) (nz_local_start + H.nz - 2*H.n_ghost)) * (P.zlen / nz_param); #endif -#ifndef MPI_CHOLLA /*perform 1-D first*/ if(H.nx > 1 && H.ny==1 && H.nz==1) { H.dx = P.xlen / nx_param; - - H.domlen_x = P.xlen; // ifdef MPI_CHOLLA this is H.dx * nx_param - H.domlen_y = P.ylen / nx_param; - H.domlen_z = P.zlen / nx_param; - - H.dy = H.domlen_y; - H.dz = H.domlen_z; + H.dy = P.ylen; + H.dz = P.zlen; } /*perform 2-D next*/ @@ -151,12 +160,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) { H.dx = P.xlen / nx_param; H.dy = P.ylen / ny_param; - - H.domlen_x = P.xlen; // ifdef MPI_CHOLLA this is H.dx * nx_param - H.domlen_y = P.ylen; // ifdef MPI_CHOLLA this is H.dy * ny_param - H.domlen_z = P.zlen / nx_param; - - H.dz = H.domlen_z; + H.dz = P.zlen; } /*perform 3-D last*/ @@ -166,19 +170,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.dy = P.ylen / ny_param; H.dz = P.zlen / nz_param; - H.domlen_x = P.xlen; // ifdef MPI_CHOLLA this could be H.dx * nx_param - H.domlen_y = P.ylen; // ifdef MPI_CHOLLA this could be H.dy * ny_param - H.domlen_z = P.zlen; // ifdef MPI_CHOLLA this could be H.dz * nz_param } - - - -#else /*MPI_CHOLLA*/ - - /* set the local domains on each process */ - Set_Parallel_Domain(P.xmin, P.ymin, P.zmin, P.xlen, P.ylen, P.zlen, &H); - -#endif /*MPI_CHOLLA*/ } @@ -213,23 +205,23 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real } // set initial values of conserved variables - for(k=kstart; k= kstart) and (j >= jstart) and (i >= istart)) { // set constant initial states C.density[id] = rho; @@ -305,6 +297,9 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0*PI*x_pos); C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0*PI*x_pos); C.Energy[id] = C.Energy[id] + A * (1.5) * sin(2*PI*x_pos); + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE } } } @@ -418,7 +413,7 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real #ifdef MHD auto setMagnetFields = [&] () { - Real x_pos_face = x_pos - 0.5 * H.dx; + Real x_pos_face = x_pos + 0.5 * H.dx; if (x_pos_face < diaph) { @@ -436,9 +431,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real #endif // MHD // set initial values of conserved variables - for(k=kstart; k= kstart) and (j >= jstart) and (i >= istart)) { if (x_pos < diaph) { @@ -524,6 +519,11 @@ void Grid3D::Shu_Osher() Real P = 1.0; C.Energy[id] = P/(gama-1.0) + 0.5*C.density[id]*vx*vx; } + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE + + } } @@ -549,7 +549,6 @@ void Grid3D::Blast_1D() C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; P = 1000.0; - C.Energy[id] = P/(gama-1.0); } else if (x_pos > 0.9) { @@ -558,7 +557,6 @@ void Grid3D::Blast_1D() C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; P = 100; - C.Energy[id] = P/(gama-1.0); } else { @@ -567,8 +565,12 @@ void Grid3D::Blast_1D() C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; P = 0.01; - C.Energy[id] = P/(gama-1.0); } + C.Energy[id] = P/(gama-1.0); + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE + } } @@ -620,7 +622,6 @@ void Grid3D::KH() C.momentum_x[id] = v2*C.density[id]; C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; - C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; #ifdef SCALAR C.scalar[id] = 0.0; #endif @@ -631,7 +632,7 @@ void Grid3D::KH() C.momentum_x[id] = v2*C.density[id]; C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; - C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; + #ifdef SCALAR C.scalar[id] = 0.0; #endif @@ -643,11 +644,17 @@ void Grid3D::KH() C.momentum_x[id] = v1*C.density[id]; C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; - C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; + #ifdef SCALAR C.scalar[id] = 1.0*d1; #endif } + C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE + + } } } @@ -665,7 +672,6 @@ void Grid3D::KH_res_ind() Real mx, my, mz; Real r, yc, zc, phi; Real d1, d2, v1, v2, P, dy, A; - istart = H.n_ghost; iend = H.nx-H.n_ghost; jstart = H.n_ghost; @@ -691,6 +697,8 @@ void Grid3D::KH_res_ind() dy = 0.05; // width of ramp function (see Robertson 2009) A = 0.1; // amplitude of the perturbation + // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy)); + // set the initial values of the conserved variables for (k=kstart; k 1) { + jstart = H.n_ghost; + jend = H.ny-H.n_ghost; + } + else { + jstart = 0; + jend = H.ny; + } + if (H.nz > 1) { + kstart = H.n_ghost; + kend = H.nz-H.n_ghost; + } + else { + kstart = 0; + kend = H.nz; + } + + // set initial values of conserved variables + for(k=kstart; k= kstart) and (j >= jstart) and (i >= istart)) { C.density[id] = 0; C.momentum_x[id] = 0; diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index dfd384f8e..d58f2c6a5 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -363,7 +363,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0_particles ); + cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -438,7 +443,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1_particles ); + cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -518,7 +528,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0_particles ); + cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -591,7 +606,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1_particles ); + cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -671,7 +691,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0_particles ); + cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -742,7 +767,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) cudaMemcpyDeviceToHost); #endif #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1 ); + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1 ); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1_particles ); + cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); + #endif #endif } else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ @@ -950,14 +980,29 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; #else - + + #ifdef MPI_GPU + if ( index == 0 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles ); + if ( index == 1 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles ); + if ( index == 2 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles ); + if ( index == 3 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles ); + if ( index == 4 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles ); + if ( index == 5 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles ); + l_recv_buffer_x0 = h_recv_buffer_x0_particles; + l_recv_buffer_x1 = h_recv_buffer_x1_particles; + l_recv_buffer_y0 = h_recv_buffer_y0_particles; + l_recv_buffer_y1 = h_recv_buffer_y1_particles; + l_recv_buffer_z0 = h_recv_buffer_z0_particles; + l_recv_buffer_z1 = h_recv_buffer_z1_particles; + #else l_recv_buffer_x0 = h_recv_buffer_x0; l_recv_buffer_x1 = h_recv_buffer_x1; l_recv_buffer_y0 = h_recv_buffer_y0; l_recv_buffer_y1 = h_recv_buffer_y1; l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - + #endif //MPI_GPU + Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 62c1e0c37..535f0e437 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -4,11 +4,16 @@ #include #include +#include + #include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" #include "../gravity/gravity_cuda.h" +#include "../utils/hydro_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../utils/reduction_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields) @@ -473,168 +478,117 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, -__global__ void Calc_dt_1D(Real *dev_conserved, int n_cells, int n_ghost, Real dx, Real *dti_array, Real gamma) +__global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, Real dx) { - __shared__ Real max_dti[TPB]; + Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, P, cs; - int id, tid; - - // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - // and a thread id within the block - tid = threadIdx.x; - - // set shared memory to 0 - max_dti[tid] = 0; - __syncthreads(); - - - // threads corresponding to real cells do the calculation - if (id > n_ghost - 1 && id < n_cells-n_ghost) + int n_cells = nx; + + // Grid stride loop to perform as much of the reduction as possible. The + // fact that `id` has type `size_t` is important. I'm not totally sure why + // but setting it to int results in some kind of silent over/underflow issue + // even though we're not hitting those kinds of numbers. Setting it to type + // uint or size_t fixes them + for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { - // start timestep calculation here - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) TINY_NUMBER); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti[tid] = (fabs(vx)+cs)/dx; - } - __syncthreads(); - - // do the reduction in shared memory (find the max inverse timestep in the block) - for (unsigned int s=1; s n_ghost - 1 && id < n_cells-n_ghost) + { + // start timestep calculation here + // every thread collects the conserved variables it needs from global memory + d = dev_conserved[ id]; + d_inv = 1.0 / d; + vx = dev_conserved[1*n_cells + id] * d_inv; + vy = dev_conserved[2*n_cells + id] * d_inv; + vz = dev_conserved[3*n_cells + id] * d_inv; + P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + P = fmax(P, (Real) TINY_NUMBER); + // find the max wavespeed in that cell, use it to calculate the inverse timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti,(fabs(vx)+cs)/dx); } - __syncthreads(); } - // write the result for this block to global memory - if (tid == 0) dti_array[blockIdx.x] = max_dti[0]; - - + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } -__global__ void Calc_dt_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real dx, Real dy, Real *dti_array, Real gamma) +__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, Real dy) { - __shared__ Real max_dti[TPB]; + Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, P, cs; - int id, tid, xid, yid, n_cells; + int xid, yid, n_cells; n_cells = nx*ny; - // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - // and a thread id within the block - tid = threadIdx.x; - - // set shared memory to 0 - max_dti[tid] = 0; - __syncthreads(); - - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) + // Grid stride loop to perform as much of the reduction as possible. The + // fact that `id` has type `size_t` is important. I'm not totally sure why + // but setting it to int results in some kind of silent over/underflow issue + // even though we're not hitting those kinds of numbers. Setting it to type + // uint or size_t fixes them + for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) 1.0e-20); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti[tid] = fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy); - } - __syncthreads(); - - // do the reduction in shared memory (find the max inverse timestep in the block) - for (unsigned int s=1; s n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) + { + // every thread collects the conserved variables it needs from global memory + d = dev_conserved[ id]; + d_inv = 1.0 / d; + vx = dev_conserved[1*n_cells + id] * d_inv; + vy = dev_conserved[2*n_cells + id] * d_inv; + vz = dev_conserved[3*n_cells + id] * d_inv; + P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + P = fmax(P, (Real) 1.0e-20); + // find the max wavespeed in that cell, use it to calculate the inverse timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti,fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy)); } - __syncthreads(); } - // write the result for this block to global memory - if (tid == 0) dti_array[blockId] = max_dti[0]; - + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } -__global__ void Calc_dt_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real *dti_array, Real gamma, Real max_dti_slow) +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) { - __shared__ Real max_dti[TPB]; + Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, E; - int id, xid, yid, zid, n_cells; - int tid; + #ifdef MHD + Real avgBx, avgBy, avgBz; + #endif //MHD + int xid, yid, zid, n_cells; n_cells = nx*ny*nz; - // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; - // and a thread id within the block - tid = threadIdx.x; - - // set shared memory to 0 - max_dti[tid] = 0; - __syncthreads(); - - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) + // Grid stride loop to perform as much of the reduction as possible. The + // fact that `id` has type `size_t` is important. I'm not totally sure why + // but setting it to int results in some kind of silent over/underflow issue + // even though we're not hitting those kinds of numbers. Setting it to type + // uint or size_t fixes them + for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of - // the faces - Real avgBx, avgBy, avgBz; - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); - #endif //MHD - - // Compute the maximum inverse crossing time in the cell - #ifdef MHD - max_dti[tid] = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma); - #else // not MHD - max_dti[tid] = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); - #endif //MHD - - #ifdef AVERAGE_SLOW_CELLS - // If the cell delta_t is smaller than the min_delta_t, then the cell is averaged over its neighbors - if (max_dti[tid] > max_dti_slow){ - // Average this cell - printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f\n", xid, yid, zid, 1./max_dti[tid], 1./max_dti_slow ); - Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); - - // Recompute max_dti for this cell - d = dev_conserved[ id]; + // get a global thread ID + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + // threads corresponding to real cells do the calculation + if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) + { + // every thread collects the conserved variables it needs from global memory + d = dev_conserved[ id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; + vx = dev_conserved[1*n_cells + id] * d_inv; + vy = dev_conserved[2*n_cells + id] * d_inv; + vz = dev_conserved[3*n_cells + id] * d_inv; + E = dev_conserved[4*n_cells + id]; #ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces @@ -643,64 +597,141 @@ __global__ void Calc_dt_3D(Real *dev_conserved, int nx, int ny, int nz, int n_gh // Compute the maximum inverse crossing time in the cell #ifdef MHD - max_dti[tid] = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma); + max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); #else // not MHD - max_dti[tid] = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); #endif //MHD - } - #endif - } - __syncthreads(); - - // do the reduction in shared memory (find the max inverse timestep in the block) - for (unsigned int s=1; s 1 && ny == 1 && nz == 1) //1D { - hipLaunchKernelGGL(Calc_dt_1D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, n_ghost, dx, dev_dti_array, gamma); + hipLaunchKernelGGL(Calc_dt_1D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, dx); } else if (nx > 1 && ny > 1 && nz == 1) //2D { - hipLaunchKernelGGL(Calc_dt_2D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, dx, dy, dev_dti_array, gamma); + hipLaunchKernelGGL(Calc_dt_2D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, ny, dx, dy); } else if (nx > 1 && ny > 1 && nz > 1) //3D { - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, dev_dti_array, gamma, max_dti_slow); + hipLaunchKernelGGL(Calc_dt_3D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); } CudaCheckError(); - // copy dev_dti_array to host_dti_array - CudaSafeCall( cudaMemcpy(host_dti_array, dev_dti_array, ngrid*sizeof(Real), cudaMemcpyDeviceToHost) ); + Real max_dti=0; + + /* Uncomment the below if we fix the AtomicDouble bug - Alwin + // copy device side max_dti to host side max_dti - Real max_dti = 0.0; - for (int i=0; i 1 && ny > 1 && nz > 1){ //3D + hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, gamma, max_dti_slow ); + } } +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){ + + int id, xid, yid, zid, n_cells; + Real d, d_inv, vx, vy, vz, E, max_dti; + Real speed, temp, P, cs; + #ifdef MHD + Real avgBx, avgBy, avgBz; + #endif //MHD + + // get a global thread ID + id = threadIdx.x + blockIdx.x * blockDim.x; + n_cells = nx*ny*nz; + + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + + // threads corresponding to real cells do the calculation + if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) + { + d = dev_conserved[ id]; + d_inv = 1.0 / d; + vx = dev_conserved[1*n_cells + id] * d_inv; + vy = dev_conserved[2*n_cells + id] * d_inv; + vz = dev_conserved[3*n_cells + id] * d_inv; + E = dev_conserved[4*n_cells + id]; + + #ifdef MHD + // Compute the cell centered magnetic field using a straight average of the faces + mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); + #endif //MHD + + // Compute the maximum inverse crossing time in the cell + #ifdef MHD + max_dti = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma); + #else // not MHD + max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + #endif //MHD + + if (max_dti > max_dti_slow){ + speed = sqrt(vx*vx + vy*vy + vz*vz); + temp = (gamma - 1)*(E - 0.5*(speed*speed)*d)*ENERGY_UNIT/(d*DENSITY_UNIT/0.6/MP)/KB; + P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + cs = sqrt(d_inv * gamma * P)*VELOCITY_UNIT*1e-5; + // Average this cell + printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, + dev_conserved[id]*DENSITY_UNIT/0.6/MP, temp, speed*VELOCITY_UNIT*1e-5, vx*VELOCITY_UNIT*1e-5, vy*VELOCITY_UNIT*1e-5, vz*VELOCITY_UNIT*1e-5, cs); + Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); + } + } +} +#endif //AVERAGE_SLOW_CELLS + + #ifdef DE __global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ){ @@ -731,7 +762,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); + P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); imo = xid-1; @@ -780,7 +811,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); + P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); imo = xid-1 + yid*nx; @@ -833,7 +864,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); + P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); imo = xid-1 + yid*nx + zid*nx*ny; @@ -1210,9 +1241,12 @@ __device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, in Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); #ifdef MHD // Average MHD - Average_Cell_Single_Field( 5+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 5+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 6+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 7+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 5+NSCALARS, i-1, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 6+NSCALARS, i, j-1, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( 7+NSCALARS, i, j, k-1, nx, ny, nz, ncells, conserved ); #endif //MHD #ifdef DE // Average GasEnergy diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index acdaf58c4..c801882d3 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -8,7 +8,6 @@ #include "../global/global.h" #include "../utils/mhd_utilities.h" - __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields); @@ -76,15 +75,9 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &dz, Real const &gamma); -__global__ void Calc_dt_1D(Real *dev_conserved, int n_cells, int n_ghost, Real dx, Real *dti_array, Real gamma); - - -__global__ void Calc_dt_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real dx, Real dy, Real *dti_array, Real gamma); - +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz); -__global__ void Calc_dt_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real *dti_array, Real gamma, Real max_dti_slow); - -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow); +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ); __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields); @@ -94,23 +87,16 @@ __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghos __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); -#ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ); -#endif - -static inline __host__ __device__ Real Get_Pressure_From_DE( Real E, Real U_total, Real U_advected, Real gamma ){ +#ifdef AVERAGE_SLOW_CELLS - Real U, P; - Real eta = DE_ETA_1; +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); - // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. - if( U_total / E > eta ) U = U_total; - else U = U_advected; - - P = U * (gamma - 1.0); - return P; -} +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); +#endif +#ifdef TEMPERATURE_FLOOR +__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ); +#endif __global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ); diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index fdada1db8..a6d00e96b 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -48,7 +48,6 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) Real *dev_conserved; Real *dev_dti_array; Real gamma = 5.0/3.0; - Real max_dti_slow = 1e10; // Allocate host and device arrays and copy data cudaHostAlloc(&host_conserved, n_fields*sizeof(Real), cudaHostAllocDefault); @@ -64,9 +63,10 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // Copy host data to device arrray CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); + //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) // Run the kernel - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, dev_dti_array, gamma, max_dti_slow); + hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_dti_array, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); CudaCheckError(); // Copy the dt value back from the GPU diff --git a/src/integrators/CTU_1D_cuda.cu b/src/integrators/CTU_1D_cuda.cu index 11e9d7abf..2c25cbdd1 100644 --- a/src/integrators/CTU_1D_cuda.cu +++ b/src/integrators/CTU_1D_cuda.cu @@ -32,6 +32,7 @@ void CTU_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Re int n_cells = nx; int ny = 1; int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); diff --git a/src/integrators/CTU_2D_cuda.cu b/src/integrators/CTU_2D_cuda.cu index c9ba084f6..865e25d85 100644 --- a/src/integrators/CTU_2D_cuda.cu +++ b/src/integrators/CTU_2D_cuda.cu @@ -35,6 +35,8 @@ void CTU_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_o //concatenated into a 1-d array int n_cells = nx*ny; int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; + // set values for GPU kernels // number of blocks per 1D grid diff --git a/src/integrators/CTU_3D_cuda.cu b/src/integrators/CTU_3D_cuda.cu index 32f8dc28c..7ef65cdd6 100644 --- a/src/integrators/CTU_3D_cuda.cu +++ b/src/integrators/CTU_3D_cuda.cu @@ -30,13 +30,14 @@ __global__ void Evolve_Interface_States_3D(Real *dev_conserved, Real *dev_Q_Lx, Real dx, Real dy, Real dz, Real dt, int n_fields); -void CTU_Algorithm_3D_CUDA(Real *d_conserved, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields , Real density_floor, Real U_floor, Real *host_grav_potential, Real max_dti_slow ) +void CTU_Algorithm_3D_CUDA(Real *d_conserved, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields , Real density_floor, Real U_floor, Real *host_grav_potential ) { //Here, *dev_conserved contains the entire //set of conserved variables on the grid //concatenated into a 1-d array int n_cells = nx*ny*nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid diff --git a/src/integrators/CTU_3D_cuda.h b/src/integrators/CTU_3D_cuda.h index 3dec12a60..5d875eeae 100644 --- a/src/integrators/CTU_3D_cuda.h +++ b/src/integrators/CTU_3D_cuda.h @@ -8,7 +8,7 @@ #include "../global/global.h" -void CTU_Algorithm_3D_CUDA(Real *d_conserved, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential, Real max_dti_slow ); +void CTU_Algorithm_3D_CUDA(Real *d_conserved, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential ); void Free_Memory_CTU_3D(); diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index e2a540c75..0eaecc899 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -37,6 +37,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea int n_cells = nx; int ny = 1; int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 8d22c86be..28e7046d5 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -37,6 +37,7 @@ void VL_Algorithm_2D_CUDA ( Real *d_conserved, int nx, int ny, int x_off, int y_ int n_cells = nx*ny; int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 9a65a62d0..097708ede 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -8,10 +8,11 @@ #include #include #include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" #include "../integrators/VL_3D_cuda.h" +#include "../hydro/hydro_cuda.h" #include "../reconstruction/pcm_cuda.h" #include "../reconstruction/plmp_cuda.h" #include "../reconstruction/plmc_cuda.h" @@ -30,7 +31,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential, Real max_dti_slow) + Real U_floor, Real *host_grav_potential ) { //Here, *dev_conserved contains the entire @@ -38,6 +39,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int //concatenated into a 1-d array int n_cells = nx*ny*nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -250,7 +252,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); + P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 250b9e609..0d28710ab 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -12,7 +12,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential, Real max_dti_slow ); + Real U_floor, Real *host_grav_potential ); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 58ea62e9e..cee7daeaa 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -30,12 +30,13 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential, Real max_dti_slow ) + Real U_floor, Real *host_grav_potential ) { //Here, *dev_conserved contains the entire //set of conserved variables on the grid //concatenated into a 1-d array int n_cells = nx*ny*nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -47,9 +48,12 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, temp_potential = host_grav_potential; if ( !memory_allocated ){ - + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + // allocate memory on the GPU - // CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); + chprintf( " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d nz: %d \n", n_fields, n_cells, nx, ny, nz ); + chprintf( " Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields*n_cells*sizeof(Real)/1e9, global_free/1e9, global_total/1e9 ); dev_conserved = d_conserved; CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); @@ -71,6 +75,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. memory_allocated = true; + chprintf( " Memory allocated \n" ); } @@ -78,7 +83,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); #endif - + // Step 1: Construct left and right interface values using updated conserved variables #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); @@ -104,8 +109,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); CudaCheckError(); #endif //PPMC - - + + // Step 2: Calculate the fluxes again #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); @@ -128,23 +133,23 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLL CudaCheckError(); - + #ifdef DE // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); CudaCheckError(); #endif - + // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); CudaCheckError(); - + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); #endif - + #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); CudaCheckError(); diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 54300fc2c..53789e5a1 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -13,7 +13,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential, Real max_dti_slow ); + Real U_floor, Real *host_grav_potential ); void Free_Memory_Simple_3D(); diff --git a/src/io/io.cpp b/src/io/io.cpp index 72b1d2279..49b9d2964 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -79,6 +79,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); chprintf( "\nSaving Snapshot: %d \n", nfile ); + + #ifdef HDF5 + // Initialize HDF5 interface + H5open(); + #endif #ifdef N_OUTPUT_COMPLETE //If nfile is multiple of N_OUTPUT_COMPLETE then output all data @@ -137,6 +142,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) chprintf( "\n" ); G.H.Output_Now = false; #endif + + #ifdef HDF5 + // Cleanup HDF5 + H5close(); + #endif #ifdef MPI_CHOLLA MPI_Barrier(world); @@ -185,7 +195,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #elif defined HDF5 hid_t file_id; /* file identifier */ herr_t status; - + // Create a new file using default properties. file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -194,10 +204,10 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // write the conserved variables to the output file G.Write_Grid_HDF5(file_id); - + // close the file status = H5Fclose(file_id); - + if (status < 0) {printf("File write failed.\n"); exit(-1); } #else @@ -409,16 +419,15 @@ void OutputSlices(Grid3D &G, struct parameters P, int nfile) * \brief Write some relevant header info to a text output file. */ void Grid3D::Write_Header_Text(FILE *fp) { - // Write the header info to the output file fprintf(fp, "Header Information\n"); + fprintf(fp, "Git Commit Hash = %s\n", GIT_HASH); + fprintf(fp, "Macro Flags = %s\n", MACRO_FLAGS); fprintf(fp, "n_step: %d sim t: %f sim dt: %f\n", H.n_step, H.t, H.dt); fprintf(fp, "mass unit: %e length unit: %e time unit: %e\n", MASS_UNIT, LENGTH_UNIT, TIME_UNIT); fprintf(fp, "nx: %d ny: %d nz: %d\n", H.nx, H.ny, H.nz); fprintf(fp, "xmin: %f ymin: %f zmin: %f\n", H.xbound, H.ybound, H.zbound); - fprintf(fp, "xlen: %f ylen: %f zlen: %f\n", H.domlen_x, H.domlen_y, H.domlen_z); fprintf(fp, "t: %f\n", H.t); - } @@ -440,9 +449,6 @@ void Grid3D::Write_Header_Binary(FILE *fp) fwrite(&H.xbound, sizeof(Real), 1, fp); fwrite(&H.ybound, sizeof(Real), 1, fp); fwrite(&H.zbound, sizeof(Real), 1, fp); - fwrite(&H.domlen_x, sizeof(Real), 1, fp); - fwrite(&H.domlen_y, sizeof(Real), 1, fp); - fwrite(&H.domlen_z, sizeof(Real), 1, fp); fwrite(&H.xblocal, sizeof(Real), 1, fp); fwrite(&H.yblocal, sizeof(Real), 1, fp); fwrite(&H.zblocal, sizeof(Real), 1, fp); @@ -481,6 +487,22 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &gama); // Close the attribute status = H5Aclose(attribute_id); + + // String attributes + hid_t stringType = H5Tcopy(H5T_C_S1); + H5Tset_size(stringType, H5T_VARIABLE); + + attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + const char * gitHash = GIT_HASH; + status = H5Awrite(attribute_id, stringType, &gitHash); + H5Aclose(attribute_id); + + attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + const char * macroFlags = MACRO_FLAGS; + status = H5Awrite(attribute_id, stringType, ¯oFlags); + H5Aclose(attribute_id); + + // Numeric Attributes attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t); status = H5Aclose(attribute_id); @@ -701,6 +723,22 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &gama); // Close the attribute status = H5Aclose(attribute_id); + + // String attributes + hid_t stringType = H5Tcopy(H5T_C_S1); + H5Tset_size(stringType, H5T_VARIABLE); + + attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + const char * gitHash = GIT_HASH; + status = H5Awrite(attribute_id, stringType, &gitHash); + H5Aclose(attribute_id); + + attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + const char * macroFlags = MACRO_FLAGS; + status = H5Awrite(attribute_id, stringType, ¯oFlags); + H5Aclose(attribute_id); + + // Numeric Attributes attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t); status = H5Aclose(attribute_id); @@ -911,57 +949,42 @@ void Grid3D::Write_Grid_Text(FILE *fp) // 3D case else { fprintf(fp, "idx\tidy\tidz\trho\tmx\tmy\tmz\tE"); - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD #ifdef DE fprintf(fp, "\tge"); #endif + #ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); + #endif //MHD fprintf(fp, "\n"); - for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) { - for (j=H.n_ghost; j < H.ny-H.n_ghost; j++) { - for (k=H.n_ghost; k < H.nz-H.n_ghost; k++) { + for (i=H.n_ghost-1; i < H.nx-H.n_ghost; i++) { + for (j=H.n_ghost-1; j < H.ny-H.n_ghost; j++) { + for (k=H.n_ghost-1; k < H.nz-H.n_ghost; k++) { id = i + j*H.nx + k*H.nx*H.ny; - fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); + + // Exclude the rightmost ghost cell on the "left" side for the hydro + // variables + if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) + { + fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); + #ifdef DE + fprintf(fp, "\t%f", C.GasEnergy[id]); + #endif //DE + } + else + { + fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost); + #ifdef DE + fprintf(fp, "\tn/a"); + #endif //DE + } #ifdef MHD fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #endif //MHD - #ifdef DE - fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE fprintf(fp, "\n"); } - #ifdef MHD - // Save the last line of magnetic fields - id = i + j*H.nx + (H.nz-H.n_ghost)*H.nx*H.ny; - fprintf(fp, "%d\t%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, H.nz-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD } - #ifdef MHD - // Save the last line of magnetic fields - id = i + (H.ny-H.n_ghost)*H.nx + (H.nz-H.n_ghost)*H.nx*H.ny; - fprintf(fp, "%d\t%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i-H.n_ghost, H.ny-2*H.n_ghost, H.nz-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD } - #ifdef MHD - // Save the last line of magnetic fields - id = (H.nx-H.n_ghost) + (H.ny-H.n_ghost)*H.nx + (H.nz-H.n_ghost)*H.nx*H.ny; - fprintf(fp, "%d\t%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx-2*H.n_ghost, H.ny-2*H.n_ghost, H.nz-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD } - } @@ -1222,47 +1245,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) status = H5Dclose(dataset_id); #endif //DE - #ifdef MHD - // Start by creating a dataspace and buffer that is large enough for the - // magnetic field since it's one larger than the rest - free(dataset_buffer); - dataset_buffer = (Real *) malloc((H.nx_real+1)*sizeof(Real)); - - // Create the data space for the datasets - dims[0]++; - dataspace_id = H5Screate_simple(1, dims, NULL); - - // Copy the x magnetic field array to the memory buffer - memcpy(&dataset_buffer[0], &(C.magnetic_x[H.n_ghost]), (H.nx_real+1)*sizeof(Real)); - - // Create a dataset id for x magnetic field - dataset_id = H5Dcreate(file_id, "/magnetic_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the x magnetic field array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the y magnetic field array to the memory buffer - memcpy(&dataset_buffer[0], &(C.magnetic_y[H.n_ghost]), (H.nx_real+1)*sizeof(Real)); - - // Create a dataset id for y magnetic field - dataset_id = H5Dcreate(file_id, "/magnetic_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the y magnetic field array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the x magnetic field array to the memory buffer - memcpy(&dataset_buffer[0], &(C.magnetic_z[H.n_ghost]), (H.nx_real+1)*sizeof(Real)); - - // Create a dataset id for z magnetic field - dataset_id = H5Dcreate(file_id, "/magnetic_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the z magnetic field array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - #endif //MHD - // Free the dataspace id status = H5Sclose(dataspace_id); } @@ -1405,63 +1387,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) status = H5Dclose(dataset_id); #endif //DE - #ifdef MHD - // Start by creating a dataspace and buffer that is large enough for the - // magnetic field since it's one larger than the rest - free(dataset_buffer); - dataset_buffer = (Real *) malloc((H.ny_real+1)*(H.nx_real+1)*sizeof(Real)); - - // Create the data space for the datasets - dims[0]++; - dims[1]++; - dataspace_id = H5Screate_simple(2, dims, NULL); - - // Copy the x magnetic array to the memory buffer - for (j=0; j max_l ) max_l = fabs(C.magnetic_x[id]); @@ -3629,11 +3455,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) min_l = 1e65; max_l = -1; // Copy the y magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_y[id]); @@ -3667,11 +3493,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) min_l = 1e65; max_l = -1; // Copy the z magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_z[id]); @@ -3696,7 +3522,6 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif //MHD } free(dataset_buffer); - } #endif diff --git a/src/main.cpp b/src/main.cpp index a47b509d1..ded863dc2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,12 +13,15 @@ #include "grid/grid3D.h" #include "io/io.h" #include "utils/error_handling.h" -#ifdef FEEDBACK +#ifdef SUPERNOVA #include "particles/supernova.h" #ifdef ANALYSIS #include "analysis/feedback_analysis.h" #endif -#endif //FEEDBACK +#endif //SUPERNOVA +#ifdef STAR_FORMATION +#include "particles/star_formation.h" +#endif int main(int argc, char *argv[]) @@ -54,6 +57,8 @@ int main(int argc, char *argv[]) if (argc < 2) { chprintf("usage: %s \n", argv[0]); + chprintf("Git Commit Hash = %s\n", GIT_HASH); + chprintf("Macro Flags = %s\n", MACRO_FLAGS); chexit(-1); } else { param_file = argv[1]; @@ -65,21 +70,29 @@ int main(int argc, char *argv[]) // read in the parameters parse_params (param_file, &P, argc, argv); // and output to screen + chprintf("Git Commit Hash = %s\n", GIT_HASH); + chprintf("Macro Flags = %s\n", MACRO_FLAGS); chprintf ("Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, boundaries = %d %d %d %d %d %d\n", P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); if (strcmp(P.init, "Read_Grid") == 0 ) chprintf ("Input directory: %s\n", P.indir); chprintf ("Output directory: %s\n", P.outdir); - //Create a Log file to output run-time messages + //Create a Log file to output run-time messages and output the git hash and + //macro flags used Create_Log_File(P); + std::string message = "Git Commit Hash = " + std::string(GIT_HASH); + Write_Message_To_Log_File( message.c_str() ); + message = "Macro Flags = " + std::string(MACRO_FLAGS); + Write_Message_To_Log_File( message.c_str() ); + + // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); - char *message = (char*)malloc(50 * sizeof(char)); - sprintf(message, "Initializing Simulation" ); - Write_Message_To_Log_File( message ); + message = "Initializing Simulation"; + Write_Message_To_Log_File( message.c_str() ); // Set initial conditions and calculate first dt chprintf("Setting initial conditions...\n"); @@ -94,8 +107,8 @@ int main(int argc, char *argv[]) #ifdef DE chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2 ); - sprintf(message, " eta_1: %0.3f eta_2: %0.3f ", DE_ETA_1, DE_ETA_2 ); - Write_Message_To_Log_File( message ); + message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); + Write_Message_To_Log_File( message.c_str() ); #endif #ifdef CPU_TIME @@ -125,27 +138,32 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS G.Initialize_Analysis_Module(&P); if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #ifdef FEEDBACK + #ifdef SUPERNOVA FeedbackAnalysis feedback_analysis(G); #endif #endif - #ifdef FEEDBACK //TODO: refactor this: encapsulate init in a method + #ifdef SUPERNOVA //TODO: refactor this: encapsulate init in a method G.countSN = 0; G.countResolved = 0; G.countUnresolved = 0; G.totalEnergy = 0; G.totalMomentum = 0; + G.totalUnresEnergy = 0; #ifdef PARTICLES_GPU #ifdef MPI_CHOLLA - Supernova::initState(&P, G.Particles.n_local, 4); + Supernova::initState(&P, G.Particles.n_total_initial); #else Supernova::initState(&P, G.Particles.n_local); #endif // MPI_CHOLLA #else // else we have PARTICLES_CPU //Supernova::initState(&P); #endif // PARTICLES_GPU - #endif // FEEDBACK + #endif // SUPERNOVA + + #ifdef STAR_FORMATION + star_formation::Initialize(G); + #endif #ifdef GRAVITY_ANALYTIC_COMP G.Setup_Analytic_Potential(&P); @@ -202,8 +220,8 @@ int main(int argc, char *argv[]) // Evolve the grid, one timestep at a time chprintf("Starting calculations.\n"); - sprintf(message, "Starting calculations." ); - Write_Message_To_Log_File( message ); + message = "Starting calculations."; + Write_Message_To_Log_File( message.c_str() ); while (G.H.t < P.tout) { // get the start time @@ -219,6 +237,10 @@ int main(int argc, char *argv[]) if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; + #ifdef SUPERNOVA + G.Cluster_Feedback(); + #endif //SUPERNOVA + #ifdef PARTICLES //Advance the particles KDK( first step ): Velocities are updated by 0.5*dt and positions are updated by dt G.Advance_Particles( 1 ); @@ -253,21 +275,9 @@ int main(int argc, char *argv[]) G.Advance_Particles( 2 ); #endif - #ifdef FEEDBACK - Real fdti = G.Cluster_Feedback(); - if (fdti != 0 && dti != 0) { - chprintf("DTI COMP: returned: %.4e [%.4e kyr]\n", fdti, 1/fdti); - chprintf(" current: %.4e [ %.4e kyr ] \n", dti, 1/dti); - - } - if (fdti > dti) { - chprintf(" UPDATING dti\n"); - dti = fdti; - } - #ifdef ANALYSIS - feedback_analysis.Compute_Gas_Velocity_Dispersion(G); - #endif - #endif + #ifdef STAR_FORMATION + star_formation::Star_Formation(G); + #endif #ifdef CPU_TIME G.Timer.Total.End(); @@ -293,6 +303,9 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); + #ifdef SUPERNOVA + feedback_analysis.Compute_Gas_Velocity_Dispersion(G); + #endif #endif // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) G.H.Output_Now = true; @@ -330,7 +343,6 @@ int main(int argc, char *argv[]) } #endif - } /*end loop over timesteps*/ @@ -339,8 +351,8 @@ int main(int argc, char *argv[]) G.Timer.Print_Average_Times( P ); #endif - sprintf(message, "Simulation completed successfully." ); - Write_Message_To_Log_File( message ); + message = "Simulation completed successfully."; + Write_Message_To_Log_File( message.c_str() ); // free the grid G.Reset(); diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index fd1e33c4f..d32099b2f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -141,12 +141,12 @@ class DiskGalaxy { }; - Real getM_d() const { return M_d; }; - Real getR_d() const { return R_d; }; - Real getZ_d() const { return Z_d; }; - Real getM_vir() const { return M_vir; }; - Real getR_vir() const { return R_vir; }; - Real getC_vir() const { return c_vir; }; + Real getM_d() const { return M_d; }; + Real getR_d() const { return R_d; }; + Real getZ_d() const { return Z_d; }; + Real getM_vir() const { return M_vir; }; + Real getR_vir() const { return R_vir; }; + Real getC_vir() const { return c_vir; }; Real getR_cool() const { return r_cool; }; }; @@ -186,7 +186,7 @@ class ClusteredDiskGalaxy: public DiskGalaxy { namespace Galaxies { // all masses in M_sun and all distances in kpc //static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); - static ClusteredDiskGalaxy MW(1e3, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); + static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8/0.015, 10, 100.0); }; diff --git a/src/mpi/cuda_pack_buffers.h b/src/mpi/cuda_pack_buffers.h deleted file mode 100644 index 40c51a87b..000000000 --- a/src/mpi/cuda_pack_buffers.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifdef CUDA -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" - - - - -__global__ void PackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); - -void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); - -__global__ void UnpackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); - -void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); - -void PackGhostCells(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir); - -__global__ void PackGhostCellsKernel(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, - int f0, int f1, int f2, int f3, int f4, int f5, - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir); - -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost, int &magneticIdx); - -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &mr); - - -#endif diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 7251e8fdb..6d51ee643 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -622,6 +622,12 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) h_recv_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) ); #endif + // NOTE: When changing this ifdef check for compatibility with + // Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer + // in particles/particles_boundaries.cpp + + // Whether or not MPI_GPU is on, the device has transfer buffers for PARTICLES_GPU + #if defined(PARTICLES) && defined(PARTICLES_GPU) chprintf("Allocating MPI communication buffers on GPU for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); CudaSafeCall ( cudaMalloc (&d_send_buffer_x0_particles, buffer_length_particles_x0_send*sizeof(Real)) ); @@ -636,9 +642,13 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv*sizeof(Real)) ); CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv*sizeof(Real)) ); CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv*sizeof(Real)) ); + #endif // PARTICLES && PARTICLES_GPU - #if !defined(MPI_GPU) - chprintf("Allocating MPI communication buffers on GPU for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); + // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers + + #ifdef PARTICLES + #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) + chprintf("Allocating MPI communication buffers on Host for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); h_send_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_send*sizeof(Real) ); h_send_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_send*sizeof(Real) ); h_send_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_send*sizeof(Real) ); @@ -651,9 +661,8 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) h_recv_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_recv*sizeof(Real) ); h_recv_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_recv*sizeof(Real) ); h_recv_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_recv*sizeof(Real) ); - #endif - - #endif//PARTICLES_GPU + #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) + #endif //PARTICLES } @@ -736,62 +745,6 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ){ #endif - -/* Set the domain properties used in initial_conditions.cpp Grid3D::Set_Domain_Properties */ -void Set_Parallel_Domain(Real xmin_global, Real ymin_global, Real zmin_global, Real xlen_global, Real ylen_global, Real zlen_global, struct Header *H) -{ - Real xlen, ylen, zlen; - - /*the local domain will be xlen_global * nx_local / nx_global */ - xlen = xlen_global * ((Real) nx_local)/((Real) nx_global); - - /*the local domain will be ylen_global * ny_local / ny_global */ - ylen = ylen_global * ((Real) ny_local)/((Real) ny_global); - - /*the local domain will be zlen_global * nz_local / nz_global */ - zlen = zlen_global * ((Real) nz_local)/((Real) nz_global); - - /* 1-D case */ - if(H->nx > 1 && H->ny==1 && H->nz==1) - { - H->dx = xlen_global / ((Real) nx_global); - - H->domlen_x = H->dx * (H->nx - 2*H->n_ghost); - H->domlen_y = ylen / ((Real) nx_global); - H->domlen_z = zlen / ((Real) nx_global); - - H->dy = H->domlen_y; - H->dz = H->domlen_z; - } - - /* 2-D case */ - if(H->nx > 1 && H->ny>1 && H->nz==1) - { - H->dx = xlen_global / ((Real) nx_global); - H->dy = ylen_global / ((Real) ny_global); - - H->domlen_x = H->dx * (H->nx - 2*H->n_ghost); - H->domlen_y = H->dy * (H->ny - 2*H->n_ghost); - H->domlen_z = zlen / ((Real) nx_global); - - H->dz = H->domlen_z; - } - - /* 3-D case */ - if(H->nx>1 && H->ny>1 && H->nz>1) - { - H->domlen_x = xlen; - H->domlen_y = ylen; - H->domlen_z = zlen; - H->dx = H->domlen_x / (H->nx - 2*H->n_ghost); - H->dy = H->domlen_y / (H->ny - 2*H->n_ghost); - H->dz = H->domlen_z / (H->nz - 2*H->n_ghost); - } - -} - - - /* Print information about the domain properties */ void Print_Domain_Properties(struct Header H) { @@ -804,7 +757,6 @@ void Print_Domain_Properties(struct Header H) { printf("procID %d nxl %ld nxls %ld\n",procID,nx_local,nx_local_start); printf("xb %e yb %e zb %e xbl %e ybl %e zbl %e\n",H.xbound,H.ybound,H.zbound,H.xblocal,H.yblocal,H.zblocal); - printf("xd %e yd %e zd %e xdl %e ydl %e zdl %e\n",H.xdglobal,H.ydglobal,H.zdglobal,H.domlen_x,H.domlen_y,H.domlen_z); printf("dx %e\n",H.dx); printf("dy %e\n",H.dy); printf("dz %e\n",H.dz); diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 3098bf7a9..b94e8595c 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -176,9 +176,6 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ); void Check_and_Grow_Particles_Buffer( Real **part_buffer, int *current_size_ptr, int new_size ); #endif -/* Set the domain properties */ -void Set_Parallel_Domain(Real xmin_global, Real ymin_global, Real zmin_global, Real xlen_global, Real ylen_global, Real zlen_global, struct Header *H); - /* Print information about the domain properties */ void Print_Domain_Properties(struct Header H); diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index 485d51e8d..c907e64eb 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -68,6 +68,9 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){ void Grid3D::Copy_Particles_Density(){ #ifdef GRAVITY_GPU + #ifdef PARTICLES_CPU + Copy_Particles_Density_to_GPU(); + #endif Copy_Particles_Density_GPU(); #else diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index cb4e19bfa..64cfa9692 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -1,4 +1,4 @@ -#if defined(PARTICLES) && defined(PARTICLES_GPU) +#ifdef PARTICLES #include #include @@ -7,9 +7,33 @@ #include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "particles_3D.h" +#include "../particles/particles_3D.h" +#include "../grid/grid3D.h" +#ifdef GRAVITY_GPU +void Grid3D::Copy_Particles_Density_to_GPU(){ + CudaSafeCall( cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); +} + +#endif +#ifdef PARTICLES_GPU + +//Define atomic_add if it's not supported +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) +{ + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif //Get the CIC index from the particle position ( device function ) __device__ void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ @@ -129,4 +153,5 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl #endif } -#endif +#endif//PARTICLES_GPU +#endif//PARTICLES diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 56fda387e..5e5cab8b8 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -87,6 +87,24 @@ void Grid3D::Transfer_Particles_Density_Boundaries( struct parameters P ){ #ifdef MPI_CHOLLA + +void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h ){ + + int nGHST, nx_g, ny_g, nz_g, buffer_length; + nGHST = Particles.G.n_ghost_particles_grid; + nx_g = Particles.G.nx_local + 2*nGHST; + ny_g = Particles.G.ny_local + 2*nGHST; + nz_g = Particles.G.nz_local + 2*nGHST; + + if ( direction == 0 ) buffer_length = nGHST * ny_g * nz_g; + if ( direction == 1 ) buffer_length = nGHST * nx_g * nz_g; + if ( direction == 2 ) buffer_length = nGHST * nx_g * ny_g; + + cudaMemcpy( buffer_h, buffer_d, buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + +} + + //Load the particles density boundaries to the MPI buffers for transfer, return the size of the transfer buffer int Grid3D::Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer ){ diff --git a/src/particles/feeback_CIC.h b/src/particles/feeback_CIC.h index a2cf33adb..f655c82a7 100644 --- a/src/particles/feeback_CIC.h +++ b/src/particles/feeback_CIC.h @@ -1,4 +1,4 @@ -#if defined(PARTICLES) && defined(DE) && defined(PARTICLE_AGE) && defined(PARTICLE_CPU) && defined(FEEDBACK) +#if defined(PARTICLES) && defined(DE) && defined(PARTICLE_AGE) && defined(PARTICLE_CPU) && defined(SUPERNOVA) #ifndef FEEDBACK_CIC_H #define FEEDBACK_CIC_H diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp index fe6fda41c..4ab8aa86f 100644 --- a/src/particles/feedback_CIC.cpp +++ b/src/particles/feedback_CIC.cpp @@ -1,7 +1,7 @@ #ifdef PARTICLES #ifdef DE #ifdef PARTICLE_AGE -#ifdef FEEDBACK +#ifdef SUPERNOVA #include #include @@ -61,9 +61,6 @@ Real Grid3D::Cluster_Feedback() { Real max_sn_dti = 0; #ifdef PARTICLES_GPU max_sn_dti = Cluster_Feedback_GPU(); - #ifdef MPI_CHOLLA - max_sn_dti = ReduceRealMax(max_sn_dti); - #endif // MPI_CHOLLA #else Real* feedbackInfo; Real* thread_dti; @@ -376,4 +373,4 @@ void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Rea #endif //PARTICLE_AGE #endif //DE #endif //PARTICLES -#endif //FEEDBACK +#endif //SUPERNOVA diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 8fbf507b1..e42a86d59 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -1,4 +1,4 @@ -#if defined(FEEDBACK) && defined(PARTICLES_GPU) +#if defined(SUPERNOVA) && defined(PARTICLES_GPU) #include #include @@ -10,12 +10,18 @@ #include "../io/io.h" #include "supernova.h" -#define TPB_FEEDBACK 64 -#define FEED_INFO_N 5 +#define TPB_FEEDBACK 256 +#define FEED_INFO_N 6 +#define i_RES 1 +#define i_UNRES 2 +#define i_ENERGY 3 +#define i_MOMENTUM 4 +#define i_UNRES_ENERGY 5 namespace Supernova { curandStateMRG32k3a_t* curandStates; part_int_t n_states; + Real t_buff, dt_buff; } @@ -35,7 +41,8 @@ __device__ double atomicMax(double* address, double val) __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { int id = blockIdx.x*blockDim.x + threadIdx.x; - curand_init(seed + id, id, 0, &states[id]); + curand_init(seed, id, 0, &states[id]); + } @@ -49,6 +56,8 @@ __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* state */ void Supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { printf("Supernova::initState start\n"); + t_buff = 0; + dt_buff = 0; n_states = n_local*allocation_factor; //n_states = 10; cudaMalloc((void**) &curandStates, n_states*sizeof(curandStateMRG32k3a_t)); @@ -83,6 +92,7 @@ __device__ void Single_Cluster_Feedback(Real t, Real dt, Real age, Real density, } */ + __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real *momentum_y, Real *momentum_z, Real *energy, int index, Real dx, Real dy, Real dz){ Real dens = fmax(density[index], DENS_FLOOR); Real d_inv = 1.0 / dens; @@ -95,50 +105,76 @@ __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real } - +/** the prescription for dividing a scalar quantity between 3x3x3 cells is done by imagining a + 2x2x2 cell volume around the SN. These fractions, then, represent the linear extent of this + volume into the cell in question. + For i=0 this should be 1*1/2. + For i=-1 this should be (1-dx)*1/2. + For i=+1 this should be dx*1/2. + In the above the 1/2 factor is normalize over 2 cells/direction. + */ __device__ Real frac(int i, Real dx) { - return (-0.5*i*i -0.5*i + 1 + i*dx)*0.5; + return (-0.5*i*i - 0.5*i + 1 + i*dx)*0.5; } + __device__ Real d_fr(int i, Real dx) { - return (dx > 0.5)*i*(1-2*dx) + ((i+1)*dx + 0.5*(i - 1)) -3*(i-1)*(i+1)*(0.5 - dx); + return (dx > 0.5)*i*(1-2*dx) + ((i+1)*dx + 0.5*(i-1)) - 3*(i-1)*(i+1)*(0.5 - dx); +} + + +__device__ Real GetAverageDensity(Real *density, int xi, int yi, int zi, int nxg, int nyg, int ng) { + Real d_average = 0.0; + for (int i = -1; i < 2; i++) { + for (int j = -1; j < 2; j++) { + for (int k = -1; k < 2; k++) { + d_average += density[(xi + ng + i) + (yi + ng + j)*nxg + (zi + ng + k)*nxg*nyg]; + } + } + } + return d_average / 27; } -__global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, - Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xLen, Real yLen, Real zLen, + +__device__ Real GetAverageNumberDensity_CGS(Real *density, int xi, int yi, int zi, int nxg, int nyg, int ng) { + return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / (Supernova::MU*MP); +} + + +__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, + Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, - Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states){ + Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states, + Real* prev_dens, int* prev_N, short direction){ __shared__ Real s_info[FEED_INFO_N*TPB_FEEDBACK]; // for collecting SN feedback information, like # of SNe or # resolved. int tid = threadIdx.x; int gtid = blockIdx.x * blockDim.x + tid ; - s_info[FEED_INFO_N*tid] = 0; - s_info[FEED_INFO_N*tid + 1] = 0; - s_info[FEED_INFO_N*tid + 2] = 0; - s_info[FEED_INFO_N*tid + 3] = 0; - s_info[FEED_INFO_N*tid + 4] = 0; + s_info[FEED_INFO_N*tid] = 0; // number of supernovae + s_info[FEED_INFO_N*tid + 1] = 0; // number of resolved events + s_info[FEED_INFO_N*tid + 2] = 0; // number of unresolved events + s_info[FEED_INFO_N*tid + 3] = 0; // resolved energy + s_info[FEED_INFO_N*tid + 4] = 0; // unresolved momentum + s_info[FEED_INFO_N*tid + 5] = 0; // unresolved KE added via momentum injection if (gtid < n_local) { - Real xMax, yMax, zMax; - xMax = xMin + xLen; - yMax = yMin + yLen; - zMax = zMin + zLen; - Real pos_x, pos_y, pos_z; Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; Real x_frac, y_frac, z_frac; - Real px, py, pz, ek, d; + Real px, py, pz, eg, t_b, t_a, v_1, v_2, d, d_b, d_a, p_b, p_a, e; Real feedback_energy=0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; bool is_resolved = false; - int pcell_x, pcell_y, pcell_z, pcell_index; Real dV = dx*dy*dz; Real local_dti = 0.0; pos_x = pos_x_dev[gtid]; pos_y = pos_y_dev[gtid]; pos_z = pos_z_dev[gtid]; + //printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); + //printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); + bool in_local = (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && @@ -151,48 +187,55 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea int indx_x = (int) floor( ( pos_x - xMin ) / dx ); int indx_y = (int) floor( ( pos_y - yMin ) / dy ); int indx_z = (int) floor( ( pos_z - zMin ) / dz ); + //printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + - bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x > nx_g-2 || indx_y > ny_g-2 || indx_z > nz_g-2; + bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g-2*n_ghost || indx_y >= ny_g-2*n_ghost || indx_z >= nz_g-2*n_ghost; if (ignore) { printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); } if (!ignore && in_local) { - pcell_x = (int) floor( ( pos_x - xMin ) / dx ) + n_ghost; - pcell_y = (int) floor( ( pos_y - yMin ) / dy ) + n_ghost; - pcell_z = (int) floor( ( pos_z - zMin ) / dz ) + n_ghost; - pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; - unsigned int N = 0; + int N = 0; if ((t - age_dev[gtid]) <= Supernova::SN_ERA) { - curandStateMRG32k3a_t state = states[gtid]; - N = curand_poisson (&state, Supernova::SNR * mass_dev[gtid] * dt); - states[gtid] = state; - - if (N > 0) { + if (direction == -1) N = -prev_N[gtid]; + else { + curandStateMRG32k3a_t state = states[gtid]; + N = curand_poisson (&state, Supernova::SNR * mass_dev[gtid] * dt); + states[gtid] = state; + prev_N[gtid] = N; + } + if (N != 0) { mass_dev[gtid] -= N * Supernova::MASS_PER_SN; feedback_energy = N * Supernova::ENERGY_PER_SN / dV; feedback_density = N * Supernova::MASS_PER_SN / dV; - n_0 = density[pcell_index] * DENSITY_UNIT / (Supernova::MU*MP); - feedback_momentum = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93); - shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); - //printf(" N=%d, shell_rad=%0.4e, n_0=%0.4e\n", N, shell_radius, n_0); + if (direction == -1) n_0 = prev_dens[gtid]; + else { + n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); + prev_dens[gtid] = n_0; + } + //int devcount; + //cudaGetDeviceCount(&devcount); + //int devId; + //cudaGetDevice(&devId); + //printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); + + feedback_momentum = direction*Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; + shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + if (!is_resolved) printf("UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, n_0=%0.4e\n", + t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); s_info[FEED_INFO_N*tid] = 1.*N; - if (is_resolved) s_info[FEED_INFO_N*tid + 1] = 1.0; - else s_info[FEED_INFO_N*tid + 2] = 1.0; - - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - + if (is_resolved) s_info[FEED_INFO_N*tid + 1] = direction * 1.0; + else s_info[FEED_INFO_N*tid + 2] = direction * 1.0; int indx; if (is_resolved) { //if resolved inject energy and density - s_info[FEED_INFO_N*tid + 3] = feedback_energy *dV; + s_info[FEED_INFO_N*tid + 3] = feedback_energy * dV; indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); @@ -214,6 +257,19 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea for (int k = 0; k < 2; k++) { indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; + if (abs(momentum_x[indx]/density[indx]) >= C_L) { + printf("%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_y[indx]/density[indx]) >= C_L) { + printf("%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_z[indx]/density[indx]) >= C_L) { + printf("%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + // i_frac are the fractions of energy/density to be allocated // to each of the 8 cells. x_frac = i*(1-delta_x) + (1-i)*delta_x; @@ -224,45 +280,126 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy ); atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy ); - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + if (abs(momentum_x[indx]/density[indx]) >= C_L) { + printf("%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_y[indx]/density[indx]) >= C_L) { + printf("%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_z[indx]/density[indx]) >= C_L) { + printf("%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + + if (direction > 0) local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); } } } } else { //if not resolved, inject momentum and density - s_info[FEED_INFO_N*tid + 4] = feedback_momentum; - feedback_momentum /= sqrt(3.0); + s_info[FEED_INFO_N*tid + 4] = feedback_momentum * dV; + + delta_x = ( pos_x - xMin - indx_x*dx ) / dx; + delta_y = ( pos_y - yMin - indx_y*dy ) / dy; + delta_z = ( pos_z - zMin - indx_z*dz ) / dz; + //printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + //printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); - delta_x = ( pos_x - indx_x*dx ) / dx; - delta_y = ( pos_y - indx_y*dy ) / dy; - delta_z = ( pos_z - indx_z*dz ) / dz; indx_x += n_ghost; indx_y += n_ghost; indx_z += n_ghost; + if (abs(feedback_momentum/feedback_density*VELOCITY_UNIT*1e-5) > 40000) { // injected speeds are greater than 4e4 km/s + printf("**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z, feedback_momentum/feedback_density*VELOCITY_UNIT*1e-5); + } + feedback_momentum /= sqrt(3.0); + for (int i = -1; i < 2; i++) { for (int j = -1; j < 2; j++) { for (int k = -1; k < 2; k++) { // index in array of conserved quantities indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; - px = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_momentum; - py = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z) * feedback_momentum; - pz = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z) * feedback_momentum; - d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_density; - ek = (px*px + py+py + pz*pz)/2/d; + x_frac = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); + y_frac = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); + z_frac = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); + + px = x_frac * feedback_momentum; + py = y_frac * feedback_momentum; + pz = z_frac * feedback_momentum; + //d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * (feedback_density + n_0*Supernova::MU*MP/DENSITY_UNIT); + d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + n_0*Supernova::MU*MP/DENSITY_UNIT; + + //d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_density; + //e = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_energy; + //printf("(%d, %d, %d): delta:(%.4e, %.4e, %.4e), frac: %.4e\n", indx_x, indx_y, indx_z, delta_x, delta_y, delta_z, frac(i, delta_x)*frac(j, delta_y)*frac(k, delta_z)); + //printf("(%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) before: %.4e\n", indx_x, indx_y, indx_z, N, i, j, k, density[indx]*DENSITY_UNIT/0.6/MP); + + + //v_1 = sqrt((momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])*VELOCITY_UNIT/1e5; + //t_b = gasEnergy[indx]*ENERGY_UNIT*(gamma - 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); + //p_b = sqrt(momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; + //d_b = density[indx]*DENSITY_UNIT/0.6/MP; + + if (abs(momentum_x[indx]/density[indx]) >= C_L) { + printf("%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_y[indx]/density[indx]) >= C_L) { + printf("%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_z[indx]/density[indx]) >= C_L) { + printf("%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } atomicAdd(&momentum_x[indx], px); atomicAdd(&momentum_y[indx], py); atomicAdd(&momentum_z[indx], pz); - atomicAdd( &density[indx], d ); - atomicAdd( &energy[indx], ek); - - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + density[indx] = d; + energy[indx] = (momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/2/density[indx] + gasEnergy[indx]; + + + // atomicAdd( &energy[indx], e ); + //atomicAdd( &density[indx], d ); + + s_info[FEED_INFO_N*tid + i_UNRES_ENERGY] += direction*(px*px + py*py + pz*pz)/2/density[indx]*dV; + + if (abs(momentum_x[indx]/density[indx]) >= C_L) { + printf("%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_y[indx]/density[indx]) >= C_L) { + printf("%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + if (abs(momentum_z[indx]/density[indx]) >= C_L) { + printf("%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); + } + //gasEnergy[indx] = energy[indx] - (momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/2/density[indx]; + //v_2 = sqrt((momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx]) * VELOCITY_UNIT/1e5; + //t_a = gasEnergy[indx]*ENERGY_UNIT*(gamma - 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); + //d_a = density[indx]*DENSITY_UNIT/0.6/MP; + //p_a = sqrt(momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; + + + //printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, (v_2-v_1)/v_1*100); + //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, (t_a-t_b)/t_b*100); + //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, (d_a-d_b)/d_b*100); + //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, (p_a-p_b)/p_b*100); + + if (direction > 0) { + //printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], N, n_0); + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + } } } } } - atomicMax(dti, local_dti); + if (direction > 0) atomicMax(dti, local_dti); } } } @@ -278,6 +415,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea s_info[FEED_INFO_N*tid + 2] += s_info[FEED_INFO_N*(tid + s) + 2]; s_info[FEED_INFO_N*tid + 3] += s_info[FEED_INFO_N*(tid + s) + 3]; s_info[FEED_INFO_N*tid + 4] += s_info[FEED_INFO_N*(tid + s) + 4]; + s_info[FEED_INFO_N*tid + 5] += s_info[FEED_INFO_N*(tid + s) + 5]; } __syncthreads(); } @@ -288,10 +426,12 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, Real* pos_x_dev, Rea info[FEED_INFO_N*blockIdx.x + 2] = s_info[2]; info[FEED_INFO_N*blockIdx.x + 3] = s_info[3]; info[FEED_INFO_N*blockIdx.x + 4] = s_info[4]; + info[FEED_INFO_N*blockIdx.x + 5] = s_info[5]; } } + Real Grid3D::Cluster_Feedback_GPU() { if (H.dt == 0) return 0.0; @@ -301,31 +441,72 @@ Real Grid3D::Cluster_Feedback_GPU() { } Real h_dti = 0.0; - Real* d_dti; - CHECK(cudaMalloc(&d_dti, sizeof(Real))); - CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); + int direction, ngrid; + Real h_info[6] = {0, 0, 0, 0, 0, 0}; + Real info[6]; + Real *d_dti, *d_info; + // require d_prev_dens & d_prev_N in case we have to undo feedback if the time step is too large. + Real* d_prev_dens; + int* d_prev_N; + + + if (Particles.n_local > 0) { + CHECK(cudaMalloc(&d_dti, sizeof(Real))); + CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); + CHECK(cudaMalloc(&d_prev_dens, Particles.n_local*sizeof(Real))); + CHECK(cudaMalloc(&d_prev_N, Particles.n_local*sizeof(int))); + CHECK(cudaMemset(d_prev_dens, 0, Particles.n_local*sizeof(Real))); + CHECK(cudaMemset(d_prev_N, 0, Particles.n_local*sizeof(int))); - int ngrid = std::ceil((1.*Particles.n_local)/TPB_FEEDBACK); - Real h_info[5] = {0, 0, 0, 0, 0}; - Real info[5]; - Real* d_info; - CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N*ngrid*sizeof(Real))); - //FIXME info collection only works if ngrid is 1. The reason being that reduction of + ngrid = std::ceil((1.*Particles.n_local)/TPB_FEEDBACK); + CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N*ngrid*sizeof(Real))); + } + //FIXME info collection and max dti calculation + // only works if ngrid is 1. The reason being that reduction of // d_info is currently done on each block. Only the first block reduction // is used - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, - Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.domlen_x, H.domlen_y, H.domlen_z, - H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, - C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates); + do { + direction = 1; + if (Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.partIDs_dev, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, + H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, + C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates, d_prev_dens, d_prev_N, direction); + + CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); + } - CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N*sizeof(Real), cudaMemcpyDeviceToHost)); - CHECK(cudaFree(d_dti)); - CHECK(cudaFree(d_info)); + #ifdef MPI_CHOLLA + h_dti = ReduceRealMax(h_dti); + MPI_Barrier(world); + #endif // MPI_CHOLLA + + if (h_dti != 0 && (C_cfl/h_dti < H.dt)) { // timestep too big: need to undo the last operation + direction = -1; + if (Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.partIDs_dev, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, + H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, + C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates, d_prev_dens, d_prev_N, direction); + + CHECK(cudaDeviceSynchronize()); + } + H.dt = C_cfl/h_dti; + } + + } while (direction == -1); + + if (Particles.n_local > 0) { + CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N*sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaFree(d_dti)); + CHECK(cudaFree(d_info)); + CHECK(cudaFree(d_prev_dens)); + CHECK(cudaFree(d_prev_N)); + } #ifdef MPI_CHOLLA - MPI_Reduce(&h_info, &info, 5, MPI_CHREAL, MPI_SUM, root, world); + MPI_Reduce(&h_info, &info, FEED_INFO_N, MPI_CHREAL, MPI_SUM, root, world); #else info = h_info; #endif @@ -335,6 +516,7 @@ Real Grid3D::Cluster_Feedback_GPU() { countUnresolved += (int)info[Supernova::NOT_RESOLVED]; totalEnergy += info[Supernova::ENERGY]; totalMomentum += info[Supernova::MOMENTUM]; + totalUnresEnergy += info[Supernova::UNRES_ENERGY]; Real resolved_ratio = 0.0; if (info[Supernova::RESOLVED] > 0 || info[Supernova::NOT_RESOLVED] > 0) { @@ -346,14 +528,16 @@ Real Grid3D::Cluster_Feedback_GPU() { } chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", H.n_step, (long)info[Supernova::SN], resolved_ratio); - chprintf(" this iteration: energy: %.5e erg. x-momentum: %.5e S.M. km/s\n", - info[Supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[Supernova::MOMENTUM]*VELOCITY_UNIT/1e5); + chprintf(" this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s unres_energy: %.5e erg\n", + info[Supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[Supernova::MOMENTUM]*VELOCITY_UNIT/1e5, + info[Supernova::UNRES_ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)countSN, (long)countResolved, (long)countUnresolved, global_resolved_ratio); - chprintf(" energy: %.5e erg. Total x-momentum: %.5e S.M. km/s\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, totalMomentum*VELOCITY_UNIT/1e5); + chprintf(" energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres energy: %.5e\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, + totalMomentum*VELOCITY_UNIT/1e5, totalUnresEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); return h_dti; } -#endif //FEEDBACK & PARTICLES_GPU +#endif //SUPERNOVA & PARTICLES_GPU diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index cf6ce45c5..ec753ba58 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -20,6 +20,10 @@ void Grid3D::Get_Gravity_Field_Particles(){ #ifdef PARTICLES_CPU + + #ifdef GRAVITY_GPU + Copy_Potential_From_GPU(); + #endif #ifndef PARALLEL_OMP Get_Gravity_Field_Particles_function( 0, Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid); diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 4c9600e05..97108dc3b 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -1,4 +1,4 @@ -#if defined(PARTICLES) && defined(PARTICLES_GPU) +#ifdef PARTICLES #include #include @@ -13,6 +13,7 @@ #include "../grid/grid3D.h" #endif +#ifdef PARTICLES_GPU //Copy the potential from host to device void Particles_3D::Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ){ @@ -289,6 +290,8 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca } +#endif //PARTICLES_GPU + #ifdef GRAVITY_GPU void __global__ Copy_Particles_Density_Kernel( Real *dst_density, Real *src_density, int nx_local, int ny_local, int nz_local, int n_ghost ){ diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 4240e1850..41bee206d 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -505,13 +505,13 @@ void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){ output_particle_data = false; #endif - #ifdef GRAVITY_GPU + #ifdef PARTICLES_GPU //Copy the device arrays from the device to the host CudaSafeCall( cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells*sizeof(Real), cudaMemcpyDeviceToHost) ); - #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) + #endif//PARTICLES_GPU + #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); #endif//OUTPUT_POTENTIAL - #endif//GRAVITY_GPU diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index d53b2cc46..b52215ec2 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -31,7 +31,7 @@ void Grid3D::Initialize_Particles( struct parameters *P ){ Particles.Initialize( P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal ); - #ifdef GRAVITY_GPU + #if defined (PARTICLES_GPU) && defined (GRAVITY_GPU) // Set the GPU array for the particles potential equal to the Gravity GPU array for the potential Particles.G.potential_dev = Grav.F.potential_d; #endif @@ -122,9 +122,9 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, G.zMin = Grav.zMin; //Right boundaries of the local domain - G.xMax = G.xMin + G.nx_local*G.dx; - G.yMax = G.yMin + G.ny_local*G.dy; - G.zMax = G.zMin + G.nz_local*G.dz; + G.xMax = Grav.xMax; + G.yMax = Grav.yMax; + G.zMax = Grav.zMax; //Left boundaries of the global domain G.domainMin_x = xbound; @@ -163,7 +163,7 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, //Factor to allocate the particles data arrays on the GPU. //When using MPI particles will be transferred to other GPU, for that reason we need extra memory allocated #ifdef MPI_CHOLLA - G.gpu_allocation_factor = 1.5; + G.gpu_allocation_factor = 1.25; #else G.gpu_allocation_factor = 1.0; #endif @@ -261,6 +261,10 @@ void Particles_3D::Allocate_Memory( void ){ G.gravity_x = (Real *) malloc(G.n_cells*sizeof(Real)); G.gravity_y = (Real *) malloc(G.n_cells*sizeof(Real)); G.gravity_z = (Real *) malloc(G.n_cells*sizeof(Real)); + #ifdef GRAVITY_GPU + // Array to copy the particles density to the device for computing the potential in the device + Allocate_Particles_Grid_Field_Real( &G.density_dev, G.n_cells); + #endif #endif #ifdef PARTICLES_GPU @@ -514,14 +518,14 @@ void Particles_3D::Initialize_Sphere(struct parameters *P){ part_int_t pID = 0; Real pPos_x, pPos_y, pPos_z, r; - ChollaPrngGenerator prng(P); + std::mt19937_64 generator(P->prng_seed); std::uniform_real_distribution xPositionPrng(G.xMin, G.xMax ); std::uniform_real_distribution yPositionPrng(G.yMin, G.yMax ); std::uniform_real_distribution zPositionPrng(G.zMin, G.zMax ); while ( pID < n_particles_local ){ - pPos_x = xPositionPrng(prng.generator); - pPos_y = yPositionPrng(prng.generator); - pPos_z = zPositionPrng(prng.generator); + pPos_x = xPositionPrng(generator); + pPos_y = yPositionPrng(generator); + pPos_z = zPositionPrng(generator); r = sqrt( (pPos_x-center_x)*(pPos_x-center_x) + (pPos_y-center_y)*(pPos_y-center_y) + (pPos_z-center_z)*(pPos_z-center_z) ); if ( r > sphereR ) continue; @@ -627,10 +631,10 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { chprintf( " Initializing Particles Stellar Disk\n"); // Set up the PRNG - std::mt19937_64 generator {P->prng_seed}; + std::mt19937_64 generator(P->prng_seed); std::gamma_distribution radialDist(2,1); //for generating cyclindrical radii - std::uniform_real_distribution zDist(-0.2, 0.2); + std::uniform_real_distribution zDist(-0.005, 0.005); std::uniform_real_distribution vzDist(-1e-8, 1e-8); std::uniform_real_distribution phiDist(0, 2*M_PI); //for generating phi std::normal_distribution speedDist(0, 1); //for generating random speeds. @@ -657,11 +661,10 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius //unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius Real total_mass = 0; - Real upper_limit_cluster_mass = 3e7; + Real upper_limit_cluster_mass = 1e7; long lost_particles = 0; part_int_t id = -1; while (total_mass < upper_limit_cluster_mass) { @@ -676,11 +679,11 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { phi = phiDist(generator); x = R * cos(phi); y = R * sin(phi); - z = 0.0; //zDist(generator); + z = zDist(generator); - if (x < G.xMin || x > G.xMax) continue; - if (y < G.yMin || y > G.yMax) continue; - if (z < G.zMin || z > G.zMax) continue; + if (x < G.xMin || x >= G.xMax) continue; + if (y < G.yMin || y >= G.yMax) continue; + if (z < G.zMin || z >= G.zMax) continue; ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); vPhi = sqrt(R*ac); @@ -764,7 +767,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { #endif //PARTICLES_GPU if (lost_particles > 0) chprintf(" lost %lu particles\n", lost_particles); - chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, total_mass: %.3e s.m.\n", id, n_local, total_mass); + chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, total_mass: %.3e s.m.\n", id+1, n_local, total_mass); } diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 19085b14c..424f15fec 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -14,6 +14,7 @@ #ifdef PARTICLES_GPU #define TPB_PARTICLES 1024 // #define PRINT_GPU_MEMORY +#define PRINT_MAX_MEMORY_USAGE #endif @@ -167,7 +168,11 @@ class Particles_3D Real *gravity_x; Real *gravity_y; Real *gravity_z; + #ifdef GRAVITY_GPU + Real *density_dev; + #endif #endif + #ifdef PARTICLES_GPU Real *density_dev; @@ -224,9 +229,10 @@ class Particles_3D void Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal ); + void Free_GPU_Array_Real( Real *array ); + #ifdef PARTICLES_GPU - void Free_GPU_Array_Real( Real *array ); void Free_GPU_Array_int( int *array ); void Free_GPU_Array_bool( bool *array ); template< typename T > void Free_GPU_Array( T *array ){ cudaFree(array); } //TODO remove the Free_GPU_Array_ functions @@ -264,6 +270,10 @@ class Particles_3D void Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ); void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ); void Set_Particles_Open_Boundary_GPU( int dir, int side ); + #ifdef PRINT_MAX_MEMORY_USAGE + void Print_Max_Memory_Usage(); + #endif + #endif //PARTICLES_GPU diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 40d1064d4..68159cee0 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -1,4 +1,4 @@ -#if defined(PARTICLES) && defined(PARTICLES_GPU) +#if defined(PARTICLES) #include #include @@ -12,7 +12,59 @@ + + void Particles_3D::Free_GPU_Array_Real( Real *array ){ cudaFree(array); } + + +void Particles_3D::Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ){ + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + #ifdef PRINT_GPU_MEMORY + chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); + #endif + if ( global_free < size*sizeof(Real) ){ + printf( "ERROR: Not enough global device memory \n" ); + printf( " Available Memory: %ld MB \n", global_free/1000000 ); + printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); + exit(-1); + } + CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); + cudaDeviceSynchronize(); +} + + + +#ifdef PARTICLES_GPU + +#ifdef PRINT_MAX_MEMORY_USAGE +#include "../mpi/mpi_routines.h" + +void Particles_3D::Print_Max_Memory_Usage(){ + + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + cudaDeviceSynchronize(); + + part_int_t n_local_max, n_total, mem_usage; + Real fraction_max, global_free_min; + + n_local_max = (part_int_t) ReduceRealMax( (Real) n_local ); + n_total = ReducePartIntSum( n_local ); + fraction_max = (Real) n_local_max / (Real) n_total; + mem_usage = n_local_max * 9 * sizeof(Real); //Usage for pos, vel ans accel. + + global_free_min = ReduceRealMin( (Real) global_free ); + + chprintf( " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB global_free_min: %.1f MB \n", n_local_max, fraction_max*100, mem_usage/1000000, global_free_min/1000000 ); + + +} + +#endif + + + void Particles_3D::Free_GPU_Array_int( int *array ) { cudaFree(array); } void Particles_3D::Free_GPU_Array_bool( bool *array ){ cudaFree(array); } @@ -50,22 +102,6 @@ void Particles_3D::Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ){ - size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); - exit(-1); - } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); - cudaDeviceSynchronize(); -} - void Particles_3D::Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ){ size_t global_free, global_total; CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); @@ -159,5 +195,5 @@ void Particles_3D::Set_Particles_Array_Real( Real value, Real *array_dev, part_i - +#endif //PARTICLES_GPU #endif//PARTICLES diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 432903b2c..6b26b2121 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -19,7 +19,7 @@ //Transfer the particles that moved outside the local domain void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ - + CudaCheckError(); //Transfer Particles Boundaries Particles.TRANSFER_PARTICLES_BOUNDARIES = true; #ifdef CPU_TIME @@ -30,7 +30,7 @@ void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ Timer.Part_Boundaries.End(); #endif Particles.TRANSFER_PARTICLES_BOUNDARIES = false; - + CudaCheckError(); } #ifdef MPI_CHOLLA @@ -455,9 +455,9 @@ void Grid3D::Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particle Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send ); #endif //PARTICLES_GPU - MPI_Isend(&Particles.n_send_z1, 1, MPI_CHREAL, dest[5], 4, world, &send_request_n_particles[1]); + MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]); MPI_Request_free(send_request_n_particles+1); - MPI_Irecv(&Particles.n_recv_z1, 1, MPI_CHREAL, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]); + MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]); // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << Particles.n_send_z1 << std::endl; buffer_length = Particles.n_send_z1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU @@ -835,7 +835,7 @@ void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *rec part_int_t n_local_after = n_local + n_recv; if ( n_local_after > particles_array_size ){ - printf(" Reallocating GPU particles arrays \n" ); + printf(" Reallocating GPU particles arrays. N local particles: %d \n", n_local_after ); int new_size = G.gpu_allocation_factor * n_local_after; Extend_GPU_Array( &pos_x_dev, (int) particles_array_size, new_size, true ); Extend_GPU_Array( &pos_y_dev, (int) particles_array_size, new_size, false ); diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 8215e3135..a979565a2 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -154,6 +154,7 @@ Real Grid3D::Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ){ //Update the particles positions and velocities void Grid3D::Advance_Particles( int N_step ){ + CudaCheckError(); #ifdef CPU_TIME if ( N_step == 1) Timer.Advance_Part_1.Start(); if ( N_step == 2) Timer.Advance_Part_2.Start(); @@ -179,6 +180,7 @@ void Grid3D::Advance_Particles( int N_step ){ if ( N_step == 1) Timer.Advance_Part_1.End(); if ( N_step == 2) Timer.Advance_Part_2.End(); #endif + CudaCheckError(); } diff --git a/src/particles/supernova.h b/src/particles/supernova.h index fa0e4250a..c780eac95 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -9,16 +9,16 @@ namespace Supernova { - const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4; + const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; - // supernova rate: 1SN / 100 solar masses per 10^4 kyr - static const Real SNR=1e-6; + // supernova rate: 1SN / 100 solar masses per 40^4 kyr + static const Real SNR=2.5e-7; static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN - static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT* 1e5 *TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) static const Real MU = 0.6; static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) - static const Real SN_ERA = 1.0e4; // assume SN occur during first 10 Myr after cluster formation. + static const Real SN_ERA = 4.0e4; // assume SN occur during first 40 Myr after cluster formation. #ifdef PARTICLES_GPU extern curandStateMRG32k3a_t* curandStates; @@ -27,6 +27,8 @@ namespace Supernova { void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); //void initState(struct parameters *P); + extern Real t_buff, dt_buff; + #endif //PARTICLES_GPU } diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index 33d609182..2a6b637f7 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -10,7 +10,7 @@ #include "../reconstruction/plmp_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif @@ -102,7 +102,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -127,7 +127,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_imo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_imo = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -152,7 +152,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_ipo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_ipo = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 5464149cd..a52d49b48 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -9,6 +9,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/ppmc_cuda.h" +#include "../utils/hydro_utilities.h" #ifdef DE //PRESSURE_DE #include "../hydro/hydro_cuda.h" @@ -130,7 +131,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -155,7 +156,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_imo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_imo = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -180,7 +181,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_ipo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_ipo = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -205,7 +206,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_imt * ( vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_imt = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_imt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_imt = (dev_conserved[4*n_cells + id] - 0.5*d_imt*(vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -230,7 +231,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_ipt * ( vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_ipt = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_ipt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_ipt = (dev_conserved[4*n_cells + id] - 0.5*d_ipt*(vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 75569dca4..dee4739f8 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -11,7 +11,7 @@ #include "../reconstruction/ppmp_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif // #define STEEPENING @@ -143,7 +143,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -168,7 +168,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_imo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_imo = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -193,7 +193,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_ipo = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_ipo = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -218,7 +218,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_imt * ( vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_imt = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_imt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_imt = (dev_conserved[4*n_cells + id] - 0.5*d_imt*(vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -243,7 +243,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E = dev_conserved[4*n_cells + id]; E_kin = 0.5 * d_ipt * ( vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt ); dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_ipt = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + p_ipt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else p_ipt = (dev_conserved[4*n_cells + id] - 0.5*d_ipt*(vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 0705b2f14..d84464828 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -11,7 +11,7 @@ #include "../riemann_solvers/exact_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif @@ -66,7 +66,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds E = dev_bounds_L[4*n_cells + tid]; E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); dge = dev_bounds_L[(n_fields-1)*n_cells + tid]; - pl = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + pl = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else pl = (dev_bounds_L[4*n_cells + tid] - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -87,7 +87,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds E = dev_bounds_R[4*n_cells + tid]; E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr ); dge = dev_bounds_R[(n_fields-1)*n_cells + tid]; - pr = Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + pr = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); #else pr = (dev_bounds_R[4*n_cells + tid] - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu index 07369a95e..a69cf9d0f 100644 --- a/src/riemann_solvers/hll_cuda.cu +++ b/src/riemann_solvers/hll_cuda.cu @@ -10,11 +10,10 @@ #include "../riemann_solvers/hll_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif - /*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) * \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) @@ -100,7 +99,7 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R vzl = mzl / dl; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); - pl = Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); + pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); #else pl = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); #endif//DE @@ -118,7 +117,7 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R vzr = mzr / dr; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr ); - pr = Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); + pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); #else pr = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0); #endif//DE diff --git a/src/riemann_solvers/hllc_cuda.cu b/src/riemann_solvers/hllc_cuda.cu index 0f15455ce..8a765bb8f 100644 --- a/src/riemann_solvers/hllc_cuda.cu +++ b/src/riemann_solvers/hllc_cuda.cu @@ -10,7 +10,7 @@ #include "../riemann_solvers/hllc_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif @@ -99,7 +99,7 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ vzl = mzl / dl; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); - pl = Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); + pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); #else pl = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -117,7 +117,7 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ vzr = mzr / dr; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr ); - pr = Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); + pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); #else pr = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 621c568b0..489647bdb 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -15,7 +15,7 @@ #include "../riemann_solvers/hlld_cuda.h" #ifdef DE //PRESSURE_DE - #include "../hydro/hydro_cuda.h" + #include "../utils/hydro_utilities.h" #endif // DE #ifdef CUDA @@ -118,7 +118,7 @@ * _hlldInternal::_dotProduct(magneticXL, magneticYL, magneticZL, magneticXL, magneticYL, magneticZL); - Real const gasPressureL = fmax(Get_Pressure_From_DE(energyL, + Real const gasPressureL = fmax(hydro_utilities::Get_Pressure_From_DE(energyL, energyL - energyKineticL - energyMagneticL, thermalEnergyConservedL, gamma), @@ -156,7 +156,7 @@ * _hlldInternal::_dotProduct(magneticXR, magneticYR, magneticZR, magneticXR, magneticYR, magneticZR); - Real const gasPressureR = fmax(Get_Pressure_From_DE(energyR, + Real const gasPressureR = fmax(hydro_utilities::Get_Pressure_From_DE(energyR, energyR - energyKineticR - energyMagneticR, thermalEnergyConservedR, gamma), diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index 2f8903fe1..88b094468 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -10,7 +10,7 @@ #include "../riemann_solvers/roe_cuda.h" #ifdef DE //PRESSURE_DE -#include "../hydro/hydro_cuda.h" +#include "../utils/hydro_utilities.h" #endif /*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields) @@ -100,7 +100,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R vzl = mzl / dl; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); - pl = Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); + pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma ); #else pl = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); #endif //PRESSURE_DE @@ -118,7 +118,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R vzr = mzr / dr; #ifdef DE //PRESSURE_DE E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr ); - pr = Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); + pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma ); #else pr = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0); #endif //PRESSURE_DE diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp new file mode 100644 index 000000000..8b62ef092 --- /dev/null +++ b/src/system_tests/cooling_system_tests.cpp @@ -0,0 +1,91 @@ +/*! + * \file hydro_system_tests.cpp + * \brief Contains all the system tests for the HYDRO build type + * + */ + + +// External Libraries and Headers +#include +#include // provides std:sin + +// Local includes +#include "../system_tests/system_tester.h" +#include "../utils/testing_utilities.h" + + + +#ifndef PI +#define PI 3.141592653589793 +#endif + +#define COOL_RHO 6.9498489284711 + +TEST(tCOOLINGSYSTEMConstant5, + CorrectInputExpectCorrectOutput) +{ + // dt = 0.3 + // rho = COOL_RHO*1e5 + // pressure = 1e-3 + // T = 1e5 + /* + double energy = 0.0014850544057189395;// Python + */ + double energy = 0.00148501098087863;// Cholla + systemTest::SystemTestRunner testObject(false, false, false); + testObject.launchCholla(); + testObject.openHydroTestData(); + + testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); + testingUtilities::analyticConstant(testObject,"momentum_x",0.0); + testingUtilities::analyticConstant(testObject,"momentum_y",0.0); + testingUtilities::analyticConstant(testObject,"momentum_z",0.0); + testingUtilities::analyticConstant(testObject,"Energy",energy); + +} + + +TEST(tCOOLINGSYSTEMConstant7, + CorrectInputExpectCorrectOutput) +{ + // dt = 100 + // rho = COOL_RHO*1e5 + // pressure = 1e-1 + // T = 1e7 + // double energy = 0.14982743570299709; // Python + double energy = 0.14982745510047499; // Cholla + systemTest::SystemTestRunner testObject(false, false, false); + testObject.launchCholla(); + testObject.openHydroTestData(); + + testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); + testingUtilities::analyticConstant(testObject,"momentum_x",0.0); + testingUtilities::analyticConstant(testObject,"momentum_y",0.0); + testingUtilities::analyticConstant(testObject,"momentum_z",0.0); + testingUtilities::analyticConstant(testObject,"Energy",energy); + +} + +TEST(tCOOLINGSYSTEMConstant8, + CorrectInputExpectCorrectOutput) +{ + // dt = 90 + // rho = COOL_RHO*1e5 + // pressure = 1 + // T = 1e8 + + // double energy = 1.499669522009355; // Python + double energy = 1.4996695198095711; // Cholla + systemTest::SystemTestRunner testObject(false, false, false); + testObject.launchCholla(); + testObject.openHydroTestData(); + + testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); + testingUtilities::analyticConstant(testObject,"momentum_x",0.0); + testingUtilities::analyticConstant(testObject,"momentum_y",0.0); + testingUtilities::analyticConstant(testObject,"momentum_z",0.0); + testingUtilities::analyticConstant(testObject,"Energy",energy); + + +} + diff --git a/src/system_tests/gravity_system_tests.cpp b/src/system_tests/gravity_system_tests.cpp new file mode 100644 index 000000000..76cae4d7d --- /dev/null +++ b/src/system_tests/gravity_system_tests.cpp @@ -0,0 +1,30 @@ +/*! + * \file gravity_system_tests.cpp + * \author Evan Schneider (eschneider@pitt.edu) + * \brief Contains all the system tests for the GRAVITY build type + * + */ + +// External Libraries and Headers +#include + +// Local includes +#include "../system_tests/system_tester.h" + +// ============================================================================= +// Test Suite: tGRAVITYSYSTEMSphericalCollapse +// ============================================================================= +/*! + * \defgroup tGRAVITYSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput + * \brief Test spherical collapse with hydro + FFT gravity initial conditions + * + */ +/// @{ +TEST(tGRAVITYSYSTEMSphericalCollapse, + CorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner collapseTest; + collapseTest.runTest(); +} +/// @} +// ============================================================================= diff --git a/src/system_tests/input_files/tCOOLINGSYSTEMConstant5_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tCOOLINGSYSTEMConstant5_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..36a74d88c --- /dev/null +++ b/src/system_tests/input_files/tCOOLINGSYSTEMConstant5_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 3D Constant +# + +################################################ +# number of grid cells in the x dimension +nx=2 +# number of grid cells in the y dimension +ny=2 +# number of grid cells in the z dimension +nz=2 +# final output time +tout=0.3 +# time interval for output +outstep=0.3 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +rho=6.9498489284711e5 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1e-3 +# 1e5 Kelvin = 1e13*1e-3/1e5 +# value of gamma +gamma=1.666666667 + + +# Choose rho so that pressure (cholla units) = temperature (Kelvin) +# n = d*DENSITY_UNIT / (mu * MP); +# T = p*PRESSURE_UNIT/ (n*KB) = (p/d) * mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) +# Let d = mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) = (mu*MP/KB) * (VELOCITY_UNIT)**2 +# = (0.6 * 1.672622e-24 / 1.380658e-16) * (3.08567758e21 / 3.15569e10)**2 +# = 69498489284711.24 = 6.9498489284711e13 +# Choose rho so that pressure (cholla units) = temperature / e13 (Kelvin) +# T = p * (6.9e13)/d +# T/e13 = p * (6.9)/d diff --git a/src/system_tests/input_files/tCOOLINGSYSTEMConstant7_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tCOOLINGSYSTEMConstant7_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..a0b8a2fcb --- /dev/null +++ b/src/system_tests/input_files/tCOOLINGSYSTEMConstant7_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 3D Constant +# + +################################################ +# number of grid cells in the x dimension +nx=2 +# number of grid cells in the y dimension +ny=2 +# number of grid cells in the z dimension +nz=2 +# final output time +tout=100 +# time interval for output +outstep=100 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +rho=6.9498489284711e5 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1e-1 +# 1e7 Kelvin = 1e13*1e-2/1e4 +# value of gamma +gamma=1.666666667 + + +# Choose rho so that pressure (cholla units) = temperature (Kelvin) +# n = d*DENSITY_UNIT / (mu * MP); +# T = p*PRESSURE_UNIT/ (n*KB) = (p/d) * mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) +# Let d = mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) = (mu*MP/KB) * (VELOCITY_UNIT)**2 +# = (0.6 * 1.672622e-24 / 1.380658e-16) * (3.08567758e21 / 3.15569e10)**2 +# = 69498489284711.24 = 6.9498489284711e13 +# Choose rho so that pressure (cholla units) = temperature / e13 (Kelvin) +# T = p * (6.9e13)/d +# T/e13 = p * (6.9)/d diff --git a/src/system_tests/input_files/tCOOLINGSYSTEMConstant8_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tCOOLINGSYSTEMConstant8_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..bb6d618bc --- /dev/null +++ b/src/system_tests/input_files/tCOOLINGSYSTEMConstant8_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 3D Constant +# + +################################################ +# number of grid cells in the x dimension +nx=2 +# number of grid cells in the y dimension +ny=2 +# number of grid cells in the z dimension +nz=2 +# final output time +tout=90 +# time interval for output +outstep=90 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +rho=6.9498489284711e5 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1 +# 1e8 Kelvin = 1e13*1/1e5 +# value of gamma +gamma=1.666666667 + + +# Choose rho so that pressure (cholla units) = temperature (Kelvin) +# n = d*DENSITY_UNIT / (mu * MP); +# T = p*PRESSURE_UNIT/ (n*KB) = (p/d) * mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) +# Let d = mu*MP * PRESSURE_UNIT / (KB * DENSITY_UNIT) = (mu*MP/KB) * (VELOCITY_UNIT)**2 +# = (0.6 * 1.672622e-24 / 1.380658e-16) * (3.08567758e21 / 3.15569e10)**2 +# = 69498489284711.24 = 6.9498489284711e13 +# Choose rho so that pressure (cholla units) = temperature / e13 (Kelvin) +# T = p * (6.9e13)/d +# T/e13 = p * (6.9)/d diff --git a/src/system_tests/input_files/tGRAVITYSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tGRAVITYSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..b7a9f0c25 --- /dev/null +++ b/src/system_tests/input_files/tGRAVITYSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,34 @@ +# +# Parameter File for the 3D Sphere Collapse. +# +###################################### +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# output time +tout=0.2 +# how often to output +outstep=0.2 +# value of gamma +gamma=1.66666667 +# name of initial conditions +init=Spherical_Overdensity_3D +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp new file mode 100644 index 000000000..a8b608993 --- /dev/null +++ b/src/utils/cuda_utilities.cpp @@ -0,0 +1,5 @@ +#include "../utils/cuda_utilities.h" + +namespace cuda_utilities { + +} // end namespace cuda_utilities \ No newline at end of file diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h new file mode 100644 index 000000000..e0fea66bc --- /dev/null +++ b/src/utils/cuda_utilities.h @@ -0,0 +1,77 @@ +/*! + * \file hydro_utilities.h + * \author Helena Richie (helenarichie@pitt.edu) + * \brief Contains the declaration of various utility functions for CUDA + * + */ + +#pragma once + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + + +namespace cuda_utilities { + + /*! + * \brief Compute the x, y, and z indices based off of the 1D index + * + * \param[in] id The 1D index + * \param[in] nx The total number of cells in the x direction + * \param[in] ny The total number of cells in the y direction + * \param[out] xid The x index + * \param[out] yid The y index + * \param[out] zid The z index + */ + inline __host__ __device__ void compute3DIndices(int const &id, + int const &nx, + int const &ny, + int &xid, + int &yid, + int &zid) + { + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + } + + /*! + * \brief Compute the 1D index based off of the 3D indices + * + * \param xid The x index + * \param yid The y index + * \param zid The z index + * \param nx The total number of cells in the x direction + * \param ny The total number of cells in the y direction + * \return int The 1D index + */ + inline __host__ __device__ int compute1DIndex(int const &xid, + int const &yid, + int const &zid, + int const &nx, + int const &ny) + { + return xid + yid*nx + zid*nx*ny; + } + + inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, int &is, int &ie, int &js, int &je, int &ks, int &ke) { + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; + } + if (nz == 1) { + ks = 0; + ke = 1; + } else { + ks = n_ghost; + ke = nz - n_ghost; + } + } +} \ No newline at end of file diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp new file mode 100644 index 000000000..dc2f20066 --- /dev/null +++ b/src/utils/cuda_utilities_tests.cpp @@ -0,0 +1,122 @@ + +/*! + * \file cuda_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) + * \brief Tests for the contents of cuda_utilities.h and cuda_utilities.cpp + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../global/global.h" + +/* + PCM : n_ghost = 2 + PLMP : n_ghost = 2 + PLMC : n_ghost = 3 + PPMP : n_ghost = 4 + PPMC : n_ghost = 4 +*/ + +// ============================================================================= +// Local helper functions +namespace +{ + struct TestParams + { + std::vector n_ghost {2, 2, 3, 4}; + std::vector nx {100, 2048, 2048, 2048}; + std::vector ny {1, 2048, 2048, 2048}; + std::vector nz {1, 4096, 4096, 4096}; + std::vector names {"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", "Large PPMP/PPMC case"}; + + }; +} + +TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector> fiducial_indices {{2, 98, 0, 1, 0, 1}, + {2, 2046, 2, 2046, 2, 4094}, + {3, 2045, 3, 2045, 3, 4093}, + {4, 2044, 4, 2044, 4, 4092}}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + int is; + int ie; + int js; + int je; + int ks; + int ke; + cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), parameters.nz.at(i), is, ie, js, je, ks, ke); + + std::vector index_names {"is", "ie", "js", "je", "ks", "ke"}; + std::vector test_indices {is, ie, js, je, ks, ke}; + + for (size_t j = 0; j < test_indices.size(); j++) + { + testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], index_names[j] + " " + parameters.names[i]); + } + } +} + +// ============================================================================= +TEST(tALLCompute3DIndices, + CorrectInputExpectCorrectOutput) +{ + // Parameters + int const id = 723; + int const nx = 34; + int const ny = 14; + + // Fiducial Data + int const fiducialXid = 9; + int const fiducialYid = 7; + int const fiducialZid = 1; + + // Test Variables + int testXid; + int testYid; + int testZid; + + // Get test data + cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid); + + EXPECT_EQ(fiducialXid, testXid); + EXPECT_EQ(fiducialYid, testYid); + EXPECT_EQ(fiducialZid, testZid); +} +// ============================================================================= + +// ============================================================================= +TEST(tALLCompute1DIndex, + CorrectInputExpectCorrectOutput) +{ + // Parameters + int const xid = 72; + int const yid = 53; + int const zid = 14; + int const nx = 128; + int const ny = 64; + + // Fiducial Data + int const fiducialId = 121544; + + // Test Variable + int testId; + + // Get test data + testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + + EXPECT_EQ(fiducialId, testId); +} +// ============================================================================= diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 3926e3a6e..1adb9fb57 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -20,7 +20,10 @@ static void __attribute__((unused)) check(const hipfftResult err, const char *co exit(err); } -#endif +#endif //CUFFT PARIS PARIC_GALACTIC + +#define WARPSIZE 64 +static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define CUFFT_D2Z HIPFFT_D2Z #define CUFFT_FORWARD HIPFFT_FORWARD @@ -57,6 +60,8 @@ static void __attribute__((unused)) check(const hipfftResult err, const char *co #define cudaReadModeElementType hipReadModeElementType #define cudaSetDevice hipSetDevice #define cudaSuccess hipSuccess +#define cudaDeviceProp hipDeviceProp_t +#define cudaGetDeviceProperties hipGetDeviceProperties #define cufftDestroy hipfftDestroy #define cufftDoubleComplex hipfftDoubleComplex @@ -76,7 +81,7 @@ static void __attribute__((unused)) check(const hipError_t err, const char *cons exit(err); } -#else +#else // not O_HIP #include @@ -92,7 +97,7 @@ static void check(const cufftResult err, const char *const file, const int line) exit(err); } -#endif +#endif //defined(CUFFT) || defined(PARIS) || defined(PARIS_GALACTIC) static void check(const cudaError_t err, const char *const file, const int line) { @@ -102,9 +107,12 @@ static void check(const cudaError_t err, const char *const file, const int line) exit(err); } +#define WARPSIZE 32 +static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define hipLaunchKernelGGL(F,G,B,M,S,...) F<<>>(__VA_ARGS__) +#define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__) -#endif +#endif //O_HIP #define CHECK(X) check(X,__FILE__,__LINE__) diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu new file mode 100644 index 000000000..b2bc7599b --- /dev/null +++ b/src/utils/gpu_arrays_functions.cu @@ -0,0 +1,75 @@ +#include "../utils/error_handling.h" +#include "../utils/gpu.hpp" +#include "../global/global_cuda.h" +#include "../utils/gpu_arrays_functions.h" +#include + + +void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out ){ + + if ( new_size <= current_size ) return; + if ( print_out ) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + + size_t global_free, global_total; + CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + cudaDeviceSynchronize(); + #ifdef PRINT_GPU_MEMORY + printf( "ReAllocating GPU Memory: %d MB free \n", (int) global_free/1000000); + #endif + + if ( global_free < new_size*sizeof(Real) ){ + printf( "ERROR: Not enough global device memory \n" ); + printf( " Available Memory: %d MB \n", (int) global_free/1000000 ); + printf( " Requested Memory: %d MB \n", (int) new_size*sizeof(Real)/1000000 ); + // exit(-1); + } + + Real *new_array_d; + CudaSafeCall( cudaMalloc((void**)&new_array_d, new_size*sizeof(Real)) ); + cudaDeviceSynchronize(); + CudaCheckError(); + if ( new_array_d == NULL ){ + std::cout << " Error When Allocating New GPU Array" << std::endl; + chexit(-1); + } + + // Copy the content of the original array to the new array + CudaSafeCall( cudaMemcpy( new_array_d, *current_array_d, current_size*sizeof(Real), cudaMemcpyDeviceToDevice ) ); + cudaDeviceSynchronize(); + CudaCheckError(); + + // size_t global_free_before, global_free_after; + // CudaSafeCall( cudaMemGetInfo( &global_free_before, &global_total ) ); + // cudaDeviceSynchronize(); + + // Free the original array + cudaFree(*current_array_d); + cudaDeviceSynchronize(); + CudaCheckError(); + + // CudaSafeCall( cudaMemGetInfo( &global_free_after, &global_total ) ); + // cudaDeviceSynchronize(); + // + // printf("Freed Memory: %d MB\n", (int) (global_free_after - global_free_before)/1000000 ); + + // Replace the pointer of the original array with the new one + *current_array_d = new_array_d; + +} + + + + + + + + + + + + + + + + + diff --git a/src/utils/hydro_utilities.cpp b/src/utils/hydro_utilities.cpp new file mode 100644 index 000000000..7fa7c1894 --- /dev/null +++ b/src/utils/hydro_utilities.cpp @@ -0,0 +1,5 @@ +#include "../utils/hydro_utilities.h" + +namespace hydro_utilities { + +} // end namespace hydro_utilities \ No newline at end of file diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h new file mode 100644 index 000000000..990eb2a83 --- /dev/null +++ b/src/utils/hydro_utilities.h @@ -0,0 +1,80 @@ +/*! + * \file hydro_utilities.h + * \author Helena Richie (helenarichie@pitt.edu) + * \brief Contains the declaration of various utility functions for hydro + * + */ + +#pragma once + +#include +#include + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + + +/*! +* INDEX OF VARIABLES +* P : pressure +* vx, vy, vz : x, y, and z velocity +* d : density +* E : energy +* T : temperature +* mx, my, mz : x, y, and z momentum +* n : number density +*/ + +namespace hydro_utilities { + + inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { + Real P; + P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + P = fmax(P, TINY_NUMBER); + return P; + } + + inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { + Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.); + return fmax(P, TINY_NUMBER); + } + + inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) { + Real T = P * PRESSURE_UNIT / (n * KB); + return T; + } + + #ifdef DE + inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) { + Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + return T; + } + #endif // DE + + inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { + // Compute and return energy + return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); + } + + inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { + Real U, P; + Real eta = DE_ETA_1; + // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. + if (U_total/E > eta) { + U = U_total; + } else { + U = U_advected; + } + P = U * (gamma - 1.0); + return P; + } + + inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { + Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); + return sqrt(gamma * P / d); + } + + +} diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp new file mode 100644 index 000000000..e8a066d12 --- /dev/null +++ b/src/utils/hydro_utilities_tests.cpp @@ -0,0 +1,129 @@ +/*! + * \file hyo_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) + * \brief Tests for the contents of hydro_utilities.h and hydro_utilities.cpp + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/hydro_utilities.h" +#include "../global/global.h" + +/*! +* INDEX OF VARIABLES +* P : pressure +* vx, vy, vz : x, y, and z velocity +* d : density +* E : energy +* T : temperature +* mx, my, mz : x, y, and z momentum +* n : number density +*/ + +// ============================================================================= +// Local helper functions + +namespace +{ + struct TestParams + { + double gamma = 5./3.; + std::vector d {1.0087201154e-15, 1.0756968986e2, 1.0882403847e100}; + std::vector vx {1.0378624601e-100, 1.0829278656e2, 1.0800514112e100}; + std::vector vy {1.0583469014e-100, 1.0283073464e2, 1.0725717864e100}; + std::vector vz {1.0182972216e-100, 1.0417748226e2, 1.0855352639e100}; + std::vector mx {0.2340416681e-100, 0.1019429453e2, 0.5062596954e100}; + std::vector my {0.9924582299e-100, 0.1254780684e2, 0.5939640992e100}; + std::vector mz {0.6703192739e-100, 0.5676716066e2, 0.2115881803e100}; + std::vector E {20.9342082433e-90, 20.9976906577e10, 20.9487120853e300}; + std::vector P {2.2244082909e-10, 8.6772951021e2, 6.7261085663e100}; + std::vector n {3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; + std::vector ge {4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; + std::vector U_total {2.389074039e-10, 4.890374019e2, 6.8731436293e100}; + std::vector U_advected {1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; + std::vector names{"Small number case", "Medium number case", "Large number case"}; + }; +} + +TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Ps {1e-20, 139983415580.5549, 1.2697896247496674e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + } +} + +TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Ps {1e-20, 139984604373.87094, 1.3965808056866668e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + } +} + +TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Ts {3465185.0560059389, 29370603.906644326, 28968949.83344138}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); + + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + } +} + +#ifdef DE +TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Ts {5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); + + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + } +} +#endif // DE + +TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Es {3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducial_Ps {1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + } +} diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index b3ce175f6..f28cbb400 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -285,9 +285,9 @@ namespace mhdUtils Real &avgBy, Real &avgBz) { - avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid+1) + yid*nx + zid*nx*ny)]); // id+1 in x - avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid + (yid+1)*nx + zid*nx*ny)]); // id+1 in y - avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid + yid*nx + (zid+1)*nx*ny)]); // id+1 in z + avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid-1) + yid*nx + zid*nx*ny)]); + avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid + (yid-1)*nx + zid*nx*ny)]); + avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid + yid*nx + (zid-1)*nx*ny)]); } // ========================================================================= diff --git a/src/utils/mhd_utilities_tests.cpp b/src/utils/mhd_utilities_tests.cpp index 6580c875a..c5cbb25fb 100644 --- a/src/utils/mhd_utilities_tests.cpp +++ b/src/utils/mhd_utilities_tests.cpp @@ -361,16 +361,18 @@ TEST(tMHDSlowMagnetosonicSpeed, testParams parameters; std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, - 0.0}; + 0.26678309355540852}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; - for (size_t i = 0; i < parameters.names.size(); i++) + for (size_t i = 2; i < parameters.names.size(); i++) { Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( - parameters.density.at(i), - parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), + parameters.density.at(i) * coef, + parameters.pressureGas.at(i) * coef, + parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, + parameters.magneticZ.at(i) * coef, parameters.gamma); testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), @@ -391,16 +393,18 @@ TEST(tMHDSlowMagnetosonicSpeed, testParams parameters; std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, - 0.0}; + 1976400098318.3574}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; - for (size_t i = 0; i < parameters.names.size(); i++) + for (size_t i = 2; i < parameters.names.size(); i++) { Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( - -parameters.density.at(i), - parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), + -parameters.density.at(i) * coef, + parameters.pressureGas.at(i) * coef, + parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, + parameters.magneticZ.at(i) * coef, parameters.gamma); testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), @@ -487,9 +491,9 @@ TEST(tMHDCellCenteredMagneticFields, std::iota(std::begin(testGrid), std::end(testGrid), 0.); // Fiducial and test variables - double const fiducialAvgBx = 638.5, - fiducialAvgBy = 764.5, - fiducialAvgBz = 892.5; + double const fiducialAvgBx = 637.5, + fiducialAvgBy = 761.5, + fiducialAvgBz = 883.5; double testAvgBx, testAvgBy, testAvgBz; // Call the function to test diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu new file mode 100644 index 000000000..8c21a0cbe --- /dev/null +++ b/src/utils/reduction_utilities.cu @@ -0,0 +1,57 @@ +/*! + * \file reduction_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of the GPU resident reduction utilities + * + */ + +// STL Includes +#include + +// External Includes + +// Local Includes +#include "../utils/reduction_utilities.h" + +#ifdef CUDA + namespace reduction_utilities + { + // ===================================================================== + __global__ void kernelReduceMax(Real *in, Real* out, size_t N, Real lowLimit) + { + // Initialize variable to store the max value + Real maxVal = lowLimit; + + // Grid stride loop to perform as much of the reduction as possible + for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) + { + // A transformation could go here + + // Grid stride reduction + maxVal = max(maxVal,in[i]); + } + + // Find the maximum val in the grid and write it to `out`. Note that there + // is no execution/memory barrier after this and so the reduced scalar is + // not available for use in this kernel. The grid wide barrier can be + // accomplished by ending this kernel here and then launching a new one or + // by using cooperative groups. If this becomes a need it can be added later + gridReduceMax(maxVal, out); + } + // ===================================================================== + + // ===================================================================== + void reductionLaunchParams(uint &numBlocks, uint &threadsPerBlock, uint const &deviceNum) + { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, deviceNum); + + // Divide the total number of allowed threads by the number of + // threads per block + threadsPerBlock = prop.maxThreadsPerBlock; + numBlocks = (prop.maxThreadsPerMultiProcessor * prop.multiProcessorCount) + / threadsPerBlock; + } + // ===================================================================== + }//reduction_utilities +#endif //CUDA \ No newline at end of file diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h new file mode 100644 index 000000000..d9976869c --- /dev/null +++ b/src/utils/reduction_utilities.h @@ -0,0 +1,205 @@ +/*! + * \file reduction_utilities.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration of the GPU resident reduction utilities + * + */ + +#pragma once + +// STL Includes +#include + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +#ifdef CUDA + /*! + * \brief Namespace to contain device resident reduction functions. Includes + * functions and kernels for array reduction, warp level, block level, and + * grid level reductions. + * + */ + namespace reduction_utilities + { + // ===================================================================== + /*! + * \brief Perform a reduction within the warp/wavefront to find the + * maximum value of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the warp + * \return Real The maximum value of `val` within the warp + */ + __inline__ __device__ Real warpReduceMax(Real val) + { + for (int offset = warpSize/2; offset > 0; offset /= 2) + { + val = fmax(val, __shfl_down(val, offset)); + } + return val; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Perform a reduction within the block to find the maximum value + * of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the block + * \return Real The maximum value of `val` within the block + */ + __inline__ __device__ Real blockReduceMax(Real val) + { + // Shared memory for storing the results of each warp-wise partial + // reduction + __shared__ Real shared[::maxWarpsPerBlock]; + + int lane = threadIdx.x % warpSize; // thread ID within the warp, + int warpId = threadIdx.x / warpSize; // ID of the warp itself + + val = warpReduceMax(val); // Each warp performs partial reduction + + if (lane==0) shared[warpId]=val; // Write reduced value to shared memory + + __syncthreads(); // Wait for all partial reductions + + //read from shared memory only if that warp existed + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + + if (warpId==0) val = warpReduceMax(val); //Final reduce within first warp + + return val; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + __inline__ __device__ double atomicMax_double(double* address, double val) + { + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + // Explanation of loop here: + // https://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu + // The loop is to make sure the value at address doesn't change + // between the load at the atomic since the entire operation isn't + // atomic + + // While it appears that this could result in many times more atomic + // operations than required, in practice it's only a handful of + // extra operation even in the worst case. Running with 16,000 + // blocks gives ~8-37 atomics after brief testing + do { + assumed = old; + old = atomicCAS(address_as_ull, + assumed, + __double_as_longlong(fmax(__longlong_as_double(assumed),val))); + } while (assumed != old); + return __longlong_as_double(old); + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Perform a reduction within the grid to find the maximum value + * of `val`. Note that this will overwrite the value in `out` with + * the value of the optional argument `lowLimit` which defaults to + * `-DBL_MAX` + * + * \details This function can perform a reduction to find the maximum of + * the thread local variable `val` across the entire grid. It relies on a + * warp-wise reduction using registers followed by a block-wise reduction + * using shared memory, and finally a grid-wise reduction using atomics. + * As a result the performance of this function is substantally improved + * by using as many threads per block as possible and as few blocks as + * possible since each block has to perform an atomic operation. To + * accomplish this it is recommened that you use the + * `reductionLaunchParams` functions to get the optimal number of blocks + * and threads per block to launch rather than relying on Cholla defaults + * and then within the kernel using a grid-stride loop to make sure the + * kernel works with any combination of threads and blocks. Note that + * after this function call you cannot use the reduced value in global + * memory since there is no grid wide sync. You can get around this by + * either launching a second kernel to do the next steps or by using + * cooperative groups to perform a grid wide sync. During it's execution + * it also calls multiple __synchThreads and so cannot be called from + * within any kind of thread guard. + * + * \param[in] val The thread local variable to find the maximum of across + * the grid + * \param[out] out The pointer to where to store the reduced scalar value + * in device memory + * \param[in] lowLimit (optional)What value to initilize global memory + * with. Defaults to -DBL_MAX. This value will be the lowest possible + * value that the reduction can produce since all other values are + * compared against it + */ + __inline__ __device__ void gridReduceMax(Real val, Real* out, Real lowLimit = -DBL_MAX) + { + __syncthreads(); // Wait for all threads to calculate val; + + // Set the value in global memory so meaningful comparisons can be + // performed + if (threadIdx.x + blockIdx.x * blockDim.x == 0) *out = lowLimit; + + // Reduce the entire block in parallel + val = blockReduceMax(val); + + // Write block level reduced value to the output scalar atomically + // if (threadIdx.x == 0) atomicMax_double(out, val); + if (threadIdx.x == 0) out[blockIdx.x] = val; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Find the maximum value in the array. Note that this will + * overwrite the value in `out` with the value of the optional argument + * `lowLimit` which defaults to `-DBL_MAX`. If `in` and `out` are the + * same array that's ok, all the loads are completed before the + * overwrite occurs + * + * \param[in] in The pointer to the array to reduce in device memory + * \param[out] out The pointer to where to store the reduced scalar + * value in device memory + * \param[in] N The size of the `in` array + * \param[in] lowLimit (optional) What value to initilize global memory + * with. Defaults to -DBL_MAX. This value will be the lowest possible + * value that the reduction can produce since all other values are + * compared against it + */ + __global__ void kernelReduceMax(Real *in, Real* out, size_t N, Real lowLimit = -DBL_MAX); + // ===================================================================== + + // ===================================================================== + /*! + * \brief Determine the optimal number of blocks and threads per block to + * use when launching a reduction kernel + * + * \param[out] numBlocks The maximum number of blocks that are + * scheduleable by the device in use when each block has the maximum + * number of threads + * \param[out] threadsPerBlock The maximum threads per block supported by + * the device in use + * \param[in] deviceNum optional: which device is being targeted. + * Defaults to zero + */ + void reductionLaunchParams(uint &numBlocks, + uint &threadsPerBlock, + uint const &deviceNum=0); + // ===================================================================== + } // namespace reduction_utilities +#endif //CUDA diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu new file mode 100644 index 000000000..2314b33be --- /dev/null +++ b/src/utils/reduction_utilities_tests.cu @@ -0,0 +1,96 @@ +/*! + * \file reduction_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the contents of reduction_utilities.h and reduction_utilities.cpp + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/reduction_utilities.h" +#include "../global/global.h" + + + +// ============================================================================= +// Tests for divergence max reduction +// ============================================================================= +TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) +{ + // Launch parameters + // ================= + uint numBlocks, threadsPerBlock; + reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); + + // Grid Parameters & testing parameters + // ==================================== + size_t const gridSize = 64; + size_t const size = std::pow(gridSize, 3);; + Real const maxValue = 4; + std::vector host_grid(size); + Real host_max = -DBL_MAX; + + // Fill grid with random values and assign maximum value + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(-std::abs(maxValue)-1, std::abs(maxValue) - 1); + std::uniform_int_distribution intRand(0, host_grid.size()-1); + for (size_t i = 0; i < host_grid.size(); i++) + { + host_grid.at(i) = doubleRand(prng); + } + host_grid.at(intRand(prng)) = maxValue; + + + // Allocating and copying to device + // ================================ + Real *dev_grid; + CudaSafeCall(cudaMalloc(&dev_grid, host_grid.size() * sizeof(Real))); + CudaSafeCall(cudaMemcpy(dev_grid, host_grid.data(), host_grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + Real *dev_max_array; + CudaSafeCall(cudaMalloc(&dev_max_array, numBlocks*sizeof(Real))); + // Sets all bytes to 0. + cudaMemset(dev_max_array,0,numBlocks*sizeof(Real)); + + Real host_max_array[numBlocks]; + //Real *host_max_array = (Real *) malloc(numBlocks*sizeof(Real)); + //CudaSafeCall( cudaHostAlloc(&host_max_array, numBlocks*sizeof(Real), cudaHostAllocDefault) ); + + + // Do the reduction + // ================ + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, numBlocks, threadsPerBlock, 0, 0, dev_grid, dev_max_array, host_grid.size()); + CudaCheckError(); + + + // Copy back and sync + // ================== + CudaSafeCall(cudaMemcpy(&host_max_array, dev_max_array, numBlocks*sizeof(Real), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + for (int i = 0; i < numBlocks; i++) + { + host_max = fmax(host_max,host_max_array[i]); + } + + //free(host_max_array); + + cudaFree(dev_max_array); + + cudaFree(dev_grid); + + // Perform comparison + testingUtilities::checkResults(maxValue, host_max, "maximum value found"); +} +// ============================================================================= +// Tests for divergence max reduction +// ============================================================================= diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index ba83d05c1..9b8bee948 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -58,14 +58,25 @@ namespace testingUtilities double const &fixedEpsilon, // = 1E-14 by default int const &ulpsEpsilon) // = 4 by default { - // Handle the near-zero case and pass back the absolute difference + // Compute differences + ulpsDiff = ulpsDistanceDbl(a, b); absoluteDiff = std::abs(a - b); - if (absoluteDiff <= fixedEpsilon) - return true; - // Handle all other cases and pass back the difference in ULPs - ulpsDiff = ulpsDistanceDbl(a, b); - return ulpsDiff <= ulpsEpsilon; + // Perform the ULP check which is for numbers far from zero + if (ulpsDiff <= ulpsEpsilon) + { + return true; + } + // Perform the absolute check which is for numbers near zero + else if (absoluteDiff <= fixedEpsilon) + { + return true; + } + // if none of the checks have passed indicate test failure + else + { + return false; + } } // ========================================================================= diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 61b73b468..9ac6bb4ba 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -39,6 +39,23 @@ void OneTime::End(){ n_steps++; } + +void OneTime::RecordTime( Real time ){ + time *= 1000; //Convert from secs to ms + #ifdef MPI_CHOLLA + t_min = ReduceRealMin(time); + t_max = ReduceRealMax(time); + t_avg = ReduceRealAvg(time); + #else + t_min = time; + t_max = time; + t_avg = time; + #endif + if (n_steps > 0) t_all += t_max; + n_steps++; +} + + void OneTime::PrintStep(){ chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, t_max, t_avg); } @@ -60,7 +77,7 @@ void Time::Initialize(){ // Add or remove timers by editing this list, keep TOTAL at the end // add NAME to timing_functions.h // add Timer.NAME.Start() and Timer.NAME.End() where appropriate. - + onetimes = { #ifdef PARTICLES &(Calc_dt = OneTime("Calc_dt")), @@ -84,15 +101,15 @@ void Time::Initialize(){ #ifdef CHEMISTRY_GPU &(Chemistry = OneTime("Chemistry")), #endif - #ifdef FEEDBACK + #ifdef SUPERNOVA &(Feedback = OneTime("Feedback")), #ifdef ANALYSIS &(FeedbackAnalysis = OneTime("FeedbackAnalysis")), #endif - #endif // FEEDBACK + #endif // SUPERNOVA &(Total = OneTime("Total")), }; - + chprintf( "\nTiming Functions is ON \n"); @@ -118,6 +135,9 @@ void Time::Print_Average_Times( struct parameters P ){ chprintf( "Writing timing values to file: %s \n", file_name.c_str()); + std::string gitHash = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); + std::string macroFlags = "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); + header = "#n_proc nx ny nz n_omp n_steps "; for (OneTime* x : onetimes){ @@ -144,7 +164,12 @@ void Time::Print_Average_Times( struct parameters P ){ // Output timing values out_file.open(file_name.c_str(), std::ios::app); - if ( !file_exists ) out_file << header; + if ( !file_exists ) + { + out_file << gitHash; + out_file << macroFlags; + out_file << header; + } #ifdef MPI_CHOLLA out_file << nproc << " "; #else diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 32c1909e0..84a1520d7 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -30,6 +30,7 @@ class OneTime void PrintStep(); void PrintAverage(); void PrintAll(); + void RecordTime( Real time ); }; // Time loops through instances of OneTime. onetimes is initialized with pointers to each timer. From 22206fef63b5cd6646725d1533afdb53ffa9f77d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 8 Sep 2022 16:45:47 -0400 Subject: [PATCH 104/694] Add option to use the debug build with run_tests.sh --- .github/workflows/build_tests.yml | 2 +- Makefile | 6 +++++- builds/make.host.github | 6 +----- builds/run_tests.sh | 13 +++++++++---- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index 8fb88c7ba..f357286a2 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -158,7 +158,7 @@ jobs: - name: Build Cholla run: | source builds/run_tests.sh - buildCholla + buildCholla OPTIMIZE - name: Build Tests run: | source builds/run_tests.sh diff --git a/Makefile b/Makefile index 7d58f01a7..d706803a8 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,11 @@ GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17 CFLAGS_DEBUG ?= -g -O0 CXXFLAGS_DEBUG ?= -g -O0 -std=c++17 -GPUFLAGS_DEBUG ?= -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx +ifdef HIPCONFIG + GPUFLAGS_DEBUG ?= -g -O0 -std=c++17 +else + GPUFLAGS_DEBUG ?= -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx +endif BUILD ?= OPTIMIZE diff --git a/builds/make.host.github b/builds/make.host.github index aea03c28c..0dea956fc 100644 --- a/builds/make.host.github +++ b/builds/make.host.github @@ -13,6 +13,7 @@ OMP_NUM_THREADS = 7 #-- Library ifdef HIPCONFIG HIPCONFIG := -I$(shell hipconfig -R)/include $(shell hipconfig -C) + GPUFLAGS_DEBUG = -g -O0 -std=c++17 endif CUDA_ROOT := $(CUDA_ROOT) HDF5_ROOT := $(HDF5_ROOT) @@ -24,11 +25,6 @@ GOOGLETEST_ROOT := ${GOOGLETEST_ROOT} #-- MPI calls accept GPU buffers (requires GPU-aware MPI) # MPI_GPU = -DMPI_GPU -ifndef HIPCONFIG - GPUFLAGS_DEBUG += -g -O0 -ccbin=mpicxx - GPUFLAGS_OPTIMIZE += -g -O3 -ccbin=mpicxx -endif - ifdef HIPCONFIG MPI_ROOT := ${MPI_ROOT} CFLAGS_DEBUG += -fPIE diff --git a/builds/run_tests.sh b/builds/run_tests.sh index bca41e411..519c9d928 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -110,7 +110,7 @@ buildCholla () { echo -e "\nBuilding Cholla...\n" builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} + make -j TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} } # ============================================================================== @@ -220,9 +220,11 @@ buildAndRunTests () # did unset BUILD_GTEST + BUILD_MODE='OPTIMIZE' + # Check arguments local OPTIND - while getopts "t:c:g" opt; do + while getopts "t:c:g:d" opt; do case $opt in t) # Set the make type MAKE_TYPE_ARG="-t ${OPTARG}" @@ -233,6 +235,9 @@ buildAndRunTests () g) # Build GoogleTest locally? BUILD_GTEST=true ;; + d) # Build the debug version of Cholla? + BUILD_MODE='DEBUG' + ;; \?) echo "Invalid option: -${OPTARG}" >&2 return 1 @@ -249,8 +254,8 @@ buildAndRunTests () if [[ -n $BUILD_GTEST ]]; then buildGoogleTest fi - buildCholla && \ - buildChollaTests && \ + buildCholla $BUILD_MODE && \ + buildChollaTests && \ runTests } # ============================================================================== From 61e0b3a1cb0beb1903014062116b8703d7433802 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 15 Sep 2022 12:25:22 -0400 Subject: [PATCH 105/694] add small changes --- builds/make.type.dust | 3 ++- cloud-wind/cloud-wind.txt | 2 +- src/gravity/gravity_functions.cpp | 20 +++++++++++++++++--- src/grid/grid3D.cpp | 3 ++- src/grid/initial_conditions.cpp | 4 +--- 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 5b54e21ad..0be259763 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -14,13 +14,14 @@ DFLAGS += -DPPMC DFLAGS += -DHLLC # DFLAGS += -DDE -# DFLAGS += -DAVERAGE_SLOW_CELLS +DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DTEMPERATURE_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE else DFLAGS += -DVL +# DFLAGS += -DSIMPLE endif # Evolve additional scalars diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index 4f08bbeb2..a60b53d61 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -42,4 +42,4 @@ zu_bcnd=3 outdir=./ # nfile=1e10 custom_bcnd=wind -# n_hydro=3728643 \ No newline at end of file +n_hydro=3728643 \ No newline at end of file diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index ed5b0ba87..8b86c30d3 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -116,7 +116,8 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell da_particles = fmin( da_particles, Cosmo.max_delta_a ); - min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; + // min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; + min_dt_slow = 1e-2 / TIME_UNIT; H.min_dt_slow = min_dt_slow; #endif @@ -144,7 +145,9 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell - min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + //min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + printf("/nhell0!!/n"); + min_dt_slow = 1e10; H.min_dt_slow = min_dt_slow; #endif @@ -156,7 +159,18 @@ void Grid3D::set_dt_Gravity(){ #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - min_dt_slow = H.dt / C_cfl * 100 ; + // min_dt_slow = H.dt / C_cfl * 100 ; + printf("/nhowdy!!/n"); + min_dt_slow = 1e10; + H.min_dt_slow = min_dt_slow; + #endif + + #ifdef AVERAGE_SLOW_CELLS + //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) + // min_dt_slow = H.dt / C_cfl * 100 ; + printf("/nhi!!/n"); + printf("/nhi!!/n"); + min_dt_slow = 1e10; H.min_dt_slow = min_dt_slow; #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 1af6ab7de..27289ac25 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -140,7 +140,8 @@ void Grid3D::Initialize(struct parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number + // H.min_dt_slow = 1e-10; //Initialize the minumum dt to a tiny number + H.min_dt_slow = 1e-2; #endif #ifndef MPI_CHOLLA diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 6695e0898..86c0d9fa8 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -239,13 +239,11 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real #endif // DUST #endif // SCALAR } -/* if (i==istart && j==jstart && k==kstart) { n = rho*DENSITY_UNIT / (mu*MP); T = P*PRESSURE_UNIT / (n*KB); printf("Initial n = %e, T = %e\n", n, T); } -*/ } } } @@ -1255,7 +1253,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 0.1; // cloud radius in code units (kpc) + Real R_cl = 0.01; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; From 3c07846ae008145c9a5ee558b1e2596e161dc559 Mon Sep 17 00:00:00 2001 From: bcaddy <41171425+bcaddy@users.noreply.github.com> Date: Wed, 28 Sep 2022 14:47:07 -0400 Subject: [PATCH 106/694] `make clobber` to only delete in `bin/` Previously `make clobber` deleted any executable that matched the pattern `cholla*` in the entire repo. Now it only deletes executables that follow this pattern and are in `bin`. The reason is that I accidentally deleted an executable script who's name started with `cholla` and that seems like something that others will do as well --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d706803a8..635de4fff 100644 --- a/Makefile +++ b/Makefile @@ -185,7 +185,7 @@ clean: -find bin/ -type f -executable -name "cholla.*.$(MACHINE)*" -exec rm -f '{}' \; clobber: clean - find . -type f -executable -name "cholla*" -exec rm -f '{}' \; + find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \; -find bin/ -type d -name "t*" -prune -exec rm -rf '{}' \; rm -rf bin/cholla.*tests*.xml From 3a99f1cf2c8ef1f479799b6fd4b594b3226adf97 Mon Sep 17 00:00:00 2001 From: bcaddy <41171425+bcaddy@users.noreply.github.com> Date: Wed, 28 Sep 2022 15:04:10 -0400 Subject: [PATCH 107/694] Stop failure if there is no bin directory --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 635de4fff..0657c0ac6 100644 --- a/Makefile +++ b/Makefile @@ -185,7 +185,7 @@ clean: -find bin/ -type f -executable -name "cholla.*.$(MACHINE)*" -exec rm -f '{}' \; clobber: clean - find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \; + -find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \; -find bin/ -type d -name "t*" -prune -exec rm -rf '{}' \; rm -rf bin/cholla.*tests*.xml From efbe7514172195bb8be37765f3733f92754c0d99 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 28 Sep 2022 15:55:37 -0400 Subject: [PATCH 108/694] Compute Sanitizer script Add a script to quickly and easily run the NVIDIA compute sanitizer Also, loosen gitignore to only ignore cholla.* not chola* --- .gitignore | 3 +- tools/cholla-nv-compute-sanitizer.sh | 105 +++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100755 tools/cholla-nv-compute-sanitizer.sh diff --git a/.gitignore b/.gitignore index 936f8ebbb..ba64b82f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ # Files specific to this repo # ############################## -cholla* googletest* # Compiled source # @@ -24,7 +23,7 @@ makefile.summit *.o ##executable -cholla* +cholla.* ## input files #parameter_file.txt diff --git a/tools/cholla-nv-compute-sanitizer.sh b/tools/cholla-nv-compute-sanitizer.sh new file mode 100755 index 000000000..73afddfc9 --- /dev/null +++ b/tools/cholla-nv-compute-sanitizer.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash + +# Utility script for running the NVIDIA Compute Sanitizer. +# The Compute Sanitizer provides 4 tool: +# - Memcheck: The memory access error and leak detection tool. +# - Racecheck: The shared memory data access hazard detection tool. +# - Initcheck: The uninitialized device global memory access detection tool. +# - Synccheck: The thread synchronization hazard detection tool. +# +# See the NVIDIA docs for more detail: +# https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html +# +# Syntax: compute-sanitizer [options] app_name [app_options] +# +# Compilation: Benefits from -G and -lineinfo. -Xcompiler -rdynamic for backtraces + +# Memcheck args +# --leak-check full/no (default: no) full = info about memory leaks +# --padding NUM, puts padding around arrays to improve out-of-bounds checking. +# NUM is The size of the pad in bytes, we should probably pad at least a couple +# of doubles, say 8 so pad=8*8=64 +# +# initcheck args +# --track-unused-memory yes/no (default: no) Check for unused memory allocations. +# +# Racecheck args +# - --print-level info + + +#set -x #echo all commands +while getopts "t:h" opt; do + case $opt in + t) # Set the tool to use + case ${OPTARG} in + m) + tool="memcheck" + tool_args="--leak-check full --padding 64 --report-api-errors all" + ;; + r) + tool="racecheck" + tool_args="--print-level info" + ;; + i) + tool="initcheck" + tool_args="--track-unused-memory yes" + ;; + s) + tool="synccheck" + tool_args="" + ;; + esac + ;; + h) # Print help + echo -e " +While not required the following compile flags can help: -G for debug builds, +-lineinfo for performance builds (can't be used with -G) and -Xcompiler -rdynamic +is useful for backtraces in all builds. + +Options: +-t m/r/i/s: Selects the tool to use. + m: runs the memcheck tool + r: runs the racecheck tool + i: runs the initcheck tool + s: runs the synccheck tool +-h: This dialogue" + exit 0 + ;; + \?) + echo "Invalid option: -${OPTARG}" >&2 + exit 1 + ;; + :) + echo "Option -${OPTARG} requires an argument." >&2 + exit 1 + ;; + esac +done + +# Exit if no tool was selected +if [ -z "$tool" ]; then + echo 'Missing tool argument' >&2 + exit 1 +fi + +# Get Paths +cholla_root=$(git rev-parse --show-toplevel) +cholla_exe=$(find "${cholla_root}" -name cholla.*) +cholla_parameter_file="${cholla_root}/examples/3D/sod.txt" +COMPUTE_SANITIZER=$(which compute-sanitizer) +sanitizer_log_file="${cholla_root}/bin/compute-sanitizer-${tool}.log" + +# Echo Paths +echo -e "cholla_root = ${cholla_root}" +echo -e "cholla_exe = ${cholla_exe}" +echo -e "cholla_parameter_file = ${cholla_parameter_file}" +echo -e "COMPUTE_SANITIZER = ${COMPUTE_SANITIZER}" +echo -e "sanitizer_log_file = ${sanitizer_log_file}" +echo -e "" +echo -e "tool = ${tool}" +echo -e "tool_args = ${tool_args}" + +# Execute Sanitizer +COMMAND="${COMPUTE_SANITIZER} --log-file ${sanitizer_log_file} --tool ${tool} ${tool_args} ${cholla_exe} ${cholla_parameter_file}" +echo -e "Launch Command = ${COMMAND}" +$COMMAND \ No newline at end of file From d7d1dc71062153109ee9271b98204061420730da Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 29 Sep 2022 12:26:48 -0400 Subject: [PATCH 109/694] Removed `extras` directory --- extras/submit_job_lux | 27 --------------------------- extras/submit_job_summit.lsf | 19 ------------------- 2 files changed, 46 deletions(-) delete mode 100644 extras/submit_job_lux delete mode 100644 extras/submit_job_summit.lsf diff --git a/extras/submit_job_lux b/extras/submit_job_lux deleted file mode 100644 index 39c919f2d..000000000 --- a/extras/submit_job_lux +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=cholla_test # Job name -#SBATCH --partition=gpuq # queue for job submission -#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=brvillas@ucsc.edu # Where to send mail -#SBATCH --ntasks=8 # Number of MPI ranks -#SBATCH --nodes=1 # Number of nodes -#SBATCH --ntasks-per-node=8 # How many tasks on each node -#SBATCH --time=00:10:00 # Time limit hrs:min:sec -#SBATCH --output=cuda_test_%j.log # Standard output and error log - -pwd; hostname; date - -echo "Running program on $SLURM_JOB_NUM_NODES nodes with $SLURM_NTASKS total tasks, with each node getting $SLURM_NTASKS_PER_NODE running on cores." - -module load hdf5 -module load openmpi/4.0.1-cuda -module load cuda10.1/10.1.168 - -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/brvillas/code/grackle/lib - - -cd /home/brvillas/cholla - -mpirun -N 1 --map-by ppr:8:node ./cholla examples/3D/Spherical_Collapse.txt - -date \ No newline at end of file diff --git a/extras/submit_job_summit.lsf b/extras/submit_job_summit.lsf deleted file mode 100644 index 5c2552c0a..000000000 --- a/extras/submit_job_summit.lsf +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# Begin LSF Directives -#BSUB -P AST149 -#BSUB -W 0:10 -#BSUB -nnodes 2 -#BSUB -J sphere_256 -#BSUB -o sphere_256.o%J -#BSUB -e sphere_256.e%J -#BSUB -alloc_flags "smt4" - -module load hdf5 -module load cuda - -export WORK_DIR=$MEMBERWORK/ast149/sphere_explosion - -cd $MEMBERWORK/ast149/cholla -date -#export OMP_NUM_THREADS=10 -jsrun -n 8 -a 1 -c 7 -g 1 -r 4 -l CPU-CPU -d packed -b packed:7 ./cholla examples/3D/Spherical_Overpressure.txt > $WORK_DIR/run_output.log |sort From d37e462749d1a60726111605f68d4de2ff7f2a20 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sun, 2 Oct 2022 01:38:50 -0700 Subject: [PATCH 110/694] add viz --- src/global/global.cpp | 6 +- src/global/global.h | 2 + src/io/io.cpp | 7 ++- src/io/io.h | 7 ++- src/io/viz.cu | 124 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 5 deletions(-) create mode 100644 src/io/viz.cu diff --git a/src/global/global.cpp b/src/global/global.cpp index a99c1360e..d84a04a68 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -223,6 +223,10 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->n_rotated_projection = atoi(value); else if (strcmp(name, "n_slice")==0) parms->n_slice = atoi(value); + else if (strcmp(name, "n_outviz")==0) + parms->n_outviz = atoi(value); + else if (strcmp(name, "outviz_density")==0) + parms->outviz_density = atoi(value); else if (strcmp(name, "xmin")==0) parms->xmin = atof(value); else if (strcmp(name, "ymin")==0) @@ -366,7 +370,7 @@ void parse_param(char *name,char *value, struct parameters *parms){ #ifdef CHEMISTRY_GPU else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); -#endif +#endif #ifdef COOLING_GRACKLE else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); diff --git a/src/global/global.h b/src/global/global.h index 560ddb5f4..0fc1ff47b 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -204,6 +204,8 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; + int n_outviz=0; + int outviz_density=0; Real xmin; Real ymin; Real zmin; diff --git a/src/io/io.cpp b/src/io/io.cpp index 46ee71916..7d51c030c 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -72,7 +72,7 @@ void Write_Message_To_Log_File( const char* message ){ out_file.close(); } -/* Write the initial conditions */ +/* Write Cholla Output Data */ void WriteData(Grid3D &G, struct parameters P, int nfile) { @@ -109,6 +109,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) if (nfile % P.n_hydro == 0) OutputData(G,P,nfile); #endif + // This function does other checks to make sure it is valid (3D only) + #ifdef HDF5 + if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); + #endif + #ifdef PROJECTION if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); #endif /*PROJECTION*/ diff --git a/src/io/io.h b/src/io/io.h index deccec5ce..2e07bf083 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,5 +1,4 @@ -#ifndef IO_CHOLLA_H -#define IO_CHOLLA_H +#pragma once #include "../global/global.h" #include "../grid/grid3D.h" @@ -29,4 +28,6 @@ void Create_Log_File( struct parameters P ); void Write_Message_To_Log_File( const char* message ); void write_debug ( Real *Value, const char *fname, int nValues, int iProc ); -#endif /*IO_CHOLLA_H*/ + +// From io/viz.cu +void OutputViz3D(Grid3D &G, struct parameters P, int nfile); diff --git a/src/io/viz.cu b/src/io/viz.cu new file mode 100644 index 000000000..bfbf549e2 --- /dev/null +++ b/src/io/viz.cu @@ -0,0 +1,124 @@ +// Require HDF5 +#ifdef HDF5 + +#include + +#include "../grid/grid3D.h" + +#include "../io/io.h" // To provide io.h with OutputViz3D + + +void CopyReal3D_CPU(Real* source, Real* destination, Header H) +{ + int i,j,k,id,buf_id; + + for (k=0; k 0) { + WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density"); + } + /* + // Just an example of extending this function to include other fields. + // Not implemented yet + if (P.outviz_energy > 0) { + WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); + } + */ + + + free(dataset_buffer); + herr_t status = H5Sclose(dataspace_id); +} + + + +void OutputViz3D(Grid3D &G, struct parameters P, int nfile) +{ + Header H = G.H; + // Do nothing in 1-D and 2-D case + if (H.ny == 1) { + return; + } + if (H.nz == 1) { + return; + } + // Do nothing if nfile is not multiple of n_outviz + if (nfile % P.n_outviz != 0) { + return; + } + + char filename[MAXLEN]; + char timestep[20]; + + // create the filename + sprintf(timestep, "%d", nfile); + strcpy(filename, P.outdir); + strcat(filename, timestep); + strcat(filename, ".viz3d.h5"); + #ifdef MPI_CHOLLA + sprintf(filename,"%s.%d",filename,procID); + #endif + + // create hdf5 file + hid_t file_id; /* file identifier */ + herr_t status; + + // Create a new file using default properties. + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write the header (file attributes) + G.Write_Header_HDF5(file_id); + + // write the conserved variables to the output file + WriteViz3D(G, P, file_id); + + // close the file + status = H5Fclose(file_id); + + if (status < 0) {printf("File write failed.\n"); exit(-1); } + +} + +#endif From 5ded79cb955e58e621a3dc23fef80479deac263d Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 3 Oct 2022 13:13:20 -0700 Subject: [PATCH 111/694] fix make.host.crc to use c++17 again --- builds/make.host.crc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builds/make.host.crc b/builds/make.host.crc index 6378cc80e..6960724cb 100644 --- a/builds/make.host.crc +++ b/builds/make.host.crc @@ -4,8 +4,8 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 CUDA_ARCH = sm_70 OMP_NUM_THREADS = 16 From cfc92d5d3bd8d75f60df697dc97da8f3eee3dbd6 Mon Sep 17 00:00:00 2001 From: ojwg Date: Tue, 27 Sep 2022 00:15:23 -0400 Subject: [PATCH 112/694] hip-ify poisson distribution code and cleanup unused CPU feedback code. --- Makefile | 7 + src/analysis/analysis.cpp | 111 ------- src/analysis/feedback_analysis.cpp | 5 +- src/analysis/feedback_analysis.h | 16 +- src/global/global.h | 4 +- src/gravity/gravity_boundaries.cpp | 2 +- src/gravity/gravity_functions.cpp | 4 +- src/gravity/potential_paris_galactic.cu | 2 +- src/grid/grid3D.h | 14 - src/main.cpp | 24 +- src/model/disk_galaxy.h | 2 + src/particles/feedback_CIC.cpp | 376 ------------------------ src/particles/feedback_CIC.h | 12 - src/particles/feedback_CIC_gpu.cu | 195 ++++++------ src/particles/gravity_CIC_gpu.cu | 29 -- src/particles/particles_3D.cpp | 2 - src/particles/supernova.h | 33 +-- src/utils/gpu.hpp | 4 + 18 files changed, 142 insertions(+), 700 deletions(-) delete mode 100644 src/particles/feedback_CIC.cpp delete mode 100644 src/particles/feedback_CIC.h diff --git a/Makefile b/Makefile index 2ff88352d..fa5c222ff 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,13 @@ ifeq ($(findstring -DPARIS,$(DFLAGS)),-DPARIS) endif endif +ifeq ($(findstring -DSUPERNOVA,$(DFLAGS)),-DSUPERNOVA) + ifdef HIPCONFIG + CXXFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include + GPUFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include + endif +endif + ifeq ($(findstring -DHDF5,$(DFLAGS)),-DHDF5) CXXFLAGS += -I$(HDF5_ROOT)/include GPUFLAGS += -I$(HDF5_ROOT)/include diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index 651c8e37e..0e4de3b17 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -80,117 +80,6 @@ void Grid3D::Compute_Lya_Statistics( ){ #endif //LYA_STATISTICS -#ifdef SUPERNOVA -void Grid3D::Compute_Gas_Velocity_Dispersion() { - #ifdef PARTICLES_CPU - int i, j, k, id, idm, idp; - int id_grav; - Real x, y, z, r, xpm, xpp, ypm, ypp, zpm, zpp; - Real Pm, Pp; - Real dPdx, dPdy, dPdr; - Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, vcya; - Real total_mass, partial_mass = 0, total_var_analytic = 0, total_var_poisson = 0, partial_var_poisson = 0, partial_var_analytic = 0; - - int n_ghost_grav = Particles.G.n_ghost_particles_grid; - int ghost_diff = n_ghost_grav - H.n_ghost; - int nx_grav = Particles.G.nx_local + 2*n_ghost_grav; - int ny_grav = Particles.G.ny_local + 2*n_ghost_grav; - - for (k=0; k %E, %E DIFF %E%% \n", i, j, fabs(gravAnalytic), fabs(gravCalc), fabs((gravAnalytic-gravCalc)/gravAnalytic*100)); - //vc = sqrt(r*fabs(Particles.G.gravity_x[id_grav]*x/r + Particles.G.gravity_y[id_grav]*y/r - dPdr/C.density[id])); - vcp = sqrt(r*fabs(Particles.G.gravity_x[id_grav]*x/r + Particles.G.gravity_y[id_grav]*y/r)); - vcxp = -y/r * vcp; - vcyp = x/r * vcp; - //auto [vcx, vcy] = Galaxies::MW.rotation_velocity(x, y); - vx = C.momentum_x[id]/ C.density[id]; - vy = C.momentum_y[id]/ C.density[id]; - vz = C.momentum_z[id]/ C.density[id]; - - partial_var_poisson += ((vx - vcxp)*(vx - vcxp) + (vy - vcyp)*(vy - vcyp) + vz*vz)* C.density[id]; - partial_var_analytic += ((vx - vcxa)*(vx - vcxa) + (vy - vcya)*(vy - vcya) + vz*vz)* C.density[id]; - } - } - } - partial_var_poisson /= total_mass; - partial_var_analytic /= total_mass; - - #ifdef MPI_CHOLLA - MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, root, world); - MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, root, world); - - #else - total_var_poisson = partial_var_poisson; - total_var_analytic = partial_var_analytic; - #endif - - vrms_poisson = sqrt(total_var_poisson)*VELOCITY_UNIT/1e5; // output in km/s - vrms_analytic = sqrt(total_var_analytic)*VELOCITY_UNIT/1e5; - - chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", H.t, H.dt, vrms_poisson, vrms_analytic); - #endif // PARTICLES_CPU -} -#endif // SUPERNOVA - - void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ #ifdef COSMOLOGY diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index d1ff8af56..8df88910b 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -6,6 +6,7 @@ #include "../mpi/mpi_routines.h" #endif +#define VRMS_CUTOFF_DENSITY 0.01*0.6*MP/DENSITY_UNIT FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) { // allocate arrays @@ -94,7 +95,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) { id = i + j*G.H.nx + k*G.H.nx*G.H.ny; id_grav = (i + ghost_diff) + (j + ghost_diff)*nx_grav + (k + ghost_diff)*nx_grav*ny_grav; - if (G.C.density[id] < 148273.7) continue; // in cgs, this is 0.01 cm^{-3} FIXME put into a constant + if (G.C.density[id] < VRMS_CUTOFF_DENSITY) continue; // in cgs, this is 0.01 cm^{-3} G.Get_Position(i, j, k, &x, &y, &z); r = sqrt(x*x + y*y); @@ -139,4 +140,4 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) { #ifdef CPU_TIME G.Timer.FeedbackAnalysis.End(); #endif -} \ No newline at end of file +} diff --git a/src/analysis/feedback_analysis.h b/src/analysis/feedback_analysis.h index 32771019b..c66a1a31d 100644 --- a/src/analysis/feedback_analysis.h +++ b/src/analysis/feedback_analysis.h @@ -8,21 +8,19 @@ class FeedbackAnalysis { Real *h_circ_vel_x, *h_circ_vel_y; - #ifdef PARTICLES_GPU - Real *d_circ_vel_x, *d_circ_vel_y; - #endif #ifdef PARTICLES_GPU + Real *d_circ_vel_x, *d_circ_vel_y; void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G); #endif public: - int countSN; - int countResolved; - int countUnresolved; - Real totalEnergy; - Real totalMomentum; - Real totalUnresEnergy; + int countSN {0}; + int countResolved {0}; + int countUnresolved {0}; + Real totalEnergy {0}; + Real totalMomentum {0}; + Real totalUnresEnergy {0}; FeedbackAnalysis(Grid3D& G); ~FeedbackAnalysis(); diff --git a/src/global/global.h b/src/global/global.h index df9a76efc..784d09505 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -217,14 +217,14 @@ struct parameters int yu_bcnd; int zl_bcnd; int zu_bcnd; -//#ifdef MPI_CHOLLA FIXME: verify this change is needed. +#ifdef MPI_CHOLLA int xlg_bcnd; int xug_bcnd; int ylg_bcnd; int yug_bcnd; int zlg_bcnd; int zug_bcnd; -//#endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ char custom_bcnd[MAXLEN]; char outdir[MAXLEN]; char indir[MAXLEN]; //Folder to load Initial conditions from diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index 967ee6817..1eaec0380 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -144,7 +144,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary( int direction, int side, int // for bc_pontential_type = 1 the mod_frac is the fraction // of the disk mass contributed by the simulated particles - Real mod_frac = 0.1; //1.0; //0; + Real mod_frac = SIMULATED_FRACTION; Real pot_val; int i, j, k, id; for ( k=0; k outtime) G.H.dt = outtime - G.H.t; #ifdef SUPERNOVA - G.Cluster_Feedback(); + supernova::Cluster_Feedback(G, sn_analysis); #endif //SUPERNOVA #ifdef PARTICLES @@ -304,7 +292,7 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); #ifdef SUPERNOVA - feedback_analysis.Compute_Gas_Velocity_Dispersion(G); + sn_analysis.Compute_Gas_Velocity_Dispersion(G); #endif #endif diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index d32099b2f..a83b350f3 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -1,6 +1,8 @@ #ifndef DISK_GALAXY #define DISK_GALAXY +#define SIMULATED_FRACTION 0.1 + #include #include #include diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp deleted file mode 100644 index 580146f9d..000000000 --- a/src/particles/feedback_CIC.cpp +++ /dev/null @@ -1,376 +0,0 @@ -#ifdef PARTICLES -#ifdef DE -#ifdef PARTICLE_AGE -#ifdef SUPERNOVA - -#include -#include -#include "feedback_CIC.h" -#include "particles_3D.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../global/global.h" -#include "supernova.h" -#include -#include -#include -#include - -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif - -#ifdef PARALLEL_OMP -#include"../utils/parallel_omp.h" -#endif - - -std::random_device rd; -//std::mt19937_64 gen(rd()); -std::mt19937_64 generator(42); //FIXME read this in from init params or ChollaPrngGenerator -/* -void Supernova::initState(struct parameters *P) { - generator.seed(P->prng_seed); -}*/ - - -std::tuple getClusterFeedback(Real t, Real dt, Real mass, Real age, Real density) { - int N = 0; - if (t - age <= 1.0e4) { - std::poisson_distribution distribution(Supernova::SNR * mass * dt); - N = distribution(generator); - } - Real n_0 = density * DENSITY_UNIT / (Supernova::MU*MP); // in cm^{-3} - //std::cout << "n_0 is " << n_0 << std::endl; - //if (N > 0) std::cout << "MOMENTUM: " << FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93) * VELOCITY_UNIT/1e10 << std::endl; - - return { /* number of SN */ N, - /* total energy given off */ N * Supernova::ENERGY_PER_SN, - /* total mass */ N * Supernova::MASS_PER_SN, - /* final momentum */ Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93), - /* shell formation radius */ Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29) - }; -} - - -Real Grid3D::Cluster_Feedback() { - #ifdef CPU_TIME - Timer.Feedback.Start(); - #endif - - Real max_sn_dti = 0; - #ifdef PARTICLES_GPU - max_sn_dti = Cluster_Feedback_GPU(); - #else - Real* feedbackInfo; - Real* thread_dti; - int totalThreads = 1; - Real partiallyReducedInfo[N_INFO] = {0, 0, 0, 0, 0}; - Real reducedInfo[N_INFO] = {0, 0, 0, 0, 0}; - const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4; - - #ifndef PARALLEL_OMP - - feedbackInfo = (Real*)calloc(N_INFO, sizeof(Real)); - sn_thread_dti = (Real*)calloc(1, sizeof(Real)); - Cluster_Feedback_Function( 0, Particles.n_local, feedbackInfo, 0, thread_dti); - - #else - - totalThreads = N_OMP_THREADS; - feedbackInfo = (Real*)calloc(N_INFO*totalThreads, sizeof(Real)); - thread_dti = (Real*)calloc(totalThreads, sizeof(Real)); - // malloc array of size N_OMP_THREADS to take the feedback info - #pragma omp parallel num_threads( N_OMP_THREADS ) - { - int omp_id, n_omp_procs; - part_int_t p_start, p_end; - - omp_id = omp_get_thread_num(); - n_omp_procs = omp_get_num_threads(); - - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - Cluster_Feedback_Function( p_start, p_end, feedbackInfo, omp_id, thread_dti); - } - #endif //PARALLEL_OMP - - for (int i = 0; i < totalThreads; i++) { - partiallyReducedInfo[SN] += feedbackInfo[i*N_INFO + SN]; - partiallyReducedInfo[RESOLVED] += feedbackInfo[i*N_INFO + RESOLVED]; - partiallyReducedInfo[NOT_RESOLVED] += feedbackInfo[i*N_INFO + NOT_RESOLVED]; - partiallyReducedInfo[ENERGY] += feedbackInfo[i*N_INFO + ENERGY]; - partiallyReducedInfo[MOMENTUM] += feedbackInfo[i*N_INFO + MOMENTUM]; - max_sn_dti = fmax(max_sn_dti, thread_dti[i]); - } - - #ifdef MPI_CHOLLA - max_sn_dti = ReduceRealMax(max_sn_dti); - MPI_Reduce(&partiallyReducedInfo, &reducedInfo, N_INFO, MPI_CHREAL, MPI_SUM, root, world); - if (procID==root) { - #else - memcpy(reducedInfo, partiallyReducedInfo, sizeof(partiallyReducedInfo)); - #endif //MPI_CHOLLA - - countSN += reducedInfo[SN]; - countResolved += reducedInfo[RESOLVED]; - countUnresolved += reducedInfo[NOT_RESOLVED]; - totalEnergy += reducedInfo[ENERGY]; - totalMomentum += reducedInfo[MOMENTUM]; - - Real resolved_ratio = 0.0; - if (reducedInfo[RESOLVED] > 0 || reducedInfo[NOT_RESOLVED] > 0) { - resolved_ratio = reducedInfo[RESOLVED]*1.0/(reducedInfo[RESOLVED] + reducedInfo[NOT_RESOLVED]); - } - Real global_resolved_ratio = 0.0; - if (countResolved > 0 || countUnresolved > 0) { - global_resolved_ratio = countResolved / (countResolved + countUnresolved); - } - /*chprintf("iteration %d: number of SN: %d, ratio of resolved %f\n", H.n_step, (long)reducedInfo[SN], resolved_ratio); - chprintf(" this iteration: energy: %e erg. x-momentum: %e S.M. km/s\n", - reducedInfo[ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, reducedInfo[MOMENTUM]*VELOCITY_UNIT/1e5); - chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %f\n", (long)countSN, (long)countResolved, (long)countUnresolved, global_resolved_ratio); - chprintf(" energy: %e erg. Total x-momentum: %e S.M. km/s\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, totalMomentum*VELOCITY_UNIT/1e5); - */ - - #ifdef MPI_CHOLLA - } - #endif /*MPI_CHOLLA*/ - - free(feedbackInfo); - free(thread_dti); - - #endif //PARTICLES_GPU - - #ifdef CPU_TIME - Timer.Feedback.End(); - #endif - - return max_sn_dti; -} - - -// returns the largest 1/dt for the cell with the given index -Real Grid3D::Calc_Timestep(int index) { - Real density = fmax(C.density[index], DENS_FLOOR); - Real vx = C.momentum_x[index] / density; - Real vy = C.momentum_y[index] / density; - Real vz = C.momentum_z[index] / density; - Real cs = sqrt(gama * fmax( (C.Energy[index]- 0.5*density*(vx*vx + vy*vy + vz*vz))*(gama-1.0), TINY_NUMBER ) / density); - return fmax( fmax((fabs(vx) + cs)/H.dx, (fabs(vy) + cs)/H.dy), (fabs(vz) + cs)/H.dz ) ; -} - - -//Compute the CIC feedback -void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end, Real* info, int threadId, Real* max_dti) { - #ifdef PARTICLES_CPU - int nx_g, ny_g, nz_g; - nx_g = H.nx; - ny_g = H.ny; - nz_g = H.nz; - - Real xMin, yMin, zMin; - xMin = H.xblocal; //TODO: make sure this is correct (and not H.xbound) (local min vs. global min) - yMin = H.yblocal; - zMin = H.zblocal; - - part_int_t pIndx; - int indx_x, indx_y, indx_z, indx; - int pcell_x, pcell_y, pcell_z, pcell_index; - Real pos_x, pos_y, pos_z; - Real cell_center_x, cell_center_y, cell_center_z; - Real delta_x, delta_y, delta_z; - Real dV = H.dx*H.dy*H.dz; - Real feedback_energy, feedback_density, feedback_momentum; - bool ignore, in_local, is_resolved; - - for ( pIndx=p_start; pIndx < p_end; pIndx++ ){ - pos_x = Particles.pos_x[pIndx]; - pos_y = Particles.pos_y[pIndx]; - pos_z = Particles.pos_z[pIndx]; - - pcell_x = (int) floor( ( pos_x - xMin ) / H.dx ) + H.n_ghost; - pcell_y = (int) floor( ( pos_y - yMin ) / H.dy ) + H.n_ghost; - pcell_z = (int) floor( ( pos_z - zMin ) / H.dz ) + H.n_ghost; - pcell_index = pcell_x + pcell_y*nx_g + pcell_z*nx_g*ny_g; - - auto [N, energy, mass, momentum, r_sf] = getClusterFeedback(H.t, H.dt, Particles.mass[pIndx], Particles.age[pIndx], C.density[pcell_index]); - if (N == 0) continue; - - Particles.mass[pIndx] -= mass; - feedback_energy = energy / dV; - feedback_density = mass / dV; - feedback_momentum = momentum / sqrt(3) / dV; - is_resolved = 3 * std::max({H.dx, H.dy, H.dz}) <= r_sf; - // now fill in 'info' for logging - info[threadId*N_INFO] += N*1.0; - if (is_resolved) info[threadId*N_INFO + 1] += 1.0; - else info[threadId*N_INFO + 2] += 1.0; - - indx_x = (int) floor( ( pos_x - xMin - 0.5*H.dx ) / H.dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*H.dy ) / H.dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*H.dz ) / H.dz ); - - in_local = (pos_x >= H.xblocal && pos_x < H.xblocal_max) && - (pos_y >= H.yblocal && pos_y < H.yblocal_max) && - (pos_z >= H.zblocal && pos_z < H.zblocal_max); - if (!in_local) { - std::cout << " Cluster_FeedbackError:" << std::endl; - #ifdef PARTICLE_IDS - std::cout << " Particle outside local domain pID: " << Particles.partIDs[pIndx] << std::endl; - #else - std::cout << " Particle outside local domain " << std::endl; - #endif - std::cout << " Domain X: " << xMin << " " << H.xblocal_max << std::endl; - std::cout << " Domain Y: " << yMin << " " << H.yblocal_max << std::endl; - std::cout << " Domain Z: " << zMin << " " << H.zblocal_max << std::endl; - std::cout << " Particle X: " << pos_x << std::endl; - std::cout << " Particle Y: " << pos_y << std::endl; - std::cout << " Particle Z: " << pos_z << std::endl; - continue; - } - - ignore = indx_x < -1 || indx_y < -1 || indx_z < -1 || indx_x > nx_g-3 || indx_y > ny_g-3 || indx_y > nz_g-3; - if (ignore){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Cluster_Feedback Index pID: " << Particles.partIDs[pIndx] << std::endl; - #else - std::cout << "ERROR Cluster_Feedback Index " << std::endl; - #endif - std::cout << "xIndx: " << pos_x << " " << indx_x << std::endl; - std::cout << "zIndx: " << pos_y << " " << indx_z << std::endl; - std::cout << "yIndx: " << pos_z << " " << indx_y << std::endl; - std::cout << std::endl; - continue; - } - - cell_center_x = xMin + indx_x*H.dx + 0.5*H.dx; - cell_center_y = yMin + indx_y*H.dy + 0.5*H.dy; - cell_center_z = zMin + indx_z*H.dz + 0.5*H.dz; - delta_x = 1 - ( pos_x - cell_center_x ) / H.dx; - delta_y = 1 - ( pos_y - cell_center_y ) / H.dy; - delta_z = 1 - ( pos_z - cell_center_z ) / H.dz; - indx_x += H.n_ghost; - indx_y += H.n_ghost; - indx_z += H.n_ghost; - - //std::cout << "delta (x, y, z): (" << delta_x << ", " << delta_y << ", " << delta_z << ")" << std::endl; - //std::cout << "cell center (x, y, z): (" << cell_center_x << ", " << cell_center_y << ", " << cell_center_z << ")" << std::endl; - - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * delta_x * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * delta_z; - C.Energy[indx] += feedback_energy * delta_x * delta_y * delta_z; - info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * delta_z) * dV; - } else { - C.momentum_x[indx] += -delta_x * feedback_momentum; - C.momentum_y[indx] += -delta_y * feedback_momentum; - C.momentum_z[indx] += -delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * (1-delta_x) * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; - C.Energy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; - info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * delta_z) * dV; - } else { - C.momentum_x[indx] += delta_x * feedback_momentum; - C.momentum_y[indx] += -delta_y * feedback_momentum; - C.momentum_z[indx] += -delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * delta_x * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; - C.Energy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; - info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y )* delta_z) * dV; - } else { - C.momentum_x[indx] += -delta_x * feedback_momentum; - C.momentum_y[indx] += delta_y * feedback_momentum; - C.momentum_z[indx] += -delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * delta_x * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); - C.Energy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); - info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * delta_y * (1 - delta_z)) * dV; - } else { - C.momentum_x[indx] += -delta_x * feedback_momentum; - C.momentum_y[indx] += -delta_y * feedback_momentum; - C.momentum_z[indx] += delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; - C.Energy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; - info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * delta_z) * dV; - } else { - C.momentum_x[indx] += delta_x * feedback_momentum; - C.momentum_y[indx] += delta_y * feedback_momentum; - C.momentum_z[indx] += -delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * (1-delta_x) * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); - C.Energy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); - info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * delta_y * (1-delta_z)) * dV; - } else { - C.momentum_x[indx] += delta_x * feedback_momentum; - C.momentum_y[indx] += -delta_y * feedback_momentum; - C.momentum_z[indx] += delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * delta_x * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); - C.Energy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); - info[threadId*N_INFO + 3] += feedback_energy * fabs(delta_x * (1-delta_y) * (1-delta_z)) * dV; - } else { - C.momentum_x[indx] += -delta_x * feedback_momentum; - C.momentum_y[indx] += delta_y * feedback_momentum; - C.momentum_z[indx] += delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - if (is_resolved) { - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); - C.Energy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); - info[threadId*N_INFO + 3] += feedback_energy * fabs((1-delta_x) * (1-delta_y) * (1-delta_z)) * dV; - } else { - C.momentum_x[indx] += delta_x * feedback_momentum; - C.momentum_y[indx] += delta_y * feedback_momentum; - C.momentum_z[indx] += delta_z * feedback_momentum; - info[threadId*N_INFO + 4] += (fabs(delta_x) /*+ fabs(delta_y) + fabs(delta_z)*/)*feedback_momentum * dV; - } - max_dti[threadId] = fmax(max_dti[threadId], Calc_Timestep(indx)); - } - #endif //PARTICLES_CPU -} -#endif //PARTICLE_AGE -#endif //DE -#endif //PARTICLES -#endif //SUPERNOVA diff --git a/src/particles/feedback_CIC.h b/src/particles/feedback_CIC.h deleted file mode 100644 index 5f89886ae..000000000 --- a/src/particles/feedback_CIC.h +++ /dev/null @@ -1,12 +0,0 @@ -#if defined(PARTICLES) && defined(DE) && defined(PARTICLE_AGE) -#pragma once - -#include "../global/global.h" - -const int N_INFO = 5; - -Real getClusterEnergyFeedback(Real t, Real dt, Real age); -Real getClusterMassFeedback(Real t, Real dt, Real age); -std::tuple getClusterFeedback(Real t, Real dt, Real age, Real density); - -#endif // PARTICLES et. al diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 8d5889537..6bb043ae4 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -18,13 +18,14 @@ #define i_MOMENTUM 4 #define i_UNRES_ENERGY 5 -namespace Supernova { - curandStateMRG32k3a_t* curandStates; +namespace supernova { + curandStateMRG32k3a_t* randStates; part_int_t n_states; Real t_buff, dt_buff; } +#ifndef O_HIP __device__ double atomicMax(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; @@ -37,7 +38,7 @@ __device__ double atomicMax(double* address, double val) } while (assumed != old); return __longlong_as_double(old); } - +#endif //O_HIP __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -50,54 +51,33 @@ __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* state * @brief Initialize the cuRAND state, which is analogous to the concept of generators in CPU code. * The state object maintains configuration and status the cuRAND context for each thread on the GPU. * Initialize more than the number of local particles since the latter will change through MPI transfers. - * - * @param n_local - * @param allocation_factor + * + * @param n_local + * @param allocation_factor */ -void Supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { - printf("Supernova::initState start\n"); +void supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { + printf("supernova::initState start\n"); t_buff = 0; dt_buff = 0; n_states = n_local*allocation_factor; - //n_states = 10; - cudaMalloc((void**) &curandStates, n_states*sizeof(curandStateMRG32k3a_t)); - //int ngrid = (n_states + TPB_PARTICLES - 1) / TPB_PARTICLES; - int ngrid = (n_states + 64- 1) / 64; + cudaMalloc((void**) &randStates, n_states*sizeof(curandStateMRG32k3a_t)); + int ngrid = (n_states + TPB_FEEDBACK- 1) / TPB_FEEDBACK; dim3 grid(ngrid); - //dim3 block(TPB_PARTICLES); - dim3 block(64); + dim3 block(TPB_FEEDBACK); - printf("Supernova::initState: n_states=%d, ngrid=%d, threads=%d\n", n_states, ngrid, 64); - hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, curandStates); + hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, randStates); CHECK(cudaDeviceSynchronize()); - printf("Supernova::initState end\n"); -} - - -/* -__device__ void Single_Cluster_Feedback(Real t, Real dt, Real age, Real density, Real* feedback, curandStateMRG32k3a_t* state) { - int N = 0; - if (t + age <= Supernova::SN_ERA) { - N = curand_poisson (state, Supernova::SNR * dt); - } - Real n_0 = density * DENSITY_UNIT / (Supernova::MU*MP); // in cm^{-3} - - feedback[Supernova::NUMBER] = N * 1.0; // number of SN - feedback[Supernova::ENERGY] = N * Supernova::ENERGY_PER_SN; // total energy - feedback[Supernova::MASS] = N * Supernova::MASS_PER_SN; // total mass - feedback[Supernova::MOMENTUM] = Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(N, 0.93); // final momentum - feedback[Supernova::SHELL_RADIUS] = Supernova::R_SH * pow(n_0, -0.46) * pow(N, 0.29); // shell formation radius + printf("supernova::initState end: n_states=%d, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); } -*/ __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real *momentum_y, Real *momentum_z, Real *energy, int index, Real dx, Real dy, Real dz){ Real dens = fmax(density[index], DENS_FLOOR); Real d_inv = 1.0 / dens; Real vx = momentum_x[index] * d_inv; - Real vy = momentum_y[index] * d_inv; + Real vy = momentum_y[index] * d_inv; Real vz = momentum_z[index] * d_inv; Real P = fmax((energy[index]- 0.5*dens*(vx*vx + vy*vy + vz*vz))*(gamma-1.0), TINY_NUMBER); Real cs = sqrt(gamma * P * d_inv); @@ -137,12 +117,12 @@ __device__ Real GetAverageDensity(Real *density, int xi, int yi, int zi, int nxg __device__ Real GetAverageNumberDensity_CGS(Real *density, int xi, int yi, int zi, int nxg, int nyg, int ng) { - return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / (Supernova::MU*MP); + return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / (supernova::MU*MP); } -__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, - Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, +__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, + Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states, Real* prev_dens, int* prev_N, short direction){ @@ -163,7 +143,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; Real x_frac, y_frac, z_frac; - Real px, py, pz, d; + Real px, py, pz, d; //Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e; Real feedback_energy=0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; bool is_resolved = false; @@ -181,7 +161,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax); if (!in_local) { - printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", + printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); } @@ -193,25 +173,25 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g-2*n_ghost || indx_y >= ny_g-2*n_ghost || indx_z >= nz_g-2*n_ghost; if (ignore) { - printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", + printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); } if (!ignore && in_local) { int N = 0; - if ((t - age_dev[gtid]) <= Supernova::SN_ERA) { - if (direction == -1) N = -prev_N[gtid]; + if ((t - age_dev[gtid]) <= supernova::SN_ERA) { + if (direction == -1) N = -prev_N[gtid]; else { curandStateMRG32k3a_t state = states[gtid]; - N = curand_poisson (&state, Supernova::SNR * mass_dev[gtid] * dt); + N = curand_poisson (&state, supernova::SNR * mass_dev[gtid] * dt); states[gtid] = state; prev_N[gtid] = N; } if (N != 0) { - mass_dev[gtid] -= N * Supernova::MASS_PER_SN; - feedback_energy = N * Supernova::ENERGY_PER_SN / dV; - feedback_density = N * Supernova::MASS_PER_SN / dV; + mass_dev[gtid] -= N * supernova::MASS_PER_SN; + feedback_energy = N * supernova::ENERGY_PER_SN / dV; + feedback_density = N * supernova::MASS_PER_SN / dV; if (direction == -1) n_0 = prev_dens[gtid]; else { n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); @@ -223,15 +203,15 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real //cudaGetDevice(&devId); //printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); - feedback_momentum = direction*Supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; - shell_radius = Supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); - is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + feedback_momentum = direction*supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; + shell_radius = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; if (!is_resolved) printf("UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, n_0=%0.4e\n", t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); - + s_info[FEED_INFO_N*tid] = 1.*N; if (is_resolved) s_info[FEED_INFO_N*tid + 1] = direction * 1.0; - else s_info[FEED_INFO_N*tid + 2] = direction * 1.0; + else s_info[FEED_INFO_N*tid + 2] = direction * 1.0; int indx; @@ -329,8 +309,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real px = x_frac * feedback_momentum; py = y_frac * feedback_momentum; pz = z_frac * feedback_momentum; - //d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * (feedback_density + n_0*Supernova::MU*MP/DENSITY_UNIT); - d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + n_0*Supernova::MU*MP/DENSITY_UNIT; + d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + n_0*supernova::MU*MP/DENSITY_UNIT; //d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_density; //e = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_energy; @@ -401,7 +380,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real } } if (direction > 0) atomicMax(dti, local_dti); - } + } } } } @@ -432,12 +411,15 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real } +Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { + #ifdef CPU_TIME + G.Timer.Feedback.Start(); + #endif -Real Grid3D::Cluster_Feedback_GPU() { - if (H.dt == 0) return 0.0; + if (G.H.dt == 0) return 0.0; - if (Particles.n_local > Supernova::n_states) { - printf("ERROR: not enough cuRAND states (%d) for %f local particles\n", Supernova::n_states, Particles.n_local ); + if (G.Particles.n_local > supernova::n_states) { + printf("ERROR: not enough cuRAND states (%d) for %d local particles\n", supernova::n_states, G.Particles.n_local ); exit(-1); } @@ -451,29 +433,29 @@ Real Grid3D::Cluster_Feedback_GPU() { int* d_prev_N; - if (Particles.n_local > 0) { + if (G.Particles.n_local > 0) { CHECK(cudaMalloc(&d_dti, sizeof(Real))); CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); - CHECK(cudaMalloc(&d_prev_dens, Particles.n_local*sizeof(Real))); - CHECK(cudaMalloc(&d_prev_N, Particles.n_local*sizeof(int))); - CHECK(cudaMemset(d_prev_dens, 0, Particles.n_local*sizeof(Real))); - CHECK(cudaMemset(d_prev_N, 0, Particles.n_local*sizeof(int))); - - ngrid = std::ceil((1.*Particles.n_local)/TPB_FEEDBACK); + CHECK(cudaMalloc(&d_prev_dens, G.Particles.n_local*sizeof(Real))); + CHECK(cudaMalloc(&d_prev_N, G.Particles.n_local*sizeof(int))); + CHECK(cudaMemset(d_prev_dens, 0, G.Particles.n_local*sizeof(Real))); + CHECK(cudaMemset(d_prev_N, 0, G.Particles.n_local*sizeof(int))); + + ngrid = std::ceil((1.*G.Particles.n_local)/TPB_FEEDBACK); CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N*ngrid*sizeof(Real))); } - //FIXME info collection and max dti calculation - // only works if ngrid is 1. The reason being that reduction of - // d_info is currently done on each block. Only the first block reduction - // is used + // TODO: info collection and max dti calculation + // assumes ngrid is 1. The reason being that reduction of + // d_info is currently done on each block. Only the first block reduction + // is used do { - direction = 1; - if (Particles.n_local > 0) { - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.partIDs_dev, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, - Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, - H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, - C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates, d_prev_dens, d_prev_N, direction); + direction = 1; + if (G.Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, + G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, + G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction); CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } @@ -483,22 +465,22 @@ Real Grid3D::Cluster_Feedback_GPU() { MPI_Barrier(world); #endif // MPI_CHOLLA - if (h_dti != 0 && (C_cfl/h_dti < H.dt)) { // timestep too big: need to undo the last operation + if (h_dti != 0 && (C_cfl/h_dti < G.H.dt)) { // timestep too big: need to undo the last operation direction = -1; - if (Particles.n_local > 0) { - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, Particles.n_local, Particles.partIDs_dev, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, - Particles.mass_dev, Particles.age_dev, H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, - H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_ghost, H.t, H.dt, d_dti, d_info, - C.d_density, C.d_GasEnergy, C.d_Energy, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, gama, Supernova::curandStates, d_prev_dens, d_prev_N, direction); - - CHECK(cudaDeviceSynchronize()); + if (G.Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, + G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, + G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction); + + CHECK(cudaDeviceSynchronize()); } - H.dt = C_cfl/h_dti; + G.H.dt = C_cfl/h_dti; } } while (direction == -1); - if (Particles.n_local > 0) { + if (G.Particles.n_local > 0) { CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N*sizeof(Real), cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_dti)); CHECK(cudaFree(d_info)); @@ -512,30 +494,35 @@ Real Grid3D::Cluster_Feedback_GPU() { info = h_info; #endif - countSN += (int)info[Supernova::SN]; - countResolved += (int)info[Supernova::RESOLVED]; - countUnresolved += (int)info[Supernova::NOT_RESOLVED]; - totalEnergy += info[Supernova::ENERGY]; - totalMomentum += info[Supernova::MOMENTUM]; - totalUnresEnergy += info[Supernova::UNRES_ENERGY]; + analysis.countSN += (int)info[supernova::SN]; + analysis.countResolved += (int)info[supernova::RESOLVED]; + analysis.countUnresolved += (int)info[supernova::NOT_RESOLVED]; + analysis.totalEnergy += info[supernova::ENERGY]; + analysis.totalMomentum += info[supernova::MOMENTUM]; + analysis.totalUnresEnergy += info[supernova::UNRES_ENERGY]; Real resolved_ratio = 0.0; - if (info[Supernova::RESOLVED] > 0 || info[Supernova::NOT_RESOLVED] > 0) { - resolved_ratio = info[Supernova::RESOLVED]/(info[Supernova::RESOLVED] + info[Supernova::NOT_RESOLVED]); + if (info[supernova::RESOLVED] > 0 || info[supernova::NOT_RESOLVED] > 0) { + resolved_ratio = info[supernova::RESOLVED]/(info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); } Real global_resolved_ratio = 0.0; - if (countResolved > 0 || countUnresolved > 0) { - global_resolved_ratio = countResolved / (countResolved + countUnresolved); + if (analysis.countResolved > 0 || analysis.countUnresolved > 0) { + global_resolved_ratio = analysis.countResolved / (analysis.countResolved + analysis.countUnresolved); } - chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", H.n_step, (long)info[Supernova::SN], resolved_ratio); - chprintf(" this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s unres_energy: %.5e erg\n", - info[Supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[Supernova::MOMENTUM]*VELOCITY_UNIT/1e5, - info[Supernova::UNRES_ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); - chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)countSN, (long)countResolved, (long)countUnresolved, global_resolved_ratio); - chprintf(" energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres energy: %.5e\n", totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, - totalMomentum*VELOCITY_UNIT/1e5, totalUnresEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); - + chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN], resolved_ratio); + chprintf(" this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s unres_energy: %.5e erg\n", + info[supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[supernova::MOMENTUM]*VELOCITY_UNIT/1e5, + info[supernova::UNRES_ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); + chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)analysis.countSN, (long)analysis.countResolved, + (long)analysis.countUnresolved, global_resolved_ratio); + chprintf(" energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres energy: %.5e\n", + analysis.totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, analysis.totalMomentum*VELOCITY_UNIT/1e5, + analysis.totalUnresEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); + + #ifdef CPU_TIME + G.Timer.Feedback.End(); + #endif return h_dti; } diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index f5e013ef0..f1290776d 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -40,8 +40,6 @@ __global__ void Get_Gravity_Field_Particles_Kernel( Real *potential_dev, Real * nx_pot = nx + 2*n_ghost_potential; ny_pot = ny + 2*n_ghost_potential; - // if (tid == 0) printf( "potential: %f\n", potential_dev[tid] ); - int nGHST = n_ghost_potential - n_ghost_particles_grid; Real phi_l, phi_r; @@ -95,13 +93,6 @@ __global__ void Get_Gravity_Field_Particles_Kernel( Real *potential_dev, Real * #else gravity_z_dev[tid] = -0.5 * ( phi_r - phi_l ) / dz; #endif - - /* - if (tid_x < 10 && tid_y == (ny_grav/2) && tid_z == (nz_grav/2)) { - printf("gravity_x_dev[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, gravity_x_dev[tid]); - //printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, analytic_d[tid]); - } - */ } @@ -132,13 +123,6 @@ void Particles_3D::Get_Gravity_Field_Particles_GPU_function( int nx_local, int n hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz ); CudaCheckError(); - - /* - gpuFor(10, - GPU_LAMBDA(const int i) { - printf("potential_final[%d, %d, %d] = %.4e\n", i, ny_g/2, nz_g/2, potential_dev[i + nx_g*ny_g/2 + nx_g*ny_g*nz_g/2]); - } - );*/ } @@ -255,19 +239,6 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, grav_y_dev[tid] = g_y; grav_z_dev[tid] = g_z; - /* - if (partIDs_dev[tid] == 15) { - //printf(" (g_x_bl, g_y_bl) (%.4e, %.4e)\n", g_x_bl, g_y_bl); - //printf(" (g_x_br, g_y_br) (%.4e, %.4e)\n", g_x_br, g_y_br); - //printf(" (g_x_bu, g_y_bu) (%.4e, %.4e)\n", g_x_bu, g_y_bu); - //printf(" (g_x_bru, g_y_bru) (%.4e, %.4e)\n", g_x_bru, g_y_bru); - //printf(" (g_x_tl, g_y_tl) (%.4e, %.4e)\n", g_x_tl, g_y_tl); - //printf(" (g_x_tr, g_y_tr) (%.4e, %.4e)\n", g_x_tr, g_y_tr); - //printf(" (g_x_tu, g_y_tu) (%.4e, %.4e)\n", g_x_tu, g_y_tu); - //printf(" (x, y) -> (%f, %f)\n", pos_x, pos_y); - //printf(" (g_x_tru, g_y_tru) (%.4e, %.4e)\n", g_x_tru, g_y_tru); - printf(" -------->ID 15: pos (%f, %f), grav (%.4e, %.4e)\n", pos_x, pos_y, g_x, g_y); - }*/ } diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index b52215ec2..e2e6c353e 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -703,8 +703,6 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { temp_grav_y.push_back(0.0); temp_grav_z.push_back(0.0); temp_mass.push_back(cluster_mass); - //if (fabs(z) >= Z_d) age.push_back(1.1e4); - //else age.push_back(0.0); temp_age.push_back(0.0); temp_ids.push_back(id); } diff --git a/src/particles/supernova.h b/src/particles/supernova.h index c780eac95..d3c884282 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -1,16 +1,20 @@ -#ifndef SUPERNOVA_H -#define SUPERNOVA_H - -#include "../global/global.h" +#pragma once #ifdef PARTICLES_GPU + +#include "../global/global.h" +#include "../analysis/feedback_analysis.h" +#ifdef O_HIP +#include +#include +#else #include #include -#endif +#endif //O_HIP -namespace Supernova { +namespace supernova { const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; - + // supernova rate: 1SN / 100 solar masses per 40^4 kyr static const Real SNR=2.5e-7; static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; @@ -20,17 +24,12 @@ namespace Supernova { static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) static const Real SN_ERA = 4.0e4; // assume SN occur during first 40 Myr after cluster formation. - #ifdef PARTICLES_GPU - extern curandStateMRG32k3a_t* curandStates; - extern part_int_t n_states; - - void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); - //void initState(struct parameters *P); + extern curandStateMRG32k3a_t* randStates; + extern part_int_t n_states; extern Real t_buff, dt_buff; - #endif //PARTICLES_GPU + void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); + Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); } - - -#endif +#endif //PARTICLES_GPU diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 792ae69a9..237290744 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -76,6 +76,10 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cufftPlan3d hipfftPlan3d #define cufftPlanMany hipfftPlanMany +#define curandStateMRG32k3a_t hiprandStateMRG32k3a_t +#define curand_init hiprand_init +#define curand_poisson hiprand_poisson + static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line) { if (err == hipSuccess) return; From 72335a47b3318cc6b5e96014e1b9e69b943c7133 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 6 Oct 2022 03:41:31 -0400 Subject: [PATCH 113/694] hdf5 output refactor passes regression tests --- src/io/io.cpp | 404 ++++++++++---------------------------------------- src/io/io.h | 5 + src/io/viz.cu | 133 ++++++++++++----- 3 files changed, 182 insertions(+), 360 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 7d51c030c..d9ea04548 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -113,7 +113,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) #ifdef HDF5 if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); #endif - + #ifdef PROJECTION if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); #endif /*PROJECTION*/ @@ -1098,6 +1098,42 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + int id = H.n_ghost; + memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); + + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); +} + +void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + int i,j,id,buf_id; + // Copy the density array to the memory buffer + for (j=0; j 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + id = H.n_ghost; memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); @@ -1234,20 +1224,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); + } #endif //SCALAR #ifdef DE - // Copy the internal energy array to the memory buffer - id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.GasEnergy[id]), H.nx_real*sizeof(Real)); - - // Create a dataset id for internal energy - dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the internal energy array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif //DE // Free the dataspace id @@ -1268,85 +1250,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); - // Copy the density array to the memory buffer - for (j=0; j 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + // Copy the scalar array to the memory buffer for (j=0; j device_buffer, then copy device_buffer -> buffer, then write HDF5 field +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); diff --git a/src/io/viz.cu b/src/io/viz.cu index bfbf549e2..a8cbaf0a3 100644 --- a/src/io/viz.cu +++ b/src/io/viz.cu @@ -7,69 +7,136 @@ #include "../io/io.h" // To provide io.h with OutputViz3D +// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) +{ + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; + + k = id/(nx_real*ny_real); + j = (id - k*nx_real*ny_real)/nx_real; + i = id - j*nx_real - k*nx_real*ny_real; + + if (k >= nz_real) { + return; + } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (double) source[source_id]; +} -void CopyReal3D_CPU(Real* source, Real* destination, Header H) +// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) { - int i,j,k,id,buf_id; + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; - for (k=0; k= nz_real) { + return; } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (float) source[source_id]; } -void WriteVizField(Header H, hid_t file_id, hid_t dataspace_id, Real* buffer, Real* source, const char* name) +// When buffer is double, automatically use the double version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) { hid_t dataset_id; herr_t status; - // Copy non-ghost parts of source to buffer - CopyReal3D_CPU(source, buffer, H); + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); // Create a dataset id dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); // Write the buffer to file status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + // Free the dataset id and dataspace id + status = H5Dclose(dataset_id); + status = H5Sclose(dataspace_id); +} + + +// When buffer is float, automatically use the float version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) +{ + + hid_t dataset_id; + herr_t status; + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); + + // Create a dataset id + dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the buffer to file + status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + // Free the dataset id and dataspace id + status = H5Dclose(dataset_id); + status = H5Sclose(dataspace_id); } + void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id) { Header H = G.H; - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[3]; - // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; - dims[2] = nz_dset; - hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - - - Real* dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); + int nx_real = H.nx_real; + int ny_real = H.ny_real; + int nz_real = H.nz_real; + int n_ghost = H.n_ghost; + int nx = H.nx; + int ny = H.ny; + float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); + float* device_buffer; + CudaSafeCall(cudaMalloc(&device_buffer, nx_real*ny_real*nz_real*sizeof(float))); if (P.outviz_density > 0) { - WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density"); + WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density"); } + + + /* // Just an example of extending this function to include other fields. // Not implemented yet if (P.outviz_energy > 0) { - WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); + WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); } */ - + CudaSafeCall(cudaFree(device_buffer)); free(dataset_buffer); - herr_t status = H5Sclose(dataspace_id); + } @@ -78,10 +145,10 @@ void OutputViz3D(Grid3D &G, struct parameters P, int nfile) { Header H = G.H; // Do nothing in 1-D and 2-D case - if (H.ny == 1) { + if (H.ny_real == 1) { return; } - if (H.nz == 1) { + if (H.nz_real == 1) { return; } // Do nothing if nfile is not multiple of n_outviz From a02d520975aa5ae511cfce3901e261a6cefcc658 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 10 Oct 2022 11:51:20 -0400 Subject: [PATCH 114/694] tweak cloud setup --- cholla-tests-data | 2 +- src/grid/initial_conditions.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index 66d592821..5a3443034 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf +Subproject commit 5a34430345d7dc746637364e8613642ebbbbc5c4 diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 1a0a03381..ccb0b3169 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1269,8 +1269,8 @@ void Grid3D::Clouds() printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1.68e-4; - n_cl = 5.4e-2; + n_bg = 1e-4; + n_cl = 1e-2; rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; vx_bg = 0.0; From 099a7cd30b25ce89d0467629498cdbdb24f7d32c Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 10 Oct 2022 13:54:38 -0700 Subject: [PATCH 115/694] new build-tests.yml for Docker container --- .github/workflows/build_tests.yml | 275 ++++++++++++------------------ 1 file changed, 110 insertions(+), 165 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index f357286a2..b183b3188 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -1,165 +1,110 @@ -name: Cholla Compile - -on: - pull_request: -# branches: [ CAAR ] - schedule: - - cron: "37 07 * * 1" # run every Monday at 07:37UTC. Crontab computed with crontab.guru - workflow_dispatch: - -jobs: - Build: - name: > - Build - API:${{ matrix.gpu-api }} - Make-Type:${{ matrix.make-type }} - Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} - GCC:v${{ matrix.gcc-version }} - ROCm:v${{ matrix.rocm-version }} - # if: ${{ false }} # If uncommented this line will disable this job - - # Choose OS/Runner - runs-on: ubuntu-latest - - # Matrix for different make types - strategy: - fail-fast: false - matrix: - make-type: [hydro, gravity, disk, particles, cosmology, mhd] - gpu-api: [HIP, CUDA] - # NOTE: if more than one parameter is in any of these three variables - # you need to manually exclude it for the GPU API that doesn't use it. - # An example exclude is shown below but commented out. Uncomment and - # tweak it to fit your needs - # CUDA uses the cuda-toolkit-version and gcc-version - # HIP uses the clang-version - cuda-toolkit-version: ['11.2.2'] - gcc-version: [9] - rocm-version: ['5.1.0'] - mpi: ['openmpi'] #Can use mpich and/or openmpi - # exclude: - # - gpu-api: HIP - # make-type: hydro - - # Setup environment variables - env: - CHOLLA_MACHINE: github - CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} - CUDA_ROOT: /usr/local/cuda - HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial - MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi - - # Run the job itself - steps: - - # Install required Tools - - uses: actions/checkout@v2 - - name: Setup MPI - uses: mpi4py/setup-mpi@v1 - with: - mpi: ${{ matrix.mpi }} - - name: Show MPI version - run: mpirun --version - - name: Install HDF5 Serial - run: sudo apt-get install libhdf5-serial-dev - - name: Show HDF5 config - run: | - h5cc -showconfig - - # Install CUDA and dependencies if this is a CUDA build - - uses: Jimver/cuda-toolkit@v0.2.8 - if: matrix.gpu-api == 'CUDA' - id: cuda-toolkit - with: - cuda: ${{ matrix.cuda-toolkit-version }} - - name: NVCC version & set CUDA_ROOT - if: matrix.gpu-api == 'CUDA' - run: | - nvcc -V - - name: Set up GCC - if: matrix.gpu-api == 'CUDA' - uses: egor-tensin/setup-gcc@v1 - with: - version: ${{ matrix.gcc-version }} - - name: GCC Version - if: matrix.gpu-api == 'CUDA' - run: | - cc --version - c++ --version - - # Install HIP and dependencies if this is a HIP build - - name: Setup ROCm - if: matrix.gpu-api == 'HIP' - run: | - # Download and install the installation script - sudo apt-get update - wget https://repo.radeon.com/amdgpu-install/22.20.1/ubuntu/focal/amdgpu-install_22.20.50201-1_all.deb - sudo apt-get install -y ./amdgpu-install_22.20.50201-1_all.deb - - # Get names correct by stripping out the last ".0" if it exists - ROCM_VERSION=${{ matrix.rocm-version }} - if [ "${ROCM_VERSION:0-1}" = "0" ] - then - # If the last character is a "0" then trim the last ".0" - ROCM_REPO_VERSION="${ROCM_VERSION:0:3}" - else - ROCM_REPO_VERSION=$ROCM_VERSION - fi - - # Add the repo for the version of ROCm that we want - echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION} ubuntu main" | sudo tee /etc/apt/sources.list.d/rocm.list - sudo apt update - - name: Install ROCm - if: matrix.gpu-api == 'HIP' - run: | - # Install ROCm - sudo amdgpu-install -y --usecase=rocm --rocmrelease=${{ matrix.rocm-version }} - - name: Install hipFFT and RocFFT - if: matrix.gpu-api == 'HIP' - run: | - sudo apt install -y hipfft${{ matrix.rocm-version }} rocfft${{ matrix.rocm-version }} - - name: Verify HIP install - if: matrix.gpu-api == 'HIP' - run: | - hipconfig --full - - name: Set Environment Variables and Files - if: matrix.gpu-api == 'HIP' - run: | - echo "HIPCONFIG=$(hipconfig -C)" >> $GITHUB_ENV - echo "ROCM_PATH=$(hipconfig -R)" >> $GITHUB_ENV - echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst # trick ROCm into thinking there's a GPU - - name: Echo Environment Variables and Files - if: matrix.gpu-api == 'HIP' - run: | - echo "HIPCONFIG = ${HIPCONFIG}" - echo "ROCM_PATH = ${ROCM_PATH}" - echo "HIPFFT_PATH = ${HIPFFT_PATH}" - - echo "The contents of $(hipconfig -R)/bin/target.lst are:" - sudo cat $(hipconfig -R)/bin/target.lst - - # Perform Build - - name: Cholla setup - run: | - source builds/run_tests.sh - setupTests -c gcc - echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" - echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}" - - echo "CHOLLA_ROOT=${CHOLLA_ROOT}" >> $GITHUB_ENV - echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV - echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV - echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV - - name: Build GoogleTest - run: | - source builds/run_tests.sh - buildGoogleTest - echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV - - name: Build Cholla - run: | - source builds/run_tests.sh - buildCholla OPTIMIZE - - name: Build Tests - run: | - source builds/run_tests.sh - buildChollaTests +name: Cholla Compile + +on: + pull_request: + schedule: + - cron: "37 07 * * 1" # run every Monday at 07:37UTC. Crontab computed with crontab.guru + workflow_dispatch: + +jobs: + Build: + name: > + Build + ${{ matrix.container.name }} + TYPE=${{ matrix.make-type }} + + # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} + # GCC:v${{ matrix.gcc-version }} + # ROCm:v${{ matrix.rocm-version }} + # if: ${{ false }} # If uncommented this line will disable this job + + # Choose OS/Runner + runs-on: ubuntu-latest + container: + image: ${{matrix.container.link}} + defaults: + run: + shell: bash + # Matrix for different make types + strategy: + fail-fast: false + matrix: + make-type: [hydro, gravity, disk, particles, cosmology, mhd] + container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] + # gpu-api: [CUDA] + # NOTE: if more than one parameter is in any of these three variables + # you need to manually exclude it for the GPU API that doesn't use it. + # An example exclude is shown below but commented out. Uncomment and + # tweak it to fit your needs + # CUDA uses the cuda-toolkit-version and gcc-version + # HIP uses the clang-version + # cuda-toolkit-version: ['11.2.2'] + # gcc-version: [9] + # rocm-version: ['5.1.0'] + mpi: ['openmpi'] #Can use mpich and/or openmpi + # exclude: + # - gpu-api: HIP + # make-type: hydro + + # Setup environment variables + env: + CHOLLA_MACHINE: github + CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} + CUDA_ROOT: /usr/local/cuda + HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial + MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi + + # Run the job itself + steps: + + # Install required Tools + - uses: actions/checkout@v2 + + # Show versions + - name: Show MPI version + run: mpirun --version + - name: Show HDF5 config + run: | + h5cc -showconfig + - name: Git Safe Directory + run: | + git --version + git config --global --add safe.directory /__w/cholla/cholla + git config --global --add safe.directory '*' + - name: Show CUDA and gcc version + if: matrix.container.name == 'CUDA' + run: | + cc --version + c++ --version + nvcc -V + - name: Show HIP and hipcc version + if: matrix.container.name == 'HIP' + run: | + hipcc --version + hipconfig --full + + + # Perform Build + - name: Cholla setup + run: | + source builds/run_tests.sh + setupTests -c gcc + echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" + echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}" + echo "CHOLLA_ROOT=${CHOLLA_ROOT}" >> $GITHUB_ENV + echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV + echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV + echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV + - name: Build GoogleTest + run: | + source builds/run_tests.sh + buildGoogleTest + echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV + - name: Build Cholla + run: | + source builds/run_tests.sh + buildCholla OPTIMIZE + - name: Build Tests + run: | + source builds/run_tests.sh + buildChollaTests From 72f367ef6ab21a66a09ed8333f740c5dd1cfd8f3 Mon Sep 17 00:00:00 2001 From: alwinm Date: Mon, 10 Oct 2022 16:15:28 -0700 Subject: [PATCH 116/694] Update build_tests.yml Use checkout v3 instead of v2 in preparation for github deprecating node 12 for node 16 --- .github/workflows/build_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index b183b3188..a21b7c950 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -58,7 +58,7 @@ jobs: steps: # Install required Tools - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Show versions - name: Show MPI version From 3b0301be524d95f3c5cd73651fa69c4c875c8c0b Mon Sep 17 00:00:00 2001 From: alwinm Date: Tue, 11 Oct 2022 07:38:56 -0700 Subject: [PATCH 117/694] Update build_tests.yml --- .github/workflows/build_tests.yml | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index a21b7c950..19bdabb44 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -12,10 +12,6 @@ jobs: Build ${{ matrix.container.name }} TYPE=${{ matrix.make-type }} - - # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} - # GCC:v${{ matrix.gcc-version }} - # ROCm:v${{ matrix.rocm-version }} # if: ${{ false }} # If uncommented this line will disable this job # Choose OS/Runner @@ -31,20 +27,6 @@ jobs: matrix: make-type: [hydro, gravity, disk, particles, cosmology, mhd] container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] - # gpu-api: [CUDA] - # NOTE: if more than one parameter is in any of these three variables - # you need to manually exclude it for the GPU API that doesn't use it. - # An example exclude is shown below but commented out. Uncomment and - # tweak it to fit your needs - # CUDA uses the cuda-toolkit-version and gcc-version - # HIP uses the clang-version - # cuda-toolkit-version: ['11.2.2'] - # gcc-version: [9] - # rocm-version: ['5.1.0'] - mpi: ['openmpi'] #Can use mpich and/or openmpi - # exclude: - # - gpu-api: HIP - # make-type: hydro # Setup environment variables env: From 37399cfff1361db601206710d590434c4f6ecfaa Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Oct 2022 12:18:16 -0400 Subject: [PATCH 118/694] hdf5 refactor part 2 (this part also passes tests) --- examples/3D/float32_sound_wave.txt | 59 ++++ src/global/global.cpp | 28 +- src/global/global.h | 16 +- src/io/io.cpp | 437 ++++++++++++++--------------- src/io/io.h | 12 +- src/io/{viz.cu => io_gpu.cu} | 125 ++------- 6 files changed, 346 insertions(+), 331 deletions(-) create mode 100644 examples/3D/float32_sound_wave.txt rename src/io/{viz.cu => io_gpu.cu} (50%) diff --git a/examples/3D/float32_sound_wave.txt b/examples/3D/float32_sound_wave.txt new file mode 100644 index 000000000..68b3d4952 --- /dev/null +++ b/examples/3D/float32_sound_wave.txt @@ -0,0 +1,59 @@ +# +# Parameter File for sound wave test with float32 output +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.05 +# time interval for output +outstep=0.05 +# name of initial conditions +init=Sound_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=4.0 +ylen=4.0 +zlen=4.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +# Enable float32 output +# Enable float32 density field +n_out_float32=1 +out_float32_density=1 + +# Uncomment this to enable momentum_x +# out_float32_momentum_x=1 + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# amplitude of perturbing oscillations +A=1e-4 +# value of gamma +gamma=1.666666666666667 + diff --git a/src/global/global.cpp b/src/global/global.cpp index d84a04a68..1f6a5cbfa 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -223,10 +223,30 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->n_rotated_projection = atoi(value); else if (strcmp(name, "n_slice")==0) parms->n_slice = atoi(value); - else if (strcmp(name, "n_outviz")==0) - parms->n_outviz = atoi(value); - else if (strcmp(name, "outviz_density")==0) - parms->outviz_density = atoi(value); + else if (strcmp(name, "n_out_float32")==0) + parms->n_out_float32 = atoi(value); + else if (strcmp(name, "out_float32_density")==0) + parms->out_float32_density = atoi(value); + else if (strcmp(name, "out_float32_momentum_x")==0) + parms->out_float32_momentum_x = atoi(value); + else if (strcmp(name, "out_float32_momentum_y")==0) + parms->out_float32_momentum_y = atoi(value); + else if (strcmp(name, "out_float32_momentum_z")==0) + parms->out_float32_momentum_z = atoi(value); + else if (strcmp(name, "out_float32_Energy")==0) + parms->out_float32_Energy = atoi(value); +#ifdef DE + else if (strcmp(name, "out_float32_GasEnergy")==0) + parms->out_float32_GasEnergy = atoi(value); +#endif // DE +#ifdef MHD + else if (strcmp(name, "out_float32_magnetic_x")==0) + parms->out_float32_magnetic_x = atoi(value); + else if (strcmp(name, "out_float32_magnetic_y")==0) + parms->out_float32_magnetic_y = atoi(value); + else if (strcmp(name, "out_float32_magnetic_z")==0) + parms->out_float32_magnetic_z = atoi(value); +#endif // MHD else if (strcmp(name, "xmin")==0) parms->xmin = atof(value); else if (strcmp(name, "ymin")==0) diff --git a/src/global/global.h b/src/global/global.h index 0fc1ff47b..4e6d8eeb9 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -204,8 +204,20 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; - int n_outviz=0; - int outviz_density=0; + int n_out_float32=0; + int out_float32_density=0; + int out_float32_momentum_x=0; + int out_float32_momentum_y=0; + int out_float32_momentum_z=0; + int out_float32_Energy=0; +#ifdef DE + int out_float32_GasEnergy=0; +#endif +#ifdef MHD + int out_float32_magnetic_x=0; + int out_float32_magnetic_y=0; + int out_float32_magnetic_z=0; +#endif Real xmin; Real ymin; Real zmin; diff --git a/src/io/io.cpp b/src/io/io.cpp index d9ea04548..fc6c52eb7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -16,6 +16,7 @@ #include "../mpi/mpi_routines.h" #endif //MPI_CHOLLA #include "../utils/error_handling.h" +#include "../utils/DeviceVector.h" #ifdef COSMOLOGY #include "../cosmology/cosmology.h" @@ -111,7 +112,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) // This function does other checks to make sure it is valid (3D only) #ifdef HDF5 - if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile); #endif #ifdef PROJECTION @@ -232,6 +233,92 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #endif } +void OutputFloat32(Grid3D &G, struct parameters P, int nfile) +{ + + Header H = G.H; + // Do nothing in 1-D and 2-D case + if (H.ny_real == 1) { + return; + } + if (H.nz_real == 1) { + return; + } + // Do nothing if nfile is not multiple of n_out_float32 + if (nfile % P.n_out_float32 != 0) { + return; + } + + char filename[MAXLEN]; + char timestep[20]; + + // create the filename + sprintf(timestep, "%d", nfile); + strcpy(filename, P.outdir); + strcat(filename, timestep); + strcat(filename, ".float32.h5"); + #ifdef MPI_CHOLLA + sprintf(filename,"%s.%d",filename,procID); + #endif + + // create hdf5 file + hid_t file_id; /* file identifier */ + herr_t status; + + // Create a new file using default properties. + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write the header (file attributes) + G.Write_Header_HDF5(file_id); + + // write the conserved variables to the output file + + // 3-D Case + if (H.nx>1 && H.ny>1 && H.nz>1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + size_t buffer_size; + // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. + // If the buffer is too large, it does not cause bugs (Oct 6 2022) +#ifdef MHD + buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); +#else + buffer_size = nx_dset*ny_dset*nz_dset; +#endif + + // Using static DeviceVector here automatically allocates the buffer the first time it is needed + // It persists until program exit, and then calls Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + float* device_dataset_buffer = device_dataset_vector.data(); + float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float)); + + if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); + if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); + if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); + if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); + if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); +#ifdef DE + if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); +#endif //DE +#ifdef MHD + if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); +#endif + + + free(dataset_buffer); + + if (status < 0) {printf("File write failed.\n"); exit(-1); } + } // 3-D case + + // close the file + status = H5Fclose(file_id); + + +} + /* Output a projection of the grid data to file. */ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) @@ -1099,23 +1186,54 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 -void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +// Helper function which uses the correct HDF5 arguments based on the type of dataset_buffer to avoid writing garbage +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name) { - int id = H.n_ghost; - memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); - // Create a dataset id for density hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the density array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! + // Write the density array to file herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); + return status; +} + +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name) +{ + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + return status; +} + + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + // Copy non-ghost source to Buffer + int id = H.n_ghost; + memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); + // Buffer write to HDF5 Dataset + herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); +} + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name) +{ + // Copy non-ghost source to Buffer with conversion from double to float + int i; + for (i=0; i 1 this substitution can be attempted. + // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - + id = H.n_ghost; memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); - - // Create a dataset id for the scalar - dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the scalar array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - + // dataset here is just a name + status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } + #endif //SCALAR #ifdef DE @@ -1265,7 +1388,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) sprintf(number, "%d", s); strcat(dataset,number); - // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. + // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); // Copy the scalar array to the memory buffer @@ -1276,12 +1399,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dataset_buffer[buf_id] = C.scalar[id+s*H.n_cells]; } } - // Create a dataset id for the scalar - dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the scalar array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + + status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif //SCALAR @@ -1303,17 +1422,27 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) hsize_t dims[3]; hsize_t dims_full[3]; - double* device_dataset_buffer; - CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); + size_t buffer_size; + // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. + // If the buffer is too large, it does not cause bugs (Oct 6 2022) + #ifdef MHD + buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); + #else + buffer_size = nx_dset*ny_dset*nz_dset; + #endif + // Using static DeviceVector here automatically allocates the buffer the first time it is needed + // It persists until program exit, and then calls Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + double* device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real)); + //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); - // Create the data space for the datasets + // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one) dims[0] = nx_dset; dims[1] = ny_dset; dims[2] = nz_dset; dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); if ( output_momentum || H.Output_Complete_Data ) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); @@ -1325,7 +1454,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); } - #ifdef SCALAR #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle for (int s=0; s device_buffer, then copy device_buffer -> buffer, then write HDF5 field void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); +#endif diff --git a/src/io/viz.cu b/src/io/io_gpu.cu similarity index 50% rename from src/io/viz.cu rename to src/io/io_gpu.cu index a8cbaf0a3..c6cab6e8a 100644 --- a/src/io/viz.cu +++ b/src/io/io_gpu.cu @@ -7,13 +7,17 @@ #include "../io/io.h" // To provide io.h with OutputViz3D +// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz +// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz +// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. + // Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) { int dest_id,source_id,id,i,j,k; id = threadIdx.x + blockIdx.x * blockDim.x; - + k = id/(nx_real*ny_real); j = (id - k*nx_real*ny_real)/nx_real; i = id - j*nx_real - k*nx_real*ny_real; @@ -35,7 +39,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int dest_id,source_id,id,i,j,k; id = threadIdx.x + blockIdx.x * blockDim.x; - + k = id/(nx_real*ny_real); j = (id - k*nx_real*ny_real)/nx_real; i = id - j*nx_real - k*nx_real*ny_real; @@ -54,27 +58,26 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything using function overloading void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) { - hid_t dataset_id; herr_t status; hsize_t dims[3]; dims[0] = nx_real; dims[1] = ny_real; dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - + //Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); - dim3 dim1dBlock(TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); - // Create a dataset id - dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the buffer to file - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id and dataspace id - status = H5Dclose(dataset_id); - status = H5Sclose(dataspace_id); + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} + + } @@ -82,110 +85,26 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) { - hid_t dataset_id; herr_t status; hsize_t dims[3]; dims[0] = nx_real; dims[1] = ny_real; dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - + //Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); - dim3 dim1dBlock(TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); - // Create a dataset id - dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the buffer to file - status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id and dataspace id - status = H5Dclose(dataset_id); - status = H5Sclose(dataspace_id); -} - - -void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id) -{ - Header H = G.H; - int nx_real = H.nx_real; - int ny_real = H.ny_real; - int nz_real = H.nz_real; - int n_ghost = H.n_ghost; - int nx = H.nx; - int ny = H.ny; - - float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); - float* device_buffer; - CudaSafeCall(cudaMalloc(&device_buffer, nx_real*ny_real*nz_real*sizeof(float))); - - if (P.outviz_density > 0) { - WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density"); - } - + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); - - /* - // Just an example of extending this function to include other fields. - // Not implemented yet - if (P.outviz_energy > 0) { - WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); - } - */ - CudaSafeCall(cudaFree(device_buffer)); - - free(dataset_buffer); + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} } - -void OutputViz3D(Grid3D &G, struct parameters P, int nfile) -{ - Header H = G.H; - // Do nothing in 1-D and 2-D case - if (H.ny_real == 1) { - return; - } - if (H.nz_real == 1) { - return; - } - // Do nothing if nfile is not multiple of n_outviz - if (nfile % P.n_outviz != 0) { - return; - } - - char filename[MAXLEN]; - char timestep[20]; - - // create the filename - sprintf(timestep, "%d", nfile); - strcpy(filename, P.outdir); - strcat(filename, timestep); - strcat(filename, ".viz3d.h5"); - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif - - // create hdf5 file - hid_t file_id; /* file identifier */ - herr_t status; - - // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - - // Write the header (file attributes) - G.Write_Header_HDF5(file_id); - - // write the conserved variables to the output file - WriteViz3D(G, P, file_id); - - // close the file - status = H5Fclose(file_id); - - if (status < 0) {printf("File write failed.\n"); exit(-1); } - -} - -#endif +#endif //HDF5 From 8e73d1c6f5f9b1fa13fc470a4ec5c65c1ff41a1d Mon Sep 17 00:00:00 2001 From: ojwg Date: Thu, 13 Oct 2022 16:35:31 -0400 Subject: [PATCH 119/694] fix gpu_particle_feedback compile issues, plus enable read_grid scenario with particles --- builds/make.type.disk | 5 +++-- src/analysis/feedback_analysis.cpp | 5 ++--- src/gravity/grav3D.cpp | 3 +-- src/gravity/gravity_boundaries_gpu.cu | 9 --------- src/gravity/gravity_functions_gpu.cu | 5 ----- src/grid/initial_conditions.cpp | 9 +-------- src/main.cpp | 10 ++++----- src/particles/feedback_CIC_gpu.cu | 4 ++-- src/particles/gravity_CIC.cpp | 2 +- src/particles/gravity_CIC_gpu.cu | 6 +++--- src/particles/io_particles.cpp | 29 +++++++++++++++++++++------ src/particles/particles_3D.cpp | 4 ++++ src/particles/particles_3D.h | 8 +++++--- src/particles/supernova.h | 4 ++-- 14 files changed, 52 insertions(+), 51 deletions(-) diff --git a/builds/make.type.disk b/builds/make.type.disk index af3c5dbf2..9696c0d03 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -6,7 +6,7 @@ DFLAGS += -DPARTICLES_GPU DFLAGS += -DPARTICLE_IDS #DFLAGS += -DSINGLE_PARTICLE_MASS DFLAGS += -DPARTICLE_AGE -DFLAGS += -DSUPERNOVA +DFLAGS += -DSUPERNOVA #this flag requires PARTICLE_AGE, PARTICLE_IDS DFLAGS += -DANALYSIS #DFLAGS += -DPARTICLES_KDK @@ -40,7 +40,8 @@ DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DHYDRO_GPU -OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES +OUTPUT ?= -DOUTPUT -DHDF5 +# -DSLICES DFLAGS += $(OUTPUT) DFLAGS += $(MPI_GPU) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 8df88910b..579979266 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -103,14 +103,13 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) { vcp = sqrt(r*fabs(G.Particles.G.gravity_x[id_grav]*x/r + G.Particles.G.gravity_y[id_grav]*y/r)); vcxp = -y/r * vcp; vcyp = x/r * vcp; - //auto [vcx, vcy] = Galaxies::MW.rotation_velocity(x, y); vx = G.C.momentum_x[id]/ G.C.density[id]; vy = G.C.momentum_y[id]/ G.C.density[id]; vz = G.C.momentum_z[id]/ G.C.density[id]; partial_var_poisson += ((vx - vcxp)*(vx - vcxp) + (vy - vcyp)*(vy - vcyp) + vz*vz)* G.C.density[id]; - partial_var_analytic += ( (vx - circ_vel_x_h[id])*(vx - circ_vel_x_h[id]) + - (vy - circ_vel_y_h[id])*(vy - circ_vel_y_h[id]) + + partial_var_analytic += ( (vx - h_circ_vel_x[id])*(vx - h_circ_vel_x[id]) + + (vy - h_circ_vel_y[id])*(vy - h_circ_vel_y[id]) + (vz*vz) )* G.C.density[id]; } diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index a36d9c7be..06fbc8cc2 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -74,11 +74,10 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_ //Set the Gravitational Constant ( units must be consistent ) Gconst = GN; - /* if (strcmp(P->init, "Spherical_Overdensity_3D")==0){ Gconst = 1; chprintf(" WARNING: Using Gravitational Constant G=1.\n"); - }*/ + } //Flag to transfer the Potential boundaries TRANSFER_POTENTIAL_BOUNDARIES = false; diff --git a/src/gravity/gravity_boundaries_gpu.cu b/src/gravity/gravity_boundaries_gpu.cu index 34710f752..7b8e5b1db 100644 --- a/src/gravity/gravity_boundaries_gpu.cu +++ b/src/gravity/gravity_boundaries_gpu.cu @@ -96,18 +96,9 @@ void Grid3D::Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int // Copy the potential boundary from buffer to potential array hipLaunchKernelGGL( Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d ); - /*if (direction == 0 && side == 0) - gpuFor(N_GHOST_POTENTIAL, - GPU_LAMBDA(const int i) { - printf("boundary_potential[%d, %d, %d] = %.4e\n", i, n_i/2, n_j/2, pot_boundary_d[i*n_i*n_j + n_i/2 + n_i*n_j/2]); - } - );*/ - } - - #endif //GRAV_ISOLATED_BOUNDARY diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 0e78720e1..a67978438 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -272,11 +272,6 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor ); - /*gpuFor(10, - GPU_LAMBDA(const int i) { - printf("extrapolated potential[%d, %d, %d] = %.4e\n", i, ny_pot/2, nz_pot/2, C.d_Grav_potential[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); - } - );*/ } diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 4de96bcde..bbfddf65f 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -57,15 +57,8 @@ void Grid3D::Set_Initial_Conditions(parameters P) { Noh_3D(); } else if (strcmp(P.init, "Disk_2D")==0) { Disk_2D(); - } else if (strcmp(P.init, "Disk_3D")==0) { + } else if (strcmp(P.init, "Disk_3D")==0 || strcmp(P.init, "Disk_3D_particles")==0) { Disk_3D(P); - } else if (strcmp(P.init, "Disk_3D_particles")==0) { - #ifndef ONLY_PARTICLES - Disk_3D(P); - #else - // Initialize a m hydro grid when only integrating particles - Uniform_Grid(); - #endif } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { Spherical_Overpressure_3D(); } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { diff --git a/src/main.cpp b/src/main.cpp index a41f9ebef..11ebb7a64 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -140,14 +140,14 @@ int main(int argc, char *argv[]) if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); #endif - #ifdef SUPERNOVA + #if defined(SUPERNOVA) && defined(PARTICLE_AGE) FeedbackAnalysis sn_analysis(G); #ifdef MPI_CHOLLA supernova::initState(&P, G.Particles.n_total_initial); #else supernova::initState(&P, G.Particles.n_local); #endif // MPI_CHOLLA - #endif // SUPERNOVA + #endif // SUPERNOVA && PARTICLE_AGE #ifdef STAR_FORMATION star_formation::Initialize(G); @@ -225,9 +225,9 @@ int main(int argc, char *argv[]) if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; - #ifdef SUPERNOVA + #if defined(SUPERNOVA) && defined(PARTICLE_AGE) supernova::Cluster_Feedback(G, sn_analysis); - #endif //SUPERNOVA + #endif //SUPERNOVA && PARTICLE_AGE #ifdef PARTICLES //Advance the particles KDK( first step ): Velocities are updated by 0.5*dt and positions are updated by dt @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #ifdef SUPERNOVA + #if defined(SUPERNOVA) && defined(PARTICLE_AGE) sn_analysis.Compute_Gas_Velocity_Dispersion(G); #endif #endif diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 6bb043ae4..8cdfbfb23 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -1,4 +1,4 @@ -#if defined(SUPERNOVA) && defined(PARTICLES_GPU) +#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS) #include #include @@ -528,4 +528,4 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { } -#endif //SUPERNOVA & PARTICLES_GPU +#endif //SUPERNOVA & PARTICLES_GPU & PARTICLE_IDS & PARTICLE_AGE diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index ec753ba58..95c5b1213 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -90,7 +90,7 @@ void Particles_3D::Get_Gravity_Field_Particles_GPU( Real *potential_host ){ void Particles_3D::Get_Gravity_CIC_GPU(){ - Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev, partIDs_dev ); + Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } #endif //PARTICLES_GPU diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index f1290776d..aa0965304 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -134,7 +134,7 @@ __device__ void Get_Indexes_CIC_Gravity( Real xMin, Real yMin, Real zMin, Real d } //Kernel to compute the gravitational field at the particles positions via Cloud-In-Cell -__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost, part_int_t *partIDs_dev ){ +__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost){ part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; @@ -243,7 +243,7 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, //Call the kernel to compote the gravitational field at the particles positions ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, part_int_t *partIDs_dev ){ +void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev){ // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -257,7 +257,7 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, - n_ghost_particles_grid, partIDs_dev ); + n_ghost_particles_grid); CudaCheckError(); } diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 41bee206d..0fb0fa37b 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -42,7 +42,7 @@ void Particles_3D::Load_Particles_Data( struct parameters *P){ #ifdef TILED_INITIAL_CONDITIONS sprintf(filename,"%sics_%dMpc_%d_particles.h5", P->indir, (int) P->tile_length/1000, G.nx_local); //Everyone reads the same file #else - if (strcmp(P->init, "Disk_3D_particles") != 0) sprintf(filename,"%s.%d",filename,procID); + sprintf(filename,"%s.%d",filename,procID); #endif //TILED_INITIAL_CONDITIONS #endif @@ -116,11 +116,9 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par #ifndef MPI_CHOLLA chprintf(" Loading %ld particles\n", n_to_load); #else - if (strcmp(P->init, "Disk_3D_particles") != 0) { - part_int_t n_total_load; - n_total_load = ReducePartIntSum( n_to_load ); - chprintf( " Total Particles To Load: %ld\n", n_total_load ); - } + part_int_t n_total_load; + n_total_load = ReducePartIntSum( n_to_load ); + chprintf( " Total Particles To Load: %ld\n", n_total_load ); // Print individual n_to_load // for ( int i=0; iinit, "Spherical_Overdensity_3D")==0) Initialize_Sphere(P); else if (strcmp(P->init, "Zeldovich_Pancake")==0) Initialize_Zeldovich_Pancake( P ); else if (strcmp(P->init, "Read_Grid")==0) Load_Particles_Data( P ); + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) else if (strcmp(P->init, "Disk_3D_particles") == 0) Initialize_Disk_Stellar_Clusters(P); + #endif #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); @@ -624,6 +626,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P){ } +#if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) /** * Initializes a disk population of uniform mass stellar clusters */ @@ -767,6 +770,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { if (lost_particles > 0) chprintf(" lost %lu particles\n", lost_particles); chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, total_mass: %.3e s.m.\n", id+1, n_local, total_mass); } +#endif void Particles_3D::Initialize_Zeldovich_Pancake( struct parameters *P ){ diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 467e11725..a25e0e21d 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -229,10 +229,11 @@ class Particles_3D void Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal ); + void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ); + void Free_GPU_Array_Real( Real *array ); #ifdef PARTICLES_GPU - void Free_GPU_Array_Real( Real *array ); void Free_GPU_Array_int( int *array ); void Free_GPU_Array_bool( bool *array ); template< typename T > void Free_GPU_Array( T *array ){ cudaFree(array); } //TODO remove the Free_GPU_Array_ functions @@ -241,7 +242,6 @@ class Particles_3D void Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ); void Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ); void Allocate_Particles_GPU_Array_Part_Int( part_int_t **array_dev, part_int_t size ); - void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ); void Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size); void Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size); void Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size); @@ -257,7 +257,7 @@ class Particles_3D void Get_Gravity_Field_Particles_GPU( Real *potential_host ); void Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); void Get_Gravity_CIC_GPU(); - void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, part_int_t *partIDs_dev ); + void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev); Real Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev ); void Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); void Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); @@ -284,7 +284,9 @@ class Particles_3D void Initialize_Sphere(struct parameters *P); +#if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) void Initialize_Disk_Stellar_Clusters(struct parameters *P); +#endif void Initialize_Zeldovich_Pancake( struct parameters *P ); diff --git a/src/particles/supernova.h b/src/particles/supernova.h index d3c884282..e55b96bee 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -1,5 +1,5 @@ #pragma once -#ifdef PARTICLES_GPU +#if defined(PARTICLES_GPU) && defined(SUPERNOVA) #include "../global/global.h" #include "../analysis/feedback_analysis.h" @@ -32,4 +32,4 @@ namespace supernova { void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); } -#endif //PARTICLES_GPU +#endif //PARTICLES_GPU && SUPERNOVA From 53abf16b7da6547876e2a532a2af5be3a7a87f22 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 18 Oct 2022 02:10:33 -0400 Subject: [PATCH 120/694] static_grav renamed/fixed, light side fixes --- builds/make.type.rot_proj | 31 ++++++++++++++++++ builds/make.type.static_grav | 32 +++++++++++++++++++ src/global/global_cuda.h | 16 ---------- src/gravity/gravity_cuda.h | 18 ----------- .../{gravity_cuda.cu => static_grav.h} | 15 ++++----- src/hydro/hydro_cuda.cu | 2 +- src/io/io.cpp | 23 ++++++++----- 7 files changed, 86 insertions(+), 51 deletions(-) create mode 100644 builds/make.type.rot_proj create mode 100644 builds/make.type.static_grav delete mode 100644 src/gravity/gravity_cuda.h rename src/gravity/{gravity_cuda.cu => static_grav.h} (90%) diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj new file mode 100644 index 000000000..e6faa7514 --- /dev/null +++ b/builds/make.type.rot_proj @@ -0,0 +1,31 @@ +#-- Default hydro only build with rotated projection + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + +DFLAGS += -DROTATED_PROJECTION \ No newline at end of file diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav new file mode 100644 index 000000000..ffa15c4ee --- /dev/null +++ b/builds/make.type.static_grav @@ -0,0 +1,32 @@ +#-- Default hydro only build with static_grav + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +DFLAGS += -DSTATIC_GRAV + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 7a5beca55..35c0c355f 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -92,17 +92,6 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) } } - - -/*! \fn Real minof3(Real a, Real b, Real c) - * \brief Returns the minimum of three floating point numbers. */ -__device__ inline Real minof3(Real a, Real b, Real c) -{ - return fmin(a, fmin(b,c)); -} - - - /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) @@ -111,11 +100,6 @@ __device__ inline int sgn_CUDA(Real x) else return 1; } - -__global__ void test_function(); - - - #endif //GLOBAL_CUDA_H #endif //CUDA diff --git a/src/gravity/gravity_cuda.h b/src/gravity/gravity_cuda.h deleted file mode 100644 index b4d885262..000000000 --- a/src/gravity/gravity_cuda.h +++ /dev/null @@ -1,18 +0,0 @@ -/*! \file gravity_cuda.h - * \brief Declarations of functions used to calculate gravitational accelerations. */ - -#ifdef CUDA -#ifndef GRAVITY_CUDA_H -#define GRAVITY_CUDA_H - -#include "../global/global.h" - - -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx); - -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy); - -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); - -#endif // GRAVITY_CUDA_H -#endif // CUDA diff --git a/src/gravity/gravity_cuda.cu b/src/gravity/static_grav.h similarity index 90% rename from src/gravity/gravity_cuda.cu rename to src/gravity/static_grav.h index 0137c44f1..3ddbb86be 100644 --- a/src/gravity/gravity_cuda.cu +++ b/src/gravity/static_grav.h @@ -4,17 +4,16 @@ functions in hydro_cuda.cu. */ #ifdef CUDA +#pragma once + #include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../gravity/gravity_cuda.h" +#include // provides sqrt log cos sin atan etc. +#include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x*x; } -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; @@ -52,7 +51,7 @@ __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, } -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; // use the subgrid offset and global boundaries to calculate absolute positions on the grid @@ -108,7 +107,7 @@ __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, R } -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; // use the subgrid offset and global boundaries to calculate absolute positions on the grid diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index bf385d25f..ee033e334 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -10,7 +10,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" -#include "../gravity/gravity_cuda.h" +#include "../gravity/static_grav.h" #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" diff --git a/src/io/io.cpp b/src/io/io.cpp index fc6c52eb7..be0a1b9fa 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1820,6 +1820,11 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) int nx_dset = R.nx; int nz_dset = R.nz; + if (R.nx * R.nz == 0) { + chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n"); + return; + } + // set the projected dataset size for this process to capture // this piece of the simulation volume // min and max values were set in the header write @@ -1920,15 +1925,17 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) // Free the dataspace id status = H5Sclose(dataspace_xzr_id); + //free the data + free(dataset_buffer_dxzr); + free(dataset_buffer_Txzr); + free(dataset_buffer_vxxzr); + free(dataset_buffer_vyxzr); + free(dataset_buffer_vzxzr); + } - else printf("Rotated projection write only implemented for 3D data.\n"); - - //free the data - free(dataset_buffer_dxzr); - free(dataset_buffer_Txzr); - free(dataset_buffer_vxxzr); - free(dataset_buffer_vyxzr); - free(dataset_buffer_vzxzr); + else chprintf("Rotated projection write only implemented for 3D data.\n"); + + } #endif //HDF5 From 96faed7369ef405c3853d564a864e80d271c32a0 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 9 Nov 2022 12:16:24 -0500 Subject: [PATCH 121/694] add recent changes --- cloud-wind/cloud-wind.txt | 2 +- src/dust/dust_cuda.cu | 5 +++-- src/grid/initial_conditions.cpp | 8 +++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt index a60b53d61..b3ec521fc 100644 --- a/cloud-wind/cloud-wind.txt +++ b/cloud-wind/cloud-wind.txt @@ -26,7 +26,7 @@ zlen=1.0 # final output time tout=9e4 # time interval for output -outstep=1e2 +outstep=1e3 # ratio of specific heats gamma=1.66666667 # name of initial conditions diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index ba14a8415..f5287aa66 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -29,7 +29,6 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { // get grid indices int n_cells = nx * ny * nz; - int ngrid = (n_cells + TPB - 1) / TPB; int is, ie, js, je, ks, ke; cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); // get a global thread ID @@ -61,7 +60,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g d_gas = dev_conserved[id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; - //printf("kernel: %7.4e\n", d_dust); + if (id == 0) { + printf("kernel: %7.4e\n", d_dust); + } // make sure thread hasn't crashed n = d_gas*DENSITY_UNIT / (mu*MP); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 86c0d9fa8..dd3741d28 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -591,7 +591,7 @@ void Grid3D::KH() Real vx, vy, vz; Real d1, d2, v1, v2, P, A; - d1 = 2.0; + d1 = 200.0; d2 = 1.0; v1 = 0.5; v2 = -0.5; @@ -1253,7 +1253,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 0.01; // cloud radius in code units (kpc) + Real R_cl = 0.1; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1278,8 +1278,6 @@ void Grid3D::Clouds() rho_bg = n_bg*mu*MP/DENSITY_UNIT; rho_cl = n_cl*mu*MP/DENSITY_UNIT; vx_bg = 100*TIME_UNIT/KPC; - // vx_bg = 0.0; - //vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; @@ -1343,7 +1341,7 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_cl/(gama-1.0); #endif #ifdef SCALAR - C.scalar[id] = C.density[id]*0.01; + C.scalar[id] = rho_cl*0.01; #endif } } From e456bb38507976ffab006d004a0d214620511cff Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 13:15:22 -0500 Subject: [PATCH 122/694] make changes to allow for additional scalars in dust field --- src/dust/dust_cuda.cu | 8 +------- src/grid/cuda_boundaries.cu | 1 - src/grid/initial_conditions.cpp | 4 ++-- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index f5287aa66..480199874 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,6 +1,5 @@ #ifdef CUDA #ifdef DUST -#ifdef SCALAR #include "dust_cuda.h" @@ -56,14 +55,10 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real dt_sub; //refined timestep if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // get quantities from dev_conserved + // get conserved quanitites d_gas = dev_conserved[id]; d_dust = dev_conserved[5*n_cells + id]; E = dev_conserved[4*n_cells + id]; - if (id == 0) { - printf("kernel: %7.4e\n", d_dust); - } - // make sure thread hasn't crashed n = d_gas*DENSITY_UNIT / (mu*MP); @@ -132,6 +127,5 @@ __device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } -#endif // SCALAR #endif // DUST #endif // CUDA \ No newline at end of file diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 8c3c40efa..f69ac3c59 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -466,7 +466,6 @@ __global__ void Noh_Boundary_kernel(Real * c_device, c_device[gid+3*n_cells] = vz*c_device[gid]; c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; } - __syncthreads(); // +y boundary next isize = nx; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index dd3741d28..bb2483dc5 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1325,7 +1325,7 @@ void Grid3D::Clouds() #ifdef DE C.GasEnergy[id] = p_bg/(gama-1.0); #endif - #ifdef SCALAR + #ifdef DUST C.scalar[id] = C.density[id]*0.0; #endif // add clouds @@ -1340,7 +1340,7 @@ void Grid3D::Clouds() #ifdef DE C.GasEnergy[id] = p_cl/(gama-1.0); #endif - #ifdef SCALAR + #ifdef DUST C.scalar[id] = rho_cl*0.01; #endif } From 9897553eb9ebe6914d410b0f277b2f501e922521 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 13:16:16 -0500 Subject: [PATCH 123/694] undo changes in KH initial conditions --- src/grid/initial_conditions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index bb2483dc5..aea437073 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -591,7 +591,7 @@ void Grid3D::KH() Real vx, vy, vz; Real d1, d2, v1, v2, P, A; - d1 = 200.0; + d1 = 2.0; d2 = 1.0; v1 = 0.5; v2 = -0.5; From f671d4b84eb2f98118f34045ad956aa6e315f71f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 13:17:31 -0500 Subject: [PATCH 124/694] remove hydro timestep debugging print statements --- src/hydro/hydro_cuda.cu | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 61522b1b1..8ea3413da 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -571,16 +571,6 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n Real n = d*DENSITY_UNIT/(0.6*MP); Real T = hydro_utilities::Calc_Temp(P, n); - if (max_dti > 1) { - printf("\nmax_dti: %e\n", max_dti); - printf("E: %e g/(cm^2⋅s^2)\n", E*ENERGY_UNIT); - printf("P: %e g/(cm⋅s^2)\n", P*PRESSURE_UNIT); - printf("T: %e K\n", T); - printf("cs: %e km/s\n", cs*1e-5*VELOCITY_UNIT); - printf("d: %e g/cm^3\n", d*DENSITY_UNIT); - printf("vx: %e km/s\n", vx*1e-5*VELOCITY_UNIT); - } - } } From 808336384759ab29fba4902cbcabd6769dc566c4 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 13:30:58 -0500 Subject: [PATCH 125/694] wrap dust-specific versions of global variables in dust macro --- src/global/global.h | 7 +++++++ src/grid/grid3D.cpp | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index 68744ba4f..0c96ff488 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -54,7 +54,14 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" //Conserved Floor Values +#ifdef DUST #define TEMP_FLOOR 10 // in Kelvin +#else // NOT DUST +#define TEMP_FLOOR 1e-3 +#endif + + + #define DENS_FLOOR 1e-5 // in code units //Parameter for Enzo dual Energy Condition diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 27289ac25..51d9a907a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -140,9 +140,12 @@ void Grid3D::Initialize(struct parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - // H.min_dt_slow = 1e-10; //Initialize the minumum dt to a tiny number + #ifndef DUST + H.min_dt_slow = 1e-10; //Initialize the minumum dt to a tiny number + #else H.min_dt_slow = 1e-2; - #endif + #endif // DUST + #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA From dec3effcba97226507c5aab2c7a0ff005cfc8ec4 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 13:49:11 -0500 Subject: [PATCH 126/694] remove input file --- cloud-wind/cloud-wind.txt | 45 --------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 cloud-wind/cloud-wind.txt diff --git a/cloud-wind/cloud-wind.txt b/cloud-wind/cloud-wind.txt deleted file mode 100644 index b3ec521fc..000000000 --- a/cloud-wind/cloud-wind.txt +++ /dev/null @@ -1,45 +0,0 @@ -# -# Sample Parameter File -# - -################################################ -# Parameters required for all problems -################################################ -# number of grid cells in the x dimension -nx=512 -# number of grid cells in the y dimension -ny=128 -# number of grid cells in the z dimension -nz=128 -# x direction lower domain boundary -xmin=0.0 -# x direction domain length -xlen=4.0 -# y direction lower domain boundary -ymin=0.0 -# y direction domain length -ylen=1.0 -# z direction lower domain boundary -zmin=0.0 -# y direction domain length -zlen=1.0 -# final output time -tout=9e4 -# time interval for output -outstep=1e3 -# ratio of specific heats -gamma=1.66666667 -# name of initial conditions -init=Clouds -# type of boundary condition, options include 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom) -xl_bcnd=4 -xu_bcnd=3 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 -# path to output directory -outdir=./ -# nfile=1e10 -custom_bcnd=wind -n_hydro=3728643 \ No newline at end of file From ecce542bbc03936cf2cecd1716e9a3f7d150ddb8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 14:05:16 -0500 Subject: [PATCH 127/694] remove bad code for defining slow factor in avg_slow_cells --- src/gravity/gravity_functions.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index a29ea6802..171f96b10 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -116,8 +116,7 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell da_particles = fmin( da_particles, Cosmo.max_delta_a ); - // min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; - min_dt_slow = 1e-2 / TIME_UNIT; + min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; H.min_dt_slow = min_dt_slow; #endif @@ -146,11 +145,11 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell //min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; - #ifndef DUST + #ifndef DUST // Not using dust min_dt_slow = 3*H.dx; - #else // DUST + #else min_dt_slow = 1e10; - #endif + #endif // DUST H.min_dt_slow = min_dt_slow; #endif @@ -166,15 +165,6 @@ void Grid3D::set_dt_Gravity(){ H.min_dt_slow = min_dt_slow; #endif - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - // min_dt_slow = H.dt / C_cfl * 100 ; - printf("/nhi!!/n"); - printf("/nhi!!/n"); - min_dt_slow = 1e10; - H.min_dt_slow = min_dt_slow; - #endif - // Set current and previous delta_t for the potential extrapolation if ( Grav.INITIAL ){ Grav.dt_prev = H.dt; From 283b16baa164ea4ce79b76a5e18185e1f48cdf62 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 14:07:55 -0500 Subject: [PATCH 128/694] remove bad code for defining slow factor in avg_slow_cells --- src/gravity/gravity_functions.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 171f96b10..f99c201d0 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -161,7 +161,12 @@ void Grid3D::set_dt_Gravity(){ #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - min_dt_slow = H.dt / C_cfl * 100 ; + //min_dt_slow = H.dt / C_cfl * 100 ; + #ifndef DUST // Not using dust + min_dt_slow = 3*H.dx; + #else + min_dt_slow = 1e10; + #endif // DUST H.min_dt_slow = min_dt_slow; #endif From f8b817483cc3fbce5a2786b125d7e0318ffa3e9d Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 14:10:37 -0500 Subject: [PATCH 129/694] remove accidental changes to existing code --- src/grid/grid3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 6a654a851..c31065012 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -141,7 +141,7 @@ void Grid3D::Initialize(struct parameters *P) #ifdef AVERAGE_SLOW_CELLS #ifndef DUST - H.min_dt_slow = 1e-10; //Initialize the minumum dt to a tiny number + H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number #else H.min_dt_slow = 1e-2; #endif // DUST From 3c713d22c1abfcb8cbc2f94ff2c54b275b235f9c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 14:12:17 -0500 Subject: [PATCH 130/694] remove old bad test code --- src/gravity/gravity_functions.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index f99c201d0..fa136c659 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -145,11 +145,7 @@ void Grid3D::set_dt_Gravity(){ #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell //min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; - #ifndef DUST // Not using dust min_dt_slow = 3*H.dx; - #else - min_dt_slow = 1e10; - #endif // DUST H.min_dt_slow = min_dt_slow; #endif @@ -162,11 +158,7 @@ void Grid3D::set_dt_Gravity(){ #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) //min_dt_slow = H.dt / C_cfl * 100 ; - #ifndef DUST // Not using dust min_dt_slow = 3*H.dx; - #else - min_dt_slow = 1e10; - #endif // DUST H.min_dt_slow = min_dt_slow; #endif From 4ea7fa81be3da34c51c01486ef9e6270124a09b9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 11 Nov 2022 14:14:59 -0500 Subject: [PATCH 131/694] remove old debugging code --- src/hydro/hydro_cuda.cu | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index ceedc05f5..c07162d7c 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -566,11 +566,6 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); #endif //MHD - Real P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - Real cs = sqrt(d_inv * gamma * P); - Real n = d*DENSITY_UNIT/(0.6*MP); - Real T = hydro_utilities::Calc_Temp(P, n); - } } From 2bdd4d0ebd0cf58b4fa88df68dd78e8dbd0e175d Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 12 Nov 2022 03:31:59 -0500 Subject: [PATCH 132/694] refactor scalars to use enum --- src/chemistry_gpu/chemistry_functions.cpp | 13 +- src/chemistry_gpu/chemistry_functions_gpu.cu | 359 ++++++++++--------- src/chemistry_gpu/chemistry_io.cpp | 3 +- src/cooling_grackle/cool_grackle.cpp | 19 +- src/cosmology/cosmology_functions.cpp | 27 +- src/grid/grid3D.cpp | 40 ++- src/grid/grid_enum.h | 54 +++ src/grid/initial_conditions.cpp | 14 +- src/io/io.cpp | 40 +-- src/main.cpp | 2 + 10 files changed, 318 insertions(+), 253 deletions(-) create mode 100644 src/grid/grid_enum.h diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 2564d2591..cbfa23d25 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -32,16 +32,18 @@ void Grid3D::Initialize_Chemistry( struct parameters *P ){ Chem.H.H_fraction = INITIAL_FRACTION_HI + INITIAL_FRACTION_HII; +#ifdef COSMOLOGY Chem.H.H0 = P->H0; Chem.H.Omega_M = P->Omega_M; Chem.H.Omega_L = P->Omega_L; - +#endif // Set up the units system. Real Msun, kpc_cgs, kpc_km, dens_to_CGS; Msun = MSUN_CGS; kpc_cgs = KPC_CGS; kpc_km = KPC; +#ifdef COSMOLOGY dens_to_CGS = Cosmo.rho_0_gas * Msun / kpc_cgs / kpc_cgs / kpc_cgs * Cosmo.cosmo_h * Cosmo.cosmo_h; // These are conversions from code units to cgs. Following Grackle @@ -51,7 +53,10 @@ void Grid3D::Initialize_Chemistry( struct parameters *P ){ Chem.H.time_units = kpc_km / Cosmo.cosmo_h ; Chem.H.velocity_units = Chem.H.length_units /Chem.H.time_units; Chem.H.dens_number_conv = Chem.H.density_units * pow(Chem.H.a_value, 3) / MH; - +#else + chprintf("EXITING: Chemistry is not supported without Cosmology, see %s::%d\n",__FILE__,__LINE__); + exit(-1); +#endif Real dens_base, length_base, time_base; dens_base = Chem.H.density_units * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; length_base = Chem.H.length_units / Chem.H.a_value; @@ -254,7 +259,7 @@ void Grid3D::Compute_Gas_Temperature( Real *temperature, bool convert_cosmo_uni #ifdef DE GE = C.GasEnergy[id]; #else - GE = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)); + GE = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)); // TODO: this probably needs to be fixed for MHD #endif dens_HI = C.HI_density[id]; @@ -321,4 +326,4 @@ void Chem_GPU::Reset(){ -#endif \ No newline at end of file +#endif diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 3d6e0052f..defb89c35 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -6,6 +6,7 @@ #include "../io/io.h" #include "rates.cuh" #include "rates_Katz95.cuh" +#include "../grid/grid_enum.h" #define eV_to_K 1.160451812e4 #define K_to_eV 8.617333263e-5 @@ -45,7 +46,7 @@ CudaCheckError(); } class Thermal_State{ -public: +public: Real U; Real d; @@ -61,7 +62,7 @@ __host__ __device__ Thermal_State( Real U_0=1, Real d_0=1, Real d_HI_0=1, Real d __host__ __device__ Real get_MMW( ){ // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII; - Real n_tot = d_HI + d_HII + 0.25 * ( d_HeI + d_HeII + d_HeIII ) + d_e; + Real n_tot = d_HI + d_HII + 0.25 * ( d_HeI + d_HeII + d_HeIII ) + d_e; return d / n_tot; // return m_tot / n_tot; } @@ -83,12 +84,12 @@ __host__ __device__ Real compute_U( Real temp, Real gamma ){ }; __device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, bool print ){ - + Real logT, logT_start, d_logT, logT_l, logT_r; logT = log( 0.5 * ( T + temp_old ) ); logT_start = log( Chem_H.Temp_start ); logT = fmax( logT_start, logT ); - logT = fmin( log( Chem_H.Temp_end ), logT ); + logT = fmin( log( Chem_H.Temp_end ), logT ); d_logT = ( log( Chem_H.Temp_end ) - logT_start ) / ( Chem_H.N_Temp_bins - 1 ); temp_indx = (int) floor( (logT - logT_start) / d_logT ); temp_indx = max( 0, temp_indx ); @@ -98,11 +99,11 @@ __device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &tem delta_T = ( logT - logT_l ) / ( logT_r - logT_l ); // if (print) printf(" logT_start: %f logT_end: %f d_logT: %f \n", logT_start, log( Chem_H.Temp_end ), d_logT ); // if (print) printf(" logT: %f logT_l: %f logT_r: %f \n", logT, logT_l, logT_r ); - + } __device__ Real interpolate_rate( Real *rate_table, int indx, Real delta ){ - + Real rate_val; rate_val = rate_table[indx]; rate_val = rate_val + delta * ( rate_table[indx+1] - rate_val ); @@ -111,63 +112,63 @@ __device__ Real interpolate_rate( Real *rate_table, int indx, Real delta ){ __device__ Real Get_Cooling_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, Real temp_prev, float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print ){ - + int temp_indx; Real temp, delta_T, U_dot; - temp = TS.get_temperature( Chem_H.gamma ); + temp = TS.get_temperature( Chem_H.gamma ); get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp_prev, print ); - if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T ); + if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T ); U_dot = 0.0; - + // Collisional excitation cooling - Real cool_ceHI, cool_ceHeI, cool_ceHeII; + Real cool_ceHI, cool_ceHeI, cool_ceHeII; cool_ceHI = interpolate_rate( Chem_H.cool_ceHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; cool_ceHeI = interpolate_rate( Chem_H.cool_ceHeI_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0 ; cool_ceHeII = interpolate_rate( Chem_H.cool_ceHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; U_dot -= cool_ceHI + cool_ceHeI + cool_ceHeII; - + // Collisional excitation cooling Real cool_ciHI, cool_ciHeI, cool_ciHeII, cool_ciHeIS; - cool_ciHI = interpolate_rate( Chem_H.cool_ciHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; + cool_ciHI = interpolate_rate( Chem_H.cool_ciHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; cool_ciHeI = interpolate_rate( Chem_H.cool_ciHeI_d, temp_indx, delta_T ) * TS.d_HeI * TS.d_e / 4.0; cool_ciHeII = interpolate_rate( Chem_H.cool_ciHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; cool_ciHeIS = interpolate_rate( Chem_H.cool_ciHeIS_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; U_dot -= cool_ciHI + cool_ciHeI + cool_ciHeII + cool_ciHeIS; - + // Recombination cooling Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII; - cool_reHII = interpolate_rate( Chem_H.cool_reHII_d, temp_indx, delta_T ) * TS.d_HII * TS.d_e; + cool_reHII = interpolate_rate( Chem_H.cool_reHII_d, temp_indx, delta_T ) * TS.d_HII * TS.d_e; cool_reHeII1 = interpolate_rate( Chem_H.cool_reHeII1_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; cool_reHeII2 = interpolate_rate( Chem_H.cool_reHeII2_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; cool_reHeIII = interpolate_rate( Chem_H.cool_reHeIII_d, temp_indx, delta_T ) * TS.d_HeIII * TS.d_e / 4.0; U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII; - + // Bremsstrahlung cooling Real cool_brem; cool_brem = interpolate_rate( Chem_H.cool_brem_d, temp_indx, delta_T ) * ( TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII ) * TS.d_e; U_dot -= cool_brem; - + // Compton cooling or heating Real cool_compton, temp_cmb; temp_cmb = 2.73 * ( 1.0 + current_z ); - cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * ( temp - temp_cmb ) * TS.d_e / dens_number_conv; + cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * ( temp - temp_cmb ) * TS.d_e / dens_number_conv; U_dot -= cool_compton; - + // Phothoheating Real photo_heat; - photo_heat = ( photo_h_HI * TS.d_HI + 0.25 * ( photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII ) ) / dens_number_conv; + photo_heat = ( photo_h_HI * TS.d_HI + 0.25 * ( photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII ) ) / dens_number_conv; U_dot += photo_heat; - - if ( temp <= 1.01* Chem_H.Temp_start && fabs( U_dot ) < 0 ) U_dot = tiny; + + if ( temp <= 1.01* Chem_H.Temp_start && fabs( U_dot ) < 0 ) U_dot = tiny; if ( fabs(U_dot) < tiny ) U_dot = tiny; - - + + if (print) printf("HI: %e \n", TS.d_HI ); if (print) printf("HII: %e \n", TS.d_HII ); if (print) printf("HeI: %e \n", TS.d_HeI ); if (print) printf("HeII: %e \n", TS.d_HeII ); if (print) printf("HeIII: %e \n", TS.d_HeIII ); - if (print) printf("de: %e \n", TS.d_e ); + if (print) printf("de: %e \n", TS.d_e ); if (print) printf("Cooling ceHI: %e \n", cool_ceHI ); if (print) printf("Cooling ceHeI: %e \n", cool_ceHeI ); if (print) printf("Cooling ceHeII: %e \n", cool_ceHeII ); @@ -186,41 +187,41 @@ __device__ Real Get_Cooling_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, if (print) printf("Cooling DOM: %e \n", dens_number_conv ); if (print) printf("Cooling compton: %e \n", cool_compton ); if (print) printf("Cooling U_dot: %e \n", U_dot ); - + return U_dot; - + } __device__ void Get_Reaction_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print ){ - + int temp_indx; Real temp, delta_T; - temp = TS.get_temperature( Chem_H.gamma ); + temp = TS.get_temperature( Chem_H.gamma ); get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp, print ); - + k_coll_i_HI = interpolate_rate( Chem_H.k_coll_i_HI_d, temp_indx, delta_T ); k_coll_i_HeI = interpolate_rate( Chem_H.k_coll_i_HeI_d, temp_indx, delta_T ); k_coll_i_HeII = interpolate_rate( Chem_H.k_coll_i_HeII_d, temp_indx, delta_T ); - + k_coll_i_HI_HI = interpolate_rate( Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T ); k_coll_i_HI_HeI = interpolate_rate( Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T ); - + k_recomb_HII = interpolate_rate( Chem_H.k_recomb_HII_d, temp_indx, delta_T ); k_recomb_HeII = interpolate_rate( Chem_H.k_recomb_HeII_d, temp_indx, delta_T ); k_recomb_HeIII = interpolate_rate( Chem_H.k_recomb_HeIII_d, temp_indx, delta_T ); - + if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx ); if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI ); if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI ); if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII ); if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI ); - if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI ); + if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI ); if (print) printf("k_recomb_HII: %e \n", k_recomb_HII ); if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII ); if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII ); - + } __device__ int Binary_Search( int N, Real val, float *data, int indx_l, int indx_r ){ @@ -236,28 +237,28 @@ __device__ int Binary_Search( int N, Real val, float *data, int indx_l, int indx } __device__ Real linear_interpolation( Real delta_x, int indx_l, int indx_r, float*array ){ - float v_l, v_r; + float v_l, v_r; Real v; v_l = array[indx_l]; v_r = array[indx_r]; v = delta_x * ( v_r - v_l ) + v_l; - return v; + return v; } __device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H, - float &photo_i_HI, float &photo_i_HeI, float &photo_i_HeII, + float &photo_i_HI, float &photo_i_HeI, float &photo_i_HeII, float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, bool print ){ - + if ( current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]){ - photo_h_HI = 0; - photo_h_HeI = 0; - photo_h_HeII = 0; - photo_i_HI = 0; - photo_i_HeI = 0; - photo_i_HeII = 0; + photo_h_HI = 0; + photo_h_HeI = 0; + photo_h_HeII = 0; + photo_i_HI = 0; + photo_i_HeI = 0; + photo_i_HeII = 0; return; - - } + + } // Find closest value of z in rates_z such that z<=current_z int indx_l; Real z_l, z_r, delta_x; @@ -265,184 +266,184 @@ __device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H, z_l = Chem_H.uvb_rates_redshift_d[indx_l]; z_r = Chem_H.uvb_rates_redshift_d[indx_l+1]; delta_x = (current_z - z_l) / ( z_r - z_l ); - + photo_i_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HI_rate_d ); photo_i_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeI_rate_d ); photo_i_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeII_rate_d ); photo_h_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HI_rate_d ); photo_h_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeI_rate_d ); photo_h_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeII_rate_d ); - + } -__device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, +__device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, - int n_iter, Real HI_dot_prev, Real e_dot_prev, + float photo_i_HI, float photo_i_HeI, float photo_i_HeII, + int n_iter, Real HI_dot_prev, Real e_dot_prev, Real t_chem, Real dt_hydro, bool print ){ - + Real dt, energy; - // Rate of change of HI + // Rate of change of HI HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - photo_i_HI * TS.d_HI; - - // Rate of change of electron + + // Rate of change of electron e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI/4.0 * TS.d_e + k_coll_i_HeII * TS.d_HeII/4.0 * TS.d_e - + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - - k_recomb_HII * TS.d_HII * TS.d_e - k_recomb_HeII * TS.d_HeII/4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII/4.0 * TS.d_e - + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0; - + + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 + - k_recomb_HII * TS.d_HII * TS.d_e - k_recomb_HeII * TS.d_HeII/4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII/4.0 * TS.d_e + + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0; + // Bound from below to prevent numerical errors if ( fabs(HI_dot) < tiny ) HI_dot = fmin( tiny, TS.d_HI ); if ( fabs(e_dot) < tiny ) e_dot = fmin( tiny, TS.d_e ); - + // If the net rate is almost perfectly balanced then set // it to zero (since it is zero to available precision) if ( fmin( fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e) ) / fmax( fabs(HI_dot), fabs(e_dot) ) > 1e6 ){ HI_dot = tiny; e_dot = tiny; } - + if ( n_iter > 50 ){ HI_dot = fmin( fabs(HI_dot), fabs( HI_dot_prev) ); e_dot = fmin( fabs(e_dot), fabs( e_dot_prev) ); } - + if ( TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0 ){ printf( "#### Equlibrium \n" ); } - + #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); #endif - + energy = fmax( TS.U * TS.d, tiny ); dt = fmin( fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ) ); dt = fmin( fabs( 0.1 * energy / U_dot ), dt ); dt = fmin( 0.5 * dt_hydro, dt ); dt = fmin( dt_hydro - t_chem, dt ); - + if ( n_iter == Chem_H.max_iter-1 ){ printf("##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: %e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ), fabs( 0.1 * TS.U * TS.d / U_dot ) ) ; } - - - if (print) printf("HIdot: %e\n", HI_dot ); - if (print) printf("edot: %e\n", e_dot ); - if (print) printf("energy: %e\n", TS.U * TS.d ); - if (print) printf("Udot: %e\n", U_dot ); + + + if (print) printf("HIdot: %e\n", HI_dot ); + if (print) printf("edot: %e\n", e_dot ); + if (print) printf("energy: %e\n", TS.U * TS.d ); + if (print) printf("Udot: %e\n", U_dot ); if (print) printf("dt_hydro: %e\n", dt_hydro ); - if (print) printf("dt: %e\n", dt ); - - return dt; - + if (print) printf("dt: %e\n", dt ); + + return dt; + } -__device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, +__device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, + Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, + float photo_i_HI, float photo_i_HeI, float photo_i_HeII, Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, bool print ){ - + Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; Real s_coef, a_coef; - + // Update HI s_coef = k_recomb_HII * TS.d_HII * TS.d_e; a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI/4.0 + photo_i_HI; - d_HI_p = ( dt * s_coef + TS.d_HI ) / ( 1.0 + dt*a_coef ); + d_HI_p = ( dt * s_coef + TS.d_HI ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, d_HI_p ); - + // Update HII s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + photo_i_HI * d_HI_p; a_coef = k_recomb_HII * TS.d_e; - d_HII_p = ( dt * s_coef + TS.d_HII ) / ( 1.0 + dt*a_coef ); + d_HII_p = ( dt * s_coef + TS.d_HII ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, d_HII_p ); - + // Update electron - s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0 ; a_coef = - k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI/4.0 + k_recomb_HeII * TS.d_HeII/4.0 - k_coll_i_HeII * TS.d_HeII/4.0 + k_recomb_HeIII * TS.d_HeIII/4.0; - d_e_p = ( dt * s_coef + TS.d_e ) / ( 1.0 + dt*a_coef ); + d_e_p = ( dt * s_coef + TS.d_e ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, d_e_p ); - + // Update HeI s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; - d_HeI_p = ( dt * s_coef + TS.d_HeI ) / ( 1.0 + dt*a_coef ); + d_HeI_p = ( dt * s_coef + TS.d_HeI ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, d_HeI_p ); - + // Update HeII s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; - d_HeII_p = ( dt * s_coef + TS.d_HeII ) / ( 1.0 + dt*a_coef ); + d_HeII_p = ( dt * s_coef + TS.d_HeII ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, a_coef, d_HeII_p ); - + // Update HeIII s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; a_coef = k_recomb_HeIII * TS.d_e; - d_HeIII_p = ( dt * s_coef + TS.d_HeIII ) / ( 1.0 + dt*a_coef ); + d_HeIII_p = ( dt * s_coef + TS.d_HeIII ) / ( 1.0 + dt*a_coef ); if ( print ) printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p ); - + // Record the temperature for the next step - temp_prev = TS.get_temperature( Chem_H.gamma ); - + temp_prev = TS.get_temperature( Chem_H.gamma ); + HI_dot_prev = fabs( TS.d_HI - d_HI_p ) / fmax( dt, tiny ); TS.d_HI = fmax( d_HI_p, tiny ); TS.d_HII = fmax( d_HII_p, tiny ); TS.d_HeI = fmax( d_HeI_p, tiny ); TS.d_HeII = fmax( d_HeII_p, tiny ); TS.d_HeIII = fmax( d_HeIII_p, 1e-5*tiny ); - + // Use charge conservation to determine electron fraction e_dot_prev = TS.d_e; TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; e_dot_prev = fabs( TS.d_e - e_dot_prev ) / fmax( dt, tiny ); - + // Update internal energy TS.U += U_dot / TS.d * dt; #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); #endif - if ( print ) printf("Updated U: %e \n", TS.U); - + if ( print ) printf("Updated U: %e \n", TS.U); + } __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt_hydro, Chemistry_Header Chem_H ){ - - + + int id, xid, yid, zid, n_cells, n_iter; Real d, d_inv, vx, vy, vz; Real GE, E_kin, dt_chem, t_chem; Real current_a, a3, a2; - + Real current_z, density_conv, energy_conv; current_z = Chem_H.current_z; density_conv = Chem_H.density_conversion; energy_conv = Chem_H.energy_conversion; - + Real U_dot, HI_dot, e_dot, HI_dot_prev, e_dot_prev, temp_prev; Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI; Real k_recomb_HII, k_recomb_HeII, k_recomb_HeIII; float photo_i_HI, photo_i_HeI, photo_i_HeII; float photo_h_HI, photo_h_HeI, photo_h_HeII; Real correct_H, correct_He; - - + + n_cells = nx*ny*nz; - + // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; zid = id / (nx*ny); yid = (id - zid*nx*ny) / nx; xid = id - zid*nx*ny - yid*nx; bool print; - + // threads corresponding to real cells do the calculation if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) { @@ -454,33 +455,33 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in E_kin = 0.5*d*(vx*vx + vy*vy + vz*vz); #ifdef DE GE = dev_conserved[(n_fields-1)*n_cells + id]; - #else + #else GE = dev_conserved[4*n_cells + id] - E_kin; #endif - + print = false; // if ( xid == n_ghost && yid == n_ghost && zid == n_ghost ) print = true; - + // Convert to cgs units current_a = 1 / ( current_z + 1); a2 = current_a * current_a; - a3 = a2 * current_a; + a3 = a2 * current_a; d *= density_conv / a3; - GE *= energy_conv / a2; + GE *= energy_conv / a2; dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units; // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * dt_hydro * Chem_H.time_units; - + // Initialize the thermal state - Thermal_State TS; - TS.d = dev_conserved[ id] / a3; - TS.d_HI = dev_conserved[ 5*n_cells + id] / a3; - TS.d_HII = dev_conserved[ 6*n_cells + id] / a3; - TS.d_HeI = dev_conserved[ 7*n_cells + id] / a3; - TS.d_HeII = dev_conserved[ 8*n_cells + id] / a3; - TS.d_HeIII = dev_conserved[ 9*n_cells + id] / a3; - TS.d_e = dev_conserved[10*n_cells + id] / a3; - TS.U = GE * d_inv * 1e-10; - + Thermal_State TS; + TS.d = dev_conserved[ id ] / a3; + TS.d_HI = dev_conserved[ id + n_cells*grid_enum::HI_density ] / a3; + TS.d_HII = dev_conserved[ id + n_cells*grid_enum::HII_density ] / a3; + TS.d_HeI = dev_conserved[ id + n_cells*grid_enum::HeI_density ] / a3; + TS.d_HeII = dev_conserved[ id + n_cells*grid_enum::HeII_density ] / a3; + TS.d_HeIII = dev_conserved[ id + n_cells*grid_enum::HeIII_density ] / a3; + TS.d_e = dev_conserved[ id + n_cells*grid_enum::e_density ] / a3; + TS.U = GE * d_inv * 1e-10; + // Ceiling species TS.d_HI = fmax( TS.d_HI, tiny ); TS.d_HII = fmax( TS.d_HII, tiny ); @@ -488,10 +489,10 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in TS.d_HeII = fmax( TS.d_HeII, tiny ); TS.d_HeIII = fmax( TS.d_HeIII, 1e-5*tiny ); TS.d_e = fmax( TS.d_e, tiny ); - + // Compute temperature at first iteration temp_prev = TS.get_temperature( Chem_H.gamma ); - + // if (print){ // printf("current_z: %f\n", current_z ); // printf("density_units: %e\n", Chem_H.density_units ); @@ -510,42 +511,42 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in // printf("energy: %e \n", TS.U*TS.d ); // printf("dt_hydro: %e \n", dt_hydro / Chem_H.time_units ); // } - + // Get the photoheating and photoionization rates at z=current_z - Get_Current_UVB_Rates( current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, + Get_Current_UVB_Rates( current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, photo_h_HI, photo_h_HeI, photo_h_HeII, print ); - + HI_dot_prev = 0; - e_dot_prev = 0; + e_dot_prev = 0; n_iter = 0; t_chem = 0; while ( t_chem < dt_hydro ){ - + if (print) printf("########################################## Iter %d \n", n_iter ); - - U_dot = Get_Cooling_Rates( TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, + + U_dot = Get_Cooling_Rates( TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, photo_h_HI, photo_h_HeI, photo_h_HeII, print ); - + Get_Reaction_Rates( TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print ); - - dt_chem = Get_Chemistry_dt( TS, Chem_H, HI_dot, e_dot, U_dot, + + dt_chem = Get_Chemistry_dt( TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, - photo_i_HI, photo_i_HeI, photo_i_HeII, + photo_i_HI, photo_i_HeI, photo_i_HeII, n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print ); - + Update_Step( TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, - k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, + k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, e_dot_prev, temp_prev, print ); - + t_chem += dt_chem; n_iter += 1; if ( n_iter == Chem_H.max_iter ) break; - + } - if ( print ) printf("Chem_GPU: N Iter: %d\n", n_iter ); - + if ( print ) printf("Chem_GPU: N Iter: %d\n", n_iter ); + // Make consistent abundances with the H and He density correct_H = Chem_H.H_fraction * TS.d / ( TS.d_HI + TS.d_HII ); correct_He = ( 1.0 - Chem_H.H_fraction ) * TS.d / ( TS.d_HeI + TS.d_HeII + TS.d_HeIII ); @@ -554,50 +555,50 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in TS.d_HeI *= correct_He; TS.d_HeII *= correct_He; TS.d_HeIII *= correct_He; - + // Use charge conservation to determine electron fractioan TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; - + // Write the Updated Thermal State - dev_conserved[ 5*n_cells + id] = TS.d_HI * a3; - dev_conserved[ 6*n_cells + id] = TS.d_HII * a3; - dev_conserved[ 7*n_cells + id] = TS.d_HeI * a3; - dev_conserved[ 8*n_cells + id] = TS.d_HeII * a3; - dev_conserved[ 9*n_cells + id] = TS.d_HeIII * a3; - dev_conserved[10*n_cells + id] = TS.d_e * a3; + dev_conserved[id + n_cells*grid_enum::HI_density ] = TS.d_HI * a3; + dev_conserved[id + n_cells*grid_enum::HII_density ] = TS.d_HII * a3; + dev_conserved[id + n_cells*grid_enum::HeI_density ] = TS.d_HeI * a3; + dev_conserved[id + n_cells*grid_enum::HeII_density ] = TS.d_HeII * a3; + dev_conserved[id + n_cells*grid_enum::HeIII_density ] = TS.d_HeIII * a3; + dev_conserved[id + n_cells*grid_enum::e_density ] = TS.d_e * a3; d = d / density_conv * a3; GE = TS.U / d_inv / energy_conv * a2 / 1e-10; - dev_conserved[4*n_cells + id] = GE + E_kin; + dev_conserved[4*n_cells + id] = GE + E_kin; #ifdef DE dev_conserved[(n_fields-1)*n_cells + id] = GE; #endif - + if ( print ) printf("###########################################\n" ); if ( print ) printf("Updated HI: %e\n", TS.d_HI * a3 ); if ( print ) printf("Updated HII: %e\n", TS.d_HII * a3 ); if ( print ) printf("Updated HeI: %e\n", TS.d_HeI * a3 ); if ( print ) printf("Updated HeII: %e\n", TS.d_HeII * a3 ); - if ( print ) printf("Updated HeIII: %e\n", TS.d_HeIII * a3 ); + if ( print ) printf("Updated HeIII: %e\n", TS.d_HeIII * a3 ); if ( print ) printf("Updated e: %e\n", TS.d_e * a3 ); if ( print ) printf("Updated GE: %e\n", dev_conserved[(n_fields-1)*n_cells + id] ); if ( print ) printf("Updated E: %e\n", dev_conserved[4*n_cells + id] ); - + } } void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H){ - + float time; cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - + int ngrid = (nx*ny*nz - 1) / TPB_CHEM + 1; dim3 dim1dGrid(ngrid, 1, 1); - dim3 dim1dBlock(TPB_CHEM, 1, 1); + dim3 dim1dBlock(TPB_CHEM, 1, 1); hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H ); - + CudaCheckError(); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); @@ -625,7 +626,7 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghos #endif // Boltzmann's constant #ifndef kboltz -#define kboltz 1.3806504e-16 //Boltzmann's constant [cm2gs-2K-1] or [ergK-1] +#define kboltz 1.3806504e-16 //Boltzmann's constant [cm2gs-2K-1] or [ergK-1] #endif @@ -646,7 +647,7 @@ __device__ Real coll_i_HI_rate( Real T, Real units ) + 0.0001119543953861*pow(logT_ev, 7) - 2.039149852002e-6*pow(logT_ev, 8)) / units; if (T_ev <= 0.8){ - k1 = fmax(tiny, k1); + k1 = fmax(tiny, k1); } return k1; } @@ -671,7 +672,7 @@ __device__ Real coll_i_HeI_rate( Real T, Real units ) } else { return tiny; } -} +} //Calculation of k4 (HeII + e --> HeI + photon) // k4_rate @@ -710,7 +711,7 @@ __device__ Real recomb_HeII_rate_case_A( Real T, Real units ) __device__ Real recomb_HeII_rate_case_B( Real T, Real units ) { //If case B recombination on. - return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; + return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; } @@ -724,7 +725,7 @@ __device__ Real recomb_HII_rate( Real T, Real units, bool use_case_B ) * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; } else { return tiny; - } + } } else { if (T > 5500) { //Convert temperature to appropriate form. @@ -777,7 +778,7 @@ __device__ Real recomb_HII_rate_case_B( Real T, Real units ) * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; } else { return tiny; - } + } } @@ -878,7 +879,7 @@ __device__ Real coll_i_HI_HeI_rate( Real T, Real units ) __host__ __device__ Real cool_ceHI_rate( Real T, Real units ) { return 7.5e-19*exp( -fmin(log(dhuge), 118348.0 / T) ) - / ( 1.0 + sqrt(T / 1.0e5) ) / units; + / ( 1.0 + sqrt(T / 1.0e5) ) / units; } //Calculation of ceHeI. @@ -910,7 +911,7 @@ __host__ __device__ Real cool_ciHeIS_rate( Real T, Real units ) __host__ __device__ Real cool_ciHI_rate( Real T, Real units ) { //Collisional ionization. Polynomial fit from Tom Abel. - return 2.18e-11 * coll_i_HI_rate(T, 1) / units; + return 2.18e-11 * coll_i_HI_rate(T, 1) / units; } @@ -927,7 +928,7 @@ __host__ __device__ Real cool_ciHeI_rate( Real T, Real units ) __host__ __device__ Real cool_ciHeII_rate( Real T, Real units ) { //Collisional ionization. Polynomial fit from Tom Abel. - return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; + return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; } //Calculation of reHII. @@ -942,7 +943,7 @@ __host__ __device__ Real cool_reHII_rate( Real T, Real units, bool use_case_B ) } else { return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; + / units; } } @@ -953,7 +954,7 @@ __host__ __device__ Real cool_reHII_rate_case_A( Real T, Real units ) Real lambdaHI = 2.0 * 157807.0 / T; return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; + / units; } //Calculation of reHII. @@ -977,7 +978,7 @@ __host__ __device__ Real cool_reHeII1_rate( Real T, Real units, bool use_case_B } else { return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units; - } + } } //Calculation of reHII. @@ -995,7 +996,7 @@ __host__ __device__ Real cool_reHeII1_rate_case_B( Real T, Real units ) { Real lambdaHeII = 2.0 * 285335.0 / T; return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) - / units; + / units; } //Calculation of reHII2. @@ -1005,7 +1006,7 @@ __host__ __device__ Real cool_reHeII2_rate( Real T, Real units ) //Dielectronic recombination (Cen, 1992). return 1.24e-13 * pow(T, -1.5) * exp( -fmin(log(dhuge), 470000.0 / T) ) - * ( 1.0 + 0.3 * exp( -fmin(log(dhuge), 94000.0 / T) ) ) + * ( 1.0 + 0.3 * exp( -fmin(log(dhuge), 94000.0 / T) ) ) / units; } @@ -1016,7 +1017,7 @@ __host__ __device__ Real cool_reHeIII_rate( Real T, Real units, bool use_case_B Real lambdaHeIII = 2.0 * 631515.0 / T; if ( use_case_B ) { return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) + / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } else { return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) @@ -1041,7 +1042,7 @@ __host__ __device__ Real cool_reHeIII_rate_case_B( Real T, Real units ) { Real lambdaHeIII = 2.0 * 631515.0 / T; return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) + / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } //Calculation of brem. @@ -1050,11 +1051,11 @@ __host__ __device__ Real cool_brem_rate( Real T, Real units ) { return 1.43e-27 * sqrt(T) * ( 1.1 + 0.34 * exp( -pow(5.5 - log10(T), 2) / 3.0) ) - / units; + / units; } -#endif \ No newline at end of file +#endif diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index 20cb53d7b..6f54c3f28 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -5,6 +5,7 @@ #include #include #include +#include // provides std::strcpy (strcpy in this file) #include "chemistry_gpu.h" #include "../io/io.h" @@ -107,4 +108,4 @@ void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates( struct parameters *P ){ -#endif \ No newline at end of file +#endif diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index 4392feefd..929a355eb 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -6,7 +6,7 @@ #include #include "../io/io.h" #include "../cooling_grackle/cool_grackle.h" - +#include "../grid/grid_enum.h" Cool_GK::Cool_GK( void ){} @@ -146,20 +146,19 @@ Cool.fields.x_velocity = NULL; Cool.fields.y_velocity = NULL; Cool.fields.z_velocity = NULL; - chprintf( " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); -Cool.fields.HI_density = &C.scalar[ 0*n_cells ]; -Cool.fields.HII_density = &C.scalar[ 1*n_cells ]; -Cool.fields.HeI_density = &C.scalar[ 2*n_cells ]; -Cool.fields.HeII_density = &C.scalar[ 3*n_cells ]; -Cool.fields.HeIII_density = &C.scalar[ 4*n_cells ]; -Cool.fields.e_density = &C.scalar[ 5*n_cells ]; +Cool.fields.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; +Cool.fields.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; +Cool.fields.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; +Cool.fields.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; +Cool.fields.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; +Cool.fields.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; #ifdef GRACKLE_METALS chprintf( " Allocating memory for: metal density\n"); -Cool.fields.metal_density = &C.scalar[ 6*n_cells ]; +Cool.fields.metal_density = &C.density[ H.n_cells*grid_enum::metal_density ]; #else -Cool.fields.metal_density = NULL; +Cool.fields.metal_density = NULL; #endif #ifdef OUTPUT_TEMPERATURE diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index c1ceb8299..6dfcfa7d7 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -4,6 +4,7 @@ #include "../grid/grid3D.h" #include "../global/global.h" #include "../io/io.h" +#include "../grid/grid_enum.h" @@ -102,24 +103,24 @@ void Grid3D::Change_GAS_Frame_System( bool forward ){ #endif #ifdef COOLING_GRACKLE - C.scalar[0*H.n_cells + id] *= dens_factor; - C.scalar[1*H.n_cells + id] *= dens_factor; - C.scalar[2*H.n_cells + id] *= dens_factor; - C.scalar[3*H.n_cells + id] *= dens_factor; - C.scalar[4*H.n_cells + id] *= dens_factor; - C.scalar[5*H.n_cells + id] *= dens_factor; + C.HI_density[id] *= dens_factor; + C.HII_density[id] *= dens_factor; + C.HeI_density[id] *= dens_factor; + C.HeII_density[id] *= dens_factor; + C.HeIII_density[id] *= dens_factor; + C.e_density[id] *= dens_factor; #ifdef GRACKLE_METALS - C.scalar[6*H.n_cells + id] *= dens_factor; + C.metal_density[id] *= dens_factor; #endif #endif//COOLING_GRACKLE #ifdef CHEMISTRY_GPU - C.scalar[0*H.n_cells + id] *= dens_factor; - C.scalar[1*H.n_cells + id] *= dens_factor; - C.scalar[2*H.n_cells + id] *= dens_factor; - C.scalar[3*H.n_cells + id] *= dens_factor; - C.scalar[4*H.n_cells + id] *= dens_factor; - C.scalar[5*H.n_cells + id] *= dens_factor; + C.HI_density[id] *= dens_factor; + C.HII_density[id] *= dens_factor; + C.HeI_density[id] *= dens_factor; + C.HeII_density[id] *= dens_factor; + C.HeIII_density[id] *= dens_factor; + C.e_density[id] *= dens_factor; #endif } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 03c1dc7c1..412570e65 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -8,6 +8,7 @@ #endif #include "../global/global.h" #include "../grid/grid3D.h" +#include "../grid/grid_enum.h" // provides grid_enum #include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU #include "../integrators/VL_1D_cuda.h" #include "../integrators/VL_2D_cuda.h" @@ -323,12 +324,12 @@ void Grid3D::AllocateMemory(void) #ifdef CHEMISTRY_GPU - C.HI_density = &C.scalar[ 0*H.n_cells ]; - C.HII_density = &C.scalar[ 1*H.n_cells ]; - C.HeI_density = &C.scalar[ 2*H.n_cells ]; - C.HeII_density = &C.scalar[ 3*H.n_cells ]; - C.HeIII_density = &C.scalar[ 4*H.n_cells ]; - C.e_density = &C.scalar[ 5*H.n_cells ]; + C.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; + C.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; + C.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; + C.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; + C.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; + C.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; #endif // initialize host array @@ -491,24 +492,25 @@ Real Grid3D::Update_Grid(void) #ifdef COOLING_GRACKLE Cool.fields.density = C.density; - Cool.fields.HI_density = &C.scalar[ 0*H.n_cells ]; - Cool.fields.HII_density = &C.scalar[ 1*H.n_cells ]; - Cool.fields.HeI_density = &C.scalar[ 2*H.n_cells ]; - Cool.fields.HeII_density = &C.scalar[ 3*H.n_cells ]; - Cool.fields.HeIII_density = &C.scalar[ 4*H.n_cells ]; - Cool.fields.e_density = &C.scalar[ 5*H.n_cells ]; + Cool.fields.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; + Cool.fields.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; + Cool.fields.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; + Cool.fields.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; + Cool.fields.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; + Cool.fields.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; + #ifdef GRACKLE_METALS - Cool.fields.metal_density = &C.scalar[ 6*H.n_cells ]; + Cool.fields.metal_density = &C.density[ H.n_cells*grid_enum::metal_density ]; #endif #endif #ifdef CHEMISTRY_GPU - C.HI_density = &C.scalar[ 0*H.n_cells ]; - C.HII_density = &C.scalar[ 1*H.n_cells ]; - C.HeI_density = &C.scalar[ 2*H.n_cells ]; - C.HeII_density = &C.scalar[ 3*H.n_cells ]; - C.HeIII_density = &C.scalar[ 4*H.n_cells ]; - C.e_density = &C.scalar[ 5*H.n_cells ]; + C.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; + C.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; + C.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; + C.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; + C.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; + C.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; #endif diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h new file mode 100644 index 000000000..ece0bfa83 --- /dev/null +++ b/src/grid/grid_enum.h @@ -0,0 +1,54 @@ +#pragma once + +// An experimental enum which holds offsets for grid quantities +// In the final form of this approach, this file will also set nfields and NSCALARS, +// so that adding a field only requires registering it here. + + +// Must be unscoped to be treated as int +// ": int" forces underlying type to be int +enum grid_enum : int { + + // Don't touch hydro quantities until all of hydro is refactored (if ever) + density, + momentum_x, + momentum_y, + momentum_z, + Energy, + + // Code assumes scalars are a contiguous block + #ifdef SCALAR + scalar, + scalar_minus_1 = scalar - 1,// so that next enum item starts at same index as scalar + + // TODO: Add scalars here: + + + finalscalar_plus_1, + // TODO: set finalscalar = finalscalar_plus_1 - 1, and then define NSCALARS equivalent from here. + finalscalar = scalar + NSCALARS - 1, + // so that anything after starts with scalar + NSCALARS + #endif // SCALAR + #ifdef MHD + magnetic_x, + magnetic_y, + magnetic_z, + #endif + #ifdef DE + GasEnergy, + #endif + num_fields, + +//Aliases + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) + HI_density = scalar, + HII_density, + HeI_density, + HeII_density, + HeIII_density, + e_density, + #ifdef GRACKLE_METALS + metal_density, + #endif + #endif +}; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index bbfddf65f..55b9aae41 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1591,14 +1591,14 @@ void Grid3D::Chemistry_Test( struct parameters P ) #ifdef COOLING_GRACKLE - C.scalar[0*H.n_cells + id] = rho_gas_mean * HI_frac; - C.scalar[1*H.n_cells + id] = rho_gas_mean * HII_frac; - C.scalar[2*H.n_cells + id] = rho_gas_mean * HeI_frac; - C.scalar[3*H.n_cells + id] = rho_gas_mean * HeII_frac; - C.scalar[4*H.n_cells + id] = rho_gas_mean * HeIII_frac; - C.scalar[5*H.n_cells + id] = rho_gas_mean * e_frac; + C.HI_density[id] = rho_gas_mean * HI_frac; + C.HII_density[id] = rho_gas_mean * HII_frac; + C.HeI_density[id] = rho_gas_mean * HeI_frac; + C.HeII_density[id] = rho_gas_mean * HeII_frac; + C.HeIII_density[id] = rho_gas_mean * HeIII_frac; + C.e_density[id] = rho_gas_mean * e_frac; #ifdef GRACKLE_METALS - C.scalar[6*H.n_cells + id] = rho_gas_mean * metal_frac; + C.metal_density[id] = rho_gas_mean * metal_frac; #endif #endif diff --git a/src/io/io.cpp b/src/io/io.cpp index 051e9130c..e0c6fc089 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -80,7 +80,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); chprintf( "\nSaving Snapshot: %d \n", nfile ); - + #ifdef HDF5 // Initialize HDF5 interface H5open(); @@ -153,7 +153,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) chprintf( "\n" ); G.H.Output_Now = false; #endif - + #ifdef HDF5 // Cleanup HDF5 H5close(); @@ -211,7 +211,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #elif defined HDF5 hid_t file_id; /* file identifier */ herr_t status; - + // Create a new file using default properties. file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -220,10 +220,10 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // write the conserved variables to the output file G.Write_Grid_HDF5(file_id); - + // close the file status = H5Fclose(file_id); - + if (status < 0) {printf("File write failed.\n"); exit(-1); } #else @@ -245,7 +245,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) void OutputFloat32(Grid3D &G, struct parameters P, int nfile) { - + Header H = G.H; // Do nothing in 1-D and 2-D case if (H.ny_real == 1) { @@ -3065,14 +3065,14 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (i=0; i Date: Wed, 16 Nov 2022 16:27:13 -0500 Subject: [PATCH 133/694] remove dust build-specific definitions of TEMP_FLOOR and min_dt_slow. --- src/global/global.h | 7 ------- src/grid/grid3D.cpp | 4 ---- 2 files changed, 11 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index 382627159..f33a05de3 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -55,14 +55,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" //Conserved Floor Values -#ifdef DUST -#define TEMP_FLOOR 10 // in Kelvin -#else // NOT DUST #define TEMP_FLOOR 1e-3 -#endif - - - #define DENS_FLOOR 1e-5 // in code units //Parameter for Enzo dual Energy Condition diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index c31065012..4018fd7cd 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -140,11 +140,7 @@ void Grid3D::Initialize(struct parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - #ifndef DUST H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number - #else - H.min_dt_slow = 1e-2; - #endif // DUST #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA From 11f247736a7d915595d61fe1e19696c7029d0d2e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 16 Nov 2022 16:38:23 -0500 Subject: [PATCH 134/694] add citations for sputtering equation and remove unnecessary modifications to the Clouds() initial conditions function. --- src/dust/dust_cuda.cu | 2 ++ src/grid/initial_conditions.cpp | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 480199874..0bcdf5d3f 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -110,6 +110,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g } } +// McKinnon et al. (2017) __device__ Real calc_tau_sp(Real n, Real T) { Real YR_IN_S = 3.154e7; Real a1 = 1; // dust grain size in units of 0.1 micrometers @@ -123,6 +124,7 @@ __device__ Real calc_tau_sp(Real n, Real T) { return tau_sp; } +// McKinnon et al. (2017) __device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index c75e9f3af..e60c6c8c5 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1247,7 +1247,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 0.1; // cloud radius in code units (kpc) + Real R_cl = 2.5; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1261,17 +1261,18 @@ void Grid3D::Clouds() // single centered cloud setup for (int nn=0; nn Date: Tue, 22 Nov 2022 10:12:46 -0500 Subject: [PATCH 135/694] restore deleted cholla-tests-data --- cholla-tests-data | 1 + 1 file changed, 1 insertion(+) create mode 160000 cholla-tests-data diff --git a/cholla-tests-data b/cholla-tests-data new file mode 160000 index 000000000..66d592821 --- /dev/null +++ b/cholla-tests-data @@ -0,0 +1 @@ +Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf From 0f8a2c255ab7dcb9fb94739146bb71bf917e7ba5 Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 2 Dec 2022 00:52:09 -0500 Subject: [PATCH 136/694] add basic_scalar and merge 62ed646105dcb8cce440de32b694f319ab82e37a --- builds/make.type.basic_scalar | 34 ++++++++++++++ src/chemistry_gpu/chemistry_functions.cpp | 38 +++++++++------ src/chemistry_gpu/chemistry_functions_gpu.cu | 9 +++- src/grid/grid3D.cpp | 10 +++- src/grid/grid3D.h | 10 +++- src/grid/grid_enum.h | 49 +++++++++++++------- src/grid/initial_conditions.cpp | 36 ++++++++++---- 7 files changed, 139 insertions(+), 47 deletions(-) create mode 100644 builds/make.type.basic_scalar diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar new file mode 100644 index 000000000..d2dd75892 --- /dev/null +++ b/builds/make.type.basic_scalar @@ -0,0 +1,34 @@ +#-- Default hydro build with BASIC_SCALAR + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Toggle scalar fields in general +DFLAGS += -DSCALAR +# Toggle Basic scalar field +DFLAGS += -DBASIC_SCALAR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index cbfa23d25..b7925df9d 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -31,35 +31,45 @@ void Grid3D::Initialize_Chemistry( struct parameters *P ){ Chem.H.Temp_end = 1000000000.0; Chem.H.H_fraction = INITIAL_FRACTION_HI + INITIAL_FRACTION_HII; - -#ifdef COSMOLOGY + +#ifdef COSMOLOGY Chem.H.H0 = P->H0; Chem.H.Omega_M = P->Omega_M; Chem.H.Omega_L = P->Omega_L; -#endif +#endif //COSMOLOGY // Set up the units system. Real Msun, kpc_cgs, kpc_km, dens_to_CGS; Msun = MSUN_CGS; kpc_cgs = KPC_CGS; kpc_km = KPC; + dens_to_CGS = Msun / kpc_cgs / kpc_cgs / kpc_cgs; #ifdef COSMOLOGY - dens_to_CGS = Cosmo.rho_0_gas * Msun / kpc_cgs / kpc_cgs / kpc_cgs * Cosmo.cosmo_h * Cosmo.cosmo_h; + dens_to_CGS = dens_to_CGS * Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h; +#endif //COSMOLOGY // These are conversions from code units to cgs. Following Grackle + Chem.H.density_units = dens_to_CGS; + Chem.H.length_units = kpc_cgs; + Chem.H.time_units = kpc_km; + Chem.H.dens_number_conv = Chem.H.density_units / MH; +#ifdef COSMOLOGY Chem.H.a_value = Cosmo.current_a; - Chem.H.density_units = dens_to_CGS / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value ; - Chem.H.length_units = kpc_cgs / Cosmo.cosmo_h * Chem.H.a_value; - Chem.H.time_units = kpc_km / Cosmo.cosmo_h ; + Chem.H.density_units = Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value ; + Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; + Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h ; + Chem.H.dens_number_conv = Chem.H.density_number_conv * pow(Chem.H.a_value, 3); +#endif //COSMOLOGY Chem.H.velocity_units = Chem.H.length_units /Chem.H.time_units; - Chem.H.dens_number_conv = Chem.H.density_units * pow(Chem.H.a_value, 3) / MH; -#else - chprintf("EXITING: Chemistry is not supported without Cosmology, see %s::%d\n",__FILE__,__LINE__); - exit(-1); -#endif + Real dens_base, length_base, time_base; - dens_base = Chem.H.density_units * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; - length_base = Chem.H.length_units / Chem.H.a_value; + dens_base = Chem.H.density_units; + length_base = Chem.H.length_units; +#ifdef COSMOLOGY + dens_base = dens_base * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; + length_base = length_base / Chem.H.a_value; +#endif //COSMOLOGY + time_base = Chem.H.time_units; Chem.H.cooling_units = ( pow(length_base, 2) * pow(MH, 2) ) / ( dens_base * pow(time_base, 3) ); Chem.H.reaction_units = MH / (dens_base * time_base ); diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index defb89c35..0f621b7f4 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -467,8 +467,13 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in a2 = current_a * current_a; a3 = a2 * current_a; d *= density_conv / a3; - GE *= energy_conv / a2; - dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units; + GE *= energy_conv / a2; + dt_hydro = dt_hydro / Chem_H.time_units; + +#ifdef COSMOLOGY + dt_hydro *= current_a * current_a / Chem_H.H0 * 1000 * KPC +#endif //COSMOLOGY + //dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units; // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * dt_hydro * Chem_H.time_units; // Initialize the thermal state diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 412570e65..ab1a73f85 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -276,7 +276,10 @@ void Grid3D::AllocateMemory(void) C.momentum_z = &(C.host[3*H.n_cells]); C.Energy = &(C.host[4*H.n_cells]); #ifdef SCALAR - C.scalar = &(C.host[5*H.n_cells]); + C.scalar = &(C.host[H.n_cells*grid_enum::scalar]); + #ifdef BASIC_SCALAR + C.basic_scalar = &(C.host[H.n_cells*grid_enum::basic_scalar]); + #endif #endif //SCALAR #ifdef MHD C.magnetic_x = &(C.host[(5 + NSCALARS)*H.n_cells]); @@ -295,7 +298,10 @@ void Grid3D::AllocateMemory(void) C.d_momentum_z = &(C.device[3*H.n_cells]); C.d_Energy = &(C.device[4*H.n_cells]); #ifdef SCALAR - C.d_scalar = &(C.device[5*H.n_cells]); + C.d_scalar = &(C.device[H.n_cells*grid_enum::scalar]); + #ifdef BASIC_SCALAR + C.d_basic_scalar = &(C.device[H.n_cells*grid_enum::basic_scalar]); + #endif #endif // SCALAR #ifdef MHD C.d_magnetic_x = &(C.device[(5 + NSCALARS)*H.n_cells]); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 1dd748f23..7bbe35461 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -354,8 +354,13 @@ class Grid3D #ifdef SCALAR /*! \var scalar - * \brief Array containing the values of the passive scalar variable(s). */ + * \brief Array containing the values of passive scalar variable(s). */ Real *scalar; + #ifdef BASIC_SCALAR + /*! \var basic_scalar + * \brief Array containing the values of a basic passive scalar variable. */ + Real *basic_scalar; + #endif #endif // SCALAR #ifdef MHD @@ -402,7 +407,8 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, - *d_Energy, *d_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, + *d_Energy, *d_scalar, *d_basic_scalar, + *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; /*! pointer to gravitational potential on device */ diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index ece0bfa83..8bdb77d11 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -5,11 +5,17 @@ // so that adding a field only requires registering it here. -// Must be unscoped to be treated as int +// enum notes: +// Must be "unscoped" to be implicitly treated as int: this means cannot use "enum class" or "enum struct" +// Wrapped in namespace to give it an effective scope to prevent collisions +// enum values (i.e. density) belong to their enclosing scope, which necessitates the namespace wrapping +// --otherwise "density" would be available in global scope // ": int" forces underlying type to be int -enum grid_enum : int { - // Don't touch hydro quantities until all of hydro is refactored (if ever) +namespace grid_enum { +enum : int { + + // Don't touch hydro quantities until all of hydro is made consistent with grid_enum (if ever) density, momentum_x, momentum_y, @@ -21,12 +27,27 @@ enum grid_enum : int { scalar, scalar_minus_1 = scalar - 1,// so that next enum item starts at same index as scalar - // TODO: Add scalars here: + // Add scalars here, wrapped appropriately with ifdefs: + #ifdef BASIC_SCALAR + basic_scalar, + #endif + + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) + HI_density, + HII_density, + HeI_density, + HeII_density, + HeIII_density, + e_density, + #ifdef GRACKLE_METALS + metal_density, + #endif + #endif - finalscalar_plus_1, - // TODO: set finalscalar = finalscalar_plus_1 - 1, and then define NSCALARS equivalent from here. - finalscalar = scalar + NSCALARS - 1, + finalscalar_plus_1, // needed to calculate NSCALARS + finalscalar = finalscalar_plus_1 - 1; // resets enum to finalscalar so fields afterwards are correct + // so that anything after starts with scalar + NSCALARS #endif // SCALAR #ifdef MHD @@ -40,15 +61,7 @@ enum grid_enum : int { num_fields, //Aliases - #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) - HI_density = scalar, - HII_density, - HeI_density, - HeII_density, - HeIII_density, - e_density, - #ifdef GRACKLE_METALS - metal_density, - #endif - #endif + nscalars = finalscalar_plus_1 - scalar, + }; +} diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 55b9aae41..189121cb3 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -349,7 +349,9 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.GasEnergy[id] = P/(gama-1.0); #endif #ifdef SCALAR - C.scalar[id] = C.density[id]*0.0; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id]*0.0; + #endif #endif if (x_pos > 0.25*H.xdglobal && x_pos < 0.75*H.xdglobal) { @@ -362,7 +364,9 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.GasEnergy[id] = P/(gama-1.0); #endif #ifdef SCALAR - C.scalar[id] = C.density[id]*1.0; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id]*1.0; + #endif #endif } } @@ -451,7 +455,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real C.momentum_z[id] = rho_l * vz_l; C.Energy[id] = P_l/(gama-1.0) + 0.5*rho_l*(vx_l*vx_l + vy_l*vy_l + vz_l*vz_l); #ifdef SCALAR - C.scalar[id] = 1.0*rho_l; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0*rho_l; + #endif #endif //SCALAR #ifdef DE C.GasEnergy[id] = P_l/(gama-1.0); @@ -465,7 +471,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real C.momentum_z[id] = rho_r * vz_r; C.Energy[id] = P_r/(gama-1.0) + 0.5*rho_r*(vx_r*vx_r + vy_r*vy_r + vz_r*vz_r); #ifdef SCALAR - C.scalar[id] = 0.0*rho_r; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0*rho_r; + #endif #endif //SCALAR #ifdef DE C.GasEnergy[id] = P_r/(gama-1.0); @@ -616,7 +624,9 @@ void Grid3D::KH() C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; #ifdef SCALAR - C.scalar[id] = 0.0; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0; + #endif #endif } else if (y_pos >= 3.0*H.ydglobal/4.0) @@ -627,7 +637,9 @@ void Grid3D::KH() C.momentum_z[id] = 0.0; #ifdef SCALAR - C.scalar[id] = 0.0; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0; + #endif #endif } // inner half of slab @@ -639,7 +651,9 @@ void Grid3D::KH() C.momentum_z[id] = 0.0; #ifdef SCALAR - C.scalar[id] = 1.0*d1; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0*d1; + #endif #endif } C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; @@ -1317,7 +1331,9 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_bg/(gama-1.0); #endif #ifdef SCALAR - C.scalar[id] = C.density[id]*0.0; + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id]*0.0; + #endif #endif // add clouds for (int nn = 0; nn Date: Fri, 2 Dec 2022 16:00:36 -0500 Subject: [PATCH 137/694] grid_enum takes over NSCALARS --- src/global/global.h | 19 +++---------------- src/grid/grid_enum.h | 17 +++++++++++------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index e3b50818d..10c493fa1 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -5,6 +5,8 @@ #ifndef GLOBAL_H #define GLOBAL_H +#include "../grid/grid_enum.h" // defines NSCALARS + #ifdef COOLING_CPU #include #include @@ -66,22 +68,7 @@ typedef double Real; #define MAX_DELTA_A 0.001 #define MAX_EXPANSION_RATE 0.01 // Limit delta(a)/a -#ifdef COOLING_GRACKLE - #ifdef GRACKLE_METALS - #define NSCALARS 7 - #else - #define NSCALARS 6 - #endif // GRACKLE_METALS -#elif CHEMISTRY_GPU - #define NSCALARS 6 -#else -#ifdef SCALAR -// Set Number of scalar fields when not using grackle -#define NSCALARS 1 -#else -#define NSCALARS 0 -#endif//SCALAR -#endif//COOLING_GRACKLE + #ifdef MHD #define N_MHD_FIELDS 3 diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 8bdb77d11..333c9a5cd 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -1,12 +1,15 @@ #pragma once -// An experimental enum which holds offsets for grid quantities -// In the final form of this approach, this file will also set nfields and NSCALARS, -// so that adding a field only requires registering it here. +// An enum which holds offsets for grid quantities +// In the final form of this approach, this file will also set nfields (not yet) and NSCALARS (done) +// so that adding a field only requires registering it here: +// grid knows to allocate memory based on nfields and NSCALARS +// and values can be accessed with density[ncells*grid_enum::enum_name + id] +// example: C.device[H.n_cells*grid_enum::basic_scalar + id] // enum notes: -// Must be "unscoped" to be implicitly treated as int: this means cannot use "enum class" or "enum struct" +// For advanced devs: must be "unscoped" to be implicitly treated as int: this means cannot use "enum class" or "enum struct" // Wrapped in namespace to give it an effective scope to prevent collisions // enum values (i.e. density) belong to their enclosing scope, which necessitates the namespace wrapping // --otherwise "density" would be available in global scope @@ -15,7 +18,7 @@ namespace grid_enum { enum : int { - // Don't touch hydro quantities until all of hydro is made consistent with grid_enum (if ever) + // Don't change order of hydro quantities until all of hydro is made consistent with grid_enum (if ever) because enum values depend on order density, momentum_x, momentum_y, @@ -46,7 +49,7 @@ enum : int { finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = finalscalar_plus_1 - 1; // resets enum to finalscalar so fields afterwards are correct + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct // so that anything after starts with scalar + NSCALARS #endif // SCALAR @@ -65,3 +68,5 @@ enum : int { }; } + +#define NSCALARS grid_enum::nscalars From 743abe3fc3f6413b8e773db59ed19318dfb00e83 Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 2 Dec 2022 16:09:09 -0500 Subject: [PATCH 138/694] delete test garbage --- src/main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index d3111e851..da2348858 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -40,7 +40,6 @@ int main(int argc, char *argv[]) // start the total time start_total = get_time(); - // TestGridEnum(); /* Initialize MPI communication */ #ifdef MPI_CHOLLA InitializeChollaMPI(&argc, &argv); From fe26ad39484849cc26ddd34bfbf5ee1c32b96fb3 Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 2 Dec 2022 16:35:07 -0500 Subject: [PATCH 139/694] always define scalar helper enums to define NSCALARS --- src/grid/grid_enum.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 333c9a5cd..84d41b20e 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -26,10 +26,11 @@ enum : int { Energy, // Code assumes scalars are a contiguous block - #ifdef SCALAR + // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to compute NSCALARS scalar, scalar_minus_1 = scalar - 1,// so that next enum item starts at same index as scalar + #ifdef SCALAR // Add scalars here, wrapped appropriately with ifdefs: #ifdef BASIC_SCALAR basic_scalar, @@ -47,12 +48,12 @@ enum : int { #endif #endif + #endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct - + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct // so that anything after starts with scalar + NSCALARS - #endif // SCALAR + #ifdef MHD magnetic_x, magnetic_y, From 90a3942727a146dc898ef224c7aa64b1a2685ab5 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 6 Dec 2022 14:09:27 -0500 Subject: [PATCH 140/694] bring grid_enum indexing into DUST --- cloudy_coolingcurve.txt | 9802 +++++++++++++++++++++++++++++++++++++++ src/dust/dust_cuda.cu | 20 +- src/grid/grid_enum.h | 4 + 3 files changed, 9816 insertions(+), 10 deletions(-) create mode 100644 cloudy_coolingcurve.txt diff --git a/cloudy_coolingcurve.txt b/cloudy_coolingcurve.txt new file mode 100644 index 000000000..ac458a6d1 --- /dev/null +++ b/cloudy_coolingcurve.txt @@ -0,0 +1,9802 @@ +#log n log T log cool/n2 log heat/n2 + -6.00 1.00 -25.332 -21.168 + -6.00 1.10 -25.276 -21.209 + -6.00 1.20 -25.219 -21.250 + -6.00 1.30 -25.162 -21.293 + -6.00 1.40 -25.101 -21.336 + -6.00 1.50 -25.035 -21.379 + -6.00 1.60 -24.965 -21.423 + -6.00 1.70 -24.893 -21.468 + -6.00 1.80 -24.819 -21.513 + -6.00 1.90 -24.738 -21.559 + -6.00 2.00 -24.636 -21.605 + -6.00 2.10 -24.504 -21.652 + -6.00 2.20 -24.344 -21.698 + -6.00 2.30 -24.168 -21.742 + -6.00 2.40 -23.990 -21.786 + -6.00 2.50 -23.826 -21.831 + -6.00 2.60 -23.679 -21.875 + -6.00 2.70 -23.552 -21.920 + -6.00 2.80 -23.445 -21.965 + -6.00 2.90 -23.360 -22.011 + -6.00 3.00 -23.299 -22.057 + -6.00 3.10 -23.263 -22.103 + -6.00 3.20 -23.250 -22.148 + -6.00 3.30 -23.257 -22.194 + -6.00 3.40 -23.282 -22.238 + -6.00 3.50 -23.320 -22.282 + -6.00 3.60 -23.366 -22.325 + -6.00 3.70 -23.414 -22.368 + -6.00 3.80 -23.459 -22.409 + -6.00 3.90 -23.494 -22.449 + -6.00 4.00 -23.517 -22.487 + -6.00 4.10 -23.524 -22.525 + -6.00 4.20 -23.510 -22.561 + -6.00 4.30 -23.466 -22.596 + -6.00 4.40 -23.385 -22.630 + -6.00 4.50 -23.273 -22.662 + -6.00 4.60 -23.149 -22.693 + -6.00 4.70 -23.030 -22.723 + -6.00 4.80 -22.924 -22.752 + -6.00 4.90 -22.826 -22.780 + -6.00 5.00 -22.728 -22.808 + -6.00 5.10 -22.630 -22.835 + -6.00 5.20 -22.528 -22.864 + -6.00 5.30 -22.430 -22.893 + -6.00 5.40 -22.344 -22.921 + -6.00 5.50 -22.271 -22.950 + -6.00 5.60 -22.214 -22.983 + -6.00 5.70 -22.174 -23.024 + -6.00 5.80 -22.205 -23.092 + -6.00 5.90 -22.749 -23.478 + -6.00 6.00 -22.847 -23.628 + -6.00 6.10 -22.798 -23.664 + -6.00 6.20 -22.737 -23.692 + -6.00 6.30 -22.664 -23.715 + -6.00 6.40 -22.579 -23.714 + -6.00 6.50 -22.489 -23.719 + -6.00 6.60 -22.398 -23.740 + -6.00 6.70 -22.307 -23.776 + -6.00 6.80 -22.221 -23.753 + -6.00 6.90 -22.202 -23.011 + -6.00 7.00 -22.046 -24.082 + -6.00 7.10 -21.972 -24.212 + -6.00 7.20 -21.897 -24.347 + -6.00 7.30 -21.816 -24.281 + -6.00 7.40 -21.728 -24.271 + -6.00 7.50 -21.637 -24.091 + -6.00 7.60 -21.543 -23.749 + -6.00 7.70 -21.446 -23.747 + -6.00 7.80 -21.350 -23.855 + -6.00 7.90 -21.253 -23.978 + -6.00 8.00 -21.156 -24.093 + -6.00 8.10 -21.060 -24.227 + -6.00 8.20 -20.960 -24.276 + -6.00 8.30 -20.863 -24.361 + -6.00 8.40 -20.765 -24.479 + -6.00 8.50 -20.667 -24.502 + -6.00 8.60 -20.568 -24.587 + -6.00 8.70 -20.472 -24.607 + -6.00 8.80 -20.372 -24.646 + -6.00 8.90 -20.273 -24.677 + -6.00 9.00 -20.176 -24.705 + -5.90 1.00 -25.341 -21.199 + -5.90 1.10 -25.284 -21.239 + -5.90 1.20 -25.227 -21.279 + -5.90 1.30 -25.169 -21.320 + -5.90 1.40 -25.105 -21.362 + -5.90 1.50 -25.035 -21.405 + -5.90 1.60 -24.957 -21.448 + -5.90 1.70 -24.875 -21.492 + -5.90 1.80 -24.791 -21.536 + -5.90 1.90 -24.703 -21.581 + -5.90 2.00 -24.596 -21.627 + -5.90 2.10 -24.460 -21.673 + -5.90 2.20 -24.296 -21.718 + -5.90 2.30 -24.117 -21.762 + -5.90 2.40 -23.938 -21.807 + -5.90 2.50 -23.773 -21.851 + -5.90 2.60 -23.627 -21.895 + -5.90 2.70 -23.501 -21.941 + -5.90 2.80 -23.396 -21.986 + -5.90 2.90 -23.314 -22.033 + -5.90 3.00 -23.255 -22.079 + -5.90 3.10 -23.220 -22.127 + -5.90 3.20 -23.206 -22.174 + -5.90 3.30 -23.212 -22.221 + -5.90 3.40 -23.233 -22.267 + -5.90 3.50 -23.267 -22.313 + -5.90 3.60 -23.309 -22.358 + -5.90 3.70 -23.355 -22.402 + -5.90 3.80 -23.400 -22.444 + -5.90 3.90 -23.438 -22.485 + -5.90 4.00 -23.468 -22.525 + -5.90 4.10 -23.483 -22.564 + -5.90 4.20 -23.477 -22.602 + -5.90 4.30 -23.437 -22.639 + -5.90 4.40 -23.351 -22.674 + -5.90 4.50 -23.228 -22.707 + -5.90 4.60 -23.091 -22.739 + -5.90 4.70 -22.962 -22.770 + -5.90 4.80 -22.847 -22.799 + -5.90 4.90 -22.744 -22.829 + -5.90 5.00 -22.647 -22.858 + -5.90 5.10 -22.550 -22.888 + -5.90 5.20 -22.457 -22.919 + -5.90 5.30 -22.372 -22.949 + -5.90 5.40 -22.295 -22.979 + -5.90 5.50 -22.228 -23.007 + -5.90 5.60 -22.164 -23.038 + -5.90 5.70 -22.118 -23.073 + -5.90 5.80 -22.102 -23.117 + -5.90 5.90 -22.150 -23.188 + -5.90 6.00 -22.325 -23.333 + -5.90 6.10 -22.730 -23.645 + -5.90 6.20 -22.762 -23.714 + -5.90 6.30 -22.700 -23.733 + -5.90 6.40 -22.625 -23.747 + -5.90 6.50 -22.541 -23.757 + -5.90 6.60 -22.454 -23.779 + -5.90 6.70 -22.368 -23.822 + -5.90 6.80 -22.284 -23.865 + -5.90 6.90 -22.284 -23.266 + -5.90 7.00 -22.117 -24.176 + -5.90 7.10 -22.047 -24.293 + -5.90 7.20 -21.978 -24.368 + -5.90 7.30 -21.899 -24.540 + -5.90 7.40 -21.815 -24.417 + -5.90 7.50 -21.726 -24.267 + -5.90 7.60 -21.634 -23.912 + -5.90 7.70 -21.539 -23.757 + -5.90 7.80 -21.443 -23.867 + -5.90 7.90 -21.347 -23.982 + -5.90 8.00 -21.252 -24.105 + -5.90 8.10 -21.153 -24.207 + -5.90 8.20 -21.059 -24.357 + -5.90 8.30 -20.959 -24.403 + -5.90 8.40 -20.861 -24.481 + -5.90 8.50 -20.763 -24.595 + -5.90 8.60 -20.665 -24.617 + -5.90 8.70 -20.567 -24.671 + -5.90 8.80 -20.483 -24.727 + -5.90 8.90 -20.371 -24.755 + -5.90 9.00 -20.273 -24.786 + -5.80 1.00 -25.350 -21.232 + -5.80 1.10 -25.293 -21.270 + -5.80 1.20 -25.236 -21.310 + -5.80 1.30 -25.176 -21.350 + -5.80 1.40 -25.109 -21.391 + -5.80 1.50 -25.032 -21.432 + -5.80 1.60 -24.945 -21.474 + -5.80 1.70 -24.850 -21.517 + -5.80 1.80 -24.755 -21.560 + -5.80 1.90 -24.659 -21.604 + -5.80 2.00 -24.549 -21.649 + -5.80 2.10 -24.414 -21.694 + -5.80 2.20 -24.251 -21.739 + -5.80 2.30 -24.073 -21.784 + -5.80 2.40 -23.896 -21.827 + -5.80 2.50 -23.732 -21.872 + -5.80 2.60 -23.587 -21.916 + -5.80 2.70 -23.461 -21.962 + -5.80 2.80 -23.357 -22.007 + -5.80 2.90 -23.276 -22.054 + -5.80 3.00 -23.219 -22.101 + -5.80 3.10 -23.184 -22.149 + -5.80 3.20 -23.171 -22.198 + -5.80 3.30 -23.175 -22.246 + -5.80 3.40 -23.195 -22.294 + -5.80 3.50 -23.226 -22.342 + -5.80 3.60 -23.265 -22.388 + -5.80 3.70 -23.307 -22.433 + -5.80 3.80 -23.350 -22.477 + -5.80 3.90 -23.388 -22.519 + -5.80 4.00 -23.420 -22.561 + -5.80 4.10 -23.440 -22.601 + -5.80 4.20 -23.437 -22.641 + -5.80 4.30 -23.397 -22.679 + -5.80 4.40 -23.304 -22.715 + -5.80 4.50 -23.169 -22.750 + -5.80 4.60 -23.020 -22.782 + -5.80 4.70 -22.882 -22.813 + -5.80 4.80 -22.759 -22.844 + -5.80 4.90 -22.651 -22.874 + -5.80 5.00 -22.553 -22.905 + -5.80 5.10 -22.461 -22.937 + -5.80 5.20 -22.376 -22.972 + -5.80 5.30 -22.301 -23.006 + -5.80 5.40 -22.239 -23.039 + -5.80 5.50 -22.179 -23.067 + -5.80 5.60 -22.126 -23.098 + -5.80 5.70 -22.079 -23.132 + -5.80 5.80 -22.054 -23.172 + -5.80 5.90 -22.070 -23.223 + -5.80 6.00 -22.154 -23.308 + -5.80 6.10 -22.294 -23.446 + -5.80 6.20 -22.455 -23.601 + -5.80 6.30 -22.646 -23.754 + -5.80 6.40 -22.645 -23.778 + -5.80 6.50 -22.574 -23.801 + -5.80 6.60 -22.497 -23.833 + -5.80 6.70 -22.418 -23.881 + -5.80 6.80 -22.342 -23.934 + -5.80 6.90 -22.270 -23.743 + -5.80 7.00 -22.180 -24.255 + -5.80 7.10 -22.119 -24.422 + -5.80 7.20 -22.053 -24.522 + -5.80 7.30 -21.980 -24.633 + -5.80 7.40 -21.899 -24.550 + -5.80 7.50 -21.813 -24.433 + -5.80 7.60 -21.723 -24.113 + -5.80 7.70 -21.630 -23.851 + -5.80 7.80 -21.535 -23.877 + -5.80 7.90 -21.440 -23.995 + -5.80 8.00 -21.344 -24.114 + -5.80 8.10 -21.248 -24.228 + -5.80 8.20 -21.152 -24.356 + -5.80 8.30 -21.057 -24.485 + -5.80 8.40 -20.957 -24.563 + -5.80 8.50 -20.864 -24.635 + -5.80 8.60 -20.762 -24.670 + -5.80 8.70 -20.664 -24.731 + -5.80 8.80 -20.577 -24.791 + -5.80 8.90 -20.481 -24.838 + -5.80 9.00 -20.382 -24.874 + -5.70 1.00 -25.358 -21.266 + -5.70 1.10 -25.301 -21.304 + -5.70 1.20 -25.244 -21.342 + -5.70 1.30 -25.182 -21.381 + -5.70 1.40 -25.112 -21.421 + -5.70 1.50 -25.028 -21.462 + -5.70 1.60 -24.929 -21.502 + -5.70 1.70 -24.821 -21.544 + -5.70 1.80 -24.712 -21.586 + -5.70 1.90 -24.607 -21.630 + -5.70 2.00 -24.495 -21.673 + -5.70 2.10 -24.364 -21.718 + -5.70 2.20 -24.207 -21.762 + -5.70 2.30 -24.035 -21.806 + -5.70 2.40 -23.863 -21.850 + -5.70 2.50 -23.702 -21.894 + -5.70 2.60 -23.558 -21.938 + -5.70 2.70 -23.432 -21.983 + -5.70 2.80 -23.329 -22.029 + -5.70 2.90 -23.248 -22.076 + -5.70 3.00 -23.191 -22.123 + -5.70 3.10 -23.156 -22.171 + -5.70 3.20 -23.142 -22.221 + -5.70 3.30 -23.146 -22.270 + -5.70 3.40 -23.164 -22.319 + -5.70 3.50 -23.193 -22.368 + -5.70 3.60 -23.229 -22.416 + -5.70 3.70 -23.269 -22.463 + -5.70 3.80 -23.309 -22.508 + -5.70 3.90 -23.346 -22.552 + -5.70 4.00 -23.378 -22.594 + -5.70 4.10 -23.398 -22.636 + -5.70 4.20 -23.395 -22.677 + -5.70 4.30 -23.351 -22.717 + -5.70 4.40 -23.249 -22.754 + -5.70 4.50 -23.102 -22.789 + -5.70 4.60 -22.941 -22.822 + -5.70 4.70 -22.793 -22.854 + -5.70 4.80 -22.663 -22.885 + -5.70 4.90 -22.550 -22.916 + -5.70 5.00 -22.452 -22.949 + -5.70 5.10 -22.364 -22.984 + -5.70 5.20 -22.289 -23.022 + -5.70 5.30 -22.227 -23.061 + -5.70 5.40 -22.180 -23.096 + -5.70 5.50 -22.134 -23.128 + -5.70 5.60 -22.090 -23.162 + -5.70 5.70 -22.046 -23.196 + -5.70 5.80 -22.019 -23.236 + -5.70 5.90 -22.009 -23.281 + -5.70 6.00 -22.047 -23.346 + -5.70 6.10 -22.138 -23.458 + -5.70 6.20 -22.285 -23.597 + -5.70 6.30 -22.410 -23.721 + -5.70 6.40 -22.559 -23.842 + -5.70 6.50 -22.590 -23.855 + -5.70 6.60 -22.527 -23.898 + -5.70 6.70 -22.458 -23.953 + -5.70 6.80 -22.390 -24.015 + -5.70 6.90 -22.324 -23.962 + -5.70 7.00 -22.239 -24.346 + -5.70 7.10 -22.181 -24.469 + -5.70 7.20 -22.125 -24.577 + -5.70 7.30 -22.058 -24.727 + -5.70 7.40 -21.981 -24.677 + -5.70 7.50 -21.897 -24.593 + -5.70 7.60 -21.809 -24.308 + -5.70 7.70 -21.719 -23.940 + -5.70 7.80 -21.625 -23.896 + -5.70 7.90 -21.531 -24.005 + -5.70 8.00 -21.436 -24.128 + -5.70 8.10 -21.341 -24.246 + -5.70 8.20 -21.245 -24.406 + -5.70 8.30 -21.151 -24.493 + -5.70 8.40 -21.052 -24.554 + -5.70 8.50 -20.957 -24.687 + -5.70 8.60 -20.863 -24.754 + -5.70 8.70 -20.761 -24.786 + -5.70 8.80 -20.663 -24.869 + -5.70 8.90 -20.575 -24.904 + -5.70 9.00 -20.467 -24.939 + -5.60 1.00 -25.366 -21.301 + -5.60 1.10 -25.309 -21.338 + -5.60 1.20 -25.251 -21.376 + -5.60 1.30 -25.189 -21.415 + -5.60 1.40 -25.115 -21.453 + -5.60 1.50 -25.022 -21.493 + -5.60 1.60 -24.910 -21.533 + -5.60 1.70 -24.787 -21.573 + -5.60 1.80 -24.665 -21.615 + -5.60 1.90 -24.550 -21.657 + -5.60 2.00 -24.437 -21.699 + -5.60 2.10 -24.311 -21.743 + -5.60 2.20 -24.164 -21.786 + -5.60 2.30 -24.001 -21.830 + -5.60 2.40 -23.836 -21.873 + -5.60 2.50 -23.681 -21.917 + -5.60 2.60 -23.539 -21.962 + -5.60 2.70 -23.415 -22.007 + -5.60 2.80 -23.311 -22.052 + -5.60 2.90 -23.230 -22.098 + -5.60 3.00 -23.171 -22.145 + -5.60 3.10 -23.135 -22.194 + -5.60 3.20 -23.120 -22.243 + -5.60 3.30 -23.122 -22.293 + -5.60 3.40 -23.139 -22.343 + -5.60 3.50 -23.166 -22.393 + -5.60 3.60 -23.201 -22.442 + -5.60 3.70 -23.239 -22.490 + -5.60 3.80 -23.277 -22.537 + -5.60 3.90 -23.312 -22.581 + -5.60 4.00 -23.341 -22.625 + -5.60 4.10 -23.359 -22.668 + -5.60 4.20 -23.354 -22.711 + -5.60 4.30 -23.302 -22.752 + -5.60 4.40 -23.189 -22.790 + -5.60 4.50 -23.029 -22.825 + -5.60 4.60 -22.859 -22.859 + -5.60 4.70 -22.701 -22.890 + -5.60 4.80 -22.563 -22.922 + -5.60 4.90 -22.444 -22.955 + -5.60 5.00 -22.344 -22.989 + -5.60 5.10 -22.260 -23.028 + -5.60 5.20 -22.196 -23.070 + -5.60 5.30 -22.146 -23.113 + -5.60 5.40 -22.112 -23.152 + -5.60 5.50 -22.085 -23.188 + -5.60 5.60 -22.051 -23.225 + -5.60 5.70 -22.015 -23.264 + -5.60 5.80 -21.986 -23.306 + -5.60 5.90 -21.978 -23.350 + -5.60 6.00 -22.005 -23.406 + -5.60 6.10 -22.062 -23.498 + -5.60 6.20 -22.206 -23.634 + -5.60 6.30 -22.344 -23.765 + -5.60 6.40 -22.453 -23.884 + -5.60 6.50 -22.536 -23.974 + -5.60 6.60 -22.538 -23.998 + -5.60 6.70 -22.490 -24.033 + -5.60 6.80 -22.431 -24.102 + -5.60 6.90 -22.371 -24.108 + -5.60 7.00 -22.428 -23.111 + -5.60 7.10 -22.241 -24.561 + -5.60 7.20 -22.192 -24.675 + -5.60 7.30 -22.131 -24.758 + -5.60 7.40 -22.058 -24.797 + -5.60 7.50 -21.978 -24.741 + -5.60 7.60 -21.890 -24.985 + -5.60 7.70 -21.805 -24.137 + -5.60 7.80 -21.713 -23.966 + -5.60 7.90 -21.620 -24.014 + -5.60 8.00 -21.526 -24.139 + -5.60 8.10 -21.432 -24.261 + -5.60 8.20 -21.337 -24.414 + -5.60 8.30 -21.242 -24.483 + -5.60 8.40 -21.148 -24.596 + -5.60 8.50 -21.050 -24.730 + -5.60 8.60 -20.957 -24.787 + -5.60 8.70 -20.856 -24.835 + -5.60 8.80 -20.758 -24.902 + -5.60 8.90 -20.669 -24.965 + -5.60 9.00 -20.563 -25.010 + -5.50 1.00 -25.374 -21.336 + -5.50 1.10 -25.317 -21.374 + -5.50 1.20 -25.259 -21.411 + -5.50 1.30 -25.195 -21.449 + -5.50 1.40 -25.117 -21.487 + -5.50 1.50 -25.015 -21.526 + -5.50 1.60 -24.890 -21.565 + -5.50 1.70 -24.751 -21.604 + -5.50 1.80 -24.616 -21.645 + -5.50 1.90 -24.492 -21.686 + -5.50 2.00 -24.376 -21.728 + -5.50 2.10 -24.256 -21.770 + -5.50 2.20 -24.119 -21.813 + -5.50 2.30 -23.969 -21.855 + -5.50 2.40 -23.815 -21.898 + -5.50 2.50 -23.667 -21.942 + -5.50 2.60 -23.530 -21.986 + -5.50 2.70 -23.408 -22.031 + -5.50 2.80 -23.304 -22.076 + -5.50 2.90 -23.221 -22.122 + -5.50 3.00 -23.161 -22.169 + -5.50 3.10 -23.123 -22.216 + -5.50 3.20 -23.104 -22.265 + -5.50 3.30 -23.104 -22.315 + -5.50 3.40 -23.118 -22.365 + -5.50 3.50 -23.144 -22.416 + -5.50 3.60 -23.177 -22.466 + -5.50 3.70 -23.214 -22.515 + -5.50 3.80 -23.250 -22.563 + -5.50 3.90 -23.283 -22.609 + -5.50 4.00 -23.310 -22.654 + -5.50 4.10 -23.325 -22.698 + -5.50 4.20 -23.314 -22.742 + -5.50 4.30 -23.253 -22.784 + -5.50 4.40 -23.128 -22.823 + -5.50 4.50 -22.956 -22.859 + -5.50 4.60 -22.776 -22.892 + -5.50 4.70 -22.609 -22.924 + -5.50 4.80 -22.462 -22.956 + -5.50 4.90 -22.337 -22.989 + -5.50 5.00 -22.234 -23.025 + -5.50 5.10 -22.153 -23.068 + -5.50 5.20 -22.096 -23.116 + -5.50 5.30 -22.062 -23.164 + -5.50 5.40 -22.043 -23.207 + -5.50 5.50 -22.028 -23.247 + -5.50 5.60 -22.013 -23.289 + -5.50 5.70 -21.988 -23.333 + -5.50 5.80 -21.960 -23.377 + -5.50 5.90 -21.951 -23.423 + -5.50 6.00 -21.933 -23.476 + -5.50 6.10 -21.976 -23.555 + -5.50 6.20 -22.131 -23.683 + -5.50 6.30 -22.301 -23.823 + -5.50 6.40 -22.407 -23.948 + -5.50 6.50 -22.502 -24.053 + -5.50 6.60 -22.521 -24.122 + -5.50 6.70 -22.502 -24.165 + -5.50 6.80 -22.461 -24.214 + -5.50 6.90 -22.412 -24.227 + -5.50 7.00 -22.496 -23.293 + -5.50 7.10 -22.294 -24.654 + -5.50 7.20 -22.253 -24.777 + -5.50 7.30 -22.200 -24.867 + -5.50 7.40 -22.131 -24.996 + -5.50 7.50 -22.055 -24.879 + -5.50 7.60 -21.972 -24.675 + -5.50 7.70 -21.884 -25.111 + -5.50 7.80 -21.798 -24.052 + -5.50 7.90 -21.706 -24.036 + -5.50 8.00 -21.614 -24.148 + -5.50 8.10 -21.522 -24.273 + -5.50 8.20 -21.428 -24.461 + -5.50 8.30 -21.334 -24.503 + -5.50 8.40 -21.239 -24.608 + -5.50 8.50 -21.143 -24.760 + -5.50 8.60 -21.047 -24.796 + -5.50 8.70 -20.950 -24.921 + -5.50 8.80 -20.853 -24.953 + -5.50 8.90 -20.756 -25.018 + -5.50 9.00 -20.658 -25.073 + -5.40 1.00 -25.381 -21.371 + -5.40 1.10 -25.324 -21.409 + -5.40 1.20 -25.266 -21.447 + -5.40 1.30 -25.200 -21.484 + -5.40 1.40 -25.118 -21.522 + -5.40 1.50 -25.007 -21.560 + -5.40 1.60 -24.868 -21.599 + -5.40 1.70 -24.716 -21.637 + -5.40 1.80 -24.568 -21.677 + -5.40 1.90 -24.436 -21.717 + -5.40 2.00 -24.317 -21.758 + -5.40 2.10 -24.201 -21.799 + -5.40 2.20 -24.074 -21.841 + -5.40 2.30 -23.937 -21.883 + -5.40 2.40 -23.796 -21.925 + -5.40 2.50 -23.658 -21.968 + -5.40 2.60 -23.528 -22.012 + -5.40 2.70 -23.409 -22.056 + -5.40 2.80 -23.306 -22.101 + -5.40 2.90 -23.223 -22.147 + -5.40 3.00 -23.161 -22.193 + -5.40 3.10 -23.119 -22.240 + -5.40 3.20 -23.096 -22.288 + -5.40 3.30 -23.092 -22.337 + -5.40 3.40 -23.103 -22.388 + -5.40 3.50 -23.127 -22.438 + -5.40 3.60 -23.158 -22.489 + -5.40 3.70 -23.192 -22.539 + -5.40 3.80 -23.226 -22.587 + -5.40 3.90 -23.257 -22.634 + -5.40 4.00 -23.281 -22.680 + -5.40 4.10 -23.292 -22.726 + -5.40 4.20 -23.275 -22.771 + -5.40 4.30 -23.205 -22.814 + -5.40 4.40 -23.067 -22.853 + -5.40 4.50 -22.884 -22.889 + -5.40 4.60 -22.696 -22.922 + -5.40 4.70 -22.520 -22.954 + -5.40 4.80 -22.364 -22.986 + -5.40 4.90 -22.231 -23.019 + -5.40 5.00 -22.124 -23.058 + -5.40 5.10 -22.045 -23.105 + -5.40 5.20 -21.996 -23.159 + -5.40 5.30 -21.974 -23.213 + -5.40 5.40 -21.971 -23.261 + -5.40 5.50 -21.972 -23.307 + -5.40 5.60 -21.974 -23.354 + -5.40 5.70 -21.962 -23.403 + -5.40 5.80 -21.936 -23.451 + -5.40 5.90 -21.918 -23.499 + -5.40 6.00 -21.919 -23.553 + -5.40 6.10 -21.948 -23.624 + -5.40 6.20 -22.084 -23.741 + -5.40 6.30 -22.268 -23.887 + -5.40 6.40 -22.382 -24.021 + -5.40 6.50 -22.473 -24.137 + -5.40 6.60 -22.511 -24.222 + -5.40 6.70 -22.508 -24.277 + -5.40 6.80 -22.481 -24.337 + -5.40 6.90 -22.442 -24.373 + -5.40 7.00 -22.418 -23.699 + -5.40 7.10 -22.342 -24.748 + -5.40 7.20 -22.309 -24.875 + -5.40 7.30 -22.263 -24.973 + -5.40 7.40 -22.200 -25.027 + -5.40 7.50 -22.128 -25.010 + -5.40 7.60 -22.048 -24.848 + -5.40 7.70 -21.965 -24.543 + -5.40 7.80 -21.879 -24.248 + -5.40 7.90 -21.790 -24.114 + -5.40 8.00 -21.700 -24.166 + -5.40 8.10 -21.609 -24.283 + -5.40 8.20 -21.517 -24.404 + -5.40 8.30 -21.423 -24.519 + -5.40 8.40 -21.329 -24.628 + -5.40 8.50 -21.235 -24.731 + -5.40 8.60 -21.139 -24.827 + -5.40 8.70 -21.043 -24.917 + -5.40 8.80 -20.951 -25.011 + -5.40 8.90 -20.850 -25.070 + -5.40 9.00 -20.753 -25.133 + -5.30 1.00 -25.388 -21.406 + -5.30 1.10 -25.331 -21.444 + -5.30 1.20 -25.272 -21.482 + -5.30 1.30 -25.205 -21.520 + -5.30 1.40 -25.118 -21.558 + -5.30 1.50 -24.998 -21.596 + -5.30 1.60 -24.847 -21.634 + -5.30 1.70 -24.683 -21.672 + -5.30 1.80 -24.524 -21.711 + -5.30 1.90 -24.384 -21.750 + -5.30 2.00 -24.261 -21.790 + -5.30 2.10 -24.147 -21.830 + -5.30 2.20 -24.030 -21.871 + -5.30 2.30 -23.906 -21.912 + -5.30 2.40 -23.779 -21.954 + -5.30 2.50 -23.652 -21.996 + -5.30 2.60 -23.530 -22.040 + -5.30 2.70 -23.416 -22.083 + -5.30 2.80 -23.317 -22.128 + -5.30 2.90 -23.233 -22.173 + -5.30 3.00 -23.169 -22.218 + -5.30 3.10 -23.123 -22.265 + -5.30 3.20 -23.096 -22.312 + -5.30 3.30 -23.087 -22.361 + -5.30 3.40 -23.094 -22.410 + -5.30 3.50 -23.113 -22.461 + -5.30 3.60 -23.142 -22.511 + -5.30 3.70 -23.174 -22.561 + -5.30 3.80 -23.206 -22.611 + -5.30 3.90 -23.234 -22.658 + -5.30 4.00 -23.255 -22.705 + -5.30 4.10 -23.262 -22.752 + -5.30 4.20 -23.237 -22.797 + -5.30 4.30 -23.156 -22.841 + -5.30 4.40 -23.008 -22.881 + -5.30 4.50 -22.816 -22.918 + -5.30 4.60 -22.620 -22.951 + -5.30 4.70 -22.437 -22.982 + -5.30 4.80 -22.272 -23.013 + -5.30 4.90 -22.130 -23.046 + -5.30 5.00 -22.017 -23.086 + -5.30 5.10 -21.937 -23.138 + -5.30 5.20 -21.895 -23.200 + -5.30 5.30 -21.886 -23.261 + -5.30 5.40 -21.900 -23.316 + -5.30 5.50 -21.918 -23.368 + -5.30 5.60 -21.935 -23.421 + -5.30 5.70 -21.934 -23.476 + -5.30 5.80 -21.920 -23.528 + -5.30 5.90 -21.902 -23.578 + -5.30 6.00 -21.905 -23.632 + -5.30 6.10 -21.929 -23.700 + -5.30 6.20 -22.046 -23.807 + -5.30 6.30 -22.233 -23.957 + -5.30 6.40 -22.364 -24.100 + -5.30 6.50 -22.458 -24.225 + -5.30 6.60 -22.507 -24.318 + -5.30 6.70 -22.513 -24.390 + -5.30 6.80 -22.497 -24.450 + -5.30 6.90 -22.467 -24.499 + -5.30 7.00 -22.443 -24.002 + -5.30 7.10 -22.385 -24.846 + -5.30 7.20 -22.359 -24.975 + -5.30 7.30 -22.321 -25.078 + -5.30 7.40 -22.264 -25.137 + -5.30 7.50 -22.196 -25.133 + -5.30 7.60 -22.120 -25.008 + -5.30 7.70 -22.040 -24.739 + -5.30 7.80 -21.956 -24.457 + -5.30 7.90 -21.870 -24.238 + -5.30 8.00 -21.782 -24.245 + -5.30 8.10 -21.693 -24.316 + -5.30 8.20 -21.603 -24.416 + -5.30 8.30 -21.511 -24.534 + -5.30 8.40 -21.418 -24.646 + -5.30 8.50 -21.325 -24.753 + -5.30 8.60 -21.232 -24.855 + -5.30 8.70 -21.135 -24.974 + -5.30 8.80 -21.040 -25.037 + -5.30 8.90 -20.947 -25.143 + -5.30 9.00 -20.847 -25.187 + -5.20 1.00 -25.394 -21.439 + -5.20 1.10 -25.337 -21.478 + -5.20 1.20 -25.278 -21.517 + -5.20 1.30 -25.209 -21.555 + -5.20 1.40 -25.117 -21.593 + -5.20 1.50 -24.990 -21.631 + -5.20 1.60 -24.828 -21.669 + -5.20 1.70 -24.653 -21.707 + -5.20 1.80 -24.485 -21.746 + -5.20 1.90 -24.338 -21.784 + -5.20 2.00 -24.211 -21.823 + -5.20 2.10 -24.098 -21.863 + -5.20 2.20 -23.988 -21.903 + -5.20 2.30 -23.875 -21.944 + -5.20 2.40 -23.761 -21.985 + -5.20 2.50 -23.646 -22.026 + -5.20 2.60 -23.534 -22.069 + -5.20 2.70 -23.427 -22.112 + -5.20 2.80 -23.332 -22.155 + -5.20 2.90 -23.250 -22.200 + -5.20 3.00 -23.184 -22.245 + -5.20 3.10 -23.136 -22.291 + -5.20 3.20 -23.104 -22.337 + -5.20 3.30 -23.089 -22.385 + -5.20 3.40 -23.091 -22.434 + -5.20 3.50 -23.105 -22.484 + -5.20 3.60 -23.130 -22.534 + -5.20 3.70 -23.159 -22.584 + -5.20 3.80 -23.187 -22.633 + -5.20 3.90 -23.212 -22.682 + -5.20 4.00 -23.230 -22.729 + -5.20 4.10 -23.232 -22.776 + -5.20 4.20 -23.200 -22.822 + -5.20 4.30 -23.108 -22.867 + -5.20 4.40 -22.951 -22.908 + -5.20 4.50 -22.752 -22.944 + -5.20 4.60 -22.550 -22.978 + -5.20 4.70 -22.361 -23.009 + -5.20 4.80 -22.188 -23.039 + -5.20 4.90 -22.039 -23.072 + -5.20 5.00 -21.918 -23.113 + -5.20 5.10 -21.835 -23.169 + -5.20 5.20 -21.795 -23.239 + -5.20 5.30 -21.798 -23.309 + -5.20 5.40 -21.827 -23.373 + -5.20 5.50 -21.867 -23.433 + -5.20 5.60 -21.896 -23.491 + -5.20 5.70 -21.909 -23.550 + -5.20 5.80 -21.897 -23.608 + -5.20 5.90 -21.890 -23.659 + -5.20 6.00 -21.889 -23.714 + -5.20 6.10 -21.908 -23.781 + -5.20 6.20 -22.012 -23.881 + -5.20 6.30 -22.208 -24.031 + -5.20 6.40 -22.346 -24.181 + -5.20 6.50 -22.449 -24.315 + -5.20 6.60 -22.505 -24.417 + -5.20 6.70 -22.519 -24.497 + -5.20 6.80 -22.510 -24.561 + -5.20 6.90 -22.487 -24.619 + -5.20 7.00 -22.467 -24.235 + -5.20 7.10 -22.420 -24.938 + -5.20 7.20 -22.403 -25.074 + -5.20 7.30 -22.373 -25.182 + -5.20 7.40 -22.322 -25.246 + -5.20 7.50 -22.259 -25.253 + -5.20 7.60 -22.186 -25.157 + -5.20 7.70 -22.109 -24.928 + -5.20 7.80 -22.029 -24.663 + -5.20 7.90 -21.946 -24.464 + -5.20 8.00 -21.861 -24.339 + -5.20 8.10 -21.774 -24.385 + -5.20 8.20 -21.685 -24.437 + -5.20 8.30 -21.596 -24.563 + -5.20 8.40 -21.505 -24.670 + -5.20 8.50 -21.413 -24.772 + -5.20 8.60 -21.320 -24.883 + -5.20 8.70 -21.226 -24.982 + -5.20 8.80 -21.130 -25.074 + -5.20 8.90 -21.035 -25.160 + -5.20 9.00 -20.940 -25.236 + -5.10 1.00 -25.400 -21.469 + -5.10 1.10 -25.343 -21.510 + -5.10 1.20 -25.283 -21.550 + -5.10 1.30 -25.212 -21.589 + -5.10 1.40 -25.116 -21.628 + -5.10 1.50 -24.982 -21.667 + -5.10 1.60 -24.812 -21.705 + -5.10 1.70 -24.628 -21.743 + -5.10 1.80 -24.453 -21.781 + -5.10 1.90 -24.299 -21.820 + -5.10 2.00 -24.168 -21.858 + -5.10 2.10 -24.054 -21.897 + -5.10 2.20 -23.948 -21.937 + -5.10 2.30 -23.845 -21.977 + -5.10 2.40 -23.742 -22.017 + -5.10 2.50 -23.639 -22.058 + -5.10 2.60 -23.537 -22.099 + -5.10 2.70 -23.439 -22.142 + -5.10 2.80 -23.349 -22.185 + -5.10 2.90 -23.270 -22.228 + -5.10 3.00 -23.205 -22.272 + -5.10 3.10 -23.154 -22.318 + -5.10 3.20 -23.119 -22.364 + -5.10 3.30 -23.099 -22.411 + -5.10 3.40 -23.094 -22.459 + -5.10 3.50 -23.104 -22.508 + -5.10 3.60 -23.123 -22.557 + -5.10 3.70 -23.148 -22.607 + -5.10 3.80 -23.172 -22.656 + -5.10 3.90 -23.194 -22.705 + -5.10 4.00 -23.207 -22.752 + -5.10 4.10 -23.202 -22.800 + -5.10 4.20 -23.162 -22.847 + -5.10 4.30 -23.061 -22.892 + -5.10 4.40 -22.896 -22.933 + -5.10 4.50 -22.692 -22.971 + -5.10 4.60 -22.487 -23.004 + -5.10 4.70 -22.293 -23.036 + -5.10 4.80 -22.115 -23.066 + -5.10 4.90 -21.958 -23.098 + -5.10 5.00 -21.831 -23.140 + -5.10 5.10 -21.743 -23.200 + -5.10 5.20 -21.703 -23.276 + -5.10 5.30 -21.713 -23.356 + -5.10 5.40 -21.758 -23.431 + -5.10 5.50 -21.820 -23.500 + -5.10 5.60 -21.863 -23.563 + -5.10 5.70 -21.869 -23.627 + -5.10 5.80 -21.883 -23.689 + -5.10 5.90 -21.874 -23.742 + -5.10 6.00 -21.880 -23.798 + -5.10 6.10 -21.896 -23.865 + -5.10 6.20 -21.990 -23.960 + -5.10 6.30 -22.187 -24.109 + -5.10 6.40 -22.335 -24.266 + -5.10 6.50 -22.442 -24.408 + -5.10 6.60 -22.504 -24.516 + -5.10 6.70 -22.524 -24.602 + -5.10 6.80 -22.522 -24.671 + -5.10 6.90 -22.505 -24.735 + -5.10 7.00 -22.487 -24.437 + -5.10 7.10 -22.647 -23.418 + -5.10 7.20 -22.441 -25.174 + -5.10 7.30 -22.419 -25.285 + -5.10 7.40 -22.375 -25.352 + -5.10 7.50 -22.317 -25.368 + -5.10 7.60 -22.248 -25.297 + -5.10 7.70 -22.174 -25.107 + -5.10 7.80 -22.097 -24.866 + -5.10 7.90 -22.017 -24.676 + -5.10 8.00 -21.935 -24.544 + -5.10 8.10 -21.851 -24.474 + -5.10 8.20 -21.765 -24.520 + -5.10 8.30 -21.677 -24.608 + -5.10 8.40 -21.588 -24.696 + -5.10 8.50 -21.498 -24.789 + -5.10 8.60 -21.406 -24.945 + -5.10 8.70 -21.314 -25.028 + -5.10 8.80 -21.220 -25.126 + -5.10 8.90 -21.125 -25.226 + -5.10 9.00 -21.032 -25.301 + -5.00 1.00 -25.405 -21.498 + -5.00 1.10 -25.348 -21.540 + -5.00 1.20 -25.287 -21.581 + -5.00 1.30 -25.214 -21.622 + -5.00 1.40 -25.114 -21.662 + -5.00 1.50 -24.974 -21.701 + -5.00 1.60 -24.798 -21.740 + -5.00 1.70 -24.608 -21.779 + -5.00 1.80 -24.427 -21.817 + -5.00 1.90 -24.268 -21.856 + -5.00 2.00 -24.133 -21.894 + -5.00 2.10 -24.017 -21.933 + -5.00 2.20 -23.914 -21.972 + -5.00 2.30 -23.816 -22.011 + -5.00 2.40 -23.723 -22.051 + -5.00 2.50 -23.630 -22.091 + -5.00 2.60 -23.538 -22.132 + -5.00 2.70 -23.449 -22.173 + -5.00 2.80 -23.365 -22.215 + -5.00 2.90 -23.291 -22.258 + -5.00 3.00 -23.228 -22.301 + -5.00 3.10 -23.177 -22.346 + -5.00 3.20 -23.139 -22.391 + -5.00 3.30 -23.114 -22.437 + -5.00 3.40 -23.104 -22.485 + -5.00 3.50 -23.108 -22.533 + -5.00 3.60 -23.122 -22.581 + -5.00 3.70 -23.141 -22.631 + -5.00 3.80 -23.162 -22.680 + -5.00 3.90 -23.178 -22.728 + -5.00 4.00 -23.186 -22.776 + -5.00 4.10 -23.175 -22.824 + -5.00 4.20 -23.126 -22.871 + -5.00 4.30 -23.015 -22.917 + -5.00 4.40 -22.844 -22.959 + -5.00 4.50 -22.637 -22.997 + -5.00 4.60 -22.429 -23.032 + -5.00 4.70 -22.232 -23.064 + -5.00 4.80 -22.050 -23.094 + -5.00 4.90 -21.889 -23.127 + -5.00 5.00 -21.757 -23.171 + -5.00 5.10 -21.665 -23.236 + -5.00 5.20 -21.622 -23.317 + -5.00 5.30 -21.631 -23.403 + -5.00 5.40 -21.690 -23.491 + -5.00 5.50 -21.776 -23.571 + -5.00 5.60 -21.816 -23.640 + -5.00 5.70 -21.847 -23.706 + -5.00 5.80 -21.865 -23.772 + -5.00 5.90 -21.866 -23.828 + -5.00 6.00 -21.869 -23.884 + -5.00 6.10 -21.886 -23.952 + -5.00 6.20 -21.972 -24.043 + -5.00 6.30 -22.169 -24.191 + -5.00 6.40 -22.324 -24.354 + -5.00 6.50 -22.436 -24.502 + -5.00 6.60 -22.504 -24.616 + -5.00 6.70 -22.530 -24.707 + -5.00 6.80 -22.532 -24.780 + -5.00 6.90 -22.520 -24.849 + -5.00 7.00 -22.505 -24.618 + -5.00 7.10 -22.577 -23.658 + -5.00 7.20 -22.475 -25.274 + -5.00 7.30 -22.460 -25.387 + -5.00 7.40 -22.422 -25.458 + -5.00 7.50 -22.369 -25.481 + -5.00 7.60 -22.304 -25.429 + -5.00 7.70 -22.234 -25.275 + -5.00 7.80 -22.160 -25.062 + -5.00 7.90 -22.083 -24.885 + -5.00 8.00 -22.005 -24.766 + -5.00 8.10 -21.924 -24.704 + -5.00 8.20 -21.840 -24.684 + -5.00 8.30 -21.755 -24.719 + -5.00 8.40 -21.668 -24.794 + -5.00 8.50 -21.580 -24.897 + -5.00 8.60 -21.490 -24.991 + -5.00 8.70 -21.399 -25.095 + -5.00 8.80 -21.307 -25.191 + -5.00 8.90 -21.214 -25.278 + -5.00 9.00 -21.121 -25.362 + -4.90 1.00 -25.409 -21.524 + -4.90 1.10 -25.352 -21.568 + -4.90 1.20 -25.290 -21.610 + -4.90 1.30 -25.214 -21.652 + -4.90 1.40 -25.112 -21.693 + -4.90 1.50 -24.968 -21.734 + -4.90 1.60 -24.787 -21.774 + -4.90 1.70 -24.593 -21.814 + -4.90 1.80 -24.409 -21.853 + -4.90 1.90 -24.245 -21.892 + -4.90 2.00 -24.105 -21.931 + -4.90 2.10 -23.987 -21.969 + -4.90 2.20 -23.884 -22.008 + -4.90 2.30 -23.790 -22.047 + -4.90 2.40 -23.704 -22.086 + -4.90 2.50 -23.620 -22.126 + -4.90 2.60 -23.537 -22.166 + -4.90 2.70 -23.456 -22.207 + -4.90 2.80 -23.380 -22.248 + -4.90 2.90 -23.311 -22.290 + -4.90 3.00 -23.251 -22.332 + -4.90 3.10 -23.201 -22.376 + -4.90 3.20 -23.161 -22.420 + -4.90 3.30 -23.134 -22.465 + -4.90 3.40 -23.120 -22.512 + -4.90 3.50 -23.118 -22.559 + -4.90 3.60 -23.126 -22.607 + -4.90 3.70 -23.141 -22.655 + -4.90 3.80 -23.156 -22.704 + -4.90 3.90 -23.167 -22.752 + -4.90 4.00 -23.169 -22.801 + -4.90 4.10 -23.151 -22.849 + -4.90 4.20 -23.092 -22.896 + -4.90 4.30 -22.972 -22.942 + -4.90 4.40 -22.795 -22.985 + -4.90 4.50 -22.585 -23.024 + -4.90 4.60 -22.376 -23.061 + -4.90 4.70 -22.177 -23.094 + -4.90 4.80 -21.993 -23.126 + -4.90 4.90 -21.831 -23.160 + -4.90 5.00 -21.697 -23.207 + -4.90 5.10 -21.603 -23.278 + -4.90 5.20 -21.555 -23.363 + -4.90 5.30 -21.560 -23.454 + -4.90 5.40 -21.627 -23.552 + -4.90 5.50 -21.736 -23.645 + -4.90 5.60 -21.789 -23.720 + -4.90 5.70 -21.825 -23.788 + -4.90 5.80 -21.850 -23.858 + -4.90 5.90 -21.858 -23.915 + -4.90 6.00 -21.861 -23.972 + -4.90 6.10 -21.880 -24.041 + -4.90 6.20 -21.959 -24.129 + -4.90 6.30 -22.152 -24.277 + -4.90 6.40 -22.314 -24.444 + -4.90 6.50 -22.432 -24.597 + -4.90 6.60 -22.504 -24.716 + -4.90 6.70 -22.534 -24.810 + -4.90 6.80 -22.541 -24.887 + -4.90 6.90 -22.533 -24.960 + -4.90 7.00 -22.521 -24.785 + -4.90 7.10 -22.538 -23.947 + -4.90 7.20 -22.503 -25.348 + -4.90 7.30 -22.495 -25.489 + -4.90 7.40 -22.463 -25.563 + -4.90 7.50 -22.415 -25.592 + -4.90 7.60 -22.355 -25.556 + -4.90 7.70 -22.288 -25.432 + -4.90 7.80 -22.218 -25.250 + -4.90 7.90 -22.144 -25.089 + -4.90 8.00 -22.069 -24.979 + -4.90 8.10 -21.991 -24.923 + -4.90 8.20 -21.911 -24.915 + -4.90 8.30 -21.829 -24.943 + -4.90 8.40 -21.744 -24.997 + -4.90 8.50 -21.658 -25.070 + -4.90 8.60 -21.570 -25.157 + -4.90 8.70 -21.480 -25.245 + -4.90 8.80 -21.390 -25.329 + -4.90 8.90 -21.299 -25.417 + -4.90 9.00 -21.207 -25.493 + -4.80 1.00 -25.414 -21.547 + -4.80 1.10 -25.356 -21.592 + -4.80 1.20 -25.292 -21.637 + -4.80 1.30 -25.214 -21.680 + -4.80 1.40 -25.108 -21.723 + -4.80 1.50 -24.962 -21.765 + -4.80 1.60 -24.780 -21.806 + -4.80 1.70 -24.584 -21.847 + -4.80 1.80 -24.396 -21.887 + -4.80 1.90 -24.229 -21.927 + -4.80 2.00 -24.085 -21.966 + -4.80 2.10 -23.963 -22.006 + -4.80 2.20 -23.859 -22.045 + -4.80 2.30 -23.768 -22.084 + -4.80 2.40 -23.685 -22.123 + -4.80 2.50 -23.608 -22.162 + -4.80 2.60 -23.533 -22.201 + -4.80 2.70 -23.460 -22.241 + -4.80 2.80 -23.391 -22.282 + -4.80 2.90 -23.328 -22.323 + -4.80 3.00 -23.272 -22.364 + -4.80 3.10 -23.224 -22.407 + -4.80 3.20 -23.185 -22.450 + -4.80 3.30 -23.156 -22.495 + -4.80 3.40 -23.139 -22.540 + -4.80 3.50 -23.133 -22.587 + -4.80 3.60 -23.136 -22.634 + -4.80 3.70 -23.145 -22.681 + -4.80 3.80 -23.156 -22.729 + -4.80 3.90 -23.161 -22.778 + -4.80 4.00 -23.156 -22.826 + -4.80 4.10 -23.130 -22.874 + -4.80 4.20 -23.061 -22.922 + -4.80 4.30 -22.932 -22.968 + -4.80 4.40 -22.748 -23.012 + -4.80 4.50 -22.536 -23.053 + -4.80 4.60 -22.325 -23.090 + -4.80 4.70 -22.125 -23.126 + -4.80 4.80 -21.942 -23.159 + -4.80 4.90 -21.780 -23.196 + -4.80 5.00 -21.648 -23.250 + -4.80 5.10 -21.553 -23.328 + -4.80 5.20 -21.503 -23.417 + -4.80 5.30 -21.502 -23.510 + -4.80 5.40 -21.570 -23.616 + -4.80 5.50 -21.690 -23.722 + -4.80 5.60 -21.770 -23.803 + -4.80 5.70 -21.804 -23.872 + -4.80 5.80 -21.840 -23.945 + -4.80 5.90 -21.849 -24.004 + -4.80 6.00 -21.854 -24.062 + -4.80 6.10 -21.875 -24.131 + -4.80 6.20 -21.946 -24.218 + -4.80 6.30 -22.137 -24.365 + -4.80 6.40 -22.305 -24.535 + -4.80 6.50 -22.428 -24.693 + -4.80 6.60 -22.505 -24.816 + -4.80 6.70 -22.538 -24.913 + -4.80 6.80 -22.548 -24.994 + -4.80 6.90 -22.544 -25.069 + -4.80 7.00 -22.534 -24.941 + -4.80 7.10 -22.540 -24.200 + -4.80 7.20 -22.551 -24.273 + -4.80 7.30 -22.525 -25.585 + -4.80 7.40 -22.499 -25.665 + -4.80 7.50 -22.456 -25.697 + -4.80 7.60 -22.399 -25.671 + -4.80 7.70 -22.337 -25.568 + -4.80 7.80 -22.270 -25.411 + -4.80 7.90 -22.200 -25.260 + -4.80 8.00 -22.129 -25.156 + -4.80 8.10 -22.054 -25.107 + -4.80 8.20 -21.977 -25.105 + -4.80 8.30 -21.897 -25.138 + -4.80 8.40 -21.815 -25.197 + -4.80 8.50 -21.732 -25.273 + -4.80 8.60 -21.646 -25.356 + -4.80 8.70 -21.558 -25.441 + -4.80 8.80 -21.470 -25.522 + -4.80 8.90 -21.380 -25.599 + -4.80 9.00 -21.289 -25.670 + -4.70 1.00 -25.418 -21.568 + -4.70 1.10 -25.359 -21.615 + -4.70 1.20 -25.293 -21.661 + -4.70 1.30 -25.212 -21.706 + -4.70 1.40 -25.103 -21.750 + -4.70 1.50 -24.956 -21.794 + -4.70 1.60 -24.774 -21.836 + -4.70 1.70 -24.579 -21.879 + -4.70 1.80 -24.390 -21.920 + -4.70 1.90 -24.220 -21.961 + -4.70 2.00 -24.072 -22.001 + -4.70 2.10 -23.946 -22.041 + -4.70 2.20 -23.839 -22.081 + -4.70 2.30 -23.748 -22.120 + -4.70 2.40 -23.668 -22.160 + -4.70 2.50 -23.595 -22.199 + -4.70 2.60 -23.527 -22.238 + -4.70 2.70 -23.461 -22.277 + -4.70 2.80 -23.399 -22.317 + -4.70 2.90 -23.342 -22.358 + -4.70 3.00 -23.290 -22.398 + -4.70 3.10 -23.246 -22.440 + -4.70 3.20 -23.208 -22.483 + -4.70 3.30 -23.179 -22.526 + -4.70 3.40 -23.160 -22.571 + -4.70 3.50 -23.150 -22.616 + -4.70 3.60 -23.150 -22.662 + -4.70 3.70 -23.155 -22.709 + -4.70 3.80 -23.160 -22.756 + -4.70 3.90 -23.160 -22.804 + -4.70 4.00 -23.148 -22.852 + -4.70 4.10 -23.112 -22.901 + -4.70 4.20 -23.033 -22.948 + -4.70 4.30 -22.895 -22.995 + -4.70 4.40 -22.706 -23.039 + -4.70 4.50 -22.491 -23.081 + -4.70 4.60 -22.277 -23.121 + -4.70 4.70 -22.076 -23.158 + -4.70 4.80 -21.893 -23.195 + -4.70 4.90 -21.734 -23.236 + -4.70 5.00 -21.606 -23.298 + -4.70 5.10 -21.516 -23.387 + -4.70 5.20 -21.464 -23.478 + -4.70 5.30 -21.454 -23.572 + -4.70 5.40 -21.518 -23.683 + -4.70 5.50 -21.651 -23.801 + -4.70 5.60 -21.751 -23.889 + -4.70 5.70 -21.784 -23.959 + -4.70 5.80 -21.832 -24.034 + -4.70 5.90 -21.841 -24.095 + -4.70 6.00 -21.850 -24.153 + -4.70 6.10 -21.869 -24.224 + -4.70 6.20 -21.936 -24.310 + -4.70 6.30 -22.127 -24.455 + -4.70 6.40 -22.299 -24.628 + -4.70 6.50 -22.426 -24.790 + -4.70 6.60 -22.506 -24.916 + -4.70 6.70 -22.542 -25.016 + -4.70 6.80 -22.554 -25.099 + -4.70 6.90 -22.552 -25.177 + -4.70 7.00 -22.545 -25.088 + -4.70 7.10 -22.547 -24.424 + -4.70 7.20 -22.617 -23.925 + -4.70 7.30 -22.555 -25.016 + -4.70 7.40 -22.531 -25.422 + -4.70 7.50 -22.492 -25.378 + -4.70 7.60 -22.440 -25.293 + -4.70 7.70 -22.381 -25.242 + -4.70 7.80 -22.317 -25.184 + -4.70 7.90 -22.251 -25.125 + -4.70 8.00 -22.182 -25.119 + -4.70 8.10 -22.111 -25.145 + -4.70 8.20 -22.037 -25.194 + -4.70 8.30 -21.961 -25.262 + -4.70 8.40 -21.882 -25.345 + -4.70 8.50 -21.801 -25.434 + -4.70 8.60 -21.717 -25.526 + -4.70 8.70 -21.632 -25.614 + -4.70 8.80 -21.546 -25.693 + -4.70 8.90 -21.458 -25.766 + -4.70 9.00 -21.369 -25.830 + -4.60 1.00 -25.422 -21.587 + -4.60 1.10 -25.362 -21.635 + -4.60 1.20 -25.293 -21.682 + -4.60 1.30 -25.209 -21.729 + -4.60 1.40 -25.097 -21.775 + -4.60 1.50 -24.949 -21.820 + -4.60 1.60 -24.770 -21.864 + -4.60 1.70 -24.577 -21.908 + -4.60 1.80 -24.389 -21.951 + -4.60 1.90 -24.217 -21.993 + -4.60 2.00 -24.065 -22.035 + -4.60 2.10 -23.935 -22.076 + -4.60 2.20 -23.825 -22.117 + -4.60 2.30 -23.732 -22.157 + -4.60 2.40 -23.653 -22.196 + -4.60 2.50 -23.584 -22.236 + -4.60 2.60 -23.520 -22.275 + -4.60 2.70 -23.460 -22.314 + -4.60 2.80 -23.404 -22.354 + -4.60 2.90 -23.353 -22.394 + -4.60 3.00 -23.306 -22.434 + -4.60 3.10 -23.265 -22.475 + -4.60 3.20 -23.230 -22.516 + -4.60 3.30 -23.202 -22.559 + -4.60 3.40 -23.182 -22.602 + -4.60 3.50 -23.171 -22.647 + -4.60 3.60 -23.167 -22.692 + -4.60 3.70 -23.168 -22.738 + -4.60 3.80 -23.169 -22.784 + -4.60 3.90 -23.162 -22.831 + -4.60 4.00 -23.143 -22.879 + -4.60 4.10 -23.098 -22.927 + -4.60 4.20 -23.009 -22.975 + -4.60 4.30 -22.862 -23.022 + -4.60 4.40 -22.667 -23.067 + -4.60 4.50 -22.448 -23.111 + -4.60 4.60 -22.232 -23.152 + -4.60 4.70 -22.029 -23.192 + -4.60 4.80 -21.846 -23.231 + -4.60 4.90 -21.690 -23.278 + -4.60 5.00 -21.569 -23.351 + -4.60 5.10 -21.486 -23.452 + -4.60 5.20 -21.435 -23.547 + -4.60 5.30 -21.418 -23.641 + -4.60 5.40 -21.475 -23.754 + -4.60 5.50 -21.622 -23.882 + -4.60 5.60 -21.730 -23.976 + -4.60 5.70 -21.773 -24.048 + -4.60 5.80 -21.824 -24.124 + -4.60 5.90 -21.837 -24.187 + -4.60 6.00 -21.845 -24.246 + -4.60 6.10 -21.864 -24.317 + -4.60 6.20 -21.930 -24.403 + -4.60 6.30 -22.119 -24.547 + -4.60 6.40 -22.294 -24.723 + -4.60 6.50 -22.423 -24.887 + -4.60 6.60 -22.505 -25.016 + -4.60 6.70 -22.544 -25.117 + -4.60 6.80 -22.559 -25.203 + -4.60 6.90 -22.560 -25.279 + -4.60 7.00 -22.554 -25.226 + -4.60 7.10 -22.554 -24.631 + -4.60 7.20 -22.605 -24.117 + -4.60 7.30 -22.613 -24.172 + -4.60 7.40 -22.575 -24.466 + -4.60 7.50 -22.535 -24.556 + -4.60 7.60 -22.482 -24.631 + -4.60 7.70 -22.423 -24.727 + -4.60 7.80 -22.360 -24.831 + -4.60 7.90 -22.296 -24.939 + -4.60 8.00 -22.231 -25.055 + -4.60 8.10 -22.162 -25.169 + -4.60 8.20 -22.092 -25.280 + -4.60 8.30 -22.019 -25.390 + -4.60 8.40 -21.943 -25.497 + -4.60 8.50 -21.865 -25.601 + -4.60 8.60 -21.784 -25.697 + -4.60 8.70 -21.702 -25.784 + -4.60 8.80 -21.618 -25.858 + -4.60 8.90 -21.532 -25.924 + -4.60 9.00 -21.444 -25.980 + -4.50 1.00 -25.426 -21.604 + -4.50 1.10 -25.364 -21.653 + -4.50 1.20 -25.293 -21.701 + -4.50 1.30 -25.204 -21.749 + -4.50 1.40 -25.089 -21.797 + -4.50 1.50 -24.942 -21.843 + -4.50 1.60 -24.766 -21.889 + -4.50 1.70 -24.578 -21.935 + -4.50 1.80 -24.392 -21.979 + -4.50 1.90 -24.218 -22.023 + -4.50 2.00 -24.063 -22.066 + -4.50 2.10 -23.929 -22.109 + -4.50 2.20 -23.815 -22.151 + -4.50 2.30 -23.720 -22.192 + -4.50 2.40 -23.641 -22.232 + -4.50 2.50 -23.573 -22.272 + -4.50 2.60 -23.513 -22.312 + -4.50 2.70 -23.458 -22.352 + -4.50 2.80 -23.408 -22.391 + -4.50 2.90 -23.362 -22.431 + -4.50 3.00 -23.320 -22.471 + -4.50 3.10 -23.283 -22.511 + -4.50 3.20 -23.250 -22.552 + -4.50 3.30 -23.224 -22.593 + -4.50 3.40 -23.204 -22.636 + -4.50 3.50 -23.192 -22.679 + -4.50 3.60 -23.187 -22.723 + -4.50 3.70 -23.185 -22.768 + -4.50 3.80 -23.181 -22.814 + -4.50 3.90 -23.169 -22.860 + -4.50 4.00 -23.142 -22.908 + -4.50 4.10 -23.088 -22.955 + -4.50 4.20 -22.988 -23.003 + -4.50 4.30 -22.832 -23.050 + -4.50 4.40 -22.632 -23.096 + -4.50 4.50 -22.410 -23.140 + -4.50 4.60 -22.190 -23.183 + -4.50 4.70 -21.985 -23.226 + -4.50 4.80 -21.803 -23.269 + -4.50 4.90 -21.649 -23.321 + -4.50 5.00 -21.535 -23.407 + -4.50 5.10 -21.462 -23.522 + -4.50 5.20 -21.413 -23.622 + -4.50 5.30 -21.391 -23.716 + -4.50 5.40 -21.436 -23.829 + -4.50 5.50 -21.596 -23.966 + -4.50 5.60 -21.720 -24.066 + -4.50 5.70 -21.764 -24.138 + -4.50 5.80 -21.814 -24.216 + -4.50 5.90 -21.834 -24.280 + -4.50 6.00 -21.838 -24.340 + -4.50 6.10 -21.861 -24.412 + -4.50 6.20 -21.925 -24.497 + -4.50 6.30 -22.112 -24.641 + -4.50 6.40 -22.289 -24.818 + -4.50 6.50 -22.421 -24.985 + -4.50 6.60 -22.506 -25.116 + -4.50 6.70 -22.546 -25.219 + -4.50 6.80 -22.563 -25.307 + -4.50 6.90 -22.566 -25.385 + -4.50 7.00 -22.561 -25.359 + -4.50 7.10 -22.561 -24.828 + -4.50 7.20 -22.601 -24.331 + -4.50 7.30 -22.629 -24.206 + -4.50 7.40 -22.606 -24.312 + -4.50 7.50 -22.565 -24.444 + -4.50 7.60 -22.513 -24.571 + -4.50 7.70 -22.456 -24.699 + -4.50 7.80 -22.396 -24.838 + -4.50 7.90 -22.334 -24.994 + -4.50 8.00 -22.273 -25.149 + -4.50 8.10 -22.208 -25.297 + -4.50 8.20 -22.141 -25.433 + -4.50 8.30 -22.071 -25.559 + -4.50 8.40 -21.998 -25.674 + -4.50 8.50 -21.924 -25.778 + -4.50 8.60 -21.846 -25.870 + -4.50 8.70 -21.766 -25.950 + -4.50 8.80 -21.684 -26.017 + -4.50 8.90 -21.601 -26.074 + -4.50 9.00 -21.516 -26.121 + -4.40 1.00 -25.430 -21.619 + -4.40 1.10 -25.367 -21.669 + -4.40 1.20 -25.292 -21.718 + -4.40 1.30 -25.198 -21.768 + -4.40 1.40 -25.079 -21.816 + -4.40 1.50 -24.932 -21.864 + -4.40 1.60 -24.762 -21.912 + -4.40 1.70 -24.579 -21.959 + -4.40 1.80 -24.397 -22.005 + -4.40 1.90 -24.224 -22.051 + -4.40 2.00 -24.067 -22.095 + -4.40 2.10 -23.929 -22.139 + -4.40 2.20 -23.811 -22.183 + -4.40 2.30 -23.713 -22.225 + -4.40 2.40 -23.632 -22.267 + -4.40 2.50 -23.564 -22.308 + -4.40 2.60 -23.506 -22.349 + -4.40 2.70 -23.455 -22.389 + -4.40 2.80 -23.410 -22.429 + -4.40 2.90 -23.368 -22.469 + -4.40 3.00 -23.331 -22.508 + -4.40 3.10 -23.298 -22.548 + -4.40 3.20 -23.269 -22.588 + -4.40 3.30 -23.245 -22.629 + -4.40 3.40 -23.226 -22.671 + -4.40 3.50 -23.214 -22.713 + -4.40 3.60 -23.208 -22.756 + -4.40 3.70 -23.203 -22.800 + -4.40 3.80 -23.196 -22.845 + -4.40 3.90 -23.178 -22.891 + -4.40 4.00 -23.143 -22.937 + -4.40 4.10 -23.079 -22.984 + -4.40 4.20 -22.970 -23.032 + -4.40 4.30 -22.806 -23.078 + -4.40 4.40 -22.601 -23.124 + -4.40 4.50 -22.375 -23.170 + -4.40 4.60 -22.152 -23.215 + -4.40 4.70 -21.945 -23.260 + -4.40 4.80 -21.762 -23.307 + -4.40 4.90 -21.610 -23.366 + -4.40 5.00 -21.502 -23.467 + -4.40 5.10 -21.442 -23.597 + -4.40 5.20 -21.397 -23.702 + -4.40 5.30 -21.370 -23.795 + -4.40 5.40 -21.404 -23.908 + -4.40 5.50 -21.569 -24.052 + -4.40 5.60 -21.712 -24.158 + -4.40 5.70 -21.757 -24.231 + -4.40 5.80 -21.807 -24.309 + -4.40 5.90 -21.827 -24.374 + -4.40 6.00 -21.835 -24.435 + -4.40 6.10 -21.859 -24.507 + -4.40 6.20 -21.918 -24.592 + -4.40 6.30 -22.106 -24.735 + -4.40 6.40 -22.284 -24.914 + -4.40 6.50 -22.419 -25.083 + -4.40 6.60 -22.506 -25.216 + -4.40 6.70 -22.548 -25.320 + -4.40 6.80 -22.566 -25.409 + -4.40 6.90 -22.570 -25.490 + -4.40 7.00 -22.567 -25.485 + -4.40 7.10 -22.567 -25.016 + -4.40 7.20 -22.603 -24.541 + -4.40 7.30 -22.631 -24.384 + -4.40 7.40 -22.618 -24.443 + -4.40 7.50 -22.583 -24.568 + -4.40 7.60 -22.535 -24.698 + -4.40 7.70 -22.482 -24.829 + -4.40 7.80 -22.426 -24.976 + -4.40 7.90 -22.367 -25.146 + -4.40 8.00 -22.309 -25.315 + -4.40 8.10 -22.248 -25.474 + -4.40 8.20 -22.184 -25.616 + -4.40 8.30 -22.118 -25.743 + -4.40 8.40 -22.048 -25.855 + -4.40 8.50 -21.977 -25.953 + -4.40 8.60 -21.901 -26.038 + -4.40 8.70 -21.825 -26.109 + -4.40 8.80 -21.746 -26.166 + -4.40 8.90 -21.665 -26.215 + -4.40 9.00 -21.582 -26.253 + -4.30 1.00 -25.434 -21.632 + -4.30 1.10 -25.369 -21.683 + -4.30 1.20 -25.291 -21.734 + -4.30 1.30 -25.191 -21.784 + -4.30 1.40 -25.068 -21.834 + -4.30 1.50 -24.921 -21.883 + -4.30 1.60 -24.755 -21.932 + -4.30 1.70 -24.580 -21.981 + -4.30 1.80 -24.403 -22.028 + -4.30 1.90 -24.233 -22.076 + -4.30 2.00 -24.074 -22.122 + -4.30 2.10 -23.933 -22.168 + -4.30 2.20 -23.811 -22.212 + -4.30 2.30 -23.709 -22.256 + -4.30 2.40 -23.626 -22.300 + -4.30 2.50 -23.558 -22.342 + -4.30 2.60 -23.501 -22.384 + -4.30 2.70 -23.453 -22.425 + -4.30 2.80 -23.411 -22.466 + -4.30 2.90 -23.374 -22.506 + -4.30 3.00 -23.341 -22.546 + -4.30 3.10 -23.312 -22.586 + -4.30 3.20 -23.287 -22.626 + -4.30 3.30 -23.265 -22.667 + -4.30 3.40 -23.248 -22.708 + -4.30 3.50 -23.237 -22.749 + -4.30 3.60 -23.230 -22.791 + -4.30 3.70 -23.224 -22.834 + -4.30 3.80 -23.214 -22.878 + -4.30 3.90 -23.190 -22.922 + -4.30 4.00 -23.146 -22.968 + -4.30 4.10 -23.073 -23.014 + -4.30 4.20 -22.954 -23.061 + -4.30 4.30 -22.783 -23.108 + -4.30 4.40 -22.574 -23.154 + -4.30 4.50 -22.346 -23.200 + -4.30 4.60 -22.119 -23.247 + -4.30 4.70 -21.911 -23.295 + -4.30 4.80 -21.727 -23.345 + -4.30 4.90 -21.574 -23.412 + -4.30 5.00 -21.472 -23.529 + -4.30 5.10 -21.424 -23.675 + -4.30 5.20 -21.384 -23.785 + -4.30 5.30 -21.355 -23.879 + -4.30 5.40 -21.381 -23.990 + -4.30 5.50 -21.550 -24.140 + -4.30 5.60 -21.705 -24.251 + -4.30 5.70 -21.750 -24.324 + -4.30 5.80 -21.803 -24.403 + -4.30 5.90 -21.822 -24.470 + -4.30 6.00 -21.832 -24.530 + -4.30 6.10 -21.856 -24.603 + -4.30 6.20 -21.916 -24.688 + -4.30 6.30 -22.099 -24.831 + -4.30 6.40 -22.280 -25.011 + -4.30 6.50 -22.417 -25.181 + -4.30 6.60 -22.506 -25.315 + -4.30 6.70 -22.550 -25.421 + -4.30 6.80 -22.569 -25.511 + -4.30 6.90 -22.574 -25.593 + -4.30 7.00 -22.571 -25.607 + -4.30 7.10 -22.573 -25.199 + -4.30 7.20 -22.606 -24.744 + -4.30 7.30 -22.633 -24.586 + -4.30 7.40 -22.626 -24.632 + -4.30 7.50 -22.597 -24.752 + -4.30 7.60 -22.553 -24.880 + -4.30 7.70 -22.504 -25.011 + -4.30 7.80 -22.451 -25.159 + -4.30 7.90 -22.396 -25.334 + -4.30 8.00 -22.341 -25.505 + -4.30 8.10 -22.282 -25.664 + -4.30 8.20 -22.222 -25.804 + -4.30 8.30 -22.159 -25.926 + -4.30 8.40 -22.092 -26.031 + -4.30 8.50 -22.024 -26.120 + -4.30 8.60 -21.952 -26.195 + -4.30 8.70 -21.878 -26.257 + -4.30 8.80 -21.802 -26.306 + -4.30 8.90 -21.723 -26.347 + -4.30 9.00 -21.643 -26.379 + -4.20 1.00 -25.440 -21.645 + -4.20 1.10 -25.372 -21.697 + -4.20 1.20 -25.289 -21.748 + -4.20 1.30 -25.184 -21.799 + -4.20 1.40 -25.055 -21.850 + -4.20 1.50 -24.907 -21.900 + -4.20 1.60 -24.746 -21.950 + -4.20 1.70 -24.579 -22.000 + -4.20 1.80 -24.409 -22.049 + -4.20 1.90 -24.242 -22.098 + -4.20 2.00 -24.084 -22.146 + -4.20 2.10 -23.940 -22.193 + -4.20 2.20 -23.815 -22.240 + -4.20 2.30 -23.709 -22.285 + -4.20 2.40 -23.623 -22.330 + -4.20 2.50 -23.554 -22.374 + -4.20 2.60 -23.497 -22.417 + -4.20 2.70 -23.451 -22.460 + -4.20 2.80 -23.412 -22.502 + -4.20 2.90 -23.379 -22.543 + -4.20 3.00 -23.350 -22.584 + -4.20 3.10 -23.325 -22.624 + -4.20 3.20 -23.303 -22.664 + -4.20 3.30 -23.284 -22.704 + -4.20 3.40 -23.270 -22.745 + -4.20 3.50 -23.259 -22.786 + -4.20 3.60 -23.253 -22.828 + -4.20 3.70 -23.246 -22.870 + -4.20 3.80 -23.233 -22.912 + -4.20 3.90 -23.203 -22.956 + -4.20 4.00 -23.151 -23.001 + -4.20 4.10 -23.068 -23.046 + -4.20 4.20 -22.940 -23.092 + -4.20 4.30 -22.763 -23.138 + -4.20 4.40 -22.550 -23.184 + -4.20 4.50 -22.319 -23.232 + -4.20 4.60 -22.091 -23.281 + -4.20 4.70 -21.882 -23.331 + -4.20 4.80 -21.696 -23.385 + -4.20 4.90 -21.543 -23.460 + -4.20 5.00 -21.445 -23.595 + -4.20 5.10 -21.409 -23.756 + -4.20 5.20 -21.374 -23.871 + -4.20 5.30 -21.344 -23.965 + -4.20 5.40 -21.362 -24.076 + -4.20 5.50 -21.535 -24.229 + -4.20 5.60 -21.699 -24.345 + -4.20 5.70 -21.744 -24.418 + -4.20 5.80 -21.796 -24.497 + -4.20 5.90 -21.820 -24.566 + -4.20 6.00 -21.830 -24.627 + -4.20 6.10 -21.852 -24.700 + -4.20 6.20 -21.913 -24.785 + -4.20 6.30 -22.093 -24.927 + -4.20 6.40 -22.278 -25.108 + -4.20 6.50 -22.415 -25.279 + -4.20 6.60 -22.506 -25.415 + -4.20 6.70 -22.551 -25.521 + -4.20 6.80 -22.571 -25.612 + -4.20 6.90 -22.576 -25.695 + -4.20 7.00 -22.575 -25.725 + -4.20 7.10 -22.577 -25.375 + -4.20 7.20 -22.609 -24.942 + -4.20 7.30 -22.636 -24.790 + -4.20 7.40 -22.633 -24.836 + -4.20 7.50 -22.608 -24.956 + -4.20 7.60 -22.568 -25.083 + -4.20 7.70 -22.522 -25.213 + -4.20 7.80 -22.471 -25.359 + -4.20 7.90 -22.419 -25.532 + -4.20 8.00 -22.367 -25.700 + -4.20 8.10 -22.312 -25.855 + -4.20 8.20 -22.255 -25.988 + -4.20 8.30 -22.195 -26.101 + -4.20 8.40 -22.131 -26.197 + -4.20 8.50 -22.066 -26.277 + -4.20 8.60 -21.996 -26.343 + -4.20 8.70 -21.926 -26.397 + -4.20 8.80 -21.852 -26.438 + -4.20 8.90 -21.776 -26.472 + -4.20 9.00 -21.699 -26.498 + -4.10 1.00 -25.446 -21.658 + -4.10 1.10 -25.376 -21.709 + -4.10 1.20 -25.288 -21.761 + -4.10 1.30 -25.175 -21.812 + -4.10 1.40 -25.041 -21.864 + -4.10 1.50 -24.892 -21.915 + -4.10 1.60 -24.735 -21.966 + -4.10 1.70 -24.575 -22.017 + -4.10 1.80 -24.413 -22.068 + -4.10 1.90 -24.252 -22.118 + -4.10 2.00 -24.095 -22.167 + -4.10 2.10 -23.950 -22.216 + -4.10 2.20 -23.822 -22.264 + -4.10 2.30 -23.713 -22.311 + -4.10 2.40 -23.624 -22.358 + -4.10 2.50 -23.553 -22.404 + -4.10 2.60 -23.496 -22.449 + -4.10 2.70 -23.450 -22.493 + -4.10 2.80 -23.413 -22.536 + -4.10 2.90 -23.383 -22.578 + -4.10 3.00 -23.358 -22.620 + -4.10 3.10 -23.337 -22.661 + -4.10 3.20 -23.318 -22.702 + -4.10 3.30 -23.303 -22.743 + -4.10 3.40 -23.291 -22.783 + -4.10 3.50 -23.282 -22.824 + -4.10 3.60 -23.277 -22.865 + -4.10 3.70 -23.268 -22.907 + -4.10 3.80 -23.252 -22.949 + -4.10 3.90 -23.217 -22.991 + -4.10 4.00 -23.157 -23.035 + -4.10 4.10 -23.064 -23.079 + -4.10 4.20 -22.927 -23.124 + -4.10 4.30 -22.745 -23.170 + -4.10 4.40 -22.529 -23.216 + -4.10 4.50 -22.296 -23.264 + -4.10 4.60 -22.067 -23.315 + -4.10 4.70 -21.857 -23.369 + -4.10 4.80 -21.671 -23.427 + -4.10 4.90 -21.515 -23.510 + -4.10 5.00 -21.420 -23.664 + -4.10 5.10 -21.395 -23.840 + -4.10 5.20 -21.366 -23.960 + -4.10 5.30 -21.334 -24.055 + -4.10 5.40 -21.347 -24.164 + -4.10 5.50 -21.522 -24.320 + -4.10 5.60 -21.695 -24.440 + -4.10 5.70 -21.741 -24.514 + -4.10 5.80 -21.792 -24.593 + -4.10 5.90 -21.818 -24.662 + -4.10 6.00 -21.829 -24.723 + -4.10 6.10 -21.849 -24.797 + -4.10 6.20 -21.904 -24.882 + -4.10 6.30 -22.091 -25.024 + -4.10 6.40 -22.274 -25.205 + -4.10 6.50 -22.414 -25.378 + -4.10 6.60 -22.506 -25.514 + -4.10 6.70 -22.552 -25.621 + -4.10 6.80 -22.572 -25.713 + -4.10 6.90 -22.578 -25.797 + -4.10 7.00 -22.578 -25.839 + -4.10 7.10 -22.580 -25.545 + -4.10 7.20 -22.612 -25.136 + -4.10 7.30 -22.640 -24.991 + -4.10 7.40 -22.639 -25.040 + -4.10 7.50 -22.617 -25.162 + -4.10 7.60 -22.580 -25.290 + -4.10 7.70 -22.537 -25.418 + -4.10 7.80 -22.489 -25.561 + -4.10 7.90 -22.439 -25.729 + -4.10 8.00 -22.389 -25.892 + -4.10 8.10 -22.337 -26.038 + -4.10 8.20 -22.283 -26.163 + -4.10 8.30 -22.225 -26.268 + -4.10 8.40 -22.164 -26.355 + -4.10 8.50 -22.102 -26.425 + -4.10 8.60 -22.035 -26.483 + -4.10 8.70 -21.967 -26.528 + -4.10 8.80 -21.897 -26.563 + -4.10 8.90 -21.824 -26.591 + -4.10 9.00 -21.749 -26.613 + -4.00 1.00 -25.453 -21.671 + -4.00 1.10 -25.380 -21.722 + -4.00 1.20 -25.287 -21.774 + -4.00 1.30 -25.167 -21.826 + -4.00 1.40 -25.026 -21.877 + -4.00 1.50 -24.874 -21.929 + -4.00 1.60 -24.721 -21.981 + -4.00 1.70 -24.567 -22.033 + -4.00 1.80 -24.414 -22.084 + -4.00 1.90 -24.260 -22.135 + -4.00 2.00 -24.107 -22.186 + -4.00 2.10 -23.962 -22.236 + -4.00 2.20 -23.832 -22.286 + -4.00 2.30 -23.721 -22.335 + -4.00 2.40 -23.629 -22.383 + -4.00 2.50 -23.555 -22.431 + -4.00 2.60 -23.497 -22.477 + -4.00 2.70 -23.451 -22.523 + -4.00 2.80 -23.415 -22.568 + -4.00 2.90 -23.387 -22.612 + -4.00 3.00 -23.365 -22.655 + -4.00 3.10 -23.347 -22.697 + -4.00 3.20 -23.332 -22.739 + -4.00 3.30 -23.321 -22.780 + -4.00 3.40 -23.311 -22.821 + -4.00 3.50 -23.305 -22.862 + -4.00 3.60 -23.300 -22.903 + -4.00 3.70 -23.291 -22.945 + -4.00 3.80 -23.273 -22.986 + -4.00 3.90 -23.232 -23.028 + -4.00 4.00 -23.162 -23.071 + -4.00 4.10 -23.060 -23.114 + -4.00 4.20 -22.915 -23.158 + -4.00 4.30 -22.727 -23.203 + -4.00 4.40 -22.508 -23.249 + -4.00 4.50 -22.274 -23.298 + -4.00 4.60 -22.046 -23.351 + -4.00 4.70 -21.838 -23.408 + -4.00 4.80 -21.651 -23.469 + -4.00 4.90 -21.491 -23.562 + -4.00 5.00 -21.397 -23.736 + -4.00 5.10 -21.383 -23.926 + -4.00 5.20 -21.359 -24.050 + -4.00 5.30 -21.326 -24.146 + -4.00 5.40 -21.335 -24.254 + -4.00 5.50 -21.511 -24.413 + -4.00 5.60 -21.690 -24.535 + -4.00 5.70 -21.737 -24.610 + -4.00 5.80 -21.790 -24.689 + -4.00 5.90 -21.817 -24.759 + -4.00 6.00 -21.823 -24.821 + -4.00 6.10 -21.844 -24.895 + -4.00 6.20 -21.903 -24.980 + -4.00 6.30 -22.088 -25.122 + -4.00 6.40 -22.272 -25.303 + -4.00 6.50 -22.412 -25.476 + -4.00 6.60 -22.505 -25.613 + -4.00 6.70 -22.552 -25.721 + -4.00 6.80 -22.573 -25.813 + -4.00 6.90 -22.580 -25.898 + -4.00 7.00 -22.579 -25.950 + -4.00 7.10 -22.582 -25.709 + -4.00 7.20 -22.615 -25.327 + -4.00 7.30 -22.644 -25.188 + -4.00 7.40 -22.645 -25.241 + -4.00 7.50 -22.625 -25.365 + -4.00 7.60 -22.590 -25.493 + -4.00 7.70 -22.549 -25.620 + -4.00 7.80 -22.503 -25.759 + -4.00 7.90 -22.455 -25.921 + -4.00 8.00 -22.408 -26.076 + -4.00 8.10 -22.358 -26.214 + -4.00 8.20 -22.306 -26.330 + -4.00 8.30 -22.251 -26.425 + -4.00 8.40 -22.193 -26.503 + -4.00 8.50 -22.133 -26.565 + -4.00 8.60 -22.069 -26.615 + -4.00 8.70 -22.004 -26.654 + -4.00 8.80 -21.936 -26.683 + -4.00 8.90 -21.865 -26.707 + -4.00 9.00 -21.793 -26.725 + -3.90 1.00 -25.461 -21.685 + -3.90 1.10 -25.386 -21.735 + -3.90 1.20 -25.286 -21.787 + -3.90 1.30 -25.159 -21.838 + -3.90 1.40 -25.011 -21.890 + -3.90 1.50 -24.856 -21.943 + -3.90 1.60 -24.704 -21.995 + -3.90 1.70 -24.557 -22.047 + -3.90 1.80 -24.412 -22.099 + -3.90 1.90 -24.265 -22.151 + -3.90 2.00 -24.117 -22.203 + -3.90 2.10 -23.975 -22.255 + -3.90 2.20 -23.844 -22.306 + -3.90 2.30 -23.731 -22.356 + -3.90 2.40 -23.637 -22.406 + -3.90 2.50 -23.561 -22.455 + -3.90 2.60 -23.501 -22.503 + -3.90 2.70 -23.454 -22.551 + -3.90 2.80 -23.419 -22.597 + -3.90 2.90 -23.392 -22.643 + -3.90 3.00 -23.371 -22.688 + -3.90 3.10 -23.356 -22.732 + -3.90 3.20 -23.345 -22.775 + -3.90 3.30 -23.337 -22.817 + -3.90 3.40 -23.331 -22.859 + -3.90 3.50 -23.327 -22.901 + -3.90 3.60 -23.323 -22.942 + -3.90 3.70 -23.314 -22.983 + -3.90 3.80 -23.292 -23.024 + -3.90 3.90 -23.246 -23.066 + -3.90 4.00 -23.168 -23.108 + -3.90 4.10 -23.056 -23.151 + -3.90 4.20 -22.903 -23.194 + -3.90 4.30 -22.710 -23.238 + -3.90 4.40 -22.487 -23.284 + -3.90 4.50 -22.253 -23.333 + -3.90 4.60 -22.027 -23.389 + -3.90 4.70 -21.821 -23.449 + -3.90 4.80 -21.633 -23.514 + -3.90 4.90 -21.471 -23.616 + -3.90 5.00 -21.378 -23.812 + -3.90 5.10 -21.373 -24.014 + -3.90 5.20 -21.353 -24.142 + -3.90 5.30 -21.321 -24.239 + -3.90 5.40 -21.326 -24.346 + -3.90 5.50 -21.502 -24.506 + -3.90 5.60 -21.686 -24.631 + -3.90 5.70 -21.734 -24.706 + -3.90 5.80 -21.787 -24.786 + -3.90 5.90 -21.816 -24.856 + -3.90 6.00 -21.821 -24.918 + -3.90 6.10 -21.842 -24.992 + -3.90 6.20 -21.901 -25.077 + -3.90 6.30 -22.085 -25.219 + -3.90 6.40 -22.269 -25.401 + -3.90 6.50 -22.409 -25.574 + -3.90 6.60 -22.504 -25.712 + -3.90 6.70 -22.552 -25.820 + -3.90 6.80 -22.574 -25.912 + -3.90 6.90 -22.580 -25.998 + -3.90 7.00 -22.579 -26.059 + -3.90 7.10 -22.582 -25.867 + -3.90 7.20 -22.617 -25.515 + -3.90 7.30 -22.647 -25.383 + -3.90 7.40 -22.650 -25.438 + -3.90 7.50 -22.632 -25.563 + -3.90 7.60 -22.599 -25.691 + -3.90 7.70 -22.559 -25.815 + -3.90 7.80 -22.515 -25.951 + -3.90 7.90 -22.469 -26.105 + -3.90 8.00 -22.424 -26.252 + -3.90 8.10 -22.375 -26.381 + -3.90 8.20 -22.325 -26.488 + -3.90 8.30 -22.273 -26.574 + -3.90 8.40 -22.217 -26.643 + -3.90 8.50 -22.159 -26.698 + -3.90 8.60 -22.098 -26.741 + -3.90 8.70 -22.035 -26.774 + -3.90 8.80 -21.970 -26.799 + -3.90 8.90 -21.901 -26.819 + -3.90 9.00 -21.832 -26.834 + -3.80 1.00 -25.472 -21.699 + -3.80 1.10 -25.393 -21.749 + -3.80 1.20 -25.288 -21.800 + -3.80 1.30 -25.152 -21.851 + -3.80 1.40 -24.997 -21.903 + -3.80 1.50 -24.838 -21.955 + -3.80 1.60 -24.687 -22.008 + -3.80 1.70 -24.544 -22.060 + -3.80 1.80 -24.406 -22.113 + -3.80 1.90 -24.267 -22.166 + -3.80 2.00 -24.126 -22.218 + -3.80 2.10 -23.987 -22.271 + -3.80 2.20 -23.857 -22.323 + -3.80 2.30 -23.743 -22.375 + -3.80 2.40 -23.647 -22.426 + -3.80 2.50 -23.569 -22.477 + -3.80 2.60 -23.507 -22.527 + -3.80 2.70 -23.460 -22.576 + -3.80 2.80 -23.423 -22.624 + -3.80 2.90 -23.397 -22.672 + -3.80 3.00 -23.378 -22.718 + -3.80 3.10 -23.365 -22.764 + -3.80 3.20 -23.357 -22.808 + -3.80 3.30 -23.352 -22.852 + -3.80 3.40 -23.349 -22.895 + -3.80 3.50 -23.347 -22.938 + -3.80 3.60 -23.345 -22.980 + -3.80 3.70 -23.335 -23.022 + -3.80 3.80 -23.311 -23.063 + -3.80 3.90 -23.259 -23.105 + -3.80 4.00 -23.173 -23.146 + -3.80 4.10 -23.052 -23.189 + -3.80 4.20 -22.891 -23.231 + -3.80 4.30 -22.692 -23.274 + -3.80 4.40 -22.467 -23.320 + -3.80 4.50 -22.232 -23.371 + -3.80 4.60 -22.009 -23.430 + -3.80 4.70 -21.806 -23.493 + -3.80 4.80 -21.619 -23.560 + -3.80 4.90 -21.453 -23.674 + -3.80 5.00 -21.361 -23.891 + -3.80 5.10 -21.364 -24.104 + -3.80 5.20 -21.348 -24.234 + -3.80 5.30 -21.317 -24.333 + -3.80 5.40 -21.319 -24.439 + -3.80 5.50 -21.495 -24.601 + -3.80 5.60 -21.684 -24.728 + -3.80 5.70 -21.732 -24.803 + -3.80 5.80 -21.786 -24.883 + -3.80 5.90 -21.812 -24.954 + -3.80 6.00 -21.819 -25.016 + -3.80 6.10 -21.841 -25.090 + -3.80 6.20 -21.899 -25.175 + -3.80 6.30 -22.081 -25.317 + -3.80 6.40 -22.263 -25.498 + -3.80 6.50 -22.408 -25.672 + -3.80 6.60 -22.504 -25.811 + -3.80 6.70 -22.552 -25.918 + -3.80 6.80 -22.574 -26.011 + -3.80 6.90 -22.580 -26.098 + -3.80 7.00 -22.578 -26.165 + -3.80 7.10 -22.583 -26.018 + -3.80 7.20 -22.619 -25.699 + -3.80 7.30 -22.650 -25.574 + -3.80 7.40 -22.654 -25.631 + -3.80 7.50 -22.638 -25.756 + -3.80 7.60 -22.606 -25.883 + -3.80 7.70 -22.567 -26.005 + -3.80 7.80 -22.524 -26.135 + -3.80 7.90 -22.480 -26.282 + -3.80 8.00 -22.436 -26.420 + -3.80 8.10 -22.389 -26.540 + -3.80 8.20 -22.342 -26.638 + -3.80 8.30 -22.291 -26.716 + -3.80 8.40 -22.237 -26.777 + -3.80 8.50 -22.182 -26.825 + -3.80 8.60 -22.122 -26.862 + -3.80 8.70 -22.061 -26.890 + -3.80 8.80 -21.998 -26.912 + -3.80 8.90 -21.932 -26.928 + -3.80 9.00 -21.865 -26.941 + -3.70 1.00 -25.484 -21.715 + -3.70 1.10 -25.402 -21.764 + -3.70 1.20 -25.291 -21.814 + -3.70 1.30 -25.148 -21.865 + -3.70 1.40 -24.985 -21.917 + -3.70 1.50 -24.821 -21.968 + -3.70 1.60 -24.669 -22.021 + -3.70 1.70 -24.530 -22.073 + -3.70 1.80 -24.398 -22.126 + -3.70 1.90 -24.266 -22.179 + -3.70 2.00 -24.132 -22.233 + -3.70 2.10 -23.997 -22.286 + -3.70 2.20 -23.870 -22.339 + -3.70 2.30 -23.757 -22.391 + -3.70 2.40 -23.660 -22.444 + -3.70 2.50 -23.580 -22.496 + -3.70 2.60 -23.517 -22.547 + -3.70 2.70 -23.467 -22.598 + -3.70 2.80 -23.430 -22.648 + -3.70 2.90 -23.403 -22.697 + -3.70 3.00 -23.385 -22.746 + -3.70 3.10 -23.374 -22.793 + -3.70 3.20 -23.368 -22.840 + -3.70 3.30 -23.365 -22.885 + -3.70 3.40 -23.365 -22.930 + -3.70 3.50 -23.366 -22.974 + -3.70 3.60 -23.366 -23.017 + -3.70 3.70 -23.355 -23.060 + -3.70 3.80 -23.329 -23.102 + -3.70 3.90 -23.271 -23.144 + -3.70 4.00 -23.177 -23.185 + -3.70 4.10 -23.047 -23.227 + -3.70 4.20 -22.879 -23.270 + -3.70 4.30 -22.674 -23.312 + -3.70 4.40 -22.445 -23.358 + -3.70 4.50 -22.211 -23.411 + -3.70 4.60 -21.993 -23.473 + -3.70 4.70 -21.794 -23.539 + -3.70 4.80 -21.607 -23.609 + -3.70 4.90 -21.438 -23.735 + -3.70 5.00 -21.346 -23.972 + -3.70 5.10 -21.356 -24.196 + -3.70 5.20 -21.344 -24.328 + -3.70 5.30 -21.313 -24.427 + -3.70 5.40 -21.313 -24.534 + -3.70 5.50 -21.489 -24.696 + -3.70 5.60 -21.682 -24.824 + -3.70 5.70 -21.731 -24.899 + -3.70 5.80 -21.785 -24.980 + -3.70 5.90 -21.811 -25.051 + -3.70 6.00 -21.817 -25.114 + -3.70 6.10 -21.840 -25.188 + -3.70 6.20 -21.899 -25.273 + -3.70 6.30 -22.076 -25.414 + -3.70 6.40 -22.262 -25.596 + -3.70 6.50 -22.407 -25.770 + -3.70 6.60 -22.503 -25.909 + -3.70 6.70 -22.552 -26.016 + -3.70 6.80 -22.573 -26.110 + -3.70 6.90 -22.579 -26.197 + -3.70 7.00 -22.576 -26.269 + -3.70 7.10 -22.582 -26.163 + -3.70 7.20 -22.620 -25.879 + -3.70 7.30 -22.652 -25.763 + -3.70 7.40 -22.657 -25.821 + -3.70 7.50 -22.642 -25.945 + -3.70 7.60 -22.612 -26.070 + -3.70 7.70 -22.574 -26.188 + -3.70 7.80 -22.532 -26.313 + -3.70 7.90 -22.489 -26.451 + -3.70 8.00 -22.447 -26.580 + -3.70 8.10 -22.401 -26.691 + -3.70 8.20 -22.355 -26.780 + -3.70 8.30 -22.306 -26.850 + -3.70 8.40 -22.253 -26.905 + -3.70 8.50 -22.200 -26.947 + -3.70 8.60 -22.142 -26.979 + -3.70 8.70 -22.084 -27.003 + -3.70 8.80 -22.023 -27.021 + -3.70 8.90 -21.959 -27.036 + -3.70 9.00 -21.894 -27.046 + -3.60 1.00 -25.499 -21.733 + -3.60 1.10 -25.413 -21.781 + -3.60 1.20 -25.296 -21.830 + -3.60 1.30 -25.145 -21.880 + -3.60 1.40 -24.975 -21.931 + -3.60 1.50 -24.807 -21.982 + -3.60 1.60 -24.653 -22.034 + -3.60 1.70 -24.515 -22.086 + -3.60 1.80 -24.388 -22.139 + -3.60 1.90 -24.263 -22.192 + -3.60 2.00 -24.135 -22.246 + -3.60 2.10 -24.006 -22.299 + -3.60 2.20 -23.882 -22.353 + -3.60 2.30 -23.770 -22.407 + -3.60 2.40 -23.674 -22.460 + -3.60 2.50 -23.594 -22.513 + -3.60 2.60 -23.529 -22.566 + -3.60 2.70 -23.478 -22.618 + -3.60 2.80 -23.439 -22.670 + -3.60 2.90 -23.412 -22.721 + -3.60 3.00 -23.393 -22.771 + -3.60 3.10 -23.382 -22.820 + -3.60 3.20 -23.377 -22.868 + -3.60 3.30 -23.377 -22.915 + -3.60 3.40 -23.380 -22.962 + -3.60 3.50 -23.383 -23.008 + -3.60 3.60 -23.384 -23.052 + -3.60 3.70 -23.373 -23.096 + -3.60 3.80 -23.345 -23.139 + -3.60 3.90 -23.281 -23.182 + -3.60 4.00 -23.180 -23.224 + -3.60 4.10 -23.042 -23.267 + -3.60 4.20 -22.867 -23.309 + -3.60 4.30 -22.657 -23.352 + -3.60 4.40 -22.424 -23.398 + -3.60 4.50 -22.191 -23.453 + -3.60 4.60 -21.977 -23.520 + -3.60 4.70 -21.783 -23.589 + -3.60 4.80 -21.596 -23.661 + -3.60 4.90 -21.424 -23.799 + -3.60 5.00 -21.334 -24.057 + -3.60 5.10 -21.350 -24.289 + -3.60 5.20 -21.340 -24.423 + -3.60 5.30 -21.310 -24.523 + -3.60 5.40 -21.308 -24.629 + -3.60 5.50 -21.484 -24.791 + -3.60 5.60 -21.680 -24.921 + -3.60 5.70 -21.730 -24.996 + -3.60 5.80 -21.784 -25.077 + -3.60 5.90 -21.810 -25.149 + -3.60 6.00 -21.817 -25.211 + -3.60 6.10 -21.840 -25.286 + -3.60 6.20 -21.896 -25.371 + -3.60 6.30 -22.075 -25.511 + -3.60 6.40 -22.261 -25.693 + -3.60 6.50 -22.406 -25.868 + -3.60 6.60 -22.502 -26.006 + -3.60 6.70 -22.551 -26.114 + -3.60 6.80 -22.573 -26.207 + -3.60 6.90 -22.576 -26.294 + -3.60 7.00 -22.574 -26.371 + -3.60 7.10 -22.582 -26.301 + -3.60 7.20 -22.620 -26.054 + -3.60 7.30 -22.654 -25.948 + -3.60 7.40 -22.660 -26.008 + -3.60 7.50 -22.646 -26.129 + -3.60 7.60 -22.616 -26.251 + -3.60 7.70 -22.580 -26.365 + -3.60 7.80 -22.539 -26.484 + -3.60 7.90 -22.496 -26.612 + -3.60 8.00 -22.455 -26.732 + -3.60 8.10 -22.411 -26.834 + -3.60 8.20 -22.366 -26.915 + -3.60 8.30 -22.318 -26.978 + -3.60 8.40 -22.267 -27.027 + -3.60 8.50 -22.215 -27.064 + -3.60 8.60 -22.159 -27.092 + -3.60 8.70 -22.102 -27.113 + -3.60 8.80 -22.043 -27.129 + -3.60 8.90 -21.981 -27.141 + -3.60 9.00 -21.918 -27.150 + -3.50 1.00 -25.517 -21.754 + -3.50 1.10 -25.427 -21.800 + -3.50 1.20 -25.305 -21.848 + -3.50 1.30 -25.146 -21.896 + -3.50 1.40 -24.968 -21.946 + -3.50 1.50 -24.795 -21.997 + -3.50 1.60 -24.638 -22.048 + -3.50 1.70 -24.501 -22.100 + -3.50 1.80 -24.377 -22.152 + -3.50 1.90 -24.257 -22.205 + -3.50 2.00 -24.135 -22.259 + -3.50 2.10 -24.012 -22.312 + -3.50 2.20 -23.893 -22.366 + -3.50 2.30 -23.783 -22.420 + -3.50 2.40 -23.688 -22.475 + -3.50 2.50 -23.608 -22.529 + -3.50 2.60 -23.543 -22.582 + -3.50 2.70 -23.491 -22.636 + -3.50 2.80 -23.451 -22.689 + -3.50 2.90 -23.422 -22.741 + -3.50 3.00 -23.402 -22.793 + -3.50 3.10 -23.391 -22.844 + -3.50 3.20 -23.387 -22.894 + -3.50 3.30 -23.388 -22.943 + -3.50 3.40 -23.392 -22.992 + -3.50 3.50 -23.398 -23.039 + -3.50 3.60 -23.400 -23.085 + -3.50 3.70 -23.389 -23.131 + -3.50 3.80 -23.359 -23.176 + -3.50 3.90 -23.291 -23.220 + -3.50 4.00 -23.182 -23.263 + -3.50 4.10 -23.037 -23.306 + -3.50 4.20 -22.855 -23.348 + -3.50 4.30 -22.639 -23.392 + -3.50 4.40 -22.404 -23.439 + -3.50 4.50 -22.171 -23.499 + -3.50 4.60 -21.963 -23.571 + -3.50 4.70 -21.773 -23.642 + -3.50 4.80 -21.586 -23.717 + -3.50 4.90 -21.412 -23.866 + -3.50 5.00 -21.324 -24.144 + -3.50 5.10 -21.345 -24.383 + -3.50 5.20 -21.337 -24.518 + -3.50 5.30 -21.308 -24.619 + -3.50 5.40 -21.305 -24.725 + -3.50 5.50 -21.480 -24.887 + -3.50 5.60 -21.679 -25.018 + -3.50 5.70 -21.729 -25.093 + -3.50 5.80 -21.782 -25.174 + -3.50 5.90 -21.809 -25.246 + -3.50 6.00 -21.816 -25.309 + -3.50 6.10 -21.839 -25.383 + -3.50 6.20 -21.894 -25.468 + -3.50 6.30 -22.074 -25.608 + -3.50 6.40 -22.260 -25.789 + -3.50 6.50 -22.404 -25.965 + -3.50 6.60 -22.500 -26.103 + -3.50 6.70 -22.550 -26.211 + -3.50 6.80 -22.571 -26.303 + -3.50 6.90 -22.573 -26.391 + -3.50 7.00 -22.572 -26.471 + -3.50 7.10 -22.579 -26.432 + -3.50 7.20 -22.621 -26.224 + -3.50 7.30 -22.655 -26.129 + -3.50 7.40 -22.662 -26.189 + -3.50 7.50 -22.649 -26.309 + -3.50 7.60 -22.620 -26.427 + -3.50 7.70 -22.584 -26.535 + -3.50 7.80 -22.544 -26.646 + -3.50 7.90 -22.502 -26.765 + -3.50 8.00 -22.462 -26.876 + -3.50 8.10 -22.418 -26.969 + -3.50 8.20 -22.374 -27.043 + -3.50 8.30 -22.328 -27.100 + -3.50 8.40 -22.278 -27.144 + -3.50 8.50 -22.228 -27.177 + -3.50 8.60 -22.173 -27.202 + -3.50 8.70 -22.118 -27.221 + -3.50 8.80 -22.060 -27.235 + -3.50 8.90 -21.999 -27.245 + -3.50 9.00 -21.938 -27.254 + -3.40 1.00 -25.537 -21.777 + -3.40 1.10 -25.445 -21.822 + -3.40 1.20 -25.316 -21.868 + -3.40 1.30 -25.150 -21.915 + -3.40 1.40 -24.965 -21.963 + -3.40 1.50 -24.786 -22.013 + -3.40 1.60 -24.626 -22.063 + -3.40 1.70 -24.488 -22.114 + -3.40 1.80 -24.366 -22.166 + -3.40 1.90 -24.251 -22.219 + -3.40 2.00 -24.134 -22.272 + -3.40 2.10 -24.017 -22.325 + -3.40 2.20 -23.902 -22.379 + -3.40 2.30 -23.795 -22.434 + -3.40 2.40 -23.702 -22.488 + -3.40 2.50 -23.623 -22.543 + -3.40 2.60 -23.558 -22.597 + -3.40 2.70 -23.505 -22.652 + -3.40 2.80 -23.464 -22.706 + -3.40 2.90 -23.434 -22.759 + -3.40 3.00 -23.414 -22.813 + -3.40 3.10 -23.402 -22.865 + -3.40 3.20 -23.397 -22.917 + -3.40 3.30 -23.399 -22.968 + -3.40 3.40 -23.404 -23.018 + -3.40 3.50 -23.410 -23.068 + -3.40 3.60 -23.414 -23.116 + -3.40 3.70 -23.403 -23.163 + -3.40 3.80 -23.371 -23.210 + -3.40 3.90 -23.299 -23.255 + -3.40 4.00 -23.185 -23.300 + -3.40 4.10 -23.032 -23.344 + -3.40 4.20 -22.843 -23.387 + -3.40 4.30 -22.622 -23.432 + -3.40 4.40 -22.383 -23.482 + -3.40 4.50 -22.153 -23.547 + -3.40 4.60 -21.951 -23.625 + -3.40 4.70 -21.765 -23.699 + -3.40 4.80 -21.577 -23.776 + -3.40 4.90 -21.402 -23.937 + -3.40 5.00 -21.315 -24.232 + -3.40 5.10 -21.341 -24.478 + -3.40 5.20 -21.335 -24.614 + -3.40 5.30 -21.307 -24.715 + -3.40 5.40 -21.302 -24.821 + -3.40 5.50 -21.477 -24.982 + -3.40 5.60 -21.678 -25.114 + -3.40 5.70 -21.728 -25.190 + -3.40 5.80 -21.781 -25.270 + -3.40 5.90 -21.809 -25.343 + -3.40 6.00 -21.816 -25.406 + -3.40 6.10 -21.838 -25.480 + -3.40 6.20 -21.894 -25.565 + -3.40 6.30 -22.074 -25.705 + -3.40 6.40 -22.259 -25.885 + -3.40 6.50 -22.402 -26.061 + -3.40 6.60 -22.499 -26.200 + -3.40 6.70 -22.549 -26.306 + -3.40 6.80 -22.569 -26.399 + -3.40 6.90 -22.570 -26.487 + -3.40 7.00 -22.565 -26.569 + -3.40 7.10 -22.576 -26.556 + -3.40 7.20 -22.620 -26.386 + -3.40 7.30 -22.656 -26.305 + -3.40 7.40 -22.664 -26.366 + -3.40 7.50 -22.651 -26.482 + -3.40 7.60 -22.623 -26.595 + -3.40 7.70 -22.587 -26.698 + -3.40 7.80 -22.548 -26.801 + -3.40 7.90 -22.507 -26.909 + -3.40 8.00 -22.467 -27.011 + -3.40 8.10 -22.425 -27.097 + -3.40 8.20 -22.382 -27.164 + -3.40 8.30 -22.336 -27.217 + -3.40 8.40 -22.287 -27.256 + -3.40 8.50 -22.238 -27.286 + -3.40 8.60 -22.184 -27.309 + -3.40 8.70 -22.130 -27.326 + -3.40 8.80 -22.074 -27.339 + -3.40 8.90 -22.015 -27.348 + -3.40 9.00 -21.954 -27.356 + -3.30 1.00 -25.562 -21.803 + -3.30 1.10 -25.465 -21.846 + -3.30 1.20 -25.332 -21.890 + -3.30 1.30 -25.158 -21.936 + -3.30 1.40 -24.967 -21.983 + -3.30 1.50 -24.782 -22.031 + -3.30 1.60 -24.618 -22.080 + -3.30 1.70 -24.479 -22.130 + -3.30 1.80 -24.357 -22.181 + -3.30 1.90 -24.244 -22.233 + -3.30 2.00 -24.132 -22.285 + -3.30 2.10 -24.020 -22.338 + -3.30 2.20 -23.909 -22.392 + -3.30 2.30 -23.806 -22.447 + -3.30 2.40 -23.715 -22.501 + -3.30 2.50 -23.638 -22.556 + -3.30 2.60 -23.573 -22.611 + -3.30 2.70 -23.520 -22.666 + -3.30 2.80 -23.479 -22.721 + -3.30 2.90 -23.448 -22.776 + -3.30 3.00 -23.426 -22.830 + -3.30 3.10 -23.413 -22.884 + -3.30 3.20 -23.408 -22.938 + -3.30 3.30 -23.410 -22.990 + -3.30 3.40 -23.415 -23.042 + -3.30 3.50 -23.422 -23.094 + -3.30 3.60 -23.425 -23.144 + -3.30 3.70 -23.414 -23.193 + -3.30 3.80 -23.381 -23.242 + -3.30 3.90 -23.306 -23.289 + -3.30 4.00 -23.186 -23.335 + -3.30 4.10 -23.027 -23.381 + -3.30 4.20 -22.832 -23.426 + -3.30 4.30 -22.605 -23.472 + -3.30 4.40 -22.363 -23.526 + -3.30 4.50 -22.136 -23.598 + -3.30 4.60 -21.940 -23.683 + -3.30 4.70 -21.758 -23.760 + -3.30 4.80 -21.570 -23.838 + -3.30 4.90 -21.393 -24.012 + -3.30 5.00 -21.308 -24.323 + -3.30 5.10 -21.337 -24.573 + -3.30 5.20 -21.333 -24.710 + -3.30 5.30 -21.305 -24.812 + -3.30 5.40 -21.300 -24.917 + -3.30 5.50 -21.475 -25.078 + -3.30 5.60 -21.677 -25.209 + -3.30 5.70 -21.728 -25.286 + -3.30 5.80 -21.780 -25.367 + -3.30 5.90 -21.808 -25.440 + -3.30 6.00 -21.815 -25.502 + -3.30 6.10 -21.838 -25.577 + -3.30 6.20 -21.894 -25.662 + -3.30 6.30 -22.073 -25.801 + -3.30 6.40 -22.258 -25.980 + -3.30 6.50 -22.400 -26.156 + -3.30 6.60 -22.499 -26.295 + -3.30 6.70 -22.546 -26.401 + -3.30 6.80 -22.566 -26.493 + -3.30 6.90 -22.563 -26.580 + -3.30 7.00 -22.553 -26.663 + -3.30 7.10 -22.570 -26.672 + -3.30 7.20 -22.619 -26.541 + -3.30 7.30 -22.657 -26.474 + -3.30 7.40 -22.666 -26.536 + -3.30 7.50 -22.654 -26.648 + -3.30 7.60 -22.625 -26.756 + -3.30 7.70 -22.590 -26.852 + -3.30 7.80 -22.551 -26.946 + -3.30 7.90 -22.511 -27.043 + -3.30 8.00 -22.472 -27.137 + -3.30 8.10 -22.430 -27.217 + -3.30 8.20 -22.387 -27.279 + -3.30 8.30 -22.342 -27.328 + -3.30 8.40 -22.295 -27.365 + -3.30 8.50 -22.246 -27.392 + -3.30 8.60 -22.194 -27.413 + -3.30 8.70 -22.141 -27.429 + -3.30 8.80 -22.085 -27.441 + -3.30 8.90 -22.027 -27.450 + -3.30 9.00 -21.968 -27.457 + -3.20 1.00 -25.590 -21.833 + -3.20 1.10 -25.490 -21.874 + -3.20 1.20 -25.351 -21.916 + -3.20 1.30 -25.170 -21.960 + -3.20 1.40 -24.972 -22.005 + -3.20 1.50 -24.782 -22.051 + -3.20 1.60 -24.615 -22.099 + -3.20 1.70 -24.473 -22.147 + -3.20 1.80 -24.351 -22.197 + -3.20 1.90 -24.239 -22.248 + -3.20 2.00 -24.130 -22.300 + -3.20 2.10 -24.022 -22.352 + -3.20 2.20 -23.915 -22.406 + -3.20 2.30 -23.816 -22.460 + -3.20 2.40 -23.727 -22.514 + -3.20 2.50 -23.652 -22.569 + -3.20 2.60 -23.588 -22.624 + -3.20 2.70 -23.536 -22.680 + -3.20 2.80 -23.495 -22.735 + -3.20 2.90 -23.463 -22.791 + -3.20 3.00 -23.441 -22.846 + -3.20 3.10 -23.427 -22.901 + -3.20 3.20 -23.420 -22.956 + -3.20 3.30 -23.421 -23.010 + -3.20 3.40 -23.426 -23.064 + -3.20 3.50 -23.432 -23.117 + -3.20 3.60 -23.435 -23.169 + -3.20 3.70 -23.424 -23.220 + -3.20 3.80 -23.390 -23.271 + -3.20 3.90 -23.313 -23.320 + -3.20 4.00 -23.188 -23.368 + -3.20 4.10 -23.022 -23.416 + -3.20 4.20 -22.821 -23.462 + -3.20 4.30 -22.588 -23.510 + -3.20 4.40 -22.343 -23.569 + -3.20 4.50 -22.119 -23.652 + -3.20 4.60 -21.931 -23.745 + -3.20 4.70 -21.752 -23.825 + -3.20 4.80 -21.563 -23.905 + -3.20 4.90 -21.385 -24.090 + -3.20 5.00 -21.302 -24.415 + -3.20 5.10 -21.334 -24.669 + -3.20 5.20 -21.331 -24.806 + -3.20 5.30 -21.304 -24.909 + -3.20 5.40 -21.298 -25.013 + -3.20 5.50 -21.473 -25.173 + -3.20 5.60 -21.677 -25.305 + -3.20 5.70 -21.728 -25.381 + -3.20 5.80 -21.780 -25.463 + -3.20 5.90 -21.808 -25.536 + -3.20 6.00 -21.815 -25.599 + -3.20 6.10 -21.837 -25.673 + -3.20 6.20 -21.893 -25.758 + -3.20 6.30 -22.073 -25.896 + -3.20 6.40 -22.256 -26.074 + -3.20 6.50 -22.400 -26.250 + -3.20 6.60 -22.497 -26.389 + -3.20 6.70 -22.545 -26.495 + -3.20 6.80 -22.562 -26.586 + -3.20 6.90 -22.552 -26.672 + -3.20 7.00 -22.551 -26.756 + -3.20 7.10 -22.565 -26.780 + -3.20 7.20 -22.618 -26.686 + -3.20 7.30 -22.655 -26.634 + -3.20 7.40 -22.666 -26.698 + -3.20 7.50 -22.655 -26.805 + -3.20 7.60 -22.627 -26.907 + -3.20 7.70 -22.593 -26.996 + -3.20 7.80 -22.554 -27.081 + -3.20 7.90 -22.514 -27.167 + -3.20 8.00 -22.475 -27.254 + -3.20 8.10 -22.434 -27.329 + -3.20 8.20 -22.392 -27.388 + -3.20 8.30 -22.348 -27.434 + -3.20 8.40 -22.301 -27.469 + -3.20 8.50 -22.253 -27.496 + -3.20 8.60 -22.201 -27.516 + -3.20 8.70 -22.149 -27.531 + -3.20 8.80 -22.094 -27.542 + -3.20 8.90 -22.037 -27.551 + -3.20 9.00 -21.979 -27.558 + -3.10 1.00 -25.622 -21.867 + -3.10 1.10 -25.518 -21.905 + -3.10 1.20 -25.374 -21.945 + -3.10 1.30 -25.187 -21.987 + -3.10 1.40 -24.982 -22.030 + -3.10 1.50 -24.786 -22.074 + -3.10 1.60 -24.615 -22.120 + -3.10 1.70 -24.470 -22.167 + -3.10 1.80 -24.347 -22.216 + -3.10 1.90 -24.236 -22.265 + -3.10 2.00 -24.129 -22.316 + -3.10 2.10 -24.024 -22.367 + -3.10 2.20 -23.921 -22.420 + -3.10 2.30 -23.824 -22.473 + -3.10 2.40 -23.738 -22.527 + -3.10 2.50 -23.665 -22.582 + -3.10 2.60 -23.603 -22.637 + -3.10 2.70 -23.552 -22.692 + -3.10 2.80 -23.510 -22.748 + -3.10 2.90 -23.479 -22.804 + -3.10 3.00 -23.456 -22.860 + -3.10 3.10 -23.441 -22.916 + -3.10 3.20 -23.433 -22.972 + -3.10 3.30 -23.432 -23.028 + -3.10 3.40 -23.436 -23.083 + -3.10 3.50 -23.442 -23.137 + -3.10 3.60 -23.444 -23.191 + -3.10 3.70 -23.432 -23.245 + -3.10 3.80 -23.397 -23.297 + -3.10 3.90 -23.318 -23.348 + -3.10 4.00 -23.189 -23.399 + -3.10 4.10 -23.018 -23.448 + -3.10 4.20 -22.809 -23.497 + -3.10 4.30 -22.569 -23.548 + -3.10 4.40 -22.321 -23.613 + -3.10 4.50 -22.103 -23.708 + -3.10 4.60 -21.923 -23.811 + -3.10 4.70 -21.747 -23.893 + -3.10 4.80 -21.557 -23.976 + -3.10 4.90 -21.377 -24.171 + -3.10 5.00 -21.297 -24.509 + -3.10 5.10 -21.332 -24.765 + -3.10 5.20 -21.330 -24.903 + -3.10 5.30 -21.303 -25.005 + -3.10 5.40 -21.297 -25.109 + -3.10 5.50 -21.472 -25.267 + -3.10 5.60 -21.676 -25.399 + -3.10 5.70 -21.727 -25.475 + -3.10 5.80 -21.779 -25.558 + -3.10 5.90 -21.808 -25.631 + -3.10 6.00 -21.815 -25.694 + -3.10 6.10 -21.837 -25.768 + -3.10 6.20 -21.893 -25.853 + -3.10 6.30 -22.072 -25.990 + -3.10 6.40 -22.255 -26.167 + -3.10 6.50 -22.399 -26.343 + -3.10 6.60 -22.496 -26.482 + -3.10 6.70 -22.543 -26.586 + -3.10 6.80 -22.555 -26.676 + -3.10 6.90 -22.551 -26.762 + -3.10 7.00 -22.544 -26.845 + -3.10 7.10 -22.563 -26.883 + -3.10 7.20 -22.615 -26.820 + -3.10 7.30 -22.656 -26.786 + -3.10 7.40 -22.667 -26.850 + -3.10 7.50 -22.656 -26.953 + -3.10 7.60 -22.629 -27.048 + -3.10 7.70 -22.594 -27.130 + -3.10 7.80 -22.556 -27.205 + -3.10 7.90 -22.516 -27.281 + -3.10 8.00 -22.478 -27.362 + -3.10 8.10 -22.437 -27.433 + -3.10 8.20 -22.396 -27.491 + -3.10 8.30 -22.352 -27.536 + -3.10 8.40 -22.305 -27.570 + -3.10 8.50 -22.258 -27.596 + -3.10 8.60 -22.207 -27.615 + -3.10 8.70 -22.156 -27.631 + -3.10 8.80 -22.102 -27.642 + -3.10 8.90 -22.046 -27.651 + -3.10 9.00 -21.988 -27.658 + -3.00 1.00 -25.658 -21.904 + -3.00 1.10 -25.551 -21.940 + -3.00 1.20 -25.401 -21.978 + -3.00 1.30 -25.208 -22.018 + -3.00 1.40 -24.996 -22.058 + -3.00 1.50 -24.795 -22.101 + -3.00 1.60 -24.620 -22.145 + -3.00 1.70 -24.472 -22.190 + -3.00 1.80 -24.347 -22.237 + -3.00 1.90 -24.236 -22.285 + -3.00 2.00 -24.130 -22.334 + -3.00 2.10 -24.027 -22.384 + -3.00 2.20 -23.926 -22.435 + -3.00 2.30 -23.832 -22.488 + -3.00 2.40 -23.748 -22.541 + -3.00 2.50 -23.677 -22.595 + -3.00 2.60 -23.616 -22.650 + -3.00 2.70 -23.566 -22.705 + -3.00 2.80 -23.526 -22.761 + -3.00 2.90 -23.494 -22.817 + -3.00 3.00 -23.471 -22.874 + -3.00 3.10 -23.455 -22.930 + -3.00 3.20 -23.446 -22.987 + -3.00 3.30 -23.444 -23.043 + -3.00 3.40 -23.447 -23.100 + -3.00 3.50 -23.451 -23.156 + -3.00 3.60 -23.451 -23.211 + -3.00 3.70 -23.438 -23.266 + -3.00 3.80 -23.403 -23.320 + -3.00 3.90 -23.323 -23.374 + -3.00 4.00 -23.190 -23.426 + -3.00 4.10 -23.012 -23.478 + -3.00 4.20 -22.797 -23.528 + -3.00 4.30 -22.549 -23.583 + -3.00 4.40 -22.299 -23.657 + -3.00 4.50 -22.088 -23.766 + -3.00 4.60 -21.917 -23.879 + -3.00 4.70 -21.743 -23.966 + -3.00 4.80 -21.551 -24.051 + -3.00 4.90 -21.371 -24.255 + -3.00 5.00 -21.293 -24.603 + -3.00 5.10 -21.330 -24.862 + -3.00 5.20 -21.329 -24.999 + -3.00 5.30 -21.303 -25.101 + -3.00 5.40 -21.295 -25.204 + -3.00 5.50 -21.470 -25.361 + -3.00 5.60 -21.676 -25.492 + -3.00 5.70 -21.726 -25.568 + -3.00 5.80 -21.779 -25.652 + -3.00 5.90 -21.808 -25.725 + -3.00 6.00 -21.815 -25.788 + -3.00 6.10 -21.837 -25.861 + -3.00 6.20 -21.893 -25.947 + -3.00 6.30 -22.071 -26.083 + -3.00 6.40 -22.255 -26.258 + -3.00 6.50 -22.399 -26.435 + -3.00 6.60 -22.495 -26.573 + -3.00 6.70 -22.540 -26.676 + -3.00 6.80 -22.555 -26.765 + -3.00 6.90 -22.545 -26.848 + -3.00 7.00 -22.543 -26.933 + -3.00 7.10 -22.559 -26.977 + -3.00 7.20 -22.614 -26.944 + -3.00 7.30 -22.657 -26.926 + -3.00 7.40 -22.668 -26.991 + -3.00 7.50 -22.657 -27.088 + -3.00 7.60 -22.630 -27.178 + -3.00 7.70 -22.596 -27.251 + -3.00 7.80 -22.558 -27.318 + -3.00 7.90 -22.518 -27.384 + -3.00 8.00 -22.480 -27.461 + -3.00 8.10 -22.440 -27.531 + -3.00 8.20 -22.399 -27.587 + -3.00 8.30 -22.355 -27.633 + -3.00 8.40 -22.309 -27.667 + -3.00 8.50 -22.263 -27.693 + -3.00 8.60 -22.212 -27.713 + -3.00 8.70 -22.161 -27.729 + -3.00 8.80 -22.108 -27.741 + -3.00 8.90 -22.052 -27.750 + -3.00 9.00 -21.996 -27.757 + -2.90 1.00 -25.698 -21.946 + -2.90 1.10 -25.588 -21.980 + -2.90 1.20 -25.433 -22.015 + -2.90 1.30 -25.233 -22.052 + -2.90 1.40 -25.015 -22.091 + -2.90 1.50 -24.809 -22.131 + -2.90 1.60 -24.630 -22.172 + -2.90 1.70 -24.479 -22.216 + -2.90 1.80 -24.352 -22.260 + -2.90 1.90 -24.239 -22.307 + -2.90 2.00 -24.133 -22.354 + -2.90 2.10 -24.031 -22.403 + -2.90 2.20 -23.932 -22.453 + -2.90 2.30 -23.840 -22.504 + -2.90 2.40 -23.758 -22.556 + -2.90 2.50 -23.688 -22.610 + -2.90 2.60 -23.629 -22.664 + -2.90 2.70 -23.580 -22.719 + -2.90 2.80 -23.540 -22.774 + -2.90 2.90 -23.509 -22.830 + -2.90 3.00 -23.486 -22.887 + -2.90 3.10 -23.469 -22.944 + -2.90 3.20 -23.459 -23.001 + -2.90 3.30 -23.456 -23.058 + -2.90 3.40 -23.456 -23.115 + -2.90 3.50 -23.458 -23.172 + -2.90 3.60 -23.456 -23.229 + -2.90 3.70 -23.441 -23.285 + -2.90 3.80 -23.406 -23.341 + -2.90 3.90 -23.326 -23.396 + -2.90 4.00 -23.190 -23.451 + -2.90 4.10 -23.006 -23.504 + -2.90 4.20 -22.783 -23.558 + -2.90 4.30 -22.528 -23.617 + -2.90 4.40 -22.275 -23.701 + -2.90 4.50 -22.073 -23.827 + -2.90 4.60 -21.912 -23.950 + -2.90 4.70 -21.741 -24.042 + -2.90 4.80 -21.547 -24.129 + -2.90 4.90 -21.366 -24.342 + -2.90 5.00 -21.289 -24.698 + -2.90 5.10 -21.328 -24.958 + -2.90 5.20 -21.328 -25.094 + -2.90 5.30 -21.302 -25.196 + -2.90 5.40 -21.295 -25.299 + -2.90 5.50 -21.469 -25.453 + -2.90 5.60 -21.676 -25.583 + -2.90 5.70 -21.726 -25.660 + -2.90 5.80 -21.779 -25.745 + -2.90 5.90 -21.808 -25.818 + -2.90 6.00 -21.814 -25.881 + -2.90 6.10 -21.837 -25.954 + -2.90 6.20 -21.893 -26.039 + -2.90 6.30 -22.071 -26.174 + -2.90 6.40 -22.255 -26.347 + -2.90 6.50 -22.399 -26.524 + -2.90 6.60 -22.494 -26.663 + -2.90 6.70 -22.539 -26.764 + -2.90 6.80 -22.549 -26.847 + -2.90 6.90 -22.544 -26.933 + -2.90 7.00 -22.529 -27.009 + -2.90 7.10 -22.553 -27.063 + -2.90 7.20 -22.614 -27.057 + -2.90 7.30 -22.657 -27.053 + -2.90 7.40 -22.668 -27.119 + -2.90 7.50 -22.658 -27.211 + -2.90 7.60 -22.631 -27.294 + -2.90 7.70 -22.597 -27.361 + -2.90 7.80 -22.559 -27.419 + -2.90 7.90 -22.520 -27.476 + -2.90 8.00 -22.482 -27.551 + -2.90 8.10 -22.442 -27.620 + -2.90 8.20 -22.401 -27.677 + -2.90 8.30 -22.358 -27.724 + -2.90 8.40 -22.312 -27.760 + -2.90 8.50 -22.266 -27.787 + -2.90 8.60 -22.216 -27.808 + -2.90 8.70 -22.165 -27.825 + -2.90 8.80 -22.113 -27.837 + -2.90 8.90 -22.058 -27.848 + -2.90 9.00 -22.002 -27.855 + -2.80 1.00 -25.743 -21.992 + -2.80 1.10 -25.629 -22.023 + -2.80 1.20 -25.469 -22.056 + -2.80 1.30 -25.263 -22.090 + -2.80 1.40 -25.039 -22.127 + -2.80 1.50 -24.828 -22.164 + -2.80 1.60 -24.644 -22.204 + -2.80 1.70 -24.490 -22.245 + -2.80 1.80 -24.360 -22.287 + -2.80 1.90 -24.246 -22.332 + -2.80 2.00 -24.139 -22.377 + -2.80 2.10 -24.038 -22.424 + -2.80 2.20 -23.939 -22.473 + -2.80 2.30 -23.848 -22.523 + -2.80 2.40 -23.768 -22.573 + -2.80 2.50 -23.699 -22.626 + -2.80 2.60 -23.641 -22.679 + -2.80 2.70 -23.593 -22.733 + -2.80 2.80 -23.554 -22.788 + -2.80 2.90 -23.523 -22.843 + -2.80 3.00 -23.500 -22.900 + -2.80 3.10 -23.482 -22.957 + -2.80 3.20 -23.471 -23.014 + -2.80 3.30 -23.467 -23.071 + -2.80 3.40 -23.465 -23.129 + -2.80 3.50 -23.464 -23.187 + -2.80 3.60 -23.458 -23.245 + -2.80 3.70 -23.441 -23.303 + -2.80 3.80 -23.407 -23.360 + -2.80 3.90 -23.327 -23.417 + -2.80 4.00 -23.189 -23.473 + -2.80 4.10 -22.999 -23.528 + -2.80 4.20 -22.767 -23.584 + -2.80 4.30 -22.503 -23.648 + -2.80 4.40 -22.249 -23.745 + -2.80 4.50 -22.059 -23.890 + -2.80 4.60 -21.908 -24.024 + -2.80 4.70 -21.738 -24.121 + -2.80 4.80 -21.543 -24.210 + -2.80 4.90 -21.362 -24.431 + -2.80 5.00 -21.287 -24.794 + -2.80 5.10 -21.327 -25.055 + -2.80 5.20 -21.327 -25.190 + -2.80 5.30 -21.302 -25.291 + -2.80 5.40 -21.294 -25.392 + -2.80 5.50 -21.469 -25.544 + -2.80 5.60 -21.675 -25.673 + -2.80 5.70 -21.726 -25.750 + -2.80 5.80 -21.779 -25.836 + -2.80 5.90 -21.808 -25.910 + -2.80 6.00 -21.814 -25.972 + -2.80 6.10 -21.837 -26.045 + -2.80 6.20 -21.892 -26.130 + -2.80 6.30 -22.071 -26.263 + -2.80 6.40 -22.255 -26.433 + -2.80 6.50 -22.399 -26.611 + -2.80 6.60 -22.494 -26.749 + -2.80 6.70 -22.539 -26.848 + -2.80 6.80 -22.548 -26.930 + -2.80 6.90 -22.522 -26.996 + -2.80 7.00 -22.522 -27.082 + -2.80 7.10 -22.551 -27.145 + -2.80 7.20 -22.614 -27.159 + -2.80 7.30 -22.657 -27.167 + -2.80 7.40 -22.669 -27.234 + -2.80 7.50 -22.658 -27.320 + -2.80 7.60 -22.632 -27.398 + -2.80 7.70 -22.598 -27.458 + -2.80 7.80 -22.560 -27.508 + -2.80 7.90 -22.521 -27.557 + -2.80 8.00 -22.484 -27.632 + -2.80 8.10 -22.443 -27.702 + -2.80 8.20 -22.403 -27.761 + -2.80 8.30 -22.360 -27.811 + -2.80 8.40 -22.315 -27.849 + -2.80 8.50 -22.269 -27.878 + -2.80 8.60 -22.219 -27.901 + -2.80 8.70 -22.169 -27.919 + -2.80 8.80 -22.117 -27.933 + -2.80 8.90 -22.062 -27.944 + -2.80 9.00 -22.007 -27.953 + -2.70 1.00 -25.792 -22.041 + -2.70 1.10 -25.674 -22.070 + -2.70 1.20 -25.509 -22.101 + -2.70 1.30 -25.297 -22.133 + -2.70 1.40 -25.067 -22.167 + -2.70 1.50 -24.851 -22.202 + -2.70 1.60 -24.662 -22.239 + -2.70 1.70 -24.505 -22.278 + -2.70 1.80 -24.372 -22.318 + -2.70 1.90 -24.256 -22.360 + -2.70 2.00 -24.149 -22.403 + -2.70 2.10 -24.046 -22.449 + -2.70 2.20 -23.948 -22.495 + -2.70 2.30 -23.858 -22.543 + -2.70 2.40 -23.778 -22.593 + -2.70 2.50 -23.710 -22.643 + -2.70 2.60 -23.653 -22.695 + -2.70 2.70 -23.605 -22.748 + -2.70 2.80 -23.567 -22.802 + -2.70 2.90 -23.536 -22.857 + -2.70 3.00 -23.512 -22.913 + -2.70 3.10 -23.494 -22.970 + -2.70 3.20 -23.483 -23.027 + -2.70 3.30 -23.476 -23.084 + -2.70 3.40 -23.472 -23.143 + -2.70 3.50 -23.467 -23.201 + -2.70 3.60 -23.458 -23.259 + -2.70 3.70 -23.438 -23.318 + -2.70 3.80 -23.404 -23.377 + -2.70 3.90 -23.325 -23.435 + -2.70 4.00 -23.185 -23.493 + -2.70 4.10 -22.990 -23.550 + -2.70 4.20 -22.749 -23.608 + -2.70 4.30 -22.476 -23.677 + -2.70 4.40 -22.223 -23.790 + -2.70 4.50 -22.047 -23.954 + -2.70 4.60 -21.904 -24.100 + -2.70 4.70 -21.737 -24.203 + -2.70 4.80 -21.540 -24.295 + -2.70 4.90 -21.358 -24.522 + -2.70 5.00 -21.285 -24.891 + -2.70 5.10 -21.326 -25.151 + -2.70 5.20 -21.327 -25.284 + -2.70 5.30 -21.301 -25.385 + -2.70 5.40 -21.293 -25.485 + -2.70 5.50 -21.468 -25.633 + -2.70 5.60 -21.675 -25.760 + -2.70 5.70 -21.726 -25.839 + -2.70 5.80 -21.779 -25.925 + -2.70 5.90 -21.807 -25.999 + -2.70 6.00 -21.814 -26.062 + -2.70 6.10 -21.837 -26.133 + -2.70 6.20 -21.892 -26.218 + -2.70 6.30 -22.071 -26.350 + -2.70 6.40 -22.255 -26.517 + -2.70 6.50 -22.399 -26.696 + -2.70 6.60 -22.493 -26.833 + -2.70 6.70 -22.535 -26.927 + -2.70 6.80 -22.533 -26.986 + -2.70 6.90 -22.516 -27.068 + -2.70 7.00 -22.514 -27.150 + -2.70 7.10 -22.549 -27.220 + -2.70 7.20 -22.610 -27.242 + -2.70 7.30 -22.657 -27.266 + -2.70 7.40 -22.669 -27.333 + -2.70 7.50 -22.659 -27.415 + -2.70 7.60 -22.632 -27.488 + -2.70 7.70 -22.599 -27.542 + -2.70 7.80 -22.561 -27.585 + -2.70 7.90 -22.522 -27.628 + -2.70 8.00 -22.485 -27.704 + -2.70 8.10 -22.445 -27.776 + -2.70 8.20 -22.404 -27.839 + -2.70 8.30 -22.362 -27.892 + -2.70 8.40 -22.317 -27.933 + -2.70 8.50 -22.271 -27.966 + -2.70 8.60 -22.221 -27.991 + -2.70 8.70 -22.172 -28.011 + -2.70 8.80 -22.120 -28.027 + -2.70 8.90 -22.065 -28.040 + -2.70 9.00 -22.010 -28.049 + -2.60 1.00 -25.844 -22.095 + -2.60 1.10 -25.724 -22.122 + -2.60 1.20 -25.554 -22.150 + -2.60 1.30 -25.336 -22.180 + -2.60 1.40 -25.099 -22.211 + -2.60 1.50 -24.878 -22.244 + -2.60 1.60 -24.685 -22.278 + -2.60 1.70 -24.524 -22.315 + -2.60 1.80 -24.389 -22.352 + -2.60 1.90 -24.271 -22.392 + -2.60 2.00 -24.162 -22.433 + -2.60 2.10 -24.058 -22.476 + -2.60 2.20 -23.960 -22.521 + -2.60 2.30 -23.869 -22.567 + -2.60 2.40 -23.789 -22.614 + -2.60 2.50 -23.721 -22.663 + -2.60 2.60 -23.665 -22.714 + -2.60 2.70 -23.618 -22.765 + -2.60 2.80 -23.580 -22.818 + -2.60 2.90 -23.549 -22.872 + -2.60 3.00 -23.525 -22.927 + -2.60 3.10 -23.506 -22.983 + -2.60 3.20 -23.493 -23.040 + -2.60 3.30 -23.485 -23.098 + -2.60 3.40 -23.478 -23.156 + -2.60 3.50 -23.469 -23.214 + -2.60 3.60 -23.455 -23.273 + -2.60 3.70 -23.432 -23.332 + -2.60 3.80 -23.397 -23.392 + -2.60 3.90 -23.321 -23.451 + -2.60 4.00 -23.180 -23.510 + -2.60 4.10 -22.979 -23.569 + -2.60 4.20 -22.728 -23.630 + -2.60 4.30 -22.445 -23.705 + -2.60 4.40 -22.195 -23.835 + -2.60 4.50 -22.035 -24.021 + -2.60 4.60 -21.901 -24.178 + -2.60 4.70 -21.736 -24.287 + -2.60 4.80 -21.537 -24.382 + -2.60 4.90 -21.355 -24.614 + -2.60 5.00 -21.283 -24.987 + -2.60 5.10 -21.325 -25.246 + -2.60 5.20 -21.326 -25.378 + -2.60 5.30 -21.301 -25.477 + -2.60 5.40 -21.293 -25.576 + -2.60 5.50 -21.468 -25.719 + -2.60 5.60 -21.675 -25.845 + -2.60 5.70 -21.726 -25.924 + -2.60 5.80 -21.778 -26.012 + -2.60 5.90 -21.807 -26.086 + -2.60 6.00 -21.814 -26.148 + -2.60 6.10 -21.837 -26.220 + -2.60 6.20 -21.892 -26.305 + -2.60 6.30 -22.071 -26.434 + -2.60 6.40 -22.255 -26.598 + -2.60 6.50 -22.398 -26.776 + -2.60 6.60 -22.493 -26.913 + -2.60 6.70 -22.534 -27.004 + -2.60 6.80 -22.528 -27.055 + -2.60 6.90 -22.501 -27.127 + -2.60 7.00 -22.491 -27.171 + -2.60 7.10 -22.535 -27.247 + -2.60 7.20 -22.610 -27.319 + -2.60 7.30 -22.657 -27.351 + -2.60 7.40 -22.669 -27.418 + -2.60 7.50 -22.659 -27.496 + -2.60 7.60 -22.633 -27.565 + -2.60 7.70 -22.599 -27.615 + -2.60 7.80 -22.562 -27.652 + -2.60 7.90 -22.523 -27.689 + -2.60 8.00 -22.486 -27.767 + -2.60 8.10 -22.446 -27.843 + -2.60 8.20 -22.406 -27.910 + -2.60 8.30 -22.363 -27.968 + -2.60 8.40 -22.318 -28.013 + -2.60 8.50 -22.273 -28.049 + -2.60 8.60 -22.223 -28.078 + -2.60 8.70 -22.174 -28.101 + -2.60 8.80 -22.122 -28.119 + -2.60 8.90 -22.068 -28.134 + -2.60 9.00 -22.013 -28.145 + -2.50 1.00 -25.901 -22.153 + -2.50 1.10 -25.777 -22.178 + -2.50 1.20 -25.602 -22.203 + -2.50 1.30 -25.378 -22.231 + -2.50 1.40 -25.136 -22.259 + -2.50 1.50 -24.909 -22.290 + -2.50 1.60 -24.712 -22.322 + -2.50 1.70 -24.548 -22.355 + -2.50 1.80 -24.410 -22.391 + -2.50 1.90 -24.289 -22.428 + -2.50 2.00 -24.178 -22.467 + -2.50 2.10 -24.073 -22.507 + -2.50 2.20 -23.974 -22.550 + -2.50 2.30 -23.883 -22.594 + -2.50 2.40 -23.802 -22.639 + -2.50 2.50 -23.734 -22.686 + -2.50 2.60 -23.678 -22.735 + -2.50 2.70 -23.631 -22.785 + -2.50 2.80 -23.593 -22.836 + -2.50 2.90 -23.562 -22.889 + -2.50 3.00 -23.537 -22.943 + -2.50 3.10 -23.518 -22.998 + -2.50 3.20 -23.504 -23.054 + -2.50 3.30 -23.494 -23.111 + -2.50 3.40 -23.484 -23.169 + -2.50 3.50 -23.471 -23.227 + -2.50 3.60 -23.452 -23.286 + -2.50 3.70 -23.424 -23.346 + -2.50 3.80 -23.388 -23.406 + -2.50 3.90 -23.313 -23.466 + -2.50 4.00 -23.172 -23.526 + -2.50 4.10 -22.966 -23.586 + -2.50 4.20 -22.704 -23.649 + -2.50 4.30 -22.412 -23.732 + -2.50 4.40 -22.167 -23.881 + -2.50 4.50 -22.024 -24.088 + -2.50 4.60 -21.899 -24.257 + -2.50 4.70 -21.735 -24.374 + -2.50 4.80 -21.535 -24.471 + -2.50 4.90 -21.352 -24.708 + -2.50 5.00 -21.282 -25.084 + -2.50 5.10 -21.325 -25.341 + -2.50 5.20 -21.326 -25.470 + -2.50 5.30 -21.301 -25.568 + -2.50 5.40 -21.293 -25.665 + -2.50 5.50 -21.467 -25.803 + -2.50 5.60 -21.675 -25.926 + -2.50 5.70 -21.726 -26.007 + -2.50 5.80 -21.778 -26.097 + -2.50 5.90 -21.807 -26.171 + -2.50 6.00 -21.814 -26.233 + -2.50 6.10 -21.837 -26.303 + -2.50 6.20 -21.892 -26.388 + -2.50 6.30 -22.070 -26.514 + -2.50 6.40 -22.255 -26.674 + -2.50 6.50 -22.398 -26.854 + -2.50 6.60 -22.491 -26.987 + -2.50 6.70 -22.522 -27.050 + -2.50 6.80 -22.504 -27.103 + -2.50 6.90 -22.492 -27.172 + -2.50 7.00 -22.491 -27.227 + -2.50 7.10 -22.535 -27.303 + -2.50 7.20 -22.610 -27.385 + -2.50 7.30 -22.658 -27.422 + -2.50 7.40 -22.670 -27.489 + -2.50 7.50 -22.660 -27.564 + -2.50 7.60 -22.633 -27.630 + -2.50 7.70 -22.600 -27.676 + -2.50 7.80 -22.562 -27.709 + -2.50 7.90 -22.523 -27.742 + -2.50 8.00 -22.486 -27.822 + -2.50 8.10 -22.446 -27.903 + -2.50 8.20 -22.407 -27.975 + -2.50 8.30 -22.364 -28.038 + -2.50 8.40 -22.320 -28.088 + -2.50 8.50 -22.274 -28.129 + -2.50 8.60 -22.225 -28.161 + -2.50 8.70 -22.176 -28.188 + -2.50 8.80 -22.124 -28.209 + -2.50 8.90 -22.070 -28.226 + -2.50 9.00 -22.016 -28.239 + -2.40 1.00 -25.962 -22.215 + -2.40 1.10 -25.834 -22.237 + -2.40 1.20 -25.655 -22.261 + -2.40 1.30 -25.424 -22.285 + -2.40 1.40 -25.176 -22.312 + -2.40 1.50 -24.944 -22.340 + -2.40 1.60 -24.744 -22.369 + -2.40 1.70 -24.575 -22.401 + -2.40 1.80 -24.434 -22.433 + -2.40 1.90 -24.311 -22.468 + -2.40 2.00 -24.198 -22.504 + -2.40 2.10 -24.091 -22.543 + -2.40 2.20 -23.990 -22.583 + -2.40 2.30 -23.898 -22.624 + -2.40 2.40 -23.818 -22.667 + -2.40 2.50 -23.749 -22.712 + -2.40 2.60 -23.692 -22.759 + -2.40 2.70 -23.645 -22.807 + -2.40 2.80 -23.607 -22.857 + -2.40 2.90 -23.576 -22.908 + -2.40 3.00 -23.551 -22.960 + -2.40 3.10 -23.531 -23.014 + -2.40 3.20 -23.516 -23.069 + -2.40 3.30 -23.504 -23.125 + -2.40 3.40 -23.491 -23.183 + -2.40 3.50 -23.474 -23.241 + -2.40 3.60 -23.449 -23.300 + -2.40 3.70 -23.417 -23.359 + -2.40 3.80 -23.379 -23.419 + -2.40 3.90 -23.304 -23.480 + -2.40 4.00 -23.162 -23.541 + -2.40 4.10 -22.951 -23.602 + -2.40 4.20 -22.678 -23.667 + -2.40 4.30 -22.376 -23.757 + -2.40 4.40 -22.139 -23.929 + -2.40 4.50 -22.014 -24.157 + -2.40 4.60 -21.896 -24.337 + -2.40 4.70 -21.734 -24.463 + -2.40 4.80 -21.534 -24.562 + -2.40 4.90 -21.350 -24.803 + -2.40 5.00 -21.280 -25.180 + -2.40 5.10 -21.324 -25.435 + -2.40 5.20 -21.325 -25.560 + -2.40 5.30 -21.301 -25.657 + -2.40 5.40 -21.292 -25.751 + -2.40 5.50 -21.467 -25.883 + -2.40 5.60 -21.675 -26.004 + -2.40 5.70 -21.726 -26.085 + -2.40 5.80 -21.778 -26.177 + -2.40 5.90 -21.807 -26.252 + -2.40 6.00 -21.814 -26.314 + -2.40 6.10 -21.837 -26.383 + -2.40 6.20 -21.892 -26.468 + -2.40 6.30 -22.070 -26.591 + -2.40 6.40 -22.255 -26.746 + -2.40 6.50 -22.398 -26.926 + -2.40 6.60 -22.491 -27.060 + -2.40 6.70 -22.497 -27.088 + -2.40 6.80 -22.502 -27.160 + -2.40 6.90 -22.492 -27.227 + -2.40 7.00 -22.491 -27.277 + -2.40 7.10 -22.535 -27.352 + -2.40 7.20 -22.610 -27.441 + -2.40 7.30 -22.658 -27.481 + -2.40 7.40 -22.670 -27.547 + -2.40 7.50 -22.660 -27.620 + -2.40 7.60 -22.634 -27.684 + -2.40 7.70 -22.600 -27.727 + -2.40 7.80 -22.563 -27.757 + -2.40 7.90 -22.524 -27.786 + -2.40 8.00 -22.487 -27.870 + -2.40 8.10 -22.447 -27.955 + -2.40 8.20 -22.407 -28.033 + -2.40 8.30 -22.365 -28.103 + -2.40 8.40 -22.320 -28.158 + -2.40 8.50 -22.275 -28.203 + -2.40 8.60 -22.226 -28.241 + -2.40 8.70 -22.177 -28.271 + -2.40 8.80 -22.126 -28.296 + -2.40 8.90 -22.072 -28.316 + -2.40 9.00 -22.018 -28.332 + -2.30 1.00 -26.026 -22.280 + -2.30 1.10 -25.895 -22.300 + -2.30 1.20 -25.711 -22.322 + -2.30 1.30 -25.474 -22.344 + -2.30 1.40 -25.220 -22.368 + -2.30 1.50 -24.983 -22.394 + -2.30 1.60 -24.779 -22.421 + -2.30 1.70 -24.607 -22.450 + -2.30 1.80 -24.463 -22.480 + -2.30 1.90 -24.337 -22.512 + -2.30 2.00 -24.221 -22.546 + -2.30 2.10 -24.113 -22.582 + -2.30 2.20 -24.010 -22.619 + -2.30 2.30 -23.917 -22.658 + -2.30 2.40 -23.835 -22.699 + -2.30 2.50 -23.766 -22.742 + -2.30 2.60 -23.708 -22.786 + -2.30 2.70 -23.661 -22.832 + -2.30 2.80 -23.622 -22.880 + -2.30 2.90 -23.591 -22.929 + -2.30 3.00 -23.565 -22.980 + -2.30 3.10 -23.544 -23.032 + -2.30 3.20 -23.528 -23.086 + -2.30 3.30 -23.515 -23.141 + -2.30 3.40 -23.499 -23.197 + -2.30 3.50 -23.478 -23.255 + -2.30 3.60 -23.449 -23.313 + -2.30 3.70 -23.411 -23.372 + -2.30 3.80 -23.369 -23.433 + -2.30 3.90 -23.293 -23.493 + -2.30 4.00 -23.150 -23.555 + -2.30 4.10 -22.934 -23.617 + -2.30 4.20 -22.648 -23.683 + -2.30 4.30 -22.337 -23.783 + -2.30 4.40 -22.112 -23.980 + -2.30 4.50 -22.006 -24.226 + -2.30 4.60 -21.895 -24.419 + -2.30 4.70 -21.734 -24.554 + -2.30 4.80 -21.532 -24.655 + -2.30 4.90 -21.348 -24.899 + -2.30 5.00 -21.280 -25.277 + -2.30 5.10 -21.324 -25.528 + -2.30 5.20 -21.325 -25.648 + -2.30 5.30 -21.300 -25.744 + -2.30 5.40 -21.292 -25.835 + -2.30 5.50 -21.467 -25.959 + -2.30 5.60 -21.675 -26.077 + -2.30 5.70 -21.726 -26.160 + -2.30 5.80 -21.778 -26.255 + -2.30 5.90 -21.807 -26.330 + -2.30 6.00 -21.814 -26.392 + -2.30 6.10 -21.837 -26.459 + -2.30 6.20 -21.892 -26.544 + -2.30 6.30 -22.070 -26.663 + -2.30 6.40 -22.255 -26.813 + -2.30 6.50 -22.398 -26.994 + -2.30 6.60 -22.484 -27.104 + -2.30 6.70 -22.497 -27.143 + -2.30 6.80 -22.502 -27.211 + -2.30 6.90 -22.492 -27.276 + -2.30 7.00 -22.491 -27.321 + -2.30 7.10 -22.535 -27.394 + -2.30 7.20 -22.610 -27.488 + -2.30 7.30 -22.658 -27.529 + -2.30 7.40 -22.670 -27.594 + -2.30 7.50 -22.660 -27.665 + -2.30 7.60 -22.634 -27.729 + -2.30 7.70 -22.600 -27.770 + -2.30 7.80 -22.563 -27.796 + -2.30 7.90 -22.524 -27.824 + -2.30 8.00 -22.487 -27.910 + -2.30 8.10 -22.448 -28.001 + -2.30 8.20 -22.408 -28.084 + -2.30 8.30 -22.366 -28.161 + -2.30 8.40 -22.321 -28.222 + -2.30 8.50 -22.276 -28.273 + -2.30 8.60 -22.227 -28.316 + -2.30 8.70 -22.178 -28.351 + -2.30 8.80 -22.127 -28.381 + -2.30 8.90 -22.074 -28.404 + -2.30 9.00 -22.019 -28.423 + -2.20 1.00 -26.094 -22.348 + -2.20 1.10 -25.960 -22.367 + -2.20 1.20 -25.770 -22.386 + -2.20 1.30 -25.527 -22.407 + -2.20 1.40 -25.267 -22.429 + -2.20 1.50 -25.026 -22.452 + -2.20 1.60 -24.817 -22.477 + -2.20 1.70 -24.642 -22.503 + -2.20 1.80 -24.495 -22.531 + -2.20 1.90 -24.366 -22.561 + -2.20 2.00 -24.248 -22.592 + -2.20 2.10 -24.137 -22.625 + -2.20 2.20 -24.033 -22.660 + -2.20 2.30 -23.938 -22.696 + -2.20 2.40 -23.855 -22.735 + -2.20 2.50 -23.785 -22.775 + -2.20 2.60 -23.726 -22.817 + -2.20 2.70 -23.679 -22.861 + -2.20 2.80 -23.639 -22.906 + -2.20 2.90 -23.607 -22.954 + -2.20 3.00 -23.581 -23.002 + -2.20 3.10 -23.559 -23.053 + -2.20 3.20 -23.542 -23.105 + -2.20 3.30 -23.527 -23.159 + -2.20 3.40 -23.509 -23.214 + -2.20 3.50 -23.484 -23.270 + -2.20 3.60 -23.451 -23.328 + -2.20 3.70 -23.408 -23.386 + -2.20 3.80 -23.361 -23.446 + -2.20 3.90 -23.282 -23.507 + -2.20 4.00 -23.137 -23.568 + -2.20 4.10 -22.915 -23.630 + -2.20 4.20 -22.616 -23.699 + -2.20 4.30 -22.297 -23.809 + -2.20 4.40 -22.086 -24.032 + -2.20 4.50 -21.998 -24.296 + -2.20 4.60 -21.893 -24.502 + -2.20 4.70 -21.734 -24.646 + -2.20 4.80 -21.531 -24.749 + -2.20 4.90 -21.347 -24.995 + -2.20 5.00 -21.279 -25.373 + -2.20 5.10 -21.324 -25.619 + -2.20 5.20 -21.325 -25.734 + -2.20 5.30 -21.300 -25.827 + -2.20 5.40 -21.292 -25.916 + -2.20 5.50 -21.466 -26.032 + -2.20 5.60 -21.674 -26.146 + -2.20 5.70 -21.726 -26.229 + -2.20 5.80 -21.778 -26.328 + -2.20 5.90 -21.807 -26.403 + -2.20 6.00 -21.814 -26.463 + -2.20 6.10 -21.837 -26.530 + -2.20 6.20 -21.892 -26.615 + -2.20 6.30 -22.070 -26.731 + -2.20 6.40 -22.254 -26.875 + -2.20 6.50 -22.397 -27.056 + -2.20 6.60 -22.467 -27.120 + -2.20 6.70 -22.497 -27.192 + -2.20 6.80 -22.502 -27.257 + -2.20 6.90 -22.492 -27.320 + -2.20 7.00 -22.491 -27.360 + -2.20 7.10 -22.535 -27.431 + -2.20 7.20 -22.610 -27.527 + -2.20 7.30 -22.658 -27.567 + -2.20 7.40 -22.670 -27.632 + -2.20 7.50 -22.660 -27.702 + -2.20 7.60 -22.634 -27.765 + -2.20 7.70 -22.601 -27.805 + -2.20 7.80 -22.563 -27.829 + -2.20 7.90 -22.525 -27.855 + -2.20 8.00 -22.488 -27.945 + -2.20 8.10 -22.448 -28.040 + -2.20 8.20 -22.408 -28.129 + -2.20 8.30 -22.366 -28.213 + -2.20 8.40 -22.322 -28.281 + -2.20 8.50 -22.277 -28.338 + -2.20 8.60 -22.228 -28.386 + -2.20 8.70 -22.179 -28.427 + -2.20 8.80 -22.128 -28.461 + -2.20 8.90 -22.075 -28.489 + -2.20 9.00 -22.021 -28.511 + -2.10 1.00 -26.164 -22.420 + -2.10 1.10 -26.027 -22.436 + -2.10 1.20 -25.832 -22.454 + -2.10 1.30 -25.583 -22.473 + -2.10 1.40 -25.317 -22.493 + -2.10 1.50 -25.071 -22.514 + -2.10 1.60 -24.859 -22.536 + -2.10 1.70 -24.681 -22.560 + -2.10 1.80 -24.531 -22.586 + -2.10 1.90 -24.399 -22.613 + -2.10 2.00 -24.279 -22.642 + -2.10 2.10 -24.165 -22.672 + -2.10 2.20 -24.059 -22.705 + -2.10 2.30 -23.962 -22.739 + -2.10 2.40 -23.877 -22.775 + -2.10 2.50 -23.806 -22.812 + -2.10 2.60 -23.746 -22.852 + -2.10 2.70 -23.698 -22.893 + -2.10 2.80 -23.658 -22.936 + -2.10 2.90 -23.625 -22.981 + -2.10 3.00 -23.598 -23.028 + -2.10 3.10 -23.576 -23.076 + -2.10 3.20 -23.557 -23.127 + -2.10 3.30 -23.540 -23.179 + -2.10 3.40 -23.520 -23.232 + -2.10 3.50 -23.493 -23.287 + -2.10 3.60 -23.455 -23.343 + -2.10 3.70 -23.408 -23.401 + -2.10 3.80 -23.355 -23.460 + -2.10 3.90 -23.271 -23.520 + -2.10 4.00 -23.124 -23.581 + -2.10 4.10 -22.895 -23.644 + -2.10 4.20 -22.581 -23.714 + -2.10 4.30 -22.255 -23.837 + -2.10 4.40 -22.062 -24.086 + -2.10 4.50 -21.992 -24.366 + -2.10 4.60 -21.892 -24.586 + -2.10 4.70 -21.733 -24.740 + -2.10 4.80 -21.530 -24.844 + -2.10 4.90 -21.346 -25.092 + -2.10 5.00 -21.278 -25.468 + -2.10 5.10 -21.323 -25.708 + -2.10 5.20 -21.325 -25.817 + -2.10 5.30 -21.300 -25.908 + -2.10 5.40 -21.292 -25.992 + -2.10 5.50 -21.466 -26.099 + -2.10 5.60 -21.674 -26.210 + -2.10 5.70 -21.726 -26.296 + -2.10 5.80 -21.778 -26.396 + -2.10 5.90 -21.807 -26.472 + -2.10 6.00 -21.814 -26.533 + -2.10 6.10 -21.837 -26.597 + -2.10 6.20 -21.892 -26.682 + -2.10 6.30 -22.070 -26.793 + -2.10 6.40 -22.254 -26.932 + -2.10 6.50 -22.394 -27.105 + -2.10 6.60 -22.467 -27.168 + -2.10 6.70 -22.497 -27.235 + -2.10 6.80 -22.502 -27.298 + -2.10 6.90 -22.492 -27.358 + -2.10 7.00 -22.491 -27.393 + -2.10 7.10 -22.535 -27.462 + -2.10 7.20 -22.610 -27.559 + -2.10 7.30 -22.658 -27.598 + -2.10 7.40 -22.670 -27.662 + -2.10 7.50 -22.661 -27.731 + -2.10 7.60 -22.634 -27.794 + -2.10 7.70 -22.601 -27.833 + -2.10 7.80 -22.564 -27.856 + -2.10 7.90 -22.525 -27.881 + -2.10 8.00 -22.488 -27.974 + -2.10 8.10 -22.448 -28.074 + -2.10 8.20 -22.409 -28.168 + -2.10 8.30 -22.367 -28.258 + -2.10 8.40 -22.322 -28.333 + -2.10 8.50 -22.278 -28.396 + -2.10 8.60 -22.229 -28.451 + -2.10 8.70 -22.180 -28.498 + -2.10 8.80 -22.129 -28.538 + -2.10 8.90 -22.076 -28.571 + -2.10 9.00 -22.022 -28.597 + -2.00 1.00 -26.238 -22.494 + -2.00 1.10 -26.097 -22.509 + -2.00 1.20 -25.897 -22.525 + -2.00 1.30 -25.642 -22.542 + -2.00 1.40 -25.371 -22.560 + -2.00 1.50 -25.120 -22.579 + -2.00 1.60 -24.904 -22.600 + -2.00 1.70 -24.723 -22.621 + -2.00 1.80 -24.570 -22.645 + -2.00 1.90 -24.436 -22.669 + -2.00 2.00 -24.313 -22.696 + -2.00 2.10 -24.197 -22.724 + -2.00 2.20 -24.088 -22.754 + -2.00 2.30 -23.989 -22.785 + -2.00 2.40 -23.903 -22.818 + -2.00 2.50 -23.829 -22.853 + -2.00 2.60 -23.769 -22.890 + -2.00 2.70 -23.719 -22.929 + -2.00 2.80 -23.678 -22.970 + -2.00 2.90 -23.645 -23.012 + -2.00 3.00 -23.617 -23.057 + -2.00 3.10 -23.594 -23.103 + -2.00 3.20 -23.574 -23.151 + -2.00 3.30 -23.556 -23.201 + -2.00 3.40 -23.534 -23.253 + -2.00 3.50 -23.504 -23.306 + -2.00 3.60 -23.463 -23.361 + -2.00 3.70 -23.411 -23.417 + -2.00 3.80 -23.352 -23.475 + -2.00 3.90 -23.262 -23.535 + -2.00 4.00 -23.111 -23.595 + -2.00 4.10 -22.873 -23.657 + -2.00 4.20 -22.544 -23.730 + -2.00 4.30 -22.212 -23.866 + -2.00 4.40 -22.041 -24.143 + -2.00 4.50 -21.986 -24.434 + -2.00 4.60 -21.891 -24.672 + -2.00 4.70 -21.733 -24.835 + -2.00 4.80 -21.530 -24.940 + -2.00 4.90 -21.345 -25.190 + -2.00 5.00 -21.278 -25.563 + -2.00 5.10 -21.323 -25.795 + -2.00 5.20 -21.325 -25.897 + -2.00 5.30 -21.300 -25.984 + -2.00 5.40 -21.292 -26.065 + -2.00 5.50 -21.466 -26.161 + -2.00 5.60 -21.674 -26.268 + -2.00 5.70 -21.725 -26.356 + -2.00 5.80 -21.778 -26.459 + -2.00 5.90 -21.807 -26.535 + -2.00 6.00 -21.814 -26.594 + -2.00 6.10 -21.837 -26.658 + -2.00 6.20 -21.892 -26.743 + -2.00 6.30 -22.070 -26.850 + -2.00 6.40 -22.254 -26.983 + -2.00 6.50 -22.387 -27.112 + -2.00 6.60 -22.467 -27.210 + -2.00 6.70 -22.497 -27.273 + -2.00 6.80 -22.502 -27.333 + -2.00 6.90 -22.492 -27.390 + -2.00 7.00 -22.491 -27.422 + -2.00 7.10 -22.535 -27.488 + -2.00 7.20 -22.610 -27.585 + -2.00 7.30 -22.658 -27.623 + -2.00 7.40 -22.670 -27.685 + -2.00 7.50 -22.661 -27.754 + -2.00 7.60 -22.634 -27.818 + -2.00 7.70 -22.601 -27.856 + -2.00 7.80 -22.564 -27.878 + -2.00 7.90 -22.525 -27.902 + -2.00 8.00 -22.488 -27.998 + -2.00 8.10 -22.449 -28.102 + -2.00 8.20 -22.409 -28.202 + -2.00 8.30 -22.367 -28.298 + -2.00 8.40 -22.323 -28.379 + -2.00 8.50 -22.278 -28.449 + -2.00 8.60 -22.229 -28.511 + -2.00 8.70 -22.180 -28.564 + -2.00 8.80 -22.129 -28.611 + -2.00 8.90 -22.076 -28.649 + -2.00 9.00 -22.022 -28.680 + -1.90 1.00 -26.314 -22.571 + -1.90 1.10 -26.170 -22.585 + -1.90 1.20 -25.965 -22.599 + -1.90 1.30 -25.703 -22.614 + -1.90 1.40 -25.427 -22.630 + -1.90 1.50 -25.172 -22.648 + -1.90 1.60 -24.952 -22.666 + -1.90 1.70 -24.768 -22.686 + -1.90 1.80 -24.612 -22.707 + -1.90 1.90 -24.475 -22.730 + -1.90 2.00 -24.349 -22.754 + -1.90 2.10 -24.231 -22.779 + -1.90 2.20 -24.120 -22.807 + -1.90 2.30 -24.019 -22.836 + -1.90 2.40 -23.931 -22.866 + -1.90 2.50 -23.856 -22.899 + -1.90 2.60 -23.794 -22.933 + -1.90 2.70 -23.743 -22.969 + -1.90 2.80 -23.701 -23.007 + -1.90 2.90 -23.666 -23.047 + -1.90 3.00 -23.637 -23.089 + -1.90 3.10 -23.613 -23.133 + -1.90 3.20 -23.593 -23.179 + -1.90 3.30 -23.573 -23.226 + -1.90 3.40 -23.549 -23.276 + -1.90 3.50 -23.517 -23.327 + -1.90 3.60 -23.473 -23.381 + -1.90 3.70 -23.417 -23.436 + -1.90 3.80 -23.351 -23.492 + -1.90 3.90 -23.255 -23.550 + -1.90 4.00 -23.099 -23.610 + -1.90 4.10 -22.851 -23.672 + -1.90 4.20 -22.504 -23.746 + -1.90 4.30 -22.169 -23.897 + -1.90 4.40 -22.021 -24.201 + -1.90 4.50 -21.982 -24.503 + -1.90 4.60 -21.890 -24.759 + -1.90 4.70 -21.733 -24.931 + -1.90 4.80 -21.529 -25.037 + -1.90 4.90 -21.344 -25.287 + -1.90 5.00 -21.278 -25.656 + -1.90 5.10 -21.323 -25.879 + -1.90 5.20 -21.325 -25.973 + -1.90 5.30 -21.300 -26.056 + -1.90 5.40 -21.292 -26.132 + -1.90 5.50 -21.466 -26.218 + -1.90 5.60 -21.674 -26.321 + -1.90 5.70 -21.725 -26.408 + -1.90 5.80 -21.778 -26.517 + -1.90 5.90 -21.807 -26.592 + -1.90 6.00 -21.814 -26.652 + -1.90 6.10 -21.837 -26.714 + -1.90 6.20 -21.892 -26.798 + -1.90 6.30 -22.070 -26.901 + -1.90 6.40 -22.254 -27.028 + -1.90 6.50 -22.387 -27.152 + -1.90 6.60 -22.467 -27.246 + -1.90 6.70 -22.497 -27.305 + -1.90 6.80 -22.502 -27.363 + -1.90 6.90 -22.492 -27.418 + -1.90 7.00 -22.491 -27.442 + -1.90 7.10 -22.535 -27.510 + -1.90 7.20 -22.610 -27.607 + -1.90 7.30 -22.658 -27.642 + -1.90 7.40 -22.671 -27.704 + -1.90 7.50 -22.661 -27.773 + -1.90 7.60 -22.634 -27.837 + -1.90 7.70 -22.601 -27.875 + -1.90 7.80 -22.564 -27.896 + -1.90 7.90 -22.525 -27.919 + -1.90 8.00 -22.488 -28.018 + -1.90 8.10 -22.449 -28.126 + -1.90 8.20 -22.409 -28.230 + -1.90 8.30 -22.368 -28.333 + -1.90 8.40 -22.323 -28.420 + -1.90 8.50 -22.278 -28.496 + -1.90 8.60 -22.230 -28.565 + -1.90 8.70 -22.181 -28.625 + -1.90 8.80 -22.130 -28.678 + -1.90 8.90 -22.077 -28.723 + -1.90 9.00 -22.023 -28.760 + -1.80 1.00 -26.392 -22.651 + -1.80 1.10 -26.245 -22.662 + -1.80 1.20 -26.035 -22.675 + -1.80 1.30 -25.767 -22.689 + -1.80 1.40 -25.485 -22.704 + -1.80 1.50 -25.226 -22.719 + -1.80 1.60 -25.003 -22.736 + -1.80 1.70 -24.816 -22.754 + -1.80 1.80 -24.657 -22.773 + -1.80 1.90 -24.518 -22.793 + -1.80 2.00 -24.389 -22.815 + -1.80 2.10 -24.268 -22.839 + -1.80 2.20 -24.155 -22.864 + -1.80 2.30 -24.052 -22.890 + -1.80 2.40 -23.961 -22.918 + -1.80 2.50 -23.884 -22.948 + -1.80 2.60 -23.821 -22.980 + -1.80 2.70 -23.769 -23.013 + -1.80 2.80 -23.726 -23.049 + -1.80 2.90 -23.690 -23.086 + -1.80 3.00 -23.660 -23.125 + -1.80 3.10 -23.635 -23.167 + -1.80 3.20 -23.613 -23.210 + -1.80 3.30 -23.592 -23.255 + -1.80 3.40 -23.566 -23.303 + -1.80 3.50 -23.532 -23.352 + -1.80 3.60 -23.485 -23.403 + -1.80 3.70 -23.426 -23.456 + -1.80 3.80 -23.353 -23.511 + -1.80 3.90 -23.251 -23.568 + -1.80 4.00 -23.088 -23.626 + -1.80 4.10 -22.828 -23.687 + -1.80 4.20 -22.463 -23.763 + -1.80 4.30 -22.127 -23.932 + -1.80 4.40 -22.004 -24.261 + -1.80 4.50 -21.978 -24.571 + -1.80 4.60 -21.889 -24.848 + -1.80 4.70 -21.733 -25.027 + -1.80 4.80 -21.529 -25.134 + -1.80 4.90 -21.344 -25.385 + -1.80 5.00 -21.277 -25.748 + -1.80 5.10 -21.323 -25.959 + -1.80 5.20 -21.325 -26.044 + -1.80 5.30 -21.300 -26.124 + -1.80 5.40 -21.292 -26.195 + -1.80 5.50 -21.466 -26.269 + -1.80 5.60 -21.674 -26.368 + -1.80 5.70 -21.725 -26.458 + -1.80 5.80 -21.778 -26.569 + -1.80 5.90 -21.807 -26.644 + -1.80 6.00 -21.814 -26.705 + -1.80 6.10 -21.837 -26.764 + -1.80 6.20 -21.892 -26.848 + -1.80 6.30 -22.070 -26.947 + -1.80 6.40 -22.254 -27.067 + -1.80 6.50 -22.387 -27.187 + -1.80 6.60 -22.467 -27.277 + -1.80 6.70 -22.497 -27.333 + -1.80 6.80 -22.502 -27.388 + -1.80 6.90 -22.492 -27.442 + -1.80 7.00 -22.491 -27.466 + -1.80 7.10 -22.535 -27.528 + -1.80 7.20 -22.610 -27.620 + -1.80 7.30 -22.658 -27.657 + -1.80 7.40 -22.671 -27.719 + -1.80 7.50 -22.661 -27.788 + -1.80 7.60 -22.634 -27.851 + -1.80 7.70 -22.601 -27.889 + -1.80 7.80 -22.564 -27.910 + -1.80 7.90 -22.525 -27.933 + -1.80 8.00 -22.489 -28.034 + -1.80 8.10 -22.449 -28.146 + -1.80 8.20 -22.410 -28.254 + -1.80 8.30 -22.368 -28.362 + -1.80 8.40 -22.323 -28.455 + -1.80 8.50 -22.279 -28.538 + -1.80 8.60 -22.230 -28.613 + -1.80 8.70 -22.181 -28.680 + -1.80 8.80 -22.130 -28.740 + -1.80 8.90 -22.077 -28.792 + -1.80 9.00 -22.024 -28.835 + -1.70 1.00 -26.472 -22.732 + -1.70 1.10 -26.323 -22.743 + -1.70 1.20 -26.107 -22.754 + -1.70 1.30 -25.833 -22.766 + -1.70 1.40 -25.546 -22.780 + -1.70 1.50 -25.282 -22.794 + -1.70 1.60 -25.056 -22.809 + -1.70 1.70 -24.866 -22.825 + -1.70 1.80 -24.705 -22.842 + -1.70 1.90 -24.563 -22.861 + -1.70 2.00 -24.432 -22.881 + -1.70 2.10 -24.308 -22.902 + -1.70 2.20 -24.192 -22.924 + -1.70 2.30 -24.087 -22.949 + -1.70 2.40 -23.994 -22.974 + -1.70 2.50 -23.916 -23.002 + -1.70 2.60 -23.850 -23.031 + -1.70 2.70 -23.797 -23.062 + -1.70 2.80 -23.753 -23.094 + -1.70 2.90 -23.716 -23.129 + -1.70 3.00 -23.685 -23.166 + -1.70 3.10 -23.658 -23.204 + -1.70 3.20 -23.635 -23.245 + -1.70 3.30 -23.612 -23.288 + -1.70 3.40 -23.585 -23.333 + -1.70 3.50 -23.549 -23.380 + -1.70 3.60 -23.499 -23.428 + -1.70 3.70 -23.436 -23.479 + -1.70 3.80 -23.359 -23.532 + -1.70 3.90 -23.249 -23.587 + -1.70 4.00 -23.078 -23.644 + -1.70 4.10 -22.805 -23.704 + -1.70 4.20 -22.421 -23.782 + -1.70 4.30 -22.087 -23.970 + -1.70 4.40 -21.990 -24.323 + -1.70 4.50 -21.975 -24.640 + -1.70 4.60 -21.888 -24.938 + -1.70 4.70 -21.733 -25.124 + -1.70 4.80 -21.528 -25.232 + -1.70 4.90 -21.343 -25.483 + -1.70 5.00 -21.277 -25.839 + -1.70 5.10 -21.323 -26.036 + -1.70 5.20 -21.325 -26.110 + -1.70 5.30 -21.300 -26.186 + -1.70 5.40 -21.291 -26.250 + -1.70 5.50 -21.466 -26.314 + -1.70 5.60 -21.674 -26.409 + -1.70 5.70 -21.725 -26.499 + -1.70 5.80 -21.778 -26.615 + -1.70 5.90 -21.807 -26.692 + -1.70 6.00 -21.814 -26.752 + -1.70 6.10 -21.837 -26.808 + -1.70 6.20 -21.892 -26.893 + -1.70 6.30 -22.070 -26.987 + -1.70 6.40 -22.251 -27.088 + -1.70 6.50 -22.387 -27.217 + -1.70 6.60 -22.467 -27.304 + -1.70 6.70 -22.497 -27.357 + -1.70 6.80 -22.502 -27.409 + -1.70 6.90 -22.493 -27.461 + -1.70 7.00 -22.491 -27.482 + -1.70 7.10 -22.535 -27.542 + -1.70 7.20 -22.610 -27.638 + -1.70 7.30 -22.658 -27.670 + -1.70 7.40 -22.671 -27.730 + -1.70 7.50 -22.661 -27.798 + -1.70 7.60 -22.635 -27.862 + -1.70 7.70 -22.601 -27.900 + -1.70 7.80 -22.564 -27.921 + -1.70 7.90 -22.525 -27.944 + -1.70 8.00 -22.489 -28.047 + -1.70 8.10 -22.449 -28.162 + -1.70 8.20 -22.410 -28.274 + -1.70 8.30 -22.368 -28.387 + -1.70 8.40 -22.324 -28.485 + -1.70 8.50 -22.279 -28.573 + -1.70 8.60 -22.230 -28.655 + -1.70 8.70 -22.181 -28.729 + -1.70 8.80 -22.131 -28.797 + -1.70 8.90 -22.078 -28.856 + -1.70 9.00 -22.024 -28.906 + -1.60 1.00 -26.554 -22.815 + -1.60 1.10 -26.402 -22.825 + -1.60 1.20 -26.181 -22.835 + -1.60 1.30 -25.901 -22.846 + -1.60 1.40 -25.608 -22.858 + -1.60 1.50 -25.341 -22.871 + -1.60 1.60 -25.112 -22.884 + -1.60 1.70 -24.919 -22.899 + -1.60 1.80 -24.755 -22.914 + -1.60 1.90 -24.611 -22.931 + -1.60 2.00 -24.477 -22.949 + -1.60 2.10 -24.351 -22.968 + -1.60 2.20 -24.232 -22.989 + -1.60 2.30 -24.125 -23.011 + -1.60 2.40 -24.030 -23.034 + -1.60 2.50 -23.949 -23.059 + -1.60 2.60 -23.883 -23.086 + -1.60 2.70 -23.828 -23.114 + -1.60 2.80 -23.782 -23.144 + -1.60 2.90 -23.744 -23.176 + -1.60 3.00 -23.711 -23.210 + -1.60 3.10 -23.684 -23.246 + -1.60 3.20 -23.659 -23.284 + -1.60 3.30 -23.635 -23.324 + -1.60 3.40 -23.606 -23.366 + -1.60 3.50 -23.568 -23.411 + -1.60 3.60 -23.516 -23.457 + -1.60 3.70 -23.450 -23.506 + -1.60 3.80 -23.367 -23.557 + -1.60 3.90 -23.250 -23.609 + -1.60 4.00 -23.071 -23.664 + -1.60 4.10 -22.783 -23.722 + -1.60 4.20 -22.378 -23.804 + -1.60 4.30 -22.049 -24.012 + -1.60 4.40 -21.978 -24.386 + -1.60 4.50 -21.972 -24.710 + -1.60 4.60 -21.887 -25.029 + -1.60 4.70 -21.733 -25.222 + -1.60 4.80 -21.528 -25.330 + -1.60 4.90 -21.343 -25.581 + -1.60 5.00 -21.277 -25.927 + -1.60 5.10 -21.323 -26.109 + -1.60 5.20 -21.324 -26.171 + -1.60 5.30 -21.300 -26.243 + -1.60 5.40 -21.291 -26.303 + -1.60 5.50 -21.466 -26.353 + -1.60 5.60 -21.674 -26.445 + -1.60 5.70 -21.725 -26.538 + -1.60 5.80 -21.778 -26.655 + -1.60 5.90 -21.807 -26.731 + -1.60 6.00 -21.814 -26.792 + -1.60 6.10 -21.837 -26.847 + -1.60 6.20 -21.892 -26.931 + -1.60 6.30 -22.070 -27.021 + -1.60 6.40 -22.251 -27.116 + -1.60 6.50 -22.387 -27.242 + -1.60 6.60 -22.467 -27.326 + -1.60 6.70 -22.497 -27.376 + -1.60 6.80 -22.502 -27.427 + -1.60 6.90 -22.493 -27.477 + -1.60 7.00 -22.491 -27.496 + -1.60 7.10 -22.535 -27.554 + -1.60 7.20 -22.610 -27.649 + -1.60 7.30 -22.658 -27.679 + -1.60 7.40 -22.671 -27.738 + -1.60 7.50 -22.661 -27.806 + -1.60 7.60 -22.635 -27.871 + -1.60 7.70 -22.601 -27.909 + -1.60 7.80 -22.564 -27.930 + -1.60 7.90 -22.526 -27.953 + -1.60 8.00 -22.489 -28.058 + -1.60 8.10 -22.449 -28.175 + -1.60 8.20 -22.410 -28.291 + -1.60 8.30 -22.368 -28.407 + -1.60 8.40 -22.324 -28.510 + -1.60 8.50 -22.279 -28.604 + -1.60 8.60 -22.230 -28.692 + -1.60 8.70 -22.182 -28.773 + -1.60 8.80 -22.131 -28.848 + -1.60 8.90 -22.078 -28.914 + -1.60 9.00 -22.024 -28.972 + -1.50 1.00 -26.638 -22.900 + -1.50 1.10 -26.482 -22.909 + -1.50 1.20 -26.256 -22.918 + -1.50 1.30 -25.970 -22.928 + -1.50 1.40 -25.673 -22.939 + -1.50 1.50 -25.402 -22.950 + -1.50 1.60 -25.170 -22.962 + -1.50 1.70 -24.974 -22.975 + -1.50 1.80 -24.808 -22.989 + -1.50 1.90 -24.661 -23.004 + -1.50 2.00 -24.525 -23.020 + -1.50 2.10 -24.396 -23.038 + -1.50 2.20 -24.275 -23.056 + -1.50 2.30 -24.165 -23.076 + -1.50 2.40 -24.068 -23.097 + -1.50 2.50 -23.986 -23.120 + -1.50 2.60 -23.917 -23.144 + -1.50 2.70 -23.860 -23.170 + -1.50 2.80 -23.814 -23.198 + -1.50 2.90 -23.774 -23.227 + -1.50 3.00 -23.740 -23.259 + -1.50 3.10 -23.712 -23.292 + -1.50 3.20 -23.686 -23.327 + -1.50 3.30 -23.660 -23.365 + -1.50 3.40 -23.629 -23.404 + -1.50 3.50 -23.589 -23.446 + -1.50 3.60 -23.535 -23.490 + -1.50 3.70 -23.465 -23.536 + -1.50 3.80 -23.378 -23.584 + -1.50 3.90 -23.254 -23.635 + -1.50 4.00 -23.067 -23.687 + -1.50 4.10 -22.761 -23.744 + -1.50 4.20 -22.336 -23.828 + -1.50 4.30 -22.014 -24.059 + -1.50 4.40 -21.968 -24.450 + -1.50 4.50 -21.969 -24.780 + -1.50 4.60 -21.887 -25.122 + -1.50 4.70 -21.733 -25.320 + -1.50 4.80 -21.528 -25.428 + -1.50 4.90 -21.343 -25.679 + -1.50 5.00 -21.277 -26.013 + -1.50 5.10 -21.323 -26.177 + -1.50 5.20 -21.324 -26.226 + -1.50 5.30 -21.300 -26.293 + -1.50 5.40 -21.291 -26.349 + -1.50 5.50 -21.466 -26.388 + -1.50 5.60 -21.674 -26.476 + -1.50 5.70 -21.725 -26.570 + -1.50 5.80 -21.778 -26.690 + -1.50 5.90 -21.807 -26.768 + -1.50 6.00 -21.814 -26.827 + -1.50 6.10 -21.837 -26.881 + -1.50 6.20 -21.892 -26.965 + -1.50 6.30 -22.070 -27.051 + -1.50 6.40 -22.251 -27.140 + -1.50 6.50 -22.387 -27.263 + -1.50 6.60 -22.467 -27.345 + -1.50 6.70 -22.497 -27.392 + -1.50 6.80 -22.502 -27.442 + -1.50 6.90 -22.493 -27.491 + -1.50 7.00 -22.491 -27.507 + -1.50 7.10 -22.535 -27.564 + -1.50 7.20 -22.611 -27.658 + -1.50 7.30 -22.658 -27.686 + -1.50 7.40 -22.671 -27.744 + -1.50 7.50 -22.661 -27.813 + -1.50 7.60 -22.635 -27.878 + -1.50 7.70 -22.601 -27.917 + -1.50 7.80 -22.564 -27.937 + -1.50 7.90 -22.526 -27.960 + -1.50 8.00 -22.489 -28.066 + -1.50 8.10 -22.449 -28.186 + -1.50 8.20 -22.410 -28.304 + -1.50 8.30 -22.368 -28.424 + -1.50 8.40 -22.324 -28.531 + -1.50 8.50 -22.279 -28.630 + -1.50 8.60 -22.231 -28.723 + -1.50 8.70 -22.182 -28.811 + -1.50 8.80 -22.131 -28.893 + -1.50 8.90 -22.078 -28.967 + -1.50 9.00 -22.025 -29.033 + -1.40 1.00 -26.723 -22.987 + -1.40 1.10 -26.564 -22.995 + -1.40 1.20 -26.333 -23.003 + -1.40 1.30 -26.042 -23.012 + -1.40 1.40 -25.740 -23.021 + -1.40 1.50 -25.465 -23.031 + -1.40 1.60 -25.230 -23.042 + -1.40 1.70 -25.032 -23.054 + -1.40 1.80 -24.863 -23.067 + -1.40 1.90 -24.714 -23.080 + -1.40 2.00 -24.575 -23.095 + -1.40 2.10 -24.444 -23.110 + -1.40 2.20 -24.320 -23.127 + -1.40 2.30 -24.207 -23.145 + -1.40 2.40 -24.108 -23.164 + -1.40 2.50 -24.024 -23.185 + -1.40 2.60 -23.954 -23.207 + -1.40 2.70 -23.896 -23.231 + -1.40 2.80 -23.847 -23.256 + -1.40 2.90 -23.807 -23.283 + -1.40 3.00 -23.772 -23.311 + -1.40 3.10 -23.741 -23.342 + -1.40 3.20 -23.714 -23.375 + -1.40 3.30 -23.687 -23.409 + -1.40 3.40 -23.655 -23.446 + -1.40 3.50 -23.612 -23.485 + -1.40 3.60 -23.555 -23.526 + -1.40 3.70 -23.483 -23.569 + -1.40 3.80 -23.391 -23.615 + -1.40 3.90 -23.262 -23.663 + -1.40 4.00 -23.065 -23.713 + -1.40 4.10 -22.740 -23.768 + -1.40 4.20 -22.295 -23.855 + -1.40 4.30 -21.982 -24.110 + -1.40 4.40 -21.960 -24.515 + -1.40 4.50 -21.967 -24.853 + -1.40 4.60 -21.886 -25.215 + -1.40 4.70 -21.733 -25.418 + -1.40 4.80 -21.528 -25.527 + -1.40 4.90 -21.342 -25.776 + -1.40 5.00 -21.277 -26.096 + -1.40 5.10 -21.323 -26.240 + -1.40 5.20 -21.324 -26.276 + -1.40 5.30 -21.300 -26.339 + -1.40 5.40 -21.291 -26.388 + -1.40 5.50 -21.466 -26.417 + -1.40 5.60 -21.674 -26.502 + -1.40 5.70 -21.725 -26.598 + -1.40 5.80 -21.778 -26.721 + -1.40 5.90 -21.807 -26.799 + -1.40 6.00 -21.814 -26.857 + -1.40 6.10 -21.837 -26.909 + -1.40 6.20 -21.892 -26.993 + -1.40 6.30 -22.070 -27.076 + -1.40 6.40 -22.251 -27.160 + -1.40 6.50 -22.387 -27.281 + -1.40 6.60 -22.467 -27.360 + -1.40 6.70 -22.497 -27.405 + -1.40 6.80 -22.502 -27.454 + -1.40 6.90 -22.493 -27.502 + -1.40 7.00 -22.491 -27.516 + -1.40 7.10 -22.535 -27.572 + -1.40 7.20 -22.611 -27.666 + -1.40 7.30 -22.658 -27.691 + -1.40 7.40 -22.671 -27.750 + -1.40 7.50 -22.661 -27.818 + -1.40 7.60 -22.635 -27.884 + -1.40 7.70 -22.601 -27.922 + -1.40 7.80 -22.564 -27.943 + -1.40 7.90 -22.526 -27.965 + -1.40 8.00 -22.489 -28.073 + -1.40 8.10 -22.449 -28.194 + -1.40 8.20 -22.410 -28.315 + -1.40 8.30 -22.368 -28.438 + -1.40 8.40 -22.324 -28.549 + -1.40 8.50 -22.279 -28.652 + -1.40 8.60 -22.231 -28.750 + -1.40 8.70 -22.182 -28.844 + -1.40 8.80 -22.131 -28.932 + -1.40 8.90 -22.078 -29.014 + -1.40 9.00 -22.025 -29.087 + -1.30 1.00 -26.809 -23.075 + -1.30 1.10 -26.647 -23.082 + -1.30 1.20 -26.412 -23.089 + -1.30 1.30 -26.114 -23.097 + -1.30 1.40 -25.808 -23.106 + -1.30 1.50 -25.530 -23.115 + -1.30 1.60 -25.292 -23.125 + -1.30 1.70 -25.092 -23.135 + -1.30 1.80 -24.921 -23.146 + -1.30 1.90 -24.769 -23.159 + -1.30 2.00 -24.628 -23.172 + -1.30 2.10 -24.493 -23.186 + -1.30 2.20 -24.367 -23.201 + -1.30 2.30 -24.252 -23.217 + -1.30 2.40 -24.151 -23.234 + -1.30 2.50 -24.064 -23.253 + -1.30 2.60 -23.992 -23.273 + -1.30 2.70 -23.933 -23.294 + -1.30 2.80 -23.883 -23.317 + -1.30 2.90 -23.841 -23.342 + -1.30 3.00 -23.805 -23.368 + -1.30 3.10 -23.773 -23.396 + -1.30 3.20 -23.745 -23.426 + -1.30 3.30 -23.716 -23.458 + -1.30 3.40 -23.682 -23.492 + -1.30 3.50 -23.637 -23.528 + -1.30 3.60 -23.579 -23.566 + -1.30 3.70 -23.504 -23.607 + -1.30 3.80 -23.408 -23.650 + -1.30 3.90 -23.272 -23.695 + -1.30 4.00 -23.066 -23.743 + -1.30 4.10 -22.721 -23.795 + -1.30 4.20 -22.255 -23.885 + -1.30 4.30 -21.954 -24.166 + -1.30 4.40 -21.953 -24.580 + -1.30 4.50 -21.965 -24.927 + -1.30 4.60 -21.886 -25.310 + -1.30 4.70 -21.733 -25.517 + -1.30 4.80 -21.528 -25.625 + -1.30 4.90 -21.342 -25.873 + -1.30 5.00 -21.276 -26.175 + -1.30 5.10 -21.323 -26.297 + -1.30 5.20 -21.324 -26.320 + -1.30 5.30 -21.300 -26.378 + -1.30 5.40 -21.291 -26.423 + -1.30 5.50 -21.466 -26.442 + -1.30 5.60 -21.674 -26.524 + -1.30 5.70 -21.725 -26.621 + -1.30 5.80 -21.778 -26.746 + -1.30 5.90 -21.807 -26.825 + -1.30 6.00 -21.814 -26.883 + -1.30 6.10 -21.837 -26.933 + -1.30 6.20 -21.892 -27.017 + -1.30 6.30 -22.070 -27.096 + -1.30 6.40 -22.251 -27.176 + -1.30 6.50 -22.387 -27.295 + -1.30 6.60 -22.467 -27.373 + -1.30 6.70 -22.497 -27.416 + -1.30 6.80 -22.502 -27.463 + -1.30 6.90 -22.493 -27.511 + -1.30 7.00 -22.491 -27.524 + -1.30 7.10 -22.535 -27.579 + -1.30 7.20 -22.611 -27.671 + -1.30 7.30 -22.658 -27.696 + -1.30 7.40 -22.671 -27.754 + -1.30 7.50 -22.661 -27.823 + -1.30 7.60 -22.635 -27.888 + -1.30 7.70 -22.601 -27.927 + -1.30 7.80 -22.564 -27.947 + -1.30 7.90 -22.526 -27.970 + -1.30 8.00 -22.489 -28.079 + -1.30 8.10 -22.450 -28.201 + -1.30 8.20 -22.410 -28.324 + -1.30 8.30 -22.368 -28.450 + -1.30 8.40 -22.324 -28.563 + -1.30 8.50 -22.279 -28.670 + -1.30 8.60 -22.231 -28.772 + -1.30 8.70 -22.182 -28.871 + -1.30 8.80 -22.131 -28.966 + -1.30 8.90 -22.079 -29.055 + -1.30 9.00 -22.025 -29.137 + -1.20 1.00 -26.896 -23.165 + -1.20 1.10 -26.731 -23.171 + -1.20 1.20 -26.491 -23.177 + -1.20 1.30 -26.188 -23.184 + -1.20 1.40 -25.878 -23.192 + -1.20 1.50 -25.597 -23.200 + -1.20 1.60 -25.356 -23.209 + -1.20 1.70 -25.153 -23.218 + -1.20 1.80 -24.980 -23.228 + -1.20 1.90 -24.826 -23.239 + -1.20 2.00 -24.682 -23.251 + -1.20 2.10 -24.545 -23.263 + -1.20 2.20 -24.416 -23.277 + -1.20 2.30 -24.298 -23.292 + -1.20 2.40 -24.195 -23.307 + -1.20 2.50 -24.107 -23.324 + -1.20 2.60 -24.033 -23.342 + -1.20 2.70 -23.972 -23.361 + -1.20 2.80 -23.921 -23.382 + -1.20 2.90 -23.878 -23.404 + -1.20 3.00 -23.840 -23.428 + -1.20 3.10 -23.807 -23.454 + -1.20 3.20 -23.778 -23.481 + -1.20 3.30 -23.747 -23.511 + -1.20 3.40 -23.712 -23.542 + -1.20 3.50 -23.665 -23.575 + -1.20 3.60 -23.604 -23.611 + -1.20 3.70 -23.527 -23.648 + -1.20 3.80 -23.427 -23.689 + -1.20 3.90 -23.286 -23.731 + -1.20 4.00 -23.071 -23.776 + -1.20 4.10 -22.704 -23.826 + -1.20 4.20 -22.218 -23.920 + -1.20 4.30 -21.929 -24.226 + -1.20 4.40 -21.948 -24.648 + -1.20 4.50 -21.964 -25.004 + -1.20 4.60 -21.886 -25.406 + -1.20 4.70 -21.733 -25.615 + -1.20 4.80 -21.528 -25.723 + -1.20 4.90 -21.342 -25.970 + -1.20 5.00 -21.276 -26.251 + -1.20 5.10 -21.323 -26.349 + -1.20 5.20 -21.324 -26.359 + -1.20 5.30 -21.300 -26.412 + -1.20 5.40 -21.291 -26.450 + -1.20 5.50 -21.466 -26.462 + -1.20 5.60 -21.674 -26.542 + -1.20 5.70 -21.725 -26.640 + -1.20 5.80 -21.778 -26.767 + -1.20 5.90 -21.807 -26.846 + -1.20 6.00 -21.814 -26.904 + -1.20 6.10 -21.837 -26.953 + -1.20 6.20 -21.892 -27.037 + -1.20 6.30 -22.070 -27.113 + -1.20 6.40 -22.251 -27.190 + -1.20 6.50 -22.387 -27.307 + -1.20 6.60 -22.467 -27.379 + -1.20 6.70 -22.497 -27.425 + -1.20 6.80 -22.502 -27.471 + -1.20 6.90 -22.493 -27.518 + -1.20 7.00 -22.491 -27.530 + -1.20 7.10 -22.535 -27.584 + -1.20 7.20 -22.611 -27.676 + -1.20 7.30 -22.658 -27.699 + -1.20 7.40 -22.671 -27.757 + -1.20 7.50 -22.661 -27.826 + -1.20 7.60 -22.635 -27.892 + -1.20 7.70 -22.601 -27.931 + -1.20 7.80 -22.564 -27.951 + -1.20 7.90 -22.526 -27.974 + -1.20 8.00 -22.489 -28.083 + -1.20 8.10 -22.450 -28.207 + -1.20 8.20 -22.410 -28.331 + -1.20 8.30 -22.368 -28.459 + -1.20 8.40 -22.324 -28.575 + -1.20 8.50 -22.280 -28.685 + -1.20 8.60 -22.231 -28.791 + -1.20 8.70 -22.182 -28.895 + -1.20 8.80 -22.132 -28.996 + -1.20 8.90 -22.079 -29.091 + -1.20 9.00 -22.025 -29.180 + -1.10 1.00 -26.983 -23.255 + -1.10 1.10 -26.816 -23.260 + -1.10 1.20 -26.571 -23.266 + -1.10 1.30 -26.264 -23.273 + -1.10 1.40 -25.949 -23.279 + -1.10 1.50 -25.665 -23.287 + -1.10 1.60 -25.422 -23.295 + -1.10 1.70 -25.217 -23.303 + -1.10 1.80 -25.041 -23.312 + -1.10 1.90 -24.885 -23.322 + -1.10 2.00 -24.738 -23.332 + -1.10 2.10 -24.598 -23.344 + -1.10 2.20 -24.466 -23.356 + -1.10 2.30 -24.346 -23.369 + -1.10 2.40 -24.241 -23.383 + -1.10 2.50 -24.151 -23.398 + -1.10 2.60 -24.076 -23.414 + -1.10 2.70 -24.013 -23.432 + -1.10 2.80 -23.961 -23.450 + -1.10 2.90 -23.916 -23.471 + -1.10 3.00 -23.877 -23.492 + -1.10 3.10 -23.843 -23.516 + -1.10 3.20 -23.812 -23.540 + -1.10 3.30 -23.781 -23.567 + -1.10 3.40 -23.743 -23.596 + -1.10 3.50 -23.695 -23.626 + -1.10 3.60 -23.632 -23.659 + -1.10 3.70 -23.552 -23.694 + -1.10 3.80 -23.449 -23.731 + -1.10 3.90 -23.303 -23.771 + -1.10 4.00 -23.078 -23.813 + -1.10 4.10 -22.689 -23.861 + -1.10 4.20 -22.183 -23.959 + -1.10 4.30 -21.908 -24.290 + -1.10 4.40 -21.945 -24.717 + -1.10 4.50 -21.963 -25.083 + -1.10 4.60 -21.886 -25.502 + -1.10 4.70 -21.733 -25.714 + -1.10 4.80 -21.527 -25.822 + -1.10 4.90 -21.342 -26.065 + -1.10 5.00 -21.276 -26.322 + -1.10 5.10 -21.323 -26.394 + -1.10 5.20 -21.324 -26.392 + -1.10 5.30 -21.300 -26.442 + -1.10 5.40 -21.291 -26.477 + -1.10 5.50 -21.466 -26.479 + -1.10 5.60 -21.674 -26.557 + -1.10 5.70 -21.725 -26.656 + -1.10 5.80 -21.778 -26.785 + -1.10 5.90 -21.807 -26.864 + -1.10 6.00 -21.814 -26.922 + -1.10 6.10 -21.837 -26.970 + -1.10 6.20 -21.892 -27.054 + -1.10 6.30 -22.070 -27.127 + -1.10 6.40 -22.251 -27.201 + -1.10 6.50 -22.387 -27.317 + -1.10 6.60 -22.467 -27.391 + -1.10 6.70 -22.497 -27.432 + -1.10 6.80 -22.502 -27.477 + -1.10 6.90 -22.493 -27.523 + -1.10 7.00 -22.491 -27.534 + -1.10 7.10 -22.535 -27.584 + -1.10 7.20 -22.611 -27.679 + -1.10 7.30 -22.658 -27.702 + -1.10 7.40 -22.671 -27.760 + -1.10 7.50 -22.661 -27.829 + -1.10 7.60 -22.635 -27.895 + -1.10 7.70 -22.602 -27.934 + -1.10 7.80 -22.564 -27.954 + -1.10 7.90 -22.526 -27.977 + -1.10 8.00 -22.489 -28.087 + -1.10 8.10 -22.450 -28.211 + -1.10 8.20 -22.410 -28.337 + -1.10 8.30 -22.368 -28.467 + -1.10 8.40 -22.324 -28.585 + -1.10 8.50 -22.280 -28.697 + -1.10 8.60 -22.231 -28.807 + -1.10 8.70 -22.182 -28.914 + -1.10 8.80 -22.132 -29.020 + -1.10 8.90 -22.079 -29.122 + -1.10 9.00 -22.025 -29.218 + -1.00 1.00 -27.071 -23.347 + -1.00 1.10 -26.901 -23.351 + -1.00 1.20 -26.652 -23.357 + -1.00 1.30 -26.340 -23.362 + -1.00 1.40 -26.022 -23.368 + -1.00 1.50 -25.735 -23.375 + -1.00 1.60 -25.489 -23.382 + -1.00 1.70 -25.282 -23.389 + -1.00 1.80 -25.105 -23.397 + -1.00 1.90 -24.946 -23.406 + -1.00 2.00 -24.796 -23.416 + -1.00 2.10 -24.653 -23.426 + -1.00 2.20 -24.519 -23.437 + -1.00 2.30 -24.396 -23.448 + -1.00 2.40 -24.289 -23.461 + -1.00 2.50 -24.197 -23.475 + -1.00 2.60 -24.120 -23.489 + -1.00 2.70 -24.056 -23.505 + -1.00 2.80 -24.002 -23.522 + -1.00 2.90 -23.956 -23.540 + -1.00 3.00 -23.916 -23.560 + -1.00 3.10 -23.881 -23.581 + -1.00 3.20 -23.849 -23.603 + -1.00 3.30 -23.816 -23.628 + -1.00 3.40 -23.777 -23.654 + -1.00 3.50 -23.727 -23.682 + -1.00 3.60 -23.662 -23.712 + -1.00 3.70 -23.579 -23.744 + -1.00 3.80 -23.473 -23.778 + -1.00 3.90 -23.323 -23.815 + -1.00 4.00 -23.088 -23.854 + -1.00 4.10 -22.677 -23.899 + -1.00 4.20 -22.151 -24.002 + -1.00 4.30 -21.890 -24.358 + -1.00 4.40 -21.942 -24.788 + -1.00 4.50 -21.962 -25.165 + -1.00 4.60 -21.885 -25.599 + -1.00 4.70 -21.733 -25.812 + -1.00 4.80 -21.527 -25.920 + -1.00 4.90 -21.342 -26.160 + -1.00 5.00 -21.276 -26.388 + -1.00 5.10 -21.323 -26.435 + -1.00 5.20 -21.324 -26.420 + -1.00 5.30 -21.300 -26.466 + -1.00 5.40 -21.291 -26.498 + -1.00 5.50 -21.466 -26.494 + -1.00 5.60 -21.674 -26.569 + -1.00 5.70 -21.725 -26.669 + -1.00 5.80 -21.778 -26.800 + -1.00 5.90 -21.807 -26.879 + -1.00 6.00 -21.814 -26.937 + -1.00 6.10 -21.837 -26.984 + -1.00 6.20 -21.892 -27.067 + -1.00 6.30 -22.070 -27.139 + -1.00 6.40 -22.251 -27.210 + -1.00 6.50 -22.387 -27.324 + -1.00 6.60 -22.467 -27.398 + -1.00 6.70 -22.497 -27.438 + -1.00 6.80 -22.502 -27.483 + -1.00 6.90 -22.493 -27.528 + -1.00 7.00 -22.491 -27.538 + -1.00 7.10 -22.535 -27.591 + -1.00 7.20 -22.611 -27.682 + -1.00 7.30 -22.658 -27.705 + -1.00 7.40 -22.671 -27.762 + -1.00 7.50 -22.661 -27.831 + -1.00 7.60 -22.635 -27.897 + -1.00 7.70 -22.602 -27.936 + -1.00 7.80 -22.564 -27.956 + -1.00 7.90 -22.525 -27.977 + -1.00 8.00 -22.489 -28.089 + -1.00 8.10 -22.450 -28.215 + -1.00 8.20 -22.410 -28.341 + -1.00 8.30 -22.368 -28.473 + -1.00 8.40 -22.324 -28.593 + -1.00 8.50 -22.280 -28.707 + -1.00 8.60 -22.231 -28.819 + -1.00 8.70 -22.182 -28.931 + -1.00 8.80 -22.132 -29.041 + -1.00 8.90 -22.079 -29.148 + -1.00 9.00 -22.025 -29.251 + -0.90 1.00 -27.160 -23.439 + -0.90 1.10 -26.986 -23.443 + -0.90 1.20 -26.733 -23.448 + -0.90 1.30 -26.417 -23.453 + -0.90 1.40 -26.095 -23.458 + -0.90 1.50 -25.806 -23.464 + -0.90 1.60 -25.559 -23.470 + -0.90 1.70 -25.349 -23.477 + -0.90 1.80 -25.170 -23.484 + -0.90 1.90 -25.008 -23.492 + -0.90 2.00 -24.856 -23.501 + -0.90 2.10 -24.710 -23.510 + -0.90 2.20 -24.573 -23.519 + -0.90 2.30 -24.448 -23.530 + -0.90 2.40 -24.338 -23.541 + -0.90 2.50 -24.244 -23.554 + -0.90 2.60 -24.165 -23.567 + -0.90 2.70 -24.100 -23.581 + -0.90 2.80 -24.045 -23.596 + -0.90 2.90 -23.998 -23.612 + -0.90 3.00 -23.957 -23.630 + -0.90 3.10 -23.921 -23.649 + -0.90 3.20 -23.887 -23.670 + -0.90 3.30 -23.853 -23.692 + -0.90 3.40 -23.812 -23.715 + -0.90 3.50 -23.761 -23.741 + -0.90 3.60 -23.694 -23.768 + -0.90 3.70 -23.609 -23.798 + -0.90 3.80 -23.500 -23.829 + -0.90 3.90 -23.346 -23.863 + -0.90 4.00 -23.101 -23.899 + -0.90 4.10 -22.667 -23.942 + -0.90 4.20 -22.123 -24.049 + -0.90 4.30 -21.875 -24.431 + -0.90 4.40 -21.940 -24.861 + -0.90 4.50 -21.961 -25.249 + -0.90 4.60 -21.885 -25.696 + -0.90 4.70 -21.733 -25.911 + -0.90 4.80 -21.527 -26.017 + -0.90 4.90 -21.342 -26.254 + -0.90 5.00 -21.276 -26.449 + -0.90 5.10 -21.323 -26.469 + -0.90 5.20 -21.324 -26.444 + -0.90 5.30 -21.300 -26.487 + -0.90 5.40 -21.291 -26.516 + -0.90 5.50 -21.466 -26.505 + -0.90 5.60 -21.674 -26.579 + -0.90 5.70 -21.725 -26.679 + -0.90 5.80 -21.778 -26.812 + -0.90 5.90 -21.807 -26.891 + -0.90 6.00 -21.814 -26.949 + -0.90 6.10 -21.837 -26.995 + -0.90 6.20 -21.892 -27.079 + -0.90 6.30 -22.070 -27.148 + -0.90 6.40 -22.251 -27.217 + -0.90 6.50 -22.387 -27.331 + -0.90 6.60 -22.467 -27.399 + -0.90 6.70 -22.497 -27.443 + -0.90 6.80 -22.502 -27.487 + -0.90 6.90 -22.493 -27.532 + -0.90 7.00 -22.491 -27.541 + -0.90 7.10 -22.535 -27.594 + -0.90 7.20 -22.611 -27.684 + -0.90 7.30 -22.658 -27.706 + -0.90 7.40 -22.671 -27.764 + -0.90 7.50 -22.661 -27.833 + -0.90 7.60 -22.635 -27.899 + -0.90 7.70 -22.602 -27.938 + -0.90 7.80 -22.564 -27.955 + -0.90 7.90 -22.525 -27.979 + -0.90 8.00 -22.489 -28.091 + -0.90 8.10 -22.450 -28.218 + -0.90 8.20 -22.410 -28.345 + -0.90 8.30 -22.368 -28.477 + -0.90 8.40 -22.324 -28.599 + -0.90 8.50 -22.280 -28.715 + -0.90 8.60 -22.231 -28.830 + -0.90 8.70 -22.182 -28.944 + -0.90 8.80 -22.132 -29.058 + -0.90 8.90 -22.079 -29.170 + -0.90 9.00 -22.025 -29.278 + -0.80 1.00 -27.247 -23.532 + -0.80 1.10 -27.071 -23.536 + -0.80 1.20 -26.814 -23.540 + -0.80 1.30 -26.494 -23.545 + -0.80 1.40 -26.170 -23.550 + -0.80 1.50 -25.879 -23.555 + -0.80 1.60 -25.629 -23.560 + -0.80 1.70 -25.418 -23.566 + -0.80 1.80 -25.236 -23.573 + -0.80 1.90 -25.072 -23.580 + -0.80 2.00 -24.917 -23.587 + -0.80 2.10 -24.768 -23.595 + -0.80 2.20 -24.628 -23.604 + -0.80 2.30 -24.500 -23.614 + -0.80 2.40 -24.388 -23.624 + -0.80 2.50 -24.293 -23.635 + -0.80 2.60 -24.212 -23.646 + -0.80 2.70 -24.146 -23.659 + -0.80 2.80 -24.090 -23.673 + -0.80 2.90 -24.041 -23.688 + -0.80 3.00 -23.999 -23.703 + -0.80 3.10 -23.962 -23.721 + -0.80 3.20 -23.927 -23.739 + -0.80 3.30 -23.892 -23.759 + -0.80 3.40 -23.850 -23.780 + -0.80 3.50 -23.796 -23.804 + -0.80 3.60 -23.728 -23.829 + -0.80 3.70 -23.641 -23.855 + -0.80 3.80 -23.529 -23.884 + -0.80 3.90 -23.371 -23.915 + -0.80 4.00 -23.117 -23.949 + -0.80 4.10 -22.661 -23.989 + -0.80 4.20 -22.098 -24.101 + -0.80 4.30 -21.863 -24.507 + -0.80 4.40 -21.938 -24.937 + -0.80 4.50 -21.960 -25.335 + -0.80 4.60 -21.885 -25.793 + -0.80 4.70 -21.733 -26.009 + -0.80 4.80 -21.527 -26.114 + -0.80 4.90 -21.342 -26.346 + -0.80 5.00 -21.276 -26.504 + -0.80 5.10 -21.322 -26.499 + -0.80 5.20 -21.324 -26.464 + -0.80 5.30 -21.300 -26.504 + -0.80 5.40 -21.291 -26.530 + -0.80 5.50 -21.466 -26.515 + -0.80 5.60 -21.674 -26.587 + -0.80 5.70 -21.725 -26.688 + -0.80 5.80 -21.778 -26.821 + -0.80 5.90 -21.807 -26.901 + -0.80 6.00 -21.814 -26.958 + -0.80 6.10 -21.837 -27.004 + -0.80 6.20 -21.892 -27.088 + -0.80 6.30 -22.070 -27.156 + -0.80 6.40 -22.251 -27.223 + -0.80 6.50 -22.387 -27.336 + -0.80 6.60 -22.467 -27.408 + -0.80 6.70 -22.497 -27.446 + -0.80 6.80 -22.503 -27.490 + -0.80 6.90 -22.493 -27.535 + -0.80 7.00 -22.491 -27.544 + -0.80 7.10 -22.535 -27.592 + -0.80 7.20 -22.611 -27.686 + -0.80 7.30 -22.658 -27.708 + -0.80 7.40 -22.671 -27.765 + -0.80 7.50 -22.661 -27.834 + -0.80 7.60 -22.635 -27.901 + -0.80 7.70 -22.601 -27.934 + -0.80 7.80 -22.564 -27.956 + -0.80 7.90 -22.525 -27.981 + -0.80 8.00 -22.489 -28.093 + -0.80 8.10 -22.450 -28.220 + -0.80 8.20 -22.410 -28.348 + -0.80 8.30 -22.369 -28.481 + -0.80 8.40 -22.324 -28.604 + -0.80 8.50 -22.280 -28.722 + -0.80 8.60 -22.231 -28.838 + -0.80 8.70 -22.182 -28.955 + -0.80 8.80 -22.132 -29.072 + -0.80 8.90 -22.079 -29.188 + -0.80 9.00 -22.025 -29.302 + -0.70 1.00 -27.335 -23.626 + -0.70 1.10 -27.155 -23.630 + -0.70 1.20 -26.895 -23.633 + -0.70 1.30 -26.572 -23.637 + -0.70 1.40 -26.246 -23.642 + -0.70 1.50 -25.953 -23.646 + -0.70 1.60 -25.701 -23.651 + -0.70 1.70 -25.489 -23.657 + -0.70 1.80 -25.305 -23.662 + -0.70 1.90 -25.138 -23.669 + -0.70 2.00 -24.980 -23.675 + -0.70 2.10 -24.828 -23.683 + -0.70 2.20 -24.685 -23.690 + -0.70 2.30 -24.555 -23.699 + -0.70 2.40 -24.440 -23.708 + -0.70 2.50 -24.343 -23.718 + -0.70 2.60 -24.261 -23.728 + -0.70 2.70 -24.193 -23.740 + -0.70 2.80 -24.135 -23.752 + -0.70 2.90 -24.086 -23.765 + -0.70 3.00 -24.043 -23.779 + -0.70 3.10 -24.005 -23.795 + -0.70 3.20 -23.969 -23.812 + -0.70 3.30 -23.932 -23.829 + -0.70 3.40 -23.889 -23.849 + -0.70 3.50 -23.834 -23.870 + -0.70 3.60 -23.763 -23.892 + -0.70 3.70 -23.675 -23.917 + -0.70 3.80 -23.561 -23.943 + -0.70 3.90 -23.399 -23.971 + -0.70 4.00 -23.136 -24.002 + -0.70 4.10 -22.657 -24.040 + -0.70 4.20 -22.077 -24.157 + -0.70 4.30 -21.852 -24.586 + -0.70 4.40 -21.937 -25.015 + -0.70 4.50 -21.959 -25.424 + -0.70 4.60 -21.885 -25.891 + -0.70 4.70 -21.733 -26.107 + -0.70 4.80 -21.527 -26.211 + -0.70 4.90 -21.342 -26.437 + -0.70 5.00 -21.276 -26.554 + -0.70 5.10 -21.322 -26.524 + -0.70 5.20 -21.324 -26.480 + -0.70 5.30 -21.300 -26.518 + -0.70 5.40 -21.291 -26.542 + -0.70 5.50 -21.466 -26.522 + -0.70 5.60 -21.674 -26.594 + -0.70 5.70 -21.725 -26.695 + -0.70 5.80 -21.778 -26.829 + -0.70 5.90 -21.807 -26.909 + -0.70 6.00 -21.814 -26.966 + -0.70 6.10 -21.837 -27.012 + -0.70 6.20 -21.892 -27.095 + -0.70 6.30 -22.070 -27.162 + -0.70 6.40 -22.251 -27.228 + -0.70 6.50 -22.387 -27.340 + -0.70 6.60 -22.467 -27.411 + -0.70 6.70 -22.497 -27.449 + -0.70 6.80 -22.503 -27.493 + -0.70 6.90 -22.493 -27.537 + -0.70 7.00 -22.491 -27.546 + -0.70 7.10 -22.535 -27.597 + -0.70 7.20 -22.611 -27.688 + -0.70 7.30 -22.658 -27.709 + -0.70 7.40 -22.671 -27.766 + -0.70 7.50 -22.661 -27.835 + -0.70 7.60 -22.634 -27.893 + -0.70 7.70 -22.601 -27.935 + -0.70 7.80 -22.564 -27.958 + -0.70 7.90 -22.525 -27.982 + -0.70 8.00 -22.489 -28.095 + -0.70 8.10 -22.450 -28.222 + -0.70 8.20 -22.410 -28.350 + -0.70 8.30 -22.369 -28.484 + -0.70 8.40 -22.324 -28.608 + -0.70 8.50 -22.280 -28.727 + -0.70 8.60 -22.231 -28.845 + -0.70 8.70 -22.182 -28.964 + -0.70 8.80 -22.132 -29.084 + -0.70 8.90 -22.079 -29.203 + -0.70 9.00 -22.025 -29.321 + -0.60 1.00 -27.421 -23.721 + -0.60 1.10 -27.238 -23.724 + -0.60 1.20 -26.975 -23.727 + -0.60 1.30 -26.650 -23.731 + -0.60 1.40 -26.322 -23.735 + -0.60 1.50 -26.028 -23.739 + -0.60 1.60 -25.775 -23.743 + -0.60 1.70 -25.560 -23.748 + -0.60 1.80 -25.375 -23.753 + -0.60 1.90 -25.205 -23.759 + -0.60 2.00 -25.045 -23.765 + -0.60 2.10 -24.889 -23.771 + -0.60 2.20 -24.743 -23.778 + -0.60 2.30 -24.610 -23.786 + -0.60 2.40 -24.493 -23.794 + -0.60 2.50 -24.394 -23.803 + -0.60 2.60 -24.310 -23.812 + -0.60 2.70 -24.241 -23.822 + -0.60 2.80 -24.182 -23.833 + -0.60 2.90 -24.132 -23.845 + -0.60 3.00 -24.088 -23.858 + -0.60 3.10 -24.048 -23.872 + -0.60 3.20 -24.011 -23.887 + -0.60 3.30 -23.974 -23.903 + -0.60 3.40 -23.929 -23.920 + -0.60 3.50 -23.873 -23.939 + -0.60 3.60 -23.801 -23.960 + -0.60 3.70 -23.710 -23.982 + -0.60 3.80 -23.594 -24.006 + -0.60 3.90 -23.429 -24.031 + -0.60 4.00 -23.157 -24.059 + -0.60 4.10 -22.657 -24.094 + -0.60 4.20 -22.059 -24.217 + -0.60 4.30 -21.844 -24.668 + -0.60 4.40 -21.937 -25.096 + -0.60 4.50 -21.959 -25.514 + -0.60 4.60 -21.885 -25.989 + -0.60 4.70 -21.733 -26.205 + -0.60 4.80 -21.527 -26.307 + -0.60 4.90 -21.342 -26.525 + -0.60 5.00 -21.276 -26.597 + -0.60 5.10 -21.322 -26.546 + -0.60 5.20 -21.324 -26.494 + -0.60 5.30 -21.300 -26.530 + -0.60 5.40 -21.291 -26.551 + -0.60 5.50 -21.466 -26.528 + -0.60 5.60 -21.674 -26.599 + -0.60 5.70 -21.725 -26.701 + -0.60 5.80 -21.778 -26.836 + -0.60 5.90 -21.807 -26.916 + -0.60 6.00 -21.814 -26.973 + -0.60 6.10 -21.837 -27.017 + -0.60 6.20 -21.892 -27.101 + -0.60 6.30 -22.070 -27.167 + -0.60 6.40 -22.251 -27.232 + -0.60 6.50 -22.387 -27.343 + -0.60 6.60 -22.467 -27.414 + -0.60 6.70 -22.497 -27.452 + -0.60 6.80 -22.503 -27.495 + -0.60 6.90 -22.493 -27.539 + -0.60 7.00 -22.491 -27.547 + -0.60 7.10 -22.535 -27.599 + -0.60 7.20 -22.611 -27.689 + -0.60 7.30 -22.658 -27.710 + -0.60 7.40 -22.671 -27.767 + -0.60 7.50 -22.661 -27.836 + -0.60 7.60 -22.634 -27.894 + -0.60 7.70 -22.601 -27.936 + -0.60 7.80 -22.564 -27.959 + -0.60 7.90 -22.525 -27.983 + -0.60 8.00 -22.489 -28.096 + -0.60 8.10 -22.450 -28.223 + -0.60 8.20 -22.410 -28.352 + -0.60 8.30 -22.369 -28.487 + -0.60 8.40 -22.324 -28.611 + -0.60 8.50 -22.280 -28.731 + -0.60 8.60 -22.231 -28.850 + -0.60 8.70 -22.182 -28.971 + -0.60 8.80 -22.132 -29.093 + -0.60 8.90 -22.079 -29.216 + -0.60 9.00 -22.025 -29.338 + -0.50 1.00 -27.506 -23.816 + -0.50 1.10 -27.320 -23.819 + -0.50 1.20 -27.054 -23.822 + -0.50 1.30 -26.728 -23.825 + -0.50 1.40 -26.399 -23.828 + -0.50 1.50 -26.104 -23.832 + -0.50 1.60 -25.850 -23.836 + -0.50 1.70 -25.634 -23.840 + -0.50 1.80 -25.446 -23.845 + -0.50 1.90 -25.274 -23.850 + -0.50 2.00 -25.110 -23.855 + -0.50 2.10 -24.952 -23.861 + -0.50 2.20 -24.802 -23.867 + -0.50 2.30 -24.666 -23.874 + -0.50 2.40 -24.547 -23.881 + -0.50 2.50 -24.446 -23.889 + -0.50 2.60 -24.361 -23.897 + -0.50 2.70 -24.290 -23.907 + -0.50 2.80 -24.230 -23.916 + -0.50 2.90 -24.179 -23.927 + -0.50 3.00 -24.134 -23.939 + -0.50 3.10 -24.093 -23.951 + -0.50 3.20 -24.055 -23.964 + -0.50 3.30 -24.017 -23.979 + -0.50 3.40 -23.971 -23.995 + -0.50 3.50 -23.913 -24.012 + -0.50 3.60 -23.840 -24.030 + -0.50 3.70 -23.748 -24.050 + -0.50 3.80 -23.629 -24.072 + -0.50 3.90 -23.462 -24.095 + -0.50 4.00 -23.181 -24.120 + -0.50 4.10 -22.661 -24.153 + -0.50 4.20 -22.045 -24.282 + -0.50 4.30 -21.838 -24.753 + -0.50 4.40 -21.936 -25.179 + -0.50 4.50 -21.958 -25.606 + -0.50 4.60 -21.885 -26.086 + -0.50 4.70 -21.733 -26.302 + -0.50 4.80 -21.527 -26.402 + -0.50 4.90 -21.341 -26.611 + -0.50 5.00 -21.276 -26.636 + -0.50 5.10 -21.322 -26.563 + -0.50 5.20 -21.324 -26.505 + -0.50 5.30 -21.300 -26.539 + -0.50 5.40 -21.291 -26.559 + -0.50 5.50 -21.466 -26.533 + -0.50 5.60 -21.674 -26.603 + -0.50 5.70 -21.725 -26.705 + -0.50 5.80 -21.778 -26.841 + -0.50 5.90 -21.807 -26.921 + -0.50 6.00 -21.814 -26.978 + -0.50 6.10 -21.837 -27.022 + -0.50 6.20 -21.892 -27.106 + -0.50 6.30 -22.070 -27.171 + -0.50 6.40 -22.251 -27.235 + -0.50 6.50 -22.387 -27.346 + -0.50 6.60 -22.467 -27.416 + -0.50 6.70 -22.497 -27.453 + -0.50 6.80 -22.503 -27.496 + -0.50 6.90 -22.493 -27.541 + -0.50 7.00 -22.491 -27.548 + -0.50 7.10 -22.535 -27.596 + -0.50 7.20 -22.611 -27.690 + -0.50 7.30 -22.658 -27.711 + -0.50 7.40 -22.671 -27.768 + -0.50 7.50 -22.660 -27.828 + -0.50 7.60 -22.634 -27.895 + -0.50 7.70 -22.601 -27.936 + -0.50 7.80 -22.564 -27.959 + -0.50 7.90 -22.525 -27.984 + -0.50 8.00 -22.489 -28.097 + -0.50 8.10 -22.450 -28.225 + -0.50 8.20 -22.410 -28.354 + -0.50 8.30 -22.369 -28.489 + -0.50 8.40 -22.324 -28.614 + -0.50 8.50 -22.280 -28.734 + -0.50 8.60 -22.231 -28.855 + -0.50 8.70 -22.182 -28.976 + -0.50 8.80 -22.132 -29.101 + -0.50 8.90 -22.079 -29.226 + -0.50 9.00 -22.025 -29.351 + -0.40 1.00 -27.589 -23.912 + -0.40 1.10 -27.400 -23.914 + -0.40 1.20 -27.132 -23.917 + -0.40 1.30 -26.806 -23.920 + -0.40 1.40 -26.477 -23.923 + -0.40 1.50 -26.180 -23.926 + -0.40 1.60 -25.925 -23.929 + -0.40 1.70 -25.708 -23.933 + -0.40 1.80 -25.518 -23.937 + -0.40 1.90 -25.344 -23.942 + -0.40 2.00 -25.177 -23.947 + -0.40 2.10 -25.015 -23.952 + -0.40 2.20 -24.862 -23.957 + -0.40 2.30 -24.724 -23.963 + -0.40 2.40 -24.602 -23.970 + -0.40 2.50 -24.499 -23.977 + -0.40 2.60 -24.412 -23.984 + -0.40 2.70 -24.340 -23.993 + -0.40 2.80 -24.279 -24.001 + -0.40 2.90 -24.227 -24.011 + -0.40 3.00 -24.181 -24.021 + -0.40 3.10 -24.139 -24.032 + -0.40 3.20 -24.101 -24.044 + -0.40 3.30 -24.061 -24.057 + -0.40 3.40 -24.014 -24.072 + -0.40 3.50 -23.955 -24.087 + -0.40 3.60 -23.880 -24.103 + -0.40 3.70 -23.786 -24.121 + -0.40 3.80 -23.666 -24.141 + -0.40 3.90 -23.496 -24.162 + -0.40 4.00 -23.206 -24.184 + -0.40 4.10 -22.667 -24.215 + -0.40 4.20 -22.033 -24.350 + -0.40 4.30 -21.832 -24.841 + -0.40 4.40 -21.936 -25.265 + -0.40 4.50 -21.958 -25.699 + -0.40 4.60 -21.885 -26.184 + -0.40 4.70 -21.733 -26.399 + -0.40 4.80 -21.527 -26.496 + -0.40 4.90 -21.341 -26.694 + -0.40 5.00 -21.276 -26.669 + -0.40 5.10 -21.322 -26.578 + -0.40 5.20 -21.324 -26.514 + -0.40 5.30 -21.300 -26.547 + -0.40 5.40 -21.291 -26.565 + -0.40 5.50 -21.466 -26.537 + -0.40 5.60 -21.674 -26.607 + -0.40 5.70 -21.725 -26.709 + -0.40 5.80 -21.778 -26.845 + -0.40 5.90 -21.807 -26.925 + -0.40 6.00 -21.814 -26.982 + -0.40 6.10 -21.837 -27.026 + -0.40 6.20 -21.892 -27.110 + -0.40 6.30 -22.070 -27.174 + -0.40 6.40 -22.251 -27.237 + -0.40 6.50 -22.387 -27.348 + -0.40 6.60 -22.467 -27.418 + -0.40 6.70 -22.497 -27.455 + -0.40 6.80 -22.503 -27.498 + -0.40 6.90 -22.493 -27.542 + -0.40 7.00 -22.491 -27.549 + -0.40 7.10 -22.535 -27.601 + -0.40 7.20 -22.611 -27.691 + -0.40 7.30 -22.658 -27.711 + -0.40 7.40 -22.671 -27.768 + -0.40 7.50 -22.660 -27.828 + -0.40 7.60 -22.634 -27.896 + -0.40 7.70 -22.601 -27.937 + -0.40 7.80 -22.564 -27.960 + -0.40 7.90 -22.525 -27.984 + -0.40 8.00 -22.489 -28.097 + -0.40 8.10 -22.450 -28.226 + -0.40 8.20 -22.410 -28.355 + -0.40 8.30 -22.369 -28.491 + -0.40 8.40 -22.324 -28.616 + -0.40 8.50 -22.280 -28.737 + -0.40 8.60 -22.231 -28.858 + -0.40 8.70 -22.183 -28.981 + -0.40 8.80 -22.132 -29.107 + -0.40 8.90 -22.079 -29.234 + -0.40 9.00 -22.025 -29.362 + -0.30 1.00 -27.670 -24.008 + -0.30 1.10 -27.477 -24.010 + -0.30 1.20 -27.208 -24.012 + -0.30 1.30 -26.883 -24.015 + -0.30 1.40 -26.554 -24.018 + -0.30 1.50 -26.257 -24.021 + -0.30 1.60 -26.002 -24.024 + -0.30 1.70 -25.783 -24.027 + -0.30 1.80 -25.592 -24.031 + -0.30 1.90 -25.416 -24.035 + -0.30 2.00 -25.246 -24.039 + -0.30 2.10 -25.080 -24.044 + -0.30 2.20 -24.924 -24.049 + -0.30 2.30 -24.782 -24.054 + -0.30 2.40 -24.658 -24.060 + -0.30 2.50 -24.553 -24.066 + -0.30 2.60 -24.464 -24.073 + -0.30 2.70 -24.391 -24.080 + -0.30 2.80 -24.329 -24.088 + -0.30 2.90 -24.275 -24.097 + -0.30 3.00 -24.228 -24.106 + -0.30 3.10 -24.186 -24.116 + -0.30 3.20 -24.147 -24.126 + -0.30 3.30 -24.106 -24.138 + -0.30 3.40 -24.058 -24.151 + -0.30 3.50 -23.998 -24.164 + -0.30 3.60 -23.921 -24.179 + -0.30 3.70 -23.826 -24.195 + -0.30 3.80 -23.704 -24.213 + -0.30 3.90 -23.532 -24.232 + -0.30 4.00 -23.234 -24.252 + -0.30 4.10 -22.677 -24.281 + -0.30 4.20 -22.024 -24.421 + -0.30 4.30 -21.828 -24.931 + -0.30 4.40 -21.936 -25.353 + -0.30 4.50 -21.957 -25.794 + -0.30 4.60 -21.885 -26.281 + -0.30 4.70 -21.733 -26.494 + -0.30 4.80 -21.527 -26.588 + -0.30 4.90 -21.341 -26.773 + -0.30 5.00 -21.276 -26.697 + -0.30 5.10 -21.322 -26.589 + -0.30 5.20 -21.324 -26.521 + -0.30 5.30 -21.300 -26.553 + -0.30 5.40 -21.291 -26.570 + -0.30 5.50 -21.466 -26.541 + -0.30 5.60 -21.674 -26.610 + -0.30 5.70 -21.725 -26.712 + -0.30 5.80 -21.778 -26.848 + -0.30 5.90 -21.807 -26.928 + -0.30 6.00 -21.814 -26.985 + -0.30 6.10 -21.837 -27.029 + -0.30 6.20 -21.892 -27.113 + -0.30 6.30 -22.070 -27.177 + -0.30 6.40 -22.251 -27.239 + -0.30 6.50 -22.387 -27.350 + -0.30 6.60 -22.467 -27.419 + -0.30 6.70 -22.497 -27.456 + -0.30 6.80 -22.503 -27.499 + -0.30 6.90 -22.493 -27.543 + -0.30 7.00 -22.491 -27.550 + -0.30 7.10 -22.535 -27.601 + -0.30 7.20 -22.611 -27.687 + -0.30 7.30 -22.658 -27.712 + -0.30 7.40 -22.670 -27.761 + -0.30 7.50 -22.660 -27.829 + -0.30 7.60 -22.634 -27.896 + -0.30 7.70 -22.601 -27.938 + -0.30 7.80 -22.564 -27.960 + -0.30 7.90 -22.526 -27.985 + -0.30 8.00 -22.489 -28.098 + -0.30 8.10 -22.450 -28.226 + -0.30 8.20 -22.410 -28.356 + -0.30 8.30 -22.369 -28.492 + -0.30 8.40 -22.324 -28.618 + -0.30 8.50 -22.280 -28.739 + -0.30 8.60 -22.231 -28.861 + -0.30 8.70 -22.183 -28.985 + -0.30 8.80 -22.132 -29.112 + -0.30 8.90 -22.079 -29.240 + -0.30 9.00 -22.025 -29.371 + -0.20 1.00 -27.747 -24.104 + -0.20 1.10 -27.551 -24.106 + -0.20 1.20 -27.282 -24.108 + -0.20 1.30 -26.958 -24.111 + -0.20 1.40 -26.631 -24.113 + -0.20 1.50 -26.335 -24.116 + -0.20 1.60 -26.079 -24.119 + -0.20 1.70 -25.860 -24.122 + -0.20 1.80 -25.667 -24.125 + -0.20 1.90 -25.488 -24.128 + -0.20 2.00 -25.315 -24.132 + -0.20 2.10 -25.145 -24.136 + -0.20 2.20 -24.986 -24.141 + -0.20 2.30 -24.841 -24.146 + -0.20 2.40 -24.715 -24.151 + -0.20 2.50 -24.607 -24.156 + -0.20 2.60 -24.517 -24.162 + -0.20 2.70 -24.442 -24.169 + -0.20 2.80 -24.379 -24.176 + -0.20 2.90 -24.325 -24.184 + -0.20 3.00 -24.277 -24.192 + -0.20 3.10 -24.234 -24.201 + -0.20 3.20 -24.193 -24.210 + -0.20 3.30 -24.152 -24.221 + -0.20 3.40 -24.103 -24.232 + -0.20 3.50 -24.041 -24.244 + -0.20 3.60 -23.964 -24.258 + -0.20 3.70 -23.867 -24.272 + -0.20 3.80 -23.744 -24.288 + -0.20 3.90 -23.569 -24.305 + -0.20 4.00 -23.264 -24.323 + -0.20 4.10 -22.690 -24.350 + -0.20 4.20 -22.017 -24.496 + -0.20 4.30 -21.825 -25.022 + -0.20 4.40 -21.936 -25.443 + -0.20 4.50 -21.957 -25.889 + -0.20 4.60 -21.885 -26.378 + -0.20 4.70 -21.733 -26.589 + -0.20 4.80 -21.527 -26.679 + -0.20 4.90 -21.341 -26.849 + -0.20 5.00 -21.276 -26.720 + -0.20 5.10 -21.322 -26.599 + -0.20 5.20 -21.324 -26.527 + -0.20 5.30 -21.300 -26.558 + -0.20 5.40 -21.291 -26.574 + -0.20 5.50 -21.466 -26.543 + -0.20 5.60 -21.674 -26.612 + -0.20 5.70 -21.725 -26.714 + -0.20 5.80 -21.778 -26.851 + -0.20 5.90 -21.807 -26.931 + -0.20 6.00 -21.814 -26.988 + -0.20 6.10 -21.837 -27.032 + -0.20 6.20 -21.892 -27.115 + -0.20 6.30 -22.070 -27.179 + -0.20 6.40 -22.251 -27.241 + -0.20 6.50 -22.387 -27.351 + -0.20 6.60 -22.467 -27.421 + -0.20 6.70 -22.497 -27.457 + -0.20 6.80 -22.503 -27.500 + -0.20 6.90 -22.493 -27.543 + -0.20 7.00 -22.491 -27.551 + -0.20 7.10 -22.535 -27.598 + -0.20 7.20 -22.611 -27.692 + -0.20 7.30 -22.658 -27.712 + -0.20 7.40 -22.670 -27.761 + -0.20 7.50 -22.660 -27.829 + -0.20 7.60 -22.634 -27.897 + -0.20 7.70 -22.601 -27.938 + -0.20 7.80 -22.564 -27.961 + -0.20 7.90 -22.526 -27.985 + -0.20 8.00 -22.489 -28.098 + -0.20 8.10 -22.450 -28.227 + -0.20 8.20 -22.410 -28.357 + -0.20 8.30 -22.369 -28.493 + -0.20 8.40 -22.324 -28.619 + -0.20 8.50 -22.280 -28.741 + -0.20 8.60 -22.231 -28.863 + -0.20 8.70 -22.183 -28.988 + -0.20 8.80 -22.132 -29.116 + -0.20 8.90 -22.079 -29.246 + -0.20 9.00 -22.025 -29.378 + -0.10 1.00 -27.821 -24.201 + -0.10 1.10 -27.621 -24.203 + -0.10 1.20 -27.353 -24.205 + -0.10 1.30 -27.032 -24.207 + -0.10 1.40 -26.707 -24.209 + -0.10 1.50 -26.412 -24.211 + -0.10 1.60 -26.157 -24.214 + -0.10 1.70 -25.937 -24.217 + -0.10 1.80 -25.742 -24.220 + -0.10 1.90 -25.561 -24.223 + -0.10 2.00 -25.385 -24.226 + -0.10 2.10 -25.212 -24.230 + -0.10 2.20 -25.049 -24.234 + -0.10 2.30 -24.901 -24.238 + -0.10 2.40 -24.772 -24.243 + -0.10 2.50 -24.663 -24.248 + -0.10 2.60 -24.571 -24.253 + -0.10 2.70 -24.494 -24.259 + -0.10 2.80 -24.430 -24.265 + -0.10 2.90 -24.375 -24.272 + -0.10 3.00 -24.326 -24.279 + -0.10 3.10 -24.282 -24.287 + -0.10 3.20 -24.241 -24.296 + -0.10 3.30 -24.198 -24.305 + -0.10 3.40 -24.148 -24.315 + -0.10 3.50 -24.086 -24.326 + -0.10 3.60 -24.007 -24.338 + -0.10 3.70 -23.910 -24.351 + -0.10 3.80 -23.785 -24.365 + -0.10 3.90 -23.608 -24.380 + -0.10 4.00 -23.296 -24.397 + -0.10 4.10 -22.705 -24.422 + -0.10 4.20 -22.012 -24.574 + -0.10 4.30 -21.822 -25.116 + -0.10 4.40 -21.936 -25.535 + -0.10 4.50 -21.957 -25.985 + -0.10 4.60 -21.885 -26.474 + -0.10 4.70 -21.733 -26.683 + -0.10 4.80 -21.527 -26.768 + -0.10 4.90 -21.341 -26.920 + -0.10 5.00 -21.276 -26.740 + -0.10 5.10 -21.322 -26.607 + -0.10 5.20 -21.324 -26.532 + -0.10 5.30 -21.300 -26.562 + -0.10 5.40 -21.291 -26.578 + -0.10 5.50 -21.466 -26.545 + -0.10 5.60 -21.674 -26.613 + -0.10 5.70 -21.725 -26.716 + -0.10 5.80 -21.778 -26.853 + -0.10 5.90 -21.807 -26.933 + -0.10 6.00 -21.814 -26.990 + -0.10 6.10 -21.837 -27.034 + -0.10 6.20 -21.892 -27.117 + -0.10 6.30 -22.070 -27.181 + -0.10 6.40 -22.251 -27.242 + -0.10 6.50 -22.387 -27.352 + -0.10 6.60 -22.467 -27.421 + -0.10 6.70 -22.497 -27.458 + -0.10 6.80 -22.503 -27.500 + -0.10 6.90 -22.493 -27.544 + -0.10 7.00 -22.491 -27.551 + -0.10 7.10 -22.535 -27.602 + -0.10 7.20 -22.611 -27.692 + -0.10 7.30 -22.658 -27.712 + -0.10 7.40 -22.670 -27.761 + -0.10 7.50 -22.660 -27.829 + -0.10 7.60 -22.634 -27.897 + -0.10 7.70 -22.601 -27.938 + -0.10 7.80 -22.564 -27.961 + -0.10 7.90 -22.526 -27.985 + -0.10 8.00 -22.489 -28.099 + -0.10 8.10 -22.450 -28.227 + -0.10 8.20 -22.410 -28.357 + -0.10 8.30 -22.369 -28.494 + -0.10 8.40 -22.324 -28.620 + -0.10 8.50 -22.280 -28.742 + -0.10 8.60 -22.231 -28.865 + -0.10 8.70 -22.183 -28.990 + -0.10 8.80 -22.132 -29.119 + -0.10 8.90 -22.079 -29.250 + -0.10 9.00 -22.026 -29.383 + -0.00 1.00 -27.891 -24.299 + -0.00 1.10 -27.687 -24.300 + -0.00 1.20 -27.420 -24.302 + -0.00 1.30 -27.103 -24.304 + -0.00 1.40 -26.781 -24.306 + -0.00 1.50 -26.489 -24.308 + -0.00 1.60 -26.234 -24.310 + -0.00 1.70 -26.014 -24.312 + -0.00 1.80 -25.819 -24.315 + -0.00 1.90 -25.635 -24.318 + -0.00 2.00 -25.456 -24.321 + -0.00 2.10 -25.279 -24.324 + -0.00 2.20 -25.113 -24.328 + -0.00 2.30 -24.962 -24.331 + -0.00 2.40 -24.830 -24.336 + -0.00 2.50 -24.719 -24.340 + -0.00 2.60 -24.625 -24.345 + -0.00 2.70 -24.547 -24.350 + -0.00 2.80 -24.482 -24.356 + -0.00 2.90 -24.425 -24.362 + -0.00 3.00 -24.375 -24.368 + -0.00 3.10 -24.331 -24.375 + -0.00 3.20 -24.289 -24.383 + -0.00 3.30 -24.245 -24.391 + -0.00 3.40 -24.195 -24.400 + -0.00 3.50 -24.131 -24.410 + -0.00 3.60 -24.051 -24.421 + -0.00 3.70 -23.952 -24.432 + -0.00 3.80 -23.826 -24.445 + -0.00 3.90 -23.647 -24.458 + -0.00 4.00 -23.329 -24.473 + -0.00 4.10 -22.723 -24.497 + -0.00 4.20 -22.008 -24.654 + -0.00 4.30 -21.820 -25.210 + -0.00 4.40 -21.936 -25.628 + -0.00 4.50 -21.957 -26.082 + -0.00 4.60 -21.884 -26.570 + -0.00 4.70 -21.733 -26.775 + -0.00 4.80 -21.527 -26.854 + -0.00 4.90 -21.341 -26.985 + -0.00 5.00 -21.276 -26.756 + -0.00 5.10 -21.322 -26.613 + -0.00 5.20 -21.324 -26.536 + -0.00 5.30 -21.300 -26.565 + -0.00 5.40 -21.291 -26.580 + -0.00 5.50 -21.466 -26.547 + -0.00 5.60 -21.674 -26.615 + -0.00 5.70 -21.725 -26.717 + -0.00 5.80 -21.778 -26.855 + -0.00 5.90 -21.807 -26.935 + -0.00 6.00 -21.814 -26.992 + -0.00 6.10 -21.837 -27.035 + -0.00 6.20 -21.892 -27.119 + -0.00 6.30 -22.070 -27.182 + -0.00 6.40 -22.251 -27.243 + -0.00 6.50 -22.387 -27.353 + -0.00 6.60 -22.467 -27.418 + -0.00 6.70 -22.497 -27.458 + -0.00 6.80 -22.503 -27.501 + -0.00 6.90 -22.493 -27.545 + -0.00 7.00 -22.491 -27.552 + -0.00 7.10 -22.535 -27.603 + -0.00 7.20 -22.611 -27.692 + -0.00 7.30 -22.658 -27.708 + -0.00 7.40 -22.670 -27.761 + -0.00 7.50 -22.660 -27.830 + -0.00 7.60 -22.634 -27.897 + -0.00 7.70 -22.601 -27.938 + -0.00 7.80 -22.564 -27.961 + -0.00 7.90 -22.526 -27.986 + -0.00 8.00 -22.489 -28.099 + -0.00 8.10 -22.450 -28.228 + -0.00 8.20 -22.410 -28.358 + -0.00 8.30 -22.369 -28.494 + -0.00 8.40 -22.324 -28.621 + -0.00 8.50 -22.280 -28.743 + -0.00 8.60 -22.231 -28.867 + -0.00 8.70 -22.183 -28.992 + -0.00 8.80 -22.132 -29.121 + -0.00 8.90 -22.079 -29.253 + -0.00 9.00 -22.026 -29.388 + 0.10 1.00 -27.956 -24.396 + 0.10 1.10 -27.748 -24.398 + 0.10 1.20 -27.483 -24.399 + 0.10 1.30 -27.172 -24.401 + 0.10 1.40 -26.855 -24.402 + 0.10 1.50 -26.565 -24.404 + 0.10 1.60 -26.312 -24.406 + 0.10 1.70 -26.092 -24.408 + 0.10 1.80 -25.896 -24.411 + 0.10 1.90 -25.710 -24.413 + 0.10 2.00 -25.527 -24.416 + 0.10 2.10 -25.347 -24.419 + 0.10 2.20 -25.177 -24.422 + 0.10 2.30 -25.023 -24.425 + 0.10 2.40 -24.889 -24.429 + 0.10 2.50 -24.775 -24.433 + 0.10 2.60 -24.680 -24.437 + 0.10 2.70 -24.600 -24.442 + 0.10 2.80 -24.534 -24.447 + 0.10 2.90 -24.476 -24.453 + 0.10 3.00 -24.426 -24.458 + 0.10 3.10 -24.380 -24.465 + 0.10 3.20 -24.337 -24.472 + 0.10 3.30 -24.293 -24.479 + 0.10 3.40 -24.242 -24.487 + 0.10 3.50 -24.177 -24.496 + 0.10 3.60 -24.096 -24.505 + 0.10 3.70 -23.996 -24.516 + 0.10 3.80 -23.869 -24.527 + 0.10 3.90 -23.688 -24.539 + 0.10 4.00 -23.364 -24.552 + 0.10 4.10 -22.744 -24.574 + 0.10 4.20 -22.006 -24.738 + 0.10 4.30 -21.819 -25.306 + 0.10 4.40 -21.936 -25.722 + 0.10 4.50 -21.957 -26.179 + 0.10 4.60 -21.884 -26.665 + 0.10 4.70 -21.733 -26.865 + 0.10 4.80 -21.527 -26.937 + 0.10 4.90 -21.341 -27.046 + 0.10 5.00 -21.276 -26.770 + 0.10 5.10 -21.322 -26.618 + 0.10 5.20 -21.324 -26.539 + 0.10 5.30 -21.300 -26.568 + 0.10 5.40 -21.291 -26.582 + 0.10 5.50 -21.466 -26.548 + 0.10 5.60 -21.674 -26.616 + 0.10 5.70 -21.725 -26.718 + 0.10 5.80 -21.778 -26.856 + 0.10 5.90 -21.807 -26.936 + 0.10 6.00 -21.814 -26.993 + 0.10 6.10 -21.837 -27.036 + 0.10 6.20 -21.892 -27.120 + 0.10 6.30 -22.070 -27.183 + 0.10 6.40 -22.251 -27.244 + 0.10 6.50 -22.387 -27.353 + 0.10 6.60 -22.467 -27.423 + 0.10 6.70 -22.497 -27.459 + 0.10 6.80 -22.503 -27.501 + 0.10 6.90 -22.493 -27.545 + 0.10 7.00 -22.491 -27.552 + 0.10 7.10 -22.535 -27.603 + 0.10 7.20 -22.611 -27.693 + 0.10 7.30 -22.658 -27.708 + 0.10 7.40 -22.670 -27.762 + 0.10 7.50 -22.660 -27.830 + 0.10 7.60 -22.634 -27.897 + 0.10 7.70 -22.601 -27.939 + 0.10 7.80 -22.564 -27.962 + 0.10 7.90 -22.526 -27.986 + 0.10 8.00 -22.489 -28.099 + 0.10 8.10 -22.450 -28.228 + 0.10 8.20 -22.410 -28.358 + 0.10 8.30 -22.369 -28.495 + 0.10 8.40 -22.324 -28.622 + 0.10 8.50 -22.280 -28.740 + 0.10 8.60 -22.231 -28.868 + 0.10 8.70 -22.183 -28.994 + 0.10 8.80 -22.132 -29.123 + 0.10 8.90 -22.079 -29.256 + 0.10 9.00 -22.026 -29.392 + 0.20 1.00 -28.015 -24.494 + 0.20 1.10 -27.804 -24.495 + 0.20 1.20 -27.542 -24.497 + 0.20 1.30 -27.237 -24.498 + 0.20 1.40 -26.926 -24.499 + 0.20 1.50 -26.640 -24.501 + 0.20 1.60 -26.389 -24.503 + 0.20 1.70 -26.170 -24.505 + 0.20 1.80 -25.973 -24.507 + 0.20 1.90 -25.785 -24.509 + 0.20 2.00 -25.599 -24.512 + 0.20 2.10 -25.415 -24.514 + 0.20 2.20 -25.241 -24.517 + 0.20 2.30 -25.084 -24.520 + 0.20 2.40 -24.948 -24.523 + 0.20 2.50 -24.832 -24.527 + 0.20 2.60 -24.735 -24.531 + 0.20 2.70 -24.654 -24.535 + 0.20 2.80 -24.586 -24.539 + 0.20 2.90 -24.528 -24.544 + 0.20 3.00 -24.476 -24.550 + 0.20 3.10 -24.430 -24.555 + 0.20 3.20 -24.386 -24.561 + 0.20 3.30 -24.341 -24.568 + 0.20 3.40 -24.289 -24.575 + 0.20 3.50 -24.224 -24.583 + 0.20 3.60 -24.142 -24.591 + 0.20 3.70 -24.041 -24.601 + 0.20 3.80 -23.912 -24.610 + 0.20 3.90 -23.730 -24.621 + 0.20 4.00 -23.400 -24.633 + 0.20 4.10 -22.767 -24.653 + 0.20 4.20 -22.005 -24.823 + 0.20 4.30 -21.817 -25.402 + 0.20 4.40 -21.936 -25.817 + 0.20 4.50 -21.956 -26.276 + 0.20 4.60 -21.884 -26.758 + 0.20 4.70 -21.733 -26.953 + 0.20 4.80 -21.527 -27.017 + 0.20 4.90 -21.341 -27.101 + 0.20 5.00 -21.276 -26.781 + 0.20 5.10 -21.322 -26.622 + 0.20 5.20 -21.324 -26.541 + 0.20 5.30 -21.300 -26.570 + 0.20 5.40 -21.291 -26.584 + 0.20 5.50 -21.466 -26.549 + 0.20 5.60 -21.674 -26.617 + 0.20 5.70 -21.725 -26.719 + 0.20 5.80 -21.778 -26.857 + 0.20 5.90 -21.807 -26.937 + 0.20 6.00 -21.814 -26.994 + 0.20 6.10 -21.837 -27.037 + 0.20 6.20 -21.892 -27.121 + 0.20 6.30 -22.070 -27.184 + 0.20 6.40 -22.251 -27.244 + 0.20 6.50 -22.387 -27.354 + 0.20 6.60 -22.467 -27.423 + 0.20 6.70 -22.497 -27.459 + 0.20 6.80 -22.503 -27.502 + 0.20 6.90 -22.493 -27.545 + 0.20 7.00 -22.491 -27.552 + 0.20 7.10 -22.535 -27.603 + 0.20 7.20 -22.611 -27.693 + 0.20 7.30 -22.658 -27.708 + 0.20 7.40 -22.670 -27.762 + 0.20 7.50 -22.660 -27.830 + 0.20 7.60 -22.634 -27.897 + 0.20 7.70 -22.601 -27.939 + 0.20 7.80 -22.564 -27.962 + 0.20 7.90 -22.526 -27.986 + 0.20 8.00 -22.489 -28.099 + 0.20 8.10 -22.450 -28.228 + 0.20 8.20 -22.410 -28.359 + 0.20 8.30 -22.369 -28.495 + 0.20 8.40 -22.324 -28.622 + 0.20 8.50 -22.280 -28.745 + 0.20 8.60 -22.231 -28.869 + 0.20 8.70 -22.183 -28.995 + 0.20 8.80 -22.132 -29.125 + 0.20 8.90 -22.079 -29.258 + 0.20 9.00 -22.026 -29.395 + 0.30 1.00 -28.069 -24.592 + 0.30 1.10 -27.854 -24.593 + 0.30 1.20 -27.596 -24.594 + 0.30 1.30 -27.299 -24.596 + 0.30 1.40 -26.994 -24.597 + 0.30 1.50 -26.714 -24.598 + 0.30 1.60 -26.465 -24.600 + 0.30 1.70 -26.247 -24.602 + 0.30 1.80 -26.049 -24.604 + 0.30 1.90 -25.860 -24.606 + 0.30 2.00 -25.671 -24.608 + 0.30 2.10 -25.483 -24.610 + 0.30 2.20 -25.306 -24.613 + 0.30 2.30 -25.146 -24.615 + 0.30 2.40 -25.007 -24.618 + 0.30 2.50 -24.889 -24.621 + 0.30 2.60 -24.790 -24.625 + 0.30 2.70 -24.708 -24.629 + 0.30 2.80 -24.639 -24.633 + 0.30 2.90 -24.579 -24.637 + 0.30 3.00 -24.527 -24.642 + 0.30 3.10 -24.480 -24.647 + 0.30 3.20 -24.435 -24.652 + 0.30 3.30 -24.390 -24.658 + 0.30 3.40 -24.337 -24.664 + 0.30 3.50 -24.271 -24.671 + 0.30 3.60 -24.188 -24.679 + 0.30 3.70 -24.086 -24.687 + 0.30 3.80 -23.956 -24.696 + 0.30 3.90 -23.772 -24.705 + 0.30 4.00 -23.437 -24.715 + 0.30 4.10 -22.791 -24.735 + 0.30 4.20 -22.004 -24.911 + 0.30 4.30 -21.816 -25.499 + 0.30 4.40 -21.936 -25.914 + 0.30 4.50 -21.956 -26.373 + 0.30 4.60 -21.884 -26.850 + 0.30 4.70 -21.733 -27.039 + 0.30 4.80 -21.527 -27.092 + 0.30 4.90 -21.341 -27.150 + 0.30 5.00 -21.276 -26.790 + 0.30 5.10 -21.322 -26.626 + 0.30 5.20 -21.324 -26.543 + 0.30 5.30 -21.300 -26.571 + 0.30 5.40 -21.291 -26.585 + 0.30 5.50 -21.466 -26.550 + 0.30 5.60 -21.674 -26.618 + 0.30 5.70 -21.725 -26.720 + 0.30 5.80 -21.778 -26.858 + 0.30 5.90 -21.807 -26.938 + 0.30 6.00 -21.814 -26.995 + 0.30 6.10 -21.837 -27.038 + 0.30 6.20 -21.892 -27.122 + 0.30 6.30 -22.070 -27.184 + 0.30 6.40 -22.251 -27.245 + 0.30 6.50 -22.387 -27.354 + 0.30 6.60 -22.467 -27.424 + 0.30 6.70 -22.497 -27.460 + 0.30 6.80 -22.503 -27.502 + 0.30 6.90 -22.493 -27.546 + 0.30 7.00 -22.491 -27.552 + 0.30 7.10 -22.535 -27.603 + 0.30 7.20 -22.611 -27.693 + 0.30 7.30 -22.658 -27.708 + 0.30 7.40 -22.670 -27.762 + 0.30 7.50 -22.660 -27.830 + 0.30 7.60 -22.634 -27.898 + 0.30 7.70 -22.601 -27.939 + 0.30 7.80 -22.564 -27.962 + 0.30 7.90 -22.526 -27.986 + 0.30 8.00 -22.489 -28.100 + 0.30 8.10 -22.450 -28.228 + 0.30 8.20 -22.410 -28.359 + 0.30 8.30 -22.369 -28.496 + 0.30 8.40 -22.324 -28.622 + 0.30 8.50 -22.280 -28.746 + 0.30 8.60 -22.231 -28.869 + 0.30 8.70 -22.183 -28.996 + 0.30 8.80 -22.132 -29.126 + 0.30 8.90 -22.079 -29.260 + 0.30 9.00 -22.026 -29.397 + 0.40 1.00 -28.117 -24.690 + 0.40 1.10 -27.899 -24.691 + 0.40 1.20 -27.645 -24.692 + 0.40 1.30 -27.356 -24.693 + 0.40 1.40 -27.060 -24.695 + 0.40 1.50 -26.785 -24.696 + 0.40 1.60 -26.540 -24.697 + 0.40 1.70 -26.323 -24.699 + 0.40 1.80 -26.126 -24.701 + 0.40 1.90 -25.935 -24.702 + 0.40 2.00 -25.743 -24.704 + 0.40 2.10 -25.552 -24.706 + 0.40 2.20 -25.371 -24.709 + 0.40 2.30 -25.208 -24.711 + 0.40 2.40 -25.066 -24.714 + 0.40 2.50 -24.946 -24.717 + 0.40 2.60 -24.846 -24.720 + 0.40 2.70 -24.762 -24.723 + 0.40 2.80 -24.692 -24.727 + 0.40 2.90 -24.631 -24.730 + 0.40 3.00 -24.578 -24.735 + 0.40 3.10 -24.530 -24.739 + 0.40 3.20 -24.485 -24.744 + 0.40 3.30 -24.439 -24.749 + 0.40 3.40 -24.385 -24.755 + 0.40 3.50 -24.318 -24.761 + 0.40 3.60 -24.234 -24.768 + 0.40 3.70 -24.131 -24.775 + 0.40 3.80 -24.000 -24.783 + 0.40 3.90 -23.815 -24.791 + 0.40 4.00 -23.475 -24.800 + 0.40 4.10 -22.818 -24.818 + 0.40 4.20 -22.004 -25.000 + 0.40 4.30 -21.815 -25.596 + 0.40 4.40 -21.936 -26.010 + 0.40 4.50 -21.956 -26.470 + 0.40 4.60 -21.884 -26.940 + 0.40 4.70 -21.733 -27.121 + 0.40 4.80 -21.527 -27.164 + 0.40 4.90 -21.341 -27.194 + 0.40 5.00 -21.276 -26.797 + 0.40 5.10 -21.322 -26.628 + 0.40 5.20 -21.324 -26.545 + 0.40 5.30 -21.300 -26.573 + 0.40 5.40 -21.291 -26.586 + 0.40 5.50 -21.466 -26.551 + 0.40 5.60 -21.674 -26.618 + 0.40 5.70 -21.725 -26.721 + 0.40 5.80 -21.778 -26.859 + 0.40 5.90 -21.807 -26.939 + 0.40 6.00 -21.814 -26.995 + 0.40 6.10 -21.837 -27.039 + 0.40 6.20 -21.892 -27.122 + 0.40 6.30 -22.070 -27.185 + 0.40 6.40 -22.251 -27.245 + 0.40 6.50 -22.387 -27.355 + 0.40 6.60 -22.467 -27.424 + 0.40 6.70 -22.497 -27.460 + 0.40 6.80 -22.503 -27.502 + 0.40 6.90 -22.493 -27.546 + 0.40 7.00 -22.491 -27.553 + 0.40 7.10 -22.535 -27.603 + 0.40 7.20 -22.610 -27.692 + 0.40 7.30 -22.658 -27.709 + 0.40 7.40 -22.670 -27.762 + 0.40 7.50 -22.660 -27.830 + 0.40 7.60 -22.634 -27.898 + 0.40 7.70 -22.601 -27.939 + 0.40 7.80 -22.564 -27.962 + 0.40 7.90 -22.526 -27.986 + 0.40 8.00 -22.489 -28.100 + 0.40 8.10 -22.450 -28.229 + 0.40 8.20 -22.410 -28.359 + 0.40 8.30 -22.369 -28.496 + 0.40 8.40 -22.324 -28.623 + 0.40 8.50 -22.280 -28.746 + 0.40 8.60 -22.231 -28.870 + 0.40 8.70 -22.183 -28.997 + 0.40 8.80 -22.132 -29.127 + 0.40 8.90 -22.079 -29.261 + 0.40 9.00 -22.026 -29.399 + 0.50 1.00 -28.159 -24.789 + 0.50 1.10 -27.938 -24.790 + 0.50 1.20 -27.688 -24.791 + 0.50 1.30 -27.409 -24.792 + 0.50 1.40 -27.122 -24.793 + 0.50 1.50 -26.854 -24.794 + 0.50 1.60 -26.613 -24.795 + 0.50 1.70 -26.399 -24.796 + 0.50 1.80 -26.202 -24.798 + 0.50 1.90 -26.010 -24.800 + 0.50 2.00 -25.815 -24.801 + 0.50 2.10 -25.620 -24.803 + 0.50 2.20 -25.436 -24.805 + 0.50 2.30 -25.270 -24.807 + 0.50 2.40 -25.125 -24.810 + 0.50 2.50 -25.003 -24.812 + 0.50 2.60 -24.901 -24.815 + 0.50 2.70 -24.816 -24.818 + 0.50 2.80 -24.745 -24.821 + 0.50 2.90 -24.683 -24.825 + 0.50 3.00 -24.629 -24.828 + 0.50 3.10 -24.581 -24.832 + 0.50 3.20 -24.535 -24.837 + 0.50 3.30 -24.488 -24.841 + 0.50 3.40 -24.433 -24.846 + 0.50 3.50 -24.365 -24.852 + 0.50 3.60 -24.281 -24.858 + 0.50 3.70 -24.177 -24.864 + 0.50 3.80 -24.045 -24.871 + 0.50 3.90 -23.858 -24.878 + 0.50 4.00 -23.515 -24.886 + 0.50 4.10 -22.846 -24.903 + 0.50 4.20 -22.004 -25.091 + 0.50 4.30 -21.815 -25.694 + 0.50 4.40 -21.936 -26.108 + 0.50 4.50 -21.956 -26.567 + 0.50 4.60 -21.884 -27.028 + 0.50 4.70 -21.733 -27.200 + 0.50 4.80 -21.527 -27.230 + 0.50 4.90 -21.341 -27.232 + 0.50 5.00 -21.276 -26.803 + 0.50 5.10 -21.322 -26.630 + 0.50 5.20 -21.324 -26.546 + 0.50 5.30 -21.300 -26.574 + 0.50 5.40 -21.291 -26.587 + 0.50 5.50 -21.466 -26.551 + 0.50 5.60 -21.674 -26.619 + 0.50 5.70 -21.725 -26.721 + 0.50 5.80 -21.778 -26.859 + 0.50 5.90 -21.807 -26.939 + 0.50 6.00 -21.814 -26.996 + 0.50 6.10 -21.837 -27.039 + 0.50 6.20 -21.892 -27.123 + 0.50 6.30 -22.070 -27.185 + 0.50 6.40 -22.251 -27.245 + 0.50 6.50 -22.387 -27.355 + 0.50 6.60 -22.467 -27.424 + 0.50 6.70 -22.497 -27.460 + 0.50 6.80 -22.503 -27.502 + 0.50 6.90 -22.493 -27.546 + 0.50 7.00 -22.491 -27.553 + 0.50 7.10 -22.535 -27.604 + 0.50 7.20 -22.610 -27.692 + 0.50 7.30 -22.658 -27.709 + 0.50 7.40 -22.670 -27.762 + 0.50 7.50 -22.660 -27.830 + 0.50 7.60 -22.634 -27.898 + 0.50 7.70 -22.601 -27.939 + 0.50 7.80 -22.564 -27.962 + 0.50 7.90 -22.526 -27.986 + 0.50 8.00 -22.489 -28.100 + 0.50 8.10 -22.450 -28.229 + 0.50 8.20 -22.410 -28.359 + 0.50 8.30 -22.369 -28.496 + 0.50 8.40 -22.324 -28.623 + 0.50 8.50 -22.280 -28.746 + 0.50 8.60 -22.231 -28.870 + 0.50 8.70 -22.183 -28.997 + 0.50 8.80 -22.132 -29.128 + 0.50 8.90 -22.079 -29.262 + 0.50 9.00 -22.026 -29.401 + 0.60 1.00 -28.196 -24.887 + 0.60 1.10 -27.972 -24.888 + 0.60 1.20 -27.727 -24.889 + 0.60 1.30 -27.456 -24.890 + 0.60 1.40 -27.180 -24.891 + 0.60 1.50 -26.919 -24.892 + 0.60 1.60 -26.684 -24.893 + 0.60 1.70 -26.473 -24.894 + 0.60 1.80 -26.277 -24.896 + 0.60 1.90 -26.084 -24.897 + 0.60 2.00 -25.887 -24.899 + 0.60 2.10 -25.689 -24.900 + 0.60 2.20 -25.501 -24.902 + 0.60 2.30 -25.332 -24.904 + 0.60 2.40 -25.185 -24.906 + 0.60 2.50 -25.061 -24.908 + 0.60 2.60 -24.957 -24.911 + 0.60 2.70 -24.871 -24.913 + 0.60 2.80 -24.798 -24.916 + 0.60 2.90 -24.736 -24.919 + 0.60 3.00 -24.681 -24.923 + 0.60 3.10 -24.631 -24.926 + 0.60 3.20 -24.585 -24.930 + 0.60 3.30 -24.537 -24.934 + 0.60 3.40 -24.482 -24.939 + 0.60 3.50 -24.413 -24.943 + 0.60 3.60 -24.328 -24.949 + 0.60 3.70 -24.223 -24.954 + 0.60 3.80 -24.090 -24.960 + 0.60 3.90 -23.902 -24.967 + 0.60 4.00 -23.555 -24.973 + 0.60 4.10 -22.875 -24.990 + 0.60 4.20 -22.005 -25.183 + 0.60 4.30 -21.814 -25.793 + 0.60 4.40 -21.936 -26.206 + 0.60 4.50 -21.956 -26.664 + 0.60 4.60 -21.884 -27.113 + 0.60 4.70 -21.733 -27.275 + 0.60 4.80 -21.527 -27.291 + 0.60 4.90 -21.341 -27.265 + 0.60 5.00 -21.276 -26.808 + 0.60 5.10 -21.322 -26.632 + 0.60 5.20 -21.324 -26.547 + 0.60 5.30 -21.300 -26.574 + 0.60 5.40 -21.291 -26.588 + 0.60 5.50 -21.466 -26.552 + 0.60 5.60 -21.674 -26.619 + 0.60 5.70 -21.725 -26.721 + 0.60 5.80 -21.778 -26.860 + 0.60 5.90 -21.807 -26.940 + 0.60 6.00 -21.814 -26.996 + 0.60 6.10 -21.837 -27.040 + 0.60 6.20 -21.892 -27.123 + 0.60 6.30 -22.070 -27.186 + 0.60 6.40 -22.251 -27.246 + 0.60 6.50 -22.387 -27.355 + 0.60 6.60 -22.467 -27.424 + 0.60 6.70 -22.497 -27.460 + 0.60 6.80 -22.503 -27.502 + 0.60 6.90 -22.493 -27.546 + 0.60 7.00 -22.491 -27.553 + 0.60 7.10 -22.535 -27.604 + 0.60 7.20 -22.610 -27.692 + 0.60 7.30 -22.658 -27.709 + 0.60 7.40 -22.670 -27.762 + 0.60 7.50 -22.660 -27.830 + 0.60 7.60 -22.634 -27.898 + 0.60 7.70 -22.601 -27.939 + 0.60 7.80 -22.564 -27.962 + 0.60 7.90 -22.526 -27.986 + 0.60 8.00 -22.489 -28.100 + 0.60 8.10 -22.450 -28.229 + 0.60 8.20 -22.410 -28.359 + 0.60 8.30 -22.369 -28.496 + 0.60 8.40 -22.324 -28.623 + 0.60 8.50 -22.280 -28.747 + 0.60 8.60 -22.231 -28.871 + 0.60 8.70 -22.183 -28.998 + 0.60 8.80 -22.132 -29.129 + 0.60 8.90 -22.079 -29.263 + 0.60 9.00 -22.026 -29.402 + 0.70 1.00 -28.227 -24.986 + 0.70 1.10 -28.001 -24.987 + 0.70 1.20 -27.760 -24.988 + 0.70 1.30 -27.500 -24.988 + 0.70 1.40 -27.233 -24.989 + 0.70 1.50 -26.981 -24.990 + 0.70 1.60 -26.752 -24.991 + 0.70 1.70 -26.544 -24.992 + 0.70 1.80 -26.350 -24.993 + 0.70 1.90 -26.157 -24.995 + 0.70 2.00 -25.958 -24.996 + 0.70 2.10 -25.756 -24.998 + 0.70 2.20 -25.566 -24.999 + 0.70 2.30 -25.393 -25.001 + 0.70 2.40 -25.244 -25.003 + 0.70 2.50 -25.118 -25.005 + 0.70 2.60 -25.013 -25.007 + 0.70 2.70 -24.925 -25.009 + 0.70 2.80 -24.851 -25.012 + 0.70 2.90 -24.788 -25.015 + 0.70 3.00 -24.732 -25.018 + 0.70 3.10 -24.682 -25.021 + 0.70 3.20 -24.635 -25.024 + 0.70 3.30 -24.586 -25.028 + 0.70 3.40 -24.530 -25.032 + 0.70 3.50 -24.461 -25.036 + 0.70 3.60 -24.375 -25.041 + 0.70 3.70 -24.269 -25.046 + 0.70 3.80 -24.136 -25.051 + 0.70 3.90 -23.947 -25.056 + 0.70 4.00 -23.596 -25.062 + 0.70 4.10 -22.905 -25.078 + 0.70 4.20 -22.005 -25.276 + 0.70 4.30 -21.814 -25.891 + 0.70 4.40 -21.936 -26.304 + 0.70 4.50 -21.956 -26.760 + 0.70 4.60 -21.884 -27.196 + 0.70 4.70 -21.733 -27.345 + 0.70 4.80 -21.527 -27.346 + 0.70 4.90 -21.341 -27.293 + 0.70 5.00 -21.276 -26.811 + 0.70 5.10 -21.322 -26.633 + 0.70 5.20 -21.324 -26.548 + 0.70 5.30 -21.300 -26.575 + 0.70 5.40 -21.291 -26.588 + 0.70 5.50 -21.466 -26.552 + 0.70 5.60 -21.674 -26.619 + 0.70 5.70 -21.725 -26.722 + 0.70 5.80 -21.778 -26.860 + 0.70 5.90 -21.807 -26.940 + 0.70 6.00 -21.814 -26.997 + 0.70 6.10 -21.837 -27.040 + 0.70 6.20 -21.892 -27.123 + 0.70 6.30 -22.070 -27.186 + 0.70 6.40 -22.251 -27.246 + 0.70 6.50 -22.387 -27.355 + 0.70 6.60 -22.467 -27.424 + 0.70 6.70 -22.497 -27.460 + 0.70 6.80 -22.503 -27.503 + 0.70 6.90 -22.493 -27.546 + 0.70 7.00 -22.491 -27.553 + 0.70 7.10 -22.535 -27.604 + 0.70 7.20 -22.610 -27.692 + 0.70 7.30 -22.658 -27.709 + 0.70 7.40 -22.670 -27.762 + 0.70 7.50 -22.660 -27.830 + 0.70 7.60 -22.634 -27.898 + 0.70 7.70 -22.601 -27.939 + 0.70 7.80 -22.564 -27.962 + 0.70 7.90 -22.526 -27.986 + 0.70 8.00 -22.489 -28.100 + 0.70 8.10 -22.450 -28.229 + 0.70 8.20 -22.410 -28.359 + 0.70 8.30 -22.369 -28.496 + 0.70 8.40 -22.324 -28.623 + 0.70 8.50 -22.280 -28.747 + 0.70 8.60 -22.231 -28.871 + 0.70 8.70 -22.183 -28.998 + 0.70 8.80 -22.132 -29.129 + 0.70 8.90 -22.079 -29.264 + 0.70 9.00 -22.026 -29.403 + 0.80 1.00 -28.254 -25.085 + 0.80 1.10 -28.026 -25.086 + 0.80 1.20 -27.789 -25.086 + 0.80 1.30 -27.538 -25.087 + 0.80 1.40 -27.282 -25.088 + 0.80 1.50 -27.040 -25.089 + 0.80 1.60 -26.817 -25.090 + 0.80 1.70 -26.614 -25.090 + 0.80 1.80 -26.422 -25.092 + 0.80 1.90 -26.229 -25.093 + 0.80 2.00 -26.028 -25.094 + 0.80 2.10 -25.824 -25.095 + 0.80 2.20 -25.630 -25.097 + 0.80 2.30 -25.455 -25.098 + 0.80 2.40 -25.303 -25.100 + 0.80 2.50 -25.175 -25.102 + 0.80 2.60 -25.068 -25.104 + 0.80 2.70 -24.979 -25.106 + 0.80 2.80 -24.904 -25.108 + 0.80 2.90 -24.840 -25.110 + 0.80 3.00 -24.783 -25.113 + 0.80 3.10 -24.733 -25.116 + 0.80 3.20 -24.685 -25.119 + 0.80 3.30 -24.636 -25.122 + 0.80 3.40 -24.579 -25.126 + 0.80 3.50 -24.509 -25.129 + 0.80 3.60 -24.422 -25.133 + 0.80 3.70 -24.315 -25.138 + 0.80 3.80 -24.181 -25.142 + 0.80 3.90 -23.991 -25.147 + 0.80 4.00 -23.638 -25.152 + 0.80 4.10 -22.936 -25.167 + 0.80 4.20 -22.006 -25.370 + 0.80 4.30 -21.814 -25.990 + 0.80 4.40 -21.936 -26.402 + 0.80 4.50 -21.956 -26.856 + 0.80 4.60 -21.884 -27.274 + 0.80 4.70 -21.733 -27.411 + 0.80 4.80 -21.527 -27.396 + 0.80 4.90 -21.341 -27.316 + 0.80 5.00 -21.276 -26.814 + 0.80 5.10 -21.322 -26.634 + 0.80 5.20 -21.324 -26.548 + 0.80 5.30 -21.300 -26.576 + 0.80 5.40 -21.291 -26.589 + 0.80 5.50 -21.466 -26.552 + 0.80 5.60 -21.674 -26.619 + 0.80 5.70 -21.725 -26.722 + 0.80 5.80 -21.778 -26.860 + 0.80 5.90 -21.807 -26.940 + 0.80 6.00 -21.814 -26.997 + 0.80 6.10 -21.837 -27.040 + 0.80 6.20 -21.892 -27.124 + 0.80 6.30 -22.070 -27.186 + 0.80 6.40 -22.251 -27.246 + 0.80 6.50 -22.387 -27.355 + 0.80 6.60 -22.467 -27.424 + 0.80 6.70 -22.497 -27.460 + 0.80 6.80 -22.503 -27.503 + 0.80 6.90 -22.493 -27.546 + 0.80 7.00 -22.491 -27.553 + 0.80 7.10 -22.535 -27.604 + 0.80 7.20 -22.610 -27.692 + 0.80 7.30 -22.658 -27.709 + 0.80 7.40 -22.670 -27.762 + 0.80 7.50 -22.660 -27.830 + 0.80 7.60 -22.634 -27.898 + 0.80 7.70 -22.601 -27.939 + 0.80 7.80 -22.564 -27.962 + 0.80 7.90 -22.526 -27.986 + 0.80 8.00 -22.489 -28.100 + 0.80 8.10 -22.450 -28.229 + 0.80 8.20 -22.410 -28.359 + 0.80 8.30 -22.369 -28.496 + 0.80 8.40 -22.324 -28.624 + 0.80 8.50 -22.280 -28.747 + 0.80 8.60 -22.231 -28.871 + 0.80 8.70 -22.183 -28.998 + 0.80 8.80 -22.132 -29.130 + 0.80 8.90 -22.079 -29.265 + 0.80 9.00 -22.026 -29.404 + 0.90 1.00 -28.277 -25.184 + 0.90 1.10 -28.047 -25.185 + 0.90 1.20 -27.814 -25.185 + 0.90 1.30 -27.572 -25.186 + 0.90 1.40 -27.327 -25.187 + 0.90 1.50 -27.094 -25.187 + 0.90 1.60 -26.878 -25.188 + 0.90 1.70 -26.681 -25.189 + 0.90 1.80 -26.492 -25.190 + 0.90 1.90 -26.299 -25.191 + 0.90 2.00 -26.097 -25.192 + 0.90 2.10 -25.890 -25.193 + 0.90 2.20 -25.693 -25.194 + 0.90 2.30 -25.516 -25.196 + 0.90 2.40 -25.362 -25.197 + 0.90 2.50 -25.232 -25.199 + 0.90 2.60 -25.124 -25.201 + 0.90 2.70 -25.033 -25.203 + 0.90 2.80 -24.958 -25.205 + 0.90 2.90 -24.892 -25.207 + 0.90 3.00 -24.835 -25.209 + 0.90 3.10 -24.783 -25.211 + 0.90 3.20 -24.735 -25.214 + 0.90 3.30 -24.685 -25.217 + 0.90 3.40 -24.628 -25.220 + 0.90 3.50 -24.557 -25.223 + 0.90 3.60 -24.469 -25.227 + 0.90 3.70 -24.362 -25.231 + 0.90 3.80 -24.227 -25.235 + 0.90 3.90 -24.036 -25.239 + 0.90 4.00 -23.680 -25.243 + 0.90 4.10 -22.968 -25.257 + 0.90 4.20 -22.006 -25.464 + 0.90 4.30 -21.813 -26.089 + 0.90 4.40 -21.936 -26.501 + 0.90 4.50 -21.956 -26.950 + 0.90 4.60 -21.884 -27.349 + 0.90 4.70 -21.732 -27.470 + 0.90 4.80 -21.527 -27.440 + 0.90 4.90 -21.341 -27.336 + 0.90 5.00 -21.276 -26.816 + 0.90 5.10 -21.322 -26.635 + 0.90 5.20 -21.324 -26.549 + 0.90 5.30 -21.300 -26.576 + 0.90 5.40 -21.291 -26.589 + 0.90 5.50 -21.466 -26.552 + 0.90 5.60 -21.674 -26.620 + 0.90 5.70 -21.725 -26.722 + 0.90 5.80 -21.778 -26.860 + 0.90 5.90 -21.807 -26.940 + 0.90 6.00 -21.814 -26.997 + 0.90 6.10 -21.837 -27.040 + 0.90 6.20 -21.892 -27.124 + 0.90 6.30 -22.070 -27.186 + 0.90 6.40 -22.251 -27.246 + 0.90 6.50 -22.387 -27.356 + 0.90 6.60 -22.467 -27.425 + 0.90 6.70 -22.497 -27.461 + 0.90 6.80 -22.503 -27.503 + 0.90 6.90 -22.493 -27.546 + 0.90 7.00 -22.491 -27.553 + 0.90 7.10 -22.535 -27.604 + 0.90 7.20 -22.610 -27.692 + 0.90 7.30 -22.658 -27.709 + 0.90 7.40 -22.670 -27.762 + 0.90 7.50 -22.660 -27.830 + 0.90 7.60 -22.634 -27.898 + 0.90 7.70 -22.601 -27.939 + 0.90 7.80 -22.564 -27.962 + 0.90 7.90 -22.526 -27.987 + 0.90 8.00 -22.489 -28.100 + 0.90 8.10 -22.450 -28.229 + 0.90 8.20 -22.410 -28.359 + 0.90 8.30 -22.369 -28.496 + 0.90 8.40 -22.324 -28.624 + 0.90 8.50 -22.280 -28.747 + 0.90 8.60 -22.231 -28.871 + 0.90 8.70 -22.183 -28.999 + 0.90 8.80 -22.132 -29.130 + 0.90 8.90 -22.079 -29.265 + 0.90 9.00 -22.026 -29.404 + 1.00 1.00 -28.296 -25.283 + 1.00 1.10 -28.064 -25.284 + 1.00 1.20 -27.836 -25.284 + 1.00 1.30 -27.602 -25.285 + 1.00 1.40 -27.367 -25.285 + 1.00 1.50 -27.144 -25.286 + 1.00 1.60 -26.936 -25.287 + 1.00 1.70 -26.744 -25.287 + 1.00 1.80 -26.559 -25.288 + 1.00 1.90 -26.368 -25.289 + 1.00 2.00 -26.164 -25.290 + 1.00 2.10 -25.956 -25.291 + 1.00 2.20 -25.756 -25.292 + 1.00 2.30 -25.576 -25.294 + 1.00 2.40 -25.420 -25.295 + 1.00 2.50 -25.289 -25.296 + 1.00 2.60 -25.179 -25.298 + 1.00 2.70 -25.087 -25.300 + 1.00 2.80 -25.011 -25.301 + 1.00 2.90 -24.945 -25.303 + 1.00 3.00 -24.886 -25.305 + 1.00 3.10 -24.834 -25.308 + 1.00 3.20 -24.785 -25.310 + 1.00 3.30 -24.734 -25.312 + 1.00 3.40 -24.676 -25.315 + 1.00 3.50 -24.605 -25.318 + 1.00 3.60 -24.517 -25.321 + 1.00 3.70 -24.409 -25.324 + 1.00 3.80 -24.273 -25.328 + 1.00 3.90 -24.081 -25.331 + 1.00 4.00 -23.723 -25.334 + 1.00 4.10 -23.000 -25.348 + 1.00 4.20 -22.006 -25.559 + 1.00 4.30 -21.813 -26.189 + 1.00 4.40 -21.936 -26.600 + 1.00 4.50 -21.955 -27.044 + 1.00 4.60 -21.884 -27.419 + 1.00 4.70 -21.732 -27.525 + 1.00 4.80 -21.527 -27.479 + 1.00 4.90 -21.341 -27.353 + 1.00 5.00 -21.276 -26.818 + 1.00 5.10 -21.322 -26.636 + 1.00 5.20 -21.324 -26.549 + 1.00 5.30 -21.300 -26.576 + 1.00 5.40 -21.291 -26.589 + 1.00 5.50 -21.466 -26.553 + 1.00 5.60 -21.674 -26.620 + 1.00 5.70 -21.725 -26.722 + 1.00 5.80 -21.778 -26.860 + 1.00 5.90 -21.807 -26.941 + 1.00 6.00 -21.814 -26.997 + 1.00 6.10 -21.837 -27.041 + 1.00 6.20 -21.892 -27.124 + 1.00 6.30 -22.070 -27.186 + 1.00 6.40 -22.251 -27.246 + 1.00 6.50 -22.387 -27.356 + 1.00 6.60 -22.467 -27.425 + 1.00 6.70 -22.497 -27.461 + 1.00 6.80 -22.503 -27.503 + 1.00 6.90 -22.493 -27.546 + 1.00 7.00 -22.491 -27.553 + 1.00 7.10 -22.535 -27.604 + 1.00 7.20 -22.610 -27.692 + 1.00 7.30 -22.658 -27.709 + 1.00 7.40 -22.670 -27.762 + 1.00 7.50 -22.660 -27.830 + 1.00 7.60 -22.634 -27.898 + 1.00 7.70 -22.601 -27.939 + 1.00 7.80 -22.564 -27.962 + 1.00 7.90 -22.526 -27.987 + 1.00 8.00 -22.489 -28.100 + 1.00 8.10 -22.450 -28.229 + 1.00 8.20 -22.410 -28.360 + 1.00 8.30 -22.369 -28.497 + 1.00 8.40 -22.324 -28.624 + 1.00 8.50 -22.280 -28.747 + 1.00 8.60 -22.231 -28.872 + 1.00 8.70 -22.183 -28.999 + 1.00 8.80 -22.132 -29.130 + 1.00 8.90 -22.079 -29.265 + 1.00 9.00 -22.026 -29.405 + 1.10 1.00 -28.312 -25.382 + 1.10 1.10 -28.080 -25.383 + 1.10 1.20 -27.854 -25.383 + 1.10 1.30 -27.628 -25.384 + 1.10 1.40 -27.403 -25.384 + 1.10 1.50 -27.190 -25.385 + 1.10 1.60 -26.990 -25.386 + 1.10 1.70 -26.805 -25.386 + 1.10 1.80 -26.624 -25.387 + 1.10 1.90 -26.434 -25.388 + 1.10 2.00 -26.231 -25.389 + 1.10 2.10 -26.020 -25.390 + 1.10 2.20 -25.819 -25.391 + 1.10 2.30 -25.636 -25.392 + 1.10 2.40 -25.478 -25.393 + 1.10 2.50 -25.345 -25.394 + 1.10 2.60 -25.234 -25.396 + 1.10 2.70 -25.141 -25.397 + 1.10 2.80 -25.063 -25.399 + 1.10 2.90 -24.996 -25.400 + 1.10 3.00 -24.937 -25.402 + 1.10 3.10 -24.884 -25.404 + 1.10 3.20 -24.834 -25.406 + 1.10 3.30 -24.783 -25.408 + 1.10 3.40 -24.724 -25.411 + 1.10 3.50 -24.653 -25.413 + 1.10 3.60 -24.564 -25.416 + 1.10 3.70 -24.455 -25.419 + 1.10 3.80 -24.319 -25.422 + 1.10 3.90 -24.126 -25.425 + 1.10 4.00 -23.766 -25.427 + 1.10 4.10 -23.033 -25.440 + 1.10 4.20 -22.007 -25.653 + 1.10 4.30 -21.813 -26.288 + 1.10 4.40 -21.936 -26.699 + 1.10 4.50 -21.955 -27.136 + 1.10 4.60 -21.883 -27.484 + 1.10 4.70 -21.732 -27.573 + 1.10 4.80 -21.527 -27.512 + 1.10 4.90 -21.341 -27.366 + 1.10 5.00 -21.276 -26.820 + 1.10 5.10 -21.322 -26.636 + 1.10 5.20 -21.324 -26.549 + 1.10 5.30 -21.300 -26.577 + 1.10 5.40 -21.291 -26.589 + 1.10 5.50 -21.466 -26.553 + 1.10 5.60 -21.674 -26.620 + 1.10 5.70 -21.725 -26.722 + 1.10 5.80 -21.778 -26.860 + 1.10 5.90 -21.807 -26.941 + 1.10 6.00 -21.814 -26.997 + 1.10 6.10 -21.837 -27.041 + 1.10 6.20 -21.892 -27.124 + 1.10 6.30 -22.070 -27.187 + 1.10 6.40 -22.251 -27.246 + 1.10 6.50 -22.387 -27.356 + 1.10 6.60 -22.467 -27.425 + 1.10 6.70 -22.497 -27.461 + 1.10 6.80 -22.503 -27.503 + 1.10 6.90 -22.493 -27.546 + 1.10 7.00 -22.491 -27.553 + 1.10 7.10 -22.535 -27.604 + 1.10 7.20 -22.610 -27.692 + 1.10 7.30 -22.658 -27.709 + 1.10 7.40 -22.670 -27.762 + 1.10 7.50 -22.660 -27.830 + 1.10 7.60 -22.634 -27.898 + 1.10 7.70 -22.601 -27.939 + 1.10 7.80 -22.564 -27.962 + 1.10 7.90 -22.526 -27.987 + 1.10 8.00 -22.489 -28.100 + 1.10 8.10 -22.450 -28.229 + 1.10 8.20 -22.410 -28.360 + 1.10 8.30 -22.369 -28.497 + 1.10 8.40 -22.324 -28.624 + 1.10 8.50 -22.280 -28.747 + 1.10 8.60 -22.231 -28.872 + 1.10 8.70 -22.183 -28.999 + 1.10 8.80 -22.132 -29.130 + 1.10 8.90 -22.079 -29.266 + 1.10 9.00 -22.026 -29.405 + 1.20 1.00 -28.326 -25.482 + 1.20 1.10 -28.093 -25.482 + 1.20 1.20 -27.871 -25.482 + 1.20 1.30 -27.652 -25.483 + 1.20 1.40 -27.436 -25.483 + 1.20 1.50 -27.232 -25.484 + 1.20 1.60 -27.041 -25.484 + 1.20 1.70 -26.862 -25.485 + 1.20 1.80 -26.686 -25.486 + 1.20 1.90 -26.499 -25.486 + 1.20 2.00 -26.295 -25.487 + 1.20 2.10 -26.084 -25.488 + 1.20 2.20 -25.880 -25.489 + 1.20 2.30 -25.696 -25.490 + 1.20 2.40 -25.536 -25.491 + 1.20 2.50 -25.401 -25.492 + 1.20 2.60 -25.288 -25.493 + 1.20 2.70 -25.195 -25.495 + 1.20 2.80 -25.116 -25.496 + 1.20 2.90 -25.048 -25.498 + 1.20 3.00 -24.988 -25.499 + 1.20 3.10 -24.935 -25.501 + 1.20 3.20 -24.884 -25.503 + 1.20 3.30 -24.832 -25.505 + 1.20 3.40 -24.772 -25.507 + 1.20 3.50 -24.700 -25.509 + 1.20 3.60 -24.611 -25.511 + 1.20 3.70 -24.502 -25.514 + 1.20 3.80 -24.365 -25.516 + 1.20 3.90 -24.171 -25.518 + 1.20 4.00 -23.810 -25.520 + 1.20 4.10 -23.065 -25.532 + 1.20 4.20 -22.007 -25.748 + 1.20 4.30 -21.813 -26.387 + 1.20 4.40 -21.936 -26.797 + 1.20 4.50 -21.955 -27.226 + 1.20 4.60 -21.883 -27.544 + 1.20 4.70 -21.732 -27.616 + 1.20 4.80 -21.527 -27.540 + 1.20 4.90 -21.341 -27.377 + 1.20 5.00 -21.276 -26.821 + 1.20 5.10 -21.322 -26.637 + 1.20 5.20 -21.324 -26.550 + 1.20 5.30 -21.300 -26.577 + 1.20 5.40 -21.291 -26.590 + 1.20 5.50 -21.466 -26.553 + 1.20 5.60 -21.674 -26.620 + 1.20 5.70 -21.725 -26.722 + 1.20 5.80 -21.778 -26.860 + 1.20 5.90 -21.807 -26.940 + 1.20 6.00 -21.814 -26.997 + 1.20 6.10 -21.837 -27.041 + 1.20 6.20 -21.892 -27.124 + 1.20 6.30 -22.070 -27.187 + 1.20 6.40 -22.251 -27.246 + 1.20 6.50 -22.387 -27.356 + 1.20 6.60 -22.467 -27.425 + 1.20 6.70 -22.497 -27.461 + 1.20 6.80 -22.503 -27.503 + 1.20 6.90 -22.493 -27.546 + 1.20 7.00 -22.491 -27.553 + 1.20 7.10 -22.535 -27.604 + 1.20 7.20 -22.610 -27.692 + 1.20 7.30 -22.658 -27.709 + 1.20 7.40 -22.670 -27.762 + 1.20 7.50 -22.660 -27.830 + 1.20 7.60 -22.634 -27.898 + 1.20 7.70 -22.601 -27.939 + 1.20 7.80 -22.564 -27.962 + 1.20 7.90 -22.526 -27.987 + 1.20 8.00 -22.489 -28.100 + 1.20 8.10 -22.450 -28.229 + 1.20 8.20 -22.410 -28.360 + 1.20 8.30 -22.369 -28.497 + 1.20 8.40 -22.324 -28.624 + 1.20 8.50 -22.280 -28.747 + 1.20 8.60 -22.231 -28.872 + 1.20 8.70 -22.183 -28.999 + 1.20 8.80 -22.132 -29.131 + 1.20 8.90 -22.079 -29.266 + 1.20 9.00 -22.026 -29.405 + 1.30 1.00 -28.339 -25.581 + 1.30 1.10 -28.105 -25.581 + 1.30 1.20 -27.886 -25.582 + 1.30 1.30 -27.673 -25.582 + 1.30 1.40 -27.466 -25.583 + 1.30 1.50 -27.270 -25.583 + 1.30 1.60 -27.087 -25.584 + 1.30 1.70 -26.916 -25.584 + 1.30 1.80 -26.745 -25.585 + 1.30 1.90 -26.561 -25.585 + 1.30 2.00 -26.358 -25.586 + 1.30 2.10 -26.146 -25.587 + 1.30 2.20 -25.941 -25.588 + 1.30 2.30 -25.754 -25.588 + 1.30 2.40 -25.593 -25.589 + 1.30 2.50 -25.457 -25.590 + 1.30 2.60 -25.343 -25.592 + 1.30 2.70 -25.248 -25.593 + 1.30 2.80 -25.168 -25.594 + 1.30 2.90 -25.099 -25.595 + 1.30 3.00 -25.039 -25.597 + 1.30 3.10 -24.984 -25.598 + 1.30 3.20 -24.933 -25.600 + 1.30 3.30 -24.880 -25.601 + 1.30 3.40 -24.820 -25.603 + 1.30 3.50 -24.748 -25.605 + 1.30 3.60 -24.658 -25.607 + 1.30 3.70 -24.548 -25.609 + 1.30 3.80 -24.411 -25.611 + 1.30 3.90 -24.217 -25.613 + 1.30 4.00 -23.854 -25.614 + 1.30 4.10 -23.097 -25.626 + 1.30 4.20 -22.008 -25.843 + 1.30 4.30 -21.813 -26.487 + 1.30 4.40 -21.935 -26.896 + 1.30 4.50 -21.955 -27.314 + 1.30 4.60 -21.883 -27.598 + 1.30 4.70 -21.732 -27.654 + 1.30 4.80 -21.527 -27.565 + 1.30 4.90 -21.341 -27.387 + 1.30 5.00 -21.276 -26.821 + 1.30 5.10 -21.323 -26.637 + 1.30 5.20 -21.324 -26.550 + 1.30 5.30 -21.300 -26.577 + 1.30 5.40 -21.291 -26.590 + 1.30 5.50 -21.466 -26.553 + 1.30 5.60 -21.674 -26.620 + 1.30 5.70 -21.725 -26.722 + 1.30 5.80 -21.778 -26.860 + 1.30 5.90 -21.807 -26.940 + 1.30 6.00 -21.814 -26.997 + 1.30 6.10 -21.837 -27.041 + 1.30 6.20 -21.892 -27.124 + 1.30 6.30 -22.070 -27.187 + 1.30 6.40 -22.251 -27.247 + 1.30 6.50 -22.387 -27.356 + 1.30 6.60 -22.467 -27.425 + 1.30 6.70 -22.497 -27.461 + 1.30 6.80 -22.503 -27.503 + 1.30 6.90 -22.493 -27.546 + 1.30 7.00 -22.491 -27.553 + 1.30 7.10 -22.535 -27.604 + 1.30 7.20 -22.610 -27.692 + 1.30 7.30 -22.658 -27.709 + 1.30 7.40 -22.670 -27.762 + 1.30 7.50 -22.660 -27.830 + 1.30 7.60 -22.634 -27.898 + 1.30 7.70 -22.601 -27.939 + 1.30 7.80 -22.564 -27.962 + 1.30 7.90 -22.526 -27.987 + 1.30 8.00 -22.489 -28.100 + 1.30 8.10 -22.450 -28.229 + 1.30 8.20 -22.410 -28.360 + 1.30 8.30 -22.369 -28.497 + 1.30 8.40 -22.324 -28.624 + 1.30 8.50 -22.280 -28.747 + 1.30 8.60 -22.231 -28.872 + 1.30 8.70 -22.183 -28.999 + 1.30 8.80 -22.132 -29.131 + 1.30 8.90 -22.079 -29.266 + 1.30 9.00 -22.026 -29.406 + 1.40 1.00 -28.351 -25.681 + 1.40 1.10 -28.117 -25.681 + 1.40 1.20 -27.900 -25.681 + 1.40 1.30 -27.692 -25.681 + 1.40 1.40 -27.493 -25.682 + 1.40 1.50 -27.306 -25.682 + 1.40 1.60 -27.131 -25.683 + 1.40 1.70 -26.966 -25.683 + 1.40 1.80 -26.801 -25.684 + 1.40 1.90 -26.621 -25.684 + 1.40 2.00 -26.420 -25.685 + 1.40 2.10 -26.207 -25.686 + 1.40 2.20 -26.001 -25.686 + 1.40 2.30 -25.813 -25.687 + 1.40 2.40 -25.649 -25.688 + 1.40 2.50 -25.512 -25.689 + 1.40 2.60 -25.397 -25.690 + 1.40 2.70 -25.301 -25.691 + 1.40 2.80 -25.220 -25.692 + 1.40 2.90 -25.150 -25.693 + 1.40 3.00 -25.089 -25.694 + 1.40 3.10 -25.034 -25.696 + 1.40 3.20 -24.981 -25.697 + 1.40 3.30 -24.928 -25.699 + 1.40 3.40 -24.867 -25.700 + 1.40 3.50 -24.795 -25.702 + 1.40 3.60 -24.705 -25.703 + 1.40 3.70 -24.595 -25.705 + 1.40 3.80 -24.457 -25.707 + 1.40 3.90 -24.262 -25.708 + 1.40 4.00 -23.899 -25.708 + 1.40 4.10 -23.129 -25.720 + 1.40 4.20 -22.008 -25.936 + 1.40 4.30 -21.813 -26.587 + 1.40 4.40 -21.935 -26.994 + 1.40 4.50 -21.955 -27.399 + 1.40 4.60 -21.883 -27.647 + 1.40 4.70 -21.732 -27.686 + 1.40 4.80 -21.527 -27.585 + 1.40 4.90 -21.341 -27.394 + 1.40 5.00 -21.276 -26.822 + 1.40 5.10 -21.323 -26.637 + 1.40 5.20 -21.324 -26.550 + 1.40 5.30 -21.300 -26.577 + 1.40 5.40 -21.291 -26.590 + 1.40 5.50 -21.466 -26.553 + 1.40 5.60 -21.674 -26.620 + 1.40 5.70 -21.725 -26.722 + 1.40 5.80 -21.778 -26.860 + 1.40 5.90 -21.807 -26.940 + 1.40 6.00 -21.814 -26.997 + 1.40 6.10 -21.837 -27.041 + 1.40 6.20 -21.892 -27.124 + 1.40 6.30 -22.070 -27.187 + 1.40 6.40 -22.251 -27.247 + 1.40 6.50 -22.387 -27.356 + 1.40 6.60 -22.467 -27.425 + 1.40 6.70 -22.497 -27.461 + 1.40 6.80 -22.503 -27.503 + 1.40 6.90 -22.493 -27.546 + 1.40 7.00 -22.491 -27.553 + 1.40 7.10 -22.535 -27.604 + 1.40 7.20 -22.610 -27.692 + 1.40 7.30 -22.658 -27.709 + 1.40 7.40 -22.670 -27.762 + 1.40 7.50 -22.660 -27.830 + 1.40 7.60 -22.634 -27.898 + 1.40 7.70 -22.601 -27.939 + 1.40 7.80 -22.564 -27.962 + 1.40 7.90 -22.526 -27.987 + 1.40 8.00 -22.489 -28.100 + 1.40 8.10 -22.450 -28.229 + 1.40 8.20 -22.410 -28.360 + 1.40 8.30 -22.369 -28.497 + 1.40 8.40 -22.324 -28.624 + 1.40 8.50 -22.280 -28.748 + 1.40 8.60 -22.231 -28.872 + 1.40 8.70 -22.183 -28.999 + 1.40 8.80 -22.132 -29.131 + 1.40 8.90 -22.079 -29.266 + 1.40 9.00 -22.026 -29.406 + 1.50 1.00 -28.363 -25.780 + 1.50 1.10 -28.129 -25.780 + 1.50 1.20 -27.914 -25.781 + 1.50 1.30 -27.711 -25.781 + 1.50 1.40 -27.518 -25.781 + 1.50 1.50 -27.339 -25.782 + 1.50 1.60 -27.172 -25.782 + 1.50 1.70 -27.014 -25.782 + 1.50 1.80 -26.855 -25.783 + 1.50 1.90 -26.679 -25.783 + 1.50 2.00 -26.480 -25.784 + 1.50 2.10 -26.267 -25.784 + 1.50 2.20 -26.060 -25.785 + 1.50 2.30 -25.870 -25.786 + 1.50 2.40 -25.706 -25.787 + 1.50 2.50 -25.567 -25.787 + 1.50 2.60 -25.450 -25.788 + 1.50 2.70 -25.353 -25.789 + 1.50 2.80 -25.271 -25.790 + 1.50 2.90 -25.201 -25.791 + 1.50 3.00 -25.139 -25.792 + 1.50 3.10 -25.082 -25.794 + 1.50 3.20 -25.029 -25.795 + 1.50 3.30 -24.975 -25.796 + 1.50 3.40 -24.914 -25.797 + 1.50 3.50 -24.841 -25.798 + 1.50 3.60 -24.752 -25.800 + 1.50 3.70 -24.641 -25.801 + 1.50 3.80 -24.502 -25.803 + 1.50 3.90 -24.308 -25.803 + 1.50 4.00 -23.943 -25.803 + 1.50 4.10 -23.160 -25.814 + 1.50 4.20 -22.008 -26.029 + 1.50 4.30 -21.813 -26.686 + 1.50 4.40 -21.935 -27.093 + 1.50 4.50 -21.955 -27.481 + 1.50 4.60 -21.883 -27.690 + 1.50 4.70 -21.732 -27.714 + 1.50 4.80 -21.527 -27.602 + 1.50 4.90 -21.341 -27.400 + 1.50 5.00 -21.276 -26.822 + 1.50 5.10 -21.323 -26.638 + 1.50 5.20 -21.324 -26.550 + 1.50 5.30 -21.300 -26.577 + 1.50 5.40 -21.291 -26.590 + 1.50 5.50 -21.466 -26.553 + 1.50 5.60 -21.674 -26.620 + 1.50 5.70 -21.725 -26.722 + 1.50 5.80 -21.778 -26.860 + 1.50 5.90 -21.807 -26.940 + 1.50 6.00 -21.814 -26.997 + 1.50 6.10 -21.837 -27.041 + 1.50 6.20 -21.892 -27.124 + 1.50 6.30 -22.070 -27.187 + 1.50 6.40 -22.251 -27.247 + 1.50 6.50 -22.387 -27.356 + 1.50 6.60 -22.467 -27.425 + 1.50 6.70 -22.497 -27.461 + 1.50 6.80 -22.503 -27.503 + 1.50 6.90 -22.493 -27.546 + 1.50 7.00 -22.491 -27.553 + 1.50 7.10 -22.535 -27.604 + 1.50 7.20 -22.610 -27.692 + 1.50 7.30 -22.658 -27.709 + 1.50 7.40 -22.670 -27.762 + 1.50 7.50 -22.660 -27.830 + 1.50 7.60 -22.634 -27.898 + 1.50 7.70 -22.601 -27.939 + 1.50 7.80 -22.564 -27.962 + 1.50 7.90 -22.526 -27.987 + 1.50 8.00 -22.489 -28.100 + 1.50 8.10 -22.450 -28.229 + 1.50 8.20 -22.410 -28.360 + 1.50 8.30 -22.369 -28.497 + 1.50 8.40 -22.324 -28.624 + 1.50 8.50 -22.280 -28.748 + 1.50 8.60 -22.231 -28.872 + 1.50 8.70 -22.183 -28.999 + 1.50 8.80 -22.132 -29.131 + 1.50 8.90 -22.079 -29.266 + 1.50 9.00 -22.026 -29.406 + 1.60 1.00 -28.375 -25.880 + 1.60 1.10 -28.141 -25.880 + 1.60 1.20 -27.928 -25.880 + 1.60 1.30 -27.730 -25.880 + 1.60 1.40 -27.543 -25.881 + 1.60 1.50 -27.371 -25.881 + 1.60 1.60 -27.211 -25.881 + 1.60 1.70 -27.060 -25.882 + 1.60 1.80 -26.906 -25.882 + 1.60 1.90 -26.735 -25.883 + 1.60 2.00 -26.538 -25.883 + 1.60 2.10 -26.326 -25.884 + 1.60 2.20 -26.118 -25.884 + 1.60 2.30 -25.927 -25.885 + 1.60 2.40 -25.761 -25.885 + 1.60 2.50 -25.621 -25.886 + 1.60 2.60 -25.503 -25.887 + 1.60 2.70 -25.405 -25.888 + 1.60 2.80 -25.322 -25.889 + 1.60 2.90 -25.251 -25.890 + 1.60 3.00 -25.188 -25.891 + 1.60 3.10 -25.131 -25.891 + 1.60 3.20 -25.077 -25.893 + 1.60 3.30 -25.022 -25.894 + 1.60 3.40 -24.961 -25.895 + 1.60 3.50 -24.887 -25.896 + 1.60 3.60 -24.798 -25.897 + 1.60 3.70 -24.687 -25.898 + 1.60 3.80 -24.548 -25.899 + 1.60 3.90 -24.353 -25.899 + 1.60 4.00 -23.988 -25.899 + 1.60 4.10 -23.190 -25.909 + 1.60 4.20 -22.008 -26.121 + 1.60 4.30 -21.813 -26.786 + 1.60 4.40 -21.935 -27.191 + 1.60 4.50 -21.955 -27.559 + 1.60 4.60 -21.883 -27.727 + 1.60 4.70 -21.732 -27.737 + 1.60 4.80 -21.527 -27.616 + 1.60 4.90 -21.341 -27.405 + 1.60 5.00 -21.276 -26.822 + 1.60 5.10 -21.323 -26.638 + 1.60 5.20 -21.324 -26.550 + 1.60 5.30 -21.300 -26.577 + 1.60 5.40 -21.291 -26.590 + 1.60 5.50 -21.466 -26.553 + 1.60 5.60 -21.674 -26.620 + 1.60 5.70 -21.725 -26.722 + 1.60 5.80 -21.778 -26.859 + 1.60 5.90 -21.807 -26.940 + 1.60 6.00 -21.814 -26.997 + 1.60 6.10 -21.837 -27.041 + 1.60 6.20 -21.892 -27.124 + 1.60 6.30 -22.070 -27.187 + 1.60 6.40 -22.251 -27.247 + 1.60 6.50 -22.387 -27.356 + 1.60 6.60 -22.467 -27.425 + 1.60 6.70 -22.497 -27.461 + 1.60 6.80 -22.503 -27.503 + 1.60 6.90 -22.493 -27.546 + 1.60 7.00 -22.491 -27.553 + 1.60 7.10 -22.535 -27.604 + 1.60 7.20 -22.610 -27.692 + 1.60 7.30 -22.658 -27.709 + 1.60 7.40 -22.670 -27.762 + 1.60 7.50 -22.660 -27.830 + 1.60 7.60 -22.634 -27.898 + 1.60 7.70 -22.601 -27.939 + 1.60 7.80 -22.564 -27.962 + 1.60 7.90 -22.526 -27.987 + 1.60 8.00 -22.489 -28.100 + 1.60 8.10 -22.450 -28.229 + 1.60 8.20 -22.410 -28.360 + 1.60 8.30 -22.369 -28.497 + 1.60 8.40 -22.324 -28.624 + 1.60 8.50 -22.280 -28.748 + 1.60 8.60 -22.231 -28.872 + 1.60 8.70 -22.183 -28.999 + 1.60 8.80 -22.132 -29.131 + 1.60 8.90 -22.079 -29.266 + 1.60 9.00 -22.026 -29.406 + 1.70 1.00 -28.388 -25.979 + 1.70 1.10 -28.155 -25.979 + 1.70 1.20 -27.944 -25.980 + 1.70 1.30 -27.749 -25.980 + 1.70 1.40 -27.568 -25.980 + 1.70 1.50 -27.402 -25.980 + 1.70 1.60 -27.248 -25.981 + 1.70 1.70 -27.104 -25.981 + 1.70 1.80 -26.956 -25.981 + 1.70 1.90 -26.789 -25.982 + 1.70 2.00 -26.595 -25.982 + 1.70 2.10 -26.384 -25.983 + 1.70 2.20 -26.176 -25.983 + 1.70 2.30 -25.984 -25.984 + 1.70 2.40 -25.817 -25.984 + 1.70 2.50 -25.675 -25.985 + 1.70 2.60 -25.556 -25.986 + 1.70 2.70 -25.457 -25.986 + 1.70 2.80 -25.373 -25.987 + 1.70 2.90 -25.301 -25.988 + 1.70 3.00 -25.236 -25.989 + 1.70 3.10 -25.178 -25.990 + 1.70 3.20 -25.123 -25.991 + 1.70 3.30 -25.068 -25.991 + 1.70 3.40 -25.006 -25.992 + 1.70 3.50 -24.933 -25.993 + 1.70 3.60 -24.843 -25.994 + 1.70 3.70 -24.732 -25.995 + 1.70 3.80 -24.593 -25.996 + 1.70 3.90 -24.398 -25.995 + 1.70 4.00 -24.033 -25.994 + 1.70 4.10 -23.219 -26.004 + 1.70 4.20 -22.008 -26.211 + 1.70 4.30 -21.812 -26.885 + 1.70 4.40 -21.935 -27.288 + 1.70 4.50 -21.955 -27.633 + 1.70 4.60 -21.883 -27.760 + 1.70 4.70 -21.732 -27.757 + 1.70 4.80 -21.527 -27.627 + 1.70 4.90 -21.341 -27.410 + 1.70 5.00 -21.276 -26.822 + 1.70 5.10 -21.323 -26.638 + 1.70 5.20 -21.324 -26.550 + 1.70 5.30 -21.300 -26.577 + 1.70 5.40 -21.291 -26.590 + 1.70 5.50 -21.466 -26.553 + 1.70 5.60 -21.674 -26.620 + 1.70 5.70 -21.725 -26.722 + 1.70 5.80 -21.778 -26.859 + 1.70 5.90 -21.807 -26.939 + 1.70 6.00 -21.814 -26.996 + 1.70 6.10 -21.837 -27.041 + 1.70 6.20 -21.892 -27.124 + 1.70 6.30 -22.070 -27.187 + 1.70 6.40 -22.251 -27.247 + 1.70 6.50 -22.387 -27.356 + 1.70 6.60 -22.467 -27.425 + 1.70 6.70 -22.497 -27.461 + 1.70 6.80 -22.503 -27.503 + 1.70 6.90 -22.493 -27.546 + 1.70 7.00 -22.491 -27.553 + 1.70 7.10 -22.535 -27.604 + 1.70 7.20 -22.610 -27.692 + 1.70 7.30 -22.658 -27.709 + 1.70 7.40 -22.670 -27.762 + 1.70 7.50 -22.660 -27.830 + 1.70 7.60 -22.634 -27.898 + 1.70 7.70 -22.601 -27.939 + 1.70 7.80 -22.564 -27.962 + 1.70 7.90 -22.526 -27.987 + 1.70 8.00 -22.489 -28.100 + 1.70 8.10 -22.450 -28.229 + 1.70 8.20 -22.410 -28.360 + 1.70 8.30 -22.369 -28.497 + 1.70 8.40 -22.324 -28.624 + 1.70 8.50 -22.280 -28.748 + 1.70 8.60 -22.231 -28.872 + 1.70 8.70 -22.183 -28.999 + 1.70 8.80 -22.132 -29.131 + 1.70 8.90 -22.079 -29.266 + 1.70 9.00 -22.026 -29.406 + 1.80 1.00 -28.403 -26.079 + 1.80 1.10 -28.170 -26.079 + 1.80 1.20 -27.961 -26.079 + 1.80 1.30 -27.769 -26.079 + 1.80 1.40 -27.593 -26.080 + 1.80 1.50 -27.432 -26.080 + 1.80 1.60 -27.285 -26.080 + 1.80 1.70 -27.147 -26.080 + 1.80 1.80 -27.005 -26.081 + 1.80 1.90 -26.842 -26.081 + 1.80 2.00 -26.651 -26.081 + 1.80 2.10 -26.441 -26.082 + 1.80 2.20 -26.233 -26.082 + 1.80 2.30 -26.040 -26.083 + 1.80 2.40 -25.872 -26.083 + 1.80 2.50 -25.729 -26.084 + 1.80 2.60 -25.609 -26.085 + 1.80 2.70 -25.508 -26.085 + 1.80 2.80 -25.423 -26.086 + 1.80 2.90 -25.349 -26.087 + 1.80 3.00 -25.284 -26.087 + 1.80 3.10 -25.225 -26.088 + 1.80 3.20 -25.169 -26.089 + 1.80 3.30 -25.113 -26.089 + 1.80 3.40 -25.051 -26.090 + 1.80 3.50 -24.978 -26.091 + 1.80 3.60 -24.888 -26.091 + 1.80 3.70 -24.777 -26.092 + 1.80 3.80 -24.638 -26.092 + 1.80 3.90 -24.443 -26.092 + 1.80 4.00 -24.079 -26.090 + 1.80 4.10 -23.247 -26.099 + 1.80 4.20 -22.008 -26.299 + 1.80 4.30 -21.812 -26.985 + 1.80 4.40 -21.935 -27.385 + 1.80 4.50 -21.955 -27.701 + 1.80 4.60 -21.883 -27.787 + 1.80 4.70 -21.732 -27.774 + 1.80 4.80 -21.527 -27.637 + 1.80 4.90 -21.341 -27.413 + 1.80 5.00 -21.276 -26.822 + 1.80 5.10 -21.323 -26.638 + 1.80 5.20 -21.324 -26.551 + 1.80 5.30 -21.300 -26.578 + 1.80 5.40 -21.291 -26.590 + 1.80 5.50 -21.466 -26.553 + 1.80 5.60 -21.674 -26.620 + 1.80 5.70 -21.725 -26.722 + 1.80 5.80 -21.778 -26.859 + 1.80 5.90 -21.807 -26.938 + 1.80 6.00 -21.814 -26.996 + 1.80 6.10 -21.837 -27.041 + 1.80 6.20 -21.892 -27.124 + 1.80 6.30 -22.070 -27.187 + 1.80 6.40 -22.251 -27.247 + 1.80 6.50 -22.387 -27.356 + 1.80 6.60 -22.467 -27.425 + 1.80 6.70 -22.497 -27.461 + 1.80 6.80 -22.503 -27.503 + 1.80 6.90 -22.493 -27.546 + 1.80 7.00 -22.491 -27.553 + 1.80 7.10 -22.535 -27.604 + 1.80 7.20 -22.610 -27.692 + 1.80 7.30 -22.658 -27.709 + 1.80 7.40 -22.670 -27.762 + 1.80 7.50 -22.660 -27.830 + 1.80 7.60 -22.634 -27.898 + 1.80 7.70 -22.601 -27.939 + 1.80 7.80 -22.564 -27.962 + 1.80 7.90 -22.526 -27.987 + 1.80 8.00 -22.489 -28.100 + 1.80 8.10 -22.450 -28.229 + 1.80 8.20 -22.410 -28.360 + 1.80 8.30 -22.369 -28.497 + 1.80 8.40 -22.324 -28.624 + 1.80 8.50 -22.280 -28.748 + 1.80 8.60 -22.231 -28.872 + 1.80 8.70 -22.183 -28.999 + 1.80 8.80 -22.132 -29.131 + 1.80 8.90 -22.079 -29.267 + 1.80 9.00 -22.026 -29.406 + 1.90 1.00 -28.420 -26.178 + 1.90 1.10 -28.188 -26.179 + 1.90 1.20 -27.981 -26.179 + 1.90 1.30 -27.792 -26.179 + 1.90 1.40 -27.620 -26.179 + 1.90 1.50 -27.464 -26.179 + 1.90 1.60 -27.323 -26.180 + 1.90 1.70 -27.190 -26.180 + 1.90 1.80 -27.053 -26.180 + 1.90 1.90 -26.894 -26.180 + 1.90 2.00 -26.706 -26.181 + 1.90 2.10 -26.498 -26.181 + 1.90 2.20 -26.290 -26.182 + 1.90 2.30 -26.096 -26.182 + 1.90 2.40 -25.927 -26.183 + 1.90 2.50 -25.782 -26.183 + 1.90 2.60 -25.661 -26.184 + 1.90 2.70 -25.559 -26.184 + 1.90 2.80 -25.472 -26.185 + 1.90 2.90 -25.398 -26.185 + 1.90 3.00 -25.331 -26.186 + 1.90 3.10 -25.271 -26.187 + 1.90 3.20 -25.214 -26.187 + 1.90 3.30 -25.158 -26.188 + 1.90 3.40 -25.095 -26.188 + 1.90 3.50 -25.022 -26.189 + 1.90 3.60 -24.933 -26.189 + 1.90 3.70 -24.822 -26.189 + 1.90 3.80 -24.683 -26.190 + 1.90 3.90 -24.488 -26.189 + 1.90 4.00 -24.124 -26.187 + 1.90 4.10 -23.274 -26.195 + 1.90 4.20 -22.009 -26.385 + 1.90 4.30 -21.813 -27.084 + 1.90 4.40 -21.935 -27.481 + 1.90 4.50 -21.955 -27.764 + 1.90 4.60 -21.883 -27.811 + 1.90 4.70 -21.732 -27.787 + 1.90 4.80 -21.527 -27.645 + 1.90 4.90 -21.341 -27.416 + 1.90 5.00 -21.277 -26.822 + 1.90 5.10 -21.323 -26.638 + 1.90 5.20 -21.324 -26.551 + 1.90 5.30 -21.300 -26.578 + 1.90 5.40 -21.291 -26.590 + 1.90 5.50 -21.466 -26.553 + 1.90 5.60 -21.674 -26.620 + 1.90 5.70 -21.725 -26.722 + 1.90 5.80 -21.778 -26.858 + 1.90 5.90 -21.807 -26.938 + 1.90 6.00 -21.814 -26.996 + 1.90 6.10 -21.837 -27.040 + 1.90 6.20 -21.892 -27.124 + 1.90 6.30 -22.070 -27.187 + 1.90 6.40 -22.251 -27.247 + 1.90 6.50 -22.387 -27.356 + 1.90 6.60 -22.467 -27.425 + 1.90 6.70 -22.497 -27.461 + 1.90 6.80 -22.503 -27.503 + 1.90 6.90 -22.493 -27.546 + 1.90 7.00 -22.491 -27.553 + 1.90 7.10 -22.535 -27.604 + 1.90 7.20 -22.610 -27.692 + 1.90 7.30 -22.658 -27.709 + 1.90 7.40 -22.670 -27.762 + 1.90 7.50 -22.660 -27.830 + 1.90 7.60 -22.634 -27.898 + 1.90 7.70 -22.601 -27.939 + 1.90 7.80 -22.564 -27.962 + 1.90 7.90 -22.526 -27.987 + 1.90 8.00 -22.489 -28.100 + 1.90 8.10 -22.450 -28.229 + 1.90 8.20 -22.410 -28.360 + 1.90 8.30 -22.369 -28.497 + 1.90 8.40 -22.324 -28.624 + 1.90 8.50 -22.280 -28.748 + 1.90 8.60 -22.231 -28.872 + 1.90 8.70 -22.183 -28.999 + 1.90 8.80 -22.132 -29.131 + 1.90 8.90 -22.079 -29.267 + 1.90 9.00 -22.026 -29.406 + 2.00 1.00 -28.440 -26.278 + 2.00 1.10 -28.208 -26.278 + 2.00 1.20 -28.003 -26.278 + 2.00 1.30 -27.817 -26.279 + 2.00 1.40 -27.649 -26.279 + 2.00 1.50 -27.498 -26.279 + 2.00 1.60 -27.361 -26.279 + 2.00 1.70 -27.233 -26.279 + 2.00 1.80 -27.100 -26.280 + 2.00 1.90 -26.946 -26.280 + 2.00 2.00 -26.761 -26.280 + 2.00 2.10 -26.554 -26.281 + 2.00 2.20 -26.346 -26.281 + 2.00 2.30 -26.152 -26.281 + 2.00 2.40 -25.981 -26.282 + 2.00 2.50 -25.836 -26.282 + 2.00 2.60 -25.713 -26.283 + 2.00 2.70 -25.609 -26.283 + 2.00 2.80 -25.521 -26.284 + 2.00 2.90 -25.445 -26.284 + 2.00 3.00 -25.378 -26.285 + 2.00 3.10 -25.316 -26.285 + 2.00 3.20 -25.259 -26.286 + 2.00 3.30 -25.202 -26.286 + 2.00 3.40 -25.139 -26.286 + 2.00 3.50 -25.066 -26.287 + 2.00 3.60 -24.977 -26.287 + 2.00 3.70 -24.866 -26.287 + 2.00 3.80 -24.727 -26.287 + 2.00 3.90 -24.533 -26.286 + 2.00 4.00 -24.169 -26.283 + 2.00 4.10 -23.299 -26.291 + 2.00 4.20 -22.009 -26.468 + 2.00 4.30 -21.813 -27.183 + 2.00 4.40 -21.935 -27.575 + 2.00 4.50 -21.955 -27.820 + 2.00 4.60 -21.883 -27.831 + 2.00 4.70 -21.732 -27.799 + 2.00 4.80 -21.527 -27.651 + 2.00 4.90 -21.341 -27.419 + 2.00 5.00 -21.277 -26.822 + 2.00 5.10 -21.323 -26.638 + 2.00 5.20 -21.324 -26.551 + 2.00 5.30 -21.300 -26.578 + 2.00 5.40 -21.291 -26.590 + 2.00 5.50 -21.466 -26.553 + 2.00 5.60 -21.674 -26.620 + 2.00 5.70 -21.725 -26.721 + 2.00 5.80 -21.778 -26.857 + 2.00 5.90 -21.807 -26.937 + 2.00 6.00 -21.814 -26.995 + 2.00 6.10 -21.837 -27.040 + 2.00 6.20 -21.892 -27.124 + 2.00 6.30 -22.070 -27.187 + 2.00 6.40 -22.251 -27.247 + 2.00 6.50 -22.387 -27.356 + 2.00 6.60 -22.467 -27.425 + 2.00 6.70 -22.497 -27.461 + 2.00 6.80 -22.503 -27.503 + 2.00 6.90 -22.493 -27.546 + 2.00 7.00 -22.491 -27.553 + 2.00 7.10 -22.535 -27.604 + 2.00 7.20 -22.610 -27.692 + 2.00 7.30 -22.658 -27.709 + 2.00 7.40 -22.670 -27.762 + 2.00 7.50 -22.660 -27.830 + 2.00 7.60 -22.634 -27.898 + 2.00 7.70 -22.601 -27.939 + 2.00 7.80 -22.564 -27.962 + 2.00 7.90 -22.526 -27.987 + 2.00 8.00 -22.489 -28.100 + 2.00 8.10 -22.450 -28.229 + 2.00 8.20 -22.410 -28.360 + 2.00 8.30 -22.369 -28.497 + 2.00 8.40 -22.324 -28.624 + 2.00 8.50 -22.280 -28.748 + 2.00 8.60 -22.231 -28.872 + 2.00 8.70 -22.183 -28.999 + 2.00 8.80 -22.132 -29.131 + 2.00 8.90 -22.079 -29.267 + 2.00 9.00 -22.026 -29.406 + 2.10 1.00 -28.463 -26.378 + 2.10 1.10 -28.232 -26.378 + 2.10 1.20 -28.029 -26.378 + 2.10 1.30 -27.845 -26.378 + 2.10 1.40 -27.680 -26.378 + 2.10 1.50 -27.533 -26.379 + 2.10 1.60 -27.401 -26.379 + 2.10 1.70 -27.277 -26.379 + 2.10 1.80 -27.148 -26.379 + 2.10 1.90 -26.997 -26.379 + 2.10 2.00 -26.814 -26.380 + 2.10 2.10 -26.609 -26.380 + 2.10 2.20 -26.402 -26.380 + 2.10 2.30 -26.207 -26.381 + 2.10 2.40 -26.036 -26.381 + 2.10 2.50 -25.889 -26.382 + 2.10 2.60 -25.764 -26.382 + 2.10 2.70 -25.659 -26.382 + 2.10 2.80 -25.570 -26.383 + 2.10 2.90 -25.492 -26.383 + 2.10 3.00 -25.423 -26.384 + 2.10 3.10 -25.361 -26.384 + 2.10 3.20 -25.302 -26.384 + 2.10 3.30 -25.244 -26.385 + 2.10 3.40 -25.181 -26.385 + 2.10 3.50 -25.108 -26.385 + 2.10 3.60 -25.020 -26.385 + 2.10 3.70 -24.910 -26.385 + 2.10 3.80 -24.771 -26.385 + 2.10 3.90 -24.578 -26.383 + 2.10 4.00 -24.214 -26.380 + 2.10 4.10 -23.322 -26.386 + 2.10 4.20 -22.009 -26.548 + 2.10 4.30 -21.813 -27.282 + 2.10 4.40 -21.935 -27.669 + 2.10 4.50 -21.955 -27.869 + 2.10 4.60 -21.884 -27.847 + 2.10 4.70 -21.732 -27.809 + 2.10 4.80 -21.527 -27.657 + 2.10 4.90 -21.342 -27.422 + 2.10 5.00 -21.277 -26.822 + 2.10 5.10 -21.323 -26.638 + 2.10 5.20 -21.324 -26.551 + 2.10 5.30 -21.300 -26.578 + 2.10 5.40 -21.291 -26.590 + 2.10 5.50 -21.466 -26.553 + 2.10 5.60 -21.674 -26.620 + 2.10 5.70 -21.725 -26.721 + 2.10 5.80 -21.778 -26.856 + 2.10 5.90 -21.807 -26.935 + 2.10 6.00 -21.814 -26.994 + 2.10 6.10 -21.837 -27.040 + 2.10 6.20 -21.892 -27.124 + 2.10 6.30 -22.070 -27.187 + 2.10 6.40 -22.251 -27.247 + 2.10 6.50 -22.387 -27.356 + 2.10 6.60 -22.467 -27.425 + 2.10 6.70 -22.497 -27.461 + 2.10 6.80 -22.503 -27.503 + 2.10 6.90 -22.493 -27.546 + 2.10 7.00 -22.491 -27.553 + 2.10 7.10 -22.535 -27.604 + 2.10 7.20 -22.610 -27.692 + 2.10 7.30 -22.658 -27.709 + 2.10 7.40 -22.670 -27.762 + 2.10 7.50 -22.660 -27.830 + 2.10 7.60 -22.634 -27.898 + 2.10 7.70 -22.601 -27.939 + 2.10 7.80 -22.564 -27.962 + 2.10 7.90 -22.526 -27.987 + 2.10 8.00 -22.489 -28.100 + 2.10 8.10 -22.450 -28.229 + 2.10 8.20 -22.410 -28.360 + 2.10 8.30 -22.369 -28.497 + 2.10 8.40 -22.324 -28.624 + 2.10 8.50 -22.280 -28.748 + 2.10 8.60 -22.231 -28.872 + 2.10 8.70 -22.183 -28.999 + 2.10 8.80 -22.132 -29.131 + 2.10 8.90 -22.079 -29.267 + 2.10 9.00 -22.026 -29.406 + 2.20 1.00 -28.490 -26.478 + 2.20 1.10 -28.261 -26.478 + 2.20 1.20 -28.058 -26.478 + 2.20 1.30 -27.877 -26.478 + 2.20 1.40 -27.715 -26.478 + 2.20 1.50 -27.571 -26.478 + 2.20 1.60 -27.442 -26.478 + 2.20 1.70 -27.322 -26.479 + 2.20 1.80 -27.196 -26.479 + 2.20 1.90 -27.048 -26.479 + 2.20 2.00 -26.868 -26.479 + 2.20 2.10 -26.664 -26.480 + 2.20 2.20 -26.457 -26.480 + 2.20 2.30 -26.262 -26.480 + 2.20 2.40 -26.089 -26.481 + 2.20 2.50 -25.941 -26.481 + 2.20 2.60 -25.815 -26.481 + 2.20 2.70 -25.708 -26.482 + 2.20 2.80 -25.617 -26.482 + 2.20 2.90 -25.539 -26.482 + 2.20 3.00 -25.468 -26.483 + 2.20 3.10 -25.404 -26.483 + 2.20 3.20 -25.345 -26.483 + 2.20 3.30 -25.286 -26.483 + 2.20 3.40 -25.223 -26.484 + 2.20 3.50 -25.150 -26.483 + 2.20 3.60 -25.062 -26.483 + 2.20 3.70 -24.953 -26.483 + 2.20 3.80 -24.815 -26.483 + 2.20 3.90 -24.622 -26.481 + 2.20 4.00 -24.260 -26.477 + 2.20 4.10 -23.344 -26.482 + 2.20 4.20 -22.009 -26.625 + 2.20 4.30 -21.813 -27.380 + 2.20 4.40 -21.935 -27.761 + 2.20 4.50 -21.955 -27.910 + 2.20 4.60 -21.884 -27.861 + 2.20 4.70 -21.733 -27.817 + 2.20 4.80 -21.527 -27.662 + 2.20 4.90 -21.342 -27.424 + 2.20 5.00 -21.277 -26.821 + 2.20 5.10 -21.323 -26.638 + 2.20 5.20 -21.324 -26.551 + 2.20 5.30 -21.300 -26.578 + 2.20 5.40 -21.291 -26.590 + 2.20 5.50 -21.466 -26.553 + 2.20 5.60 -21.674 -26.620 + 2.20 5.70 -21.725 -26.720 + 2.20 5.80 -21.778 -26.855 + 2.20 5.90 -21.807 -26.934 + 2.20 6.00 -21.814 -26.993 + 2.20 6.10 -21.837 -27.040 + 2.20 6.20 -21.892 -27.124 + 2.20 6.30 -22.070 -27.187 + 2.20 6.40 -22.251 -27.247 + 2.20 6.50 -22.387 -27.356 + 2.20 6.60 -22.467 -27.425 + 2.20 6.70 -22.497 -27.461 + 2.20 6.80 -22.503 -27.503 + 2.20 6.90 -22.493 -27.546 + 2.20 7.00 -22.491 -27.553 + 2.20 7.10 -22.535 -27.604 + 2.20 7.20 -22.610 -27.692 + 2.20 7.30 -22.658 -27.709 + 2.20 7.40 -22.670 -27.762 + 2.20 7.50 -22.660 -27.830 + 2.20 7.60 -22.634 -27.898 + 2.20 7.70 -22.601 -27.939 + 2.20 7.80 -22.564 -27.962 + 2.20 7.90 -22.526 -27.987 + 2.20 8.00 -22.489 -28.100 + 2.20 8.10 -22.450 -28.229 + 2.20 8.20 -22.410 -28.360 + 2.20 8.30 -22.369 -28.497 + 2.20 8.40 -22.324 -28.624 + 2.20 8.50 -22.280 -28.748 + 2.20 8.60 -22.231 -28.872 + 2.20 8.70 -22.183 -28.999 + 2.20 8.80 -22.132 -29.131 + 2.20 8.90 -22.079 -29.267 + 2.20 9.00 -22.026 -29.406 + 2.30 1.00 -28.522 -26.577 + 2.30 1.10 -28.293 -26.577 + 2.30 1.20 -28.092 -26.578 + 2.30 1.30 -27.913 -26.578 + 2.30 1.40 -27.753 -26.578 + 2.30 1.50 -27.613 -26.578 + 2.30 1.60 -27.487 -26.578 + 2.30 1.70 -27.369 -26.578 + 2.30 1.80 -27.245 -26.578 + 2.30 1.90 -27.099 -26.579 + 2.30 2.00 -26.921 -26.579 + 2.30 2.10 -26.718 -26.579 + 2.30 2.20 -26.511 -26.579 + 2.30 2.30 -26.316 -26.580 + 2.30 2.40 -26.143 -26.580 + 2.30 2.50 -25.994 -26.580 + 2.30 2.60 -25.866 -26.581 + 2.30 2.70 -25.758 -26.581 + 2.30 2.80 -25.665 -26.581 + 2.30 2.90 -25.584 -26.582 + 2.30 3.00 -25.512 -26.582 + 2.30 3.10 -25.447 -26.582 + 2.30 3.20 -25.386 -26.582 + 2.30 3.30 -25.327 -26.582 + 2.30 3.40 -25.264 -26.582 + 2.30 3.50 -25.191 -26.582 + 2.30 3.60 -25.104 -26.582 + 2.30 3.70 -24.995 -26.581 + 2.30 3.80 -24.857 -26.581 + 2.30 3.90 -24.666 -26.579 + 2.30 4.00 -24.305 -26.574 + 2.30 4.10 -23.364 -26.578 + 2.30 4.20 -22.009 -26.697 + 2.30 4.30 -21.813 -27.479 + 2.30 4.40 -21.935 -27.850 + 2.30 4.50 -21.956 -27.944 + 2.30 4.60 -21.885 -27.873 + 2.30 4.70 -21.733 -27.824 + 2.30 4.80 -21.527 -27.666 + 2.30 4.90 -21.342 -27.426 + 2.30 5.00 -21.277 -26.821 + 2.30 5.10 -21.323 -26.638 + 2.30 5.20 -21.324 -26.551 + 2.30 5.30 -21.300 -26.578 + 2.30 5.40 -21.291 -26.590 + 2.30 5.50 -21.466 -26.553 + 2.30 5.60 -21.674 -26.619 + 2.30 5.70 -21.725 -26.719 + 2.30 5.80 -21.778 -26.853 + 2.30 5.90 -21.807 -26.932 + 2.30 6.00 -21.814 -26.992 + 2.30 6.10 -21.837 -27.039 + 2.30 6.20 -21.892 -27.124 + 2.30 6.30 -22.070 -27.187 + 2.30 6.40 -22.251 -27.247 + 2.30 6.50 -22.387 -27.356 + 2.30 6.60 -22.467 -27.425 + 2.30 6.70 -22.497 -27.461 + 2.30 6.80 -22.503 -27.503 + 2.30 6.90 -22.493 -27.546 + 2.30 7.00 -22.491 -27.553 + 2.30 7.10 -22.535 -27.604 + 2.30 7.20 -22.610 -27.692 + 2.30 7.30 -22.658 -27.709 + 2.30 7.40 -22.670 -27.762 + 2.30 7.50 -22.660 -27.831 + 2.30 7.60 -22.634 -27.898 + 2.30 7.70 -22.601 -27.939 + 2.30 7.80 -22.564 -27.962 + 2.30 7.90 -22.526 -27.987 + 2.30 8.00 -22.489 -28.100 + 2.30 8.10 -22.450 -28.229 + 2.30 8.20 -22.410 -28.360 + 2.30 8.30 -22.369 -28.497 + 2.30 8.40 -22.324 -28.624 + 2.30 8.50 -22.280 -28.748 + 2.30 8.60 -22.231 -28.872 + 2.30 8.70 -22.183 -28.999 + 2.30 8.80 -22.132 -29.131 + 2.30 8.90 -22.079 -29.267 + 2.30 9.00 -22.026 -29.406 + 2.40 1.00 -28.558 -26.677 + 2.40 1.10 -28.331 -26.677 + 2.40 1.20 -28.131 -26.677 + 2.40 1.30 -27.953 -26.677 + 2.40 1.40 -27.796 -26.678 + 2.40 1.50 -27.657 -26.678 + 2.40 1.60 -27.533 -26.678 + 2.40 1.70 -27.417 -26.678 + 2.40 1.80 -27.296 -26.678 + 2.40 1.90 -27.151 -26.678 + 2.40 2.00 -26.973 -26.678 + 2.40 2.10 -26.771 -26.679 + 2.40 2.20 -26.565 -26.679 + 2.40 2.30 -26.370 -26.679 + 2.40 2.40 -26.196 -26.680 + 2.40 2.50 -26.046 -26.680 + 2.40 2.60 -25.917 -26.680 + 2.40 2.70 -25.806 -26.680 + 2.40 2.80 -25.712 -26.681 + 2.40 2.90 -25.629 -26.681 + 2.40 3.00 -25.556 -26.681 + 2.40 3.10 -25.488 -26.681 + 2.40 3.20 -25.426 -26.681 + 2.40 3.30 -25.367 -26.681 + 2.40 3.40 -25.303 -26.681 + 2.40 3.50 -25.231 -26.681 + 2.40 3.60 -25.144 -26.680 + 2.40 3.70 -25.036 -26.680 + 2.40 3.80 -24.900 -26.679 + 2.40 3.90 -24.709 -26.677 + 2.40 4.00 -24.350 -26.672 + 2.40 4.10 -23.382 -26.673 + 2.40 4.20 -22.009 -26.765 + 2.40 4.30 -21.813 -27.577 + 2.40 4.40 -21.935 -27.937 + 2.40 4.50 -21.956 -27.970 + 2.40 4.60 -21.885 -27.883 + 2.40 4.70 -21.733 -27.830 + 2.40 4.80 -21.527 -27.670 + 2.40 4.90 -21.342 -27.428 + 2.40 5.00 -21.277 -26.820 + 2.40 5.10 -21.324 -26.638 + 2.40 5.20 -21.324 -26.551 + 2.40 5.30 -21.300 -26.578 + 2.40 5.40 -21.291 -26.590 + 2.40 5.50 -21.466 -26.553 + 2.40 5.60 -21.674 -26.619 + 2.40 5.70 -21.725 -26.719 + 2.40 5.80 -21.778 -26.851 + 2.40 5.90 -21.807 -26.929 + 2.40 6.00 -21.814 -26.990 + 2.40 6.10 -21.837 -27.039 + 2.40 6.20 -21.892 -27.124 + 2.40 6.30 -22.070 -27.187 + 2.40 6.40 -22.251 -27.247 + 2.40 6.50 -22.387 -27.356 + 2.40 6.60 -22.467 -27.425 + 2.40 6.70 -22.497 -27.461 + 2.40 6.80 -22.503 -27.503 + 2.40 6.90 -22.493 -27.546 + 2.40 7.00 -22.491 -27.553 + 2.40 7.10 -22.535 -27.604 + 2.40 7.20 -22.610 -27.692 + 2.40 7.30 -22.658 -27.709 + 2.40 7.40 -22.670 -27.762 + 2.40 7.50 -22.660 -27.831 + 2.40 7.60 -22.634 -27.898 + 2.40 7.70 -22.601 -27.939 + 2.40 7.80 -22.564 -27.962 + 2.40 7.90 -22.526 -27.987 + 2.40 8.00 -22.489 -28.100 + 2.40 8.10 -22.450 -28.229 + 2.40 8.20 -22.410 -28.360 + 2.40 8.30 -22.369 -28.497 + 2.40 8.40 -22.324 -28.624 + 2.40 8.50 -22.280 -28.748 + 2.40 8.60 -22.231 -28.872 + 2.40 8.70 -22.183 -28.999 + 2.40 8.80 -22.132 -29.131 + 2.40 8.90 -22.079 -29.267 + 2.40 9.00 -22.026 -29.406 + 2.50 1.00 -28.600 -26.777 + 2.50 1.10 -28.374 -26.777 + 2.50 1.20 -28.175 -26.777 + 2.50 1.30 -27.998 -26.777 + 2.50 1.40 -27.842 -26.777 + 2.50 1.50 -27.706 -26.777 + 2.50 1.60 -27.583 -26.778 + 2.50 1.70 -27.468 -26.778 + 2.50 1.80 -27.347 -26.778 + 2.50 1.90 -27.202 -26.778 + 2.50 2.00 -27.024 -26.778 + 2.50 2.10 -26.823 -26.778 + 2.50 2.20 -26.617 -26.779 + 2.50 2.30 -26.422 -26.779 + 2.50 2.40 -26.248 -26.779 + 2.50 2.50 -26.097 -26.779 + 2.50 2.60 -25.967 -26.780 + 2.50 2.70 -25.855 -26.780 + 2.50 2.80 -25.758 -26.780 + 2.50 2.90 -25.673 -26.780 + 2.50 3.00 -25.598 -26.781 + 2.50 3.10 -25.529 -26.781 + 2.50 3.20 -25.466 -26.781 + 2.50 3.30 -25.406 -26.780 + 2.50 3.40 -25.342 -26.780 + 2.50 3.50 -25.270 -26.780 + 2.50 3.60 -25.184 -26.779 + 2.50 3.70 -25.077 -26.778 + 2.50 3.80 -24.941 -26.778 + 2.50 3.90 -24.753 -26.775 + 2.50 4.00 -24.395 -26.769 + 2.50 4.10 -23.398 -26.767 + 2.50 4.20 -22.009 -26.828 + 2.50 4.30 -21.813 -27.674 + 2.50 4.40 -21.935 -28.019 + 2.50 4.50 -21.957 -27.989 + 2.50 4.60 -21.886 -27.892 + 2.50 4.70 -21.734 -27.835 + 2.50 4.80 -21.527 -27.673 + 2.50 4.90 -21.342 -27.430 + 2.50 5.00 -21.277 -26.820 + 2.50 5.10 -21.324 -26.638 + 2.50 5.20 -21.324 -26.552 + 2.50 5.30 -21.300 -26.578 + 2.50 5.40 -21.291 -26.590 + 2.50 5.50 -21.466 -26.553 + 2.50 5.60 -21.674 -26.619 + 2.50 5.70 -21.725 -26.717 + 2.50 5.80 -21.778 -26.848 + 2.50 5.90 -21.807 -26.926 + 2.50 6.00 -21.814 -26.988 + 2.50 6.10 -21.837 -27.038 + 2.50 6.20 -21.892 -27.124 + 2.50 6.30 -22.070 -27.187 + 2.50 6.40 -22.251 -27.247 + 2.50 6.50 -22.387 -27.356 + 2.50 6.60 -22.467 -27.425 + 2.50 6.70 -22.497 -27.461 + 2.50 6.80 -22.503 -27.503 + 2.50 6.90 -22.493 -27.546 + 2.50 7.00 -22.491 -27.553 + 2.50 7.10 -22.535 -27.604 + 2.50 7.20 -22.610 -27.692 + 2.50 7.30 -22.658 -27.709 + 2.50 7.40 -22.670 -27.762 + 2.50 7.50 -22.660 -27.831 + 2.50 7.60 -22.634 -27.898 + 2.50 7.70 -22.601 -27.939 + 2.50 7.80 -22.564 -27.962 + 2.50 7.90 -22.526 -27.987 + 2.50 8.00 -22.489 -28.100 + 2.50 8.10 -22.450 -28.229 + 2.50 8.20 -22.410 -28.360 + 2.50 8.30 -22.369 -28.497 + 2.50 8.40 -22.324 -28.624 + 2.50 8.50 -22.280 -28.748 + 2.50 8.60 -22.231 -28.872 + 2.50 8.70 -22.183 -28.999 + 2.50 8.80 -22.132 -29.131 + 2.50 8.90 -22.079 -29.267 + 2.50 9.00 -22.026 -29.406 + 2.60 1.00 -28.648 -26.877 + 2.60 1.10 -28.422 -26.877 + 2.60 1.20 -28.224 -26.877 + 2.60 1.30 -28.048 -26.877 + 2.60 1.40 -27.893 -26.877 + 2.60 1.50 -27.757 -26.877 + 2.60 1.60 -27.636 -26.877 + 2.60 1.70 -27.521 -26.877 + 2.60 1.80 -27.398 -26.878 + 2.60 1.90 -27.253 -26.878 + 2.60 2.00 -27.075 -26.878 + 2.60 2.10 -26.873 -26.878 + 2.60 2.20 -26.668 -26.878 + 2.60 2.30 -26.473 -26.878 + 2.60 2.40 -26.299 -26.879 + 2.60 2.50 -26.147 -26.879 + 2.60 2.60 -26.016 -26.879 + 2.60 2.70 -25.902 -26.879 + 2.60 2.80 -25.803 -26.880 + 2.60 2.90 -25.717 -26.880 + 2.60 3.00 -25.640 -26.880 + 2.60 3.10 -25.569 -26.880 + 2.60 3.20 -25.504 -26.880 + 2.60 3.30 -25.444 -26.880 + 2.60 3.40 -25.380 -26.879 + 2.60 3.50 -25.308 -26.879 + 2.60 3.60 -25.223 -26.878 + 2.60 3.70 -25.117 -26.877 + 2.60 3.80 -24.982 -26.876 + 2.60 3.90 -24.795 -26.873 + 2.60 4.00 -24.439 -26.867 + 2.60 4.10 -23.412 -26.861 + 2.60 4.20 -22.010 -26.886 + 2.60 4.30 -21.814 -27.770 + 2.60 4.40 -21.935 -28.097 + 2.60 4.50 -21.958 -28.000 + 2.60 4.60 -21.887 -27.899 + 2.60 4.70 -21.734 -27.840 + 2.60 4.80 -21.528 -27.676 + 2.60 4.90 -21.342 -27.432 + 2.60 5.00 -21.277 -26.819 + 2.60 5.10 -21.324 -26.638 + 2.60 5.20 -21.325 -26.552 + 2.60 5.30 -21.300 -26.579 + 2.60 5.40 -21.291 -26.590 + 2.60 5.50 -21.466 -26.553 + 2.60 5.60 -21.674 -26.619 + 2.60 5.70 -21.725 -26.716 + 2.60 5.80 -21.778 -26.845 + 2.60 5.90 -21.807 -26.922 + 2.60 6.00 -21.814 -26.986 + 2.60 6.10 -21.837 -27.037 + 2.60 6.20 -21.892 -27.124 + 2.60 6.30 -22.070 -27.187 + 2.60 6.40 -22.251 -27.247 + 2.60 6.50 -22.387 -27.356 + 2.60 6.60 -22.467 -27.425 + 2.60 6.70 -22.497 -27.461 + 2.60 6.80 -22.503 -27.503 + 2.60 6.90 -22.493 -27.546 + 2.60 7.00 -22.491 -27.553 + 2.60 7.10 -22.535 -27.604 + 2.60 7.20 -22.610 -27.692 + 2.60 7.30 -22.658 -27.709 + 2.60 7.40 -22.670 -27.762 + 2.60 7.50 -22.660 -27.831 + 2.60 7.60 -22.634 -27.898 + 2.60 7.70 -22.601 -27.939 + 2.60 7.80 -22.564 -27.962 + 2.60 7.90 -22.526 -27.987 + 2.60 8.00 -22.489 -28.100 + 2.60 8.10 -22.450 -28.229 + 2.60 8.20 -22.410 -28.360 + 2.60 8.30 -22.369 -28.493 + 2.60 8.40 -22.324 -28.624 + 2.60 8.50 -22.280 -28.748 + 2.60 8.60 -22.231 -28.872 + 2.60 8.70 -22.183 -28.999 + 2.60 8.80 -22.132 -29.131 + 2.60 8.90 -22.079 -29.267 + 2.60 9.00 -22.026 -29.406 + 2.70 1.00 -28.700 -26.977 + 2.70 1.10 -28.475 -26.977 + 2.70 1.20 -28.278 -26.977 + 2.70 1.30 -28.103 -26.977 + 2.70 1.40 -27.948 -26.977 + 2.70 1.50 -27.813 -26.977 + 2.70 1.60 -27.691 -26.977 + 2.70 1.70 -27.575 -26.977 + 2.70 1.80 -27.451 -26.977 + 2.70 1.90 -27.303 -26.977 + 2.70 2.00 -27.124 -26.978 + 2.70 2.10 -26.922 -26.978 + 2.70 2.20 -26.717 -26.978 + 2.70 2.30 -26.523 -26.978 + 2.70 2.40 -26.348 -26.978 + 2.70 2.50 -26.196 -26.979 + 2.70 2.60 -26.064 -26.979 + 2.70 2.70 -25.949 -26.979 + 2.70 2.80 -25.849 -26.979 + 2.70 2.90 -25.760 -26.979 + 2.70 3.00 -25.681 -26.979 + 2.70 3.10 -25.608 -26.979 + 2.70 3.20 -25.542 -26.979 + 2.70 3.30 -25.481 -26.979 + 2.70 3.40 -25.417 -26.978 + 2.70 3.50 -25.346 -26.978 + 2.70 3.60 -25.262 -26.977 + 2.70 3.70 -25.156 -26.976 + 2.70 3.80 -25.022 -26.975 + 2.70 3.90 -24.837 -26.971 + 2.70 4.00 -24.484 -26.965 + 2.70 4.10 -23.424 -26.954 + 2.70 4.20 -22.010 -26.939 + 2.70 4.30 -21.814 -27.865 + 2.70 4.40 -21.936 -28.170 + 2.70 4.50 -21.959 -28.016 + 2.70 4.60 -21.888 -27.905 + 2.70 4.70 -21.735 -27.844 + 2.70 4.80 -21.528 -27.679 + 2.70 4.90 -21.342 -27.434 + 2.70 5.00 -21.278 -26.819 + 2.70 5.10 -21.324 -26.638 + 2.70 5.20 -21.325 -26.552 + 2.70 5.30 -21.300 -26.579 + 2.70 5.40 -21.291 -26.590 + 2.70 5.50 -21.466 -26.553 + 2.70 5.60 -21.674 -26.618 + 2.70 5.70 -21.725 -26.714 + 2.70 5.80 -21.778 -26.841 + 2.70 5.90 -21.807 -26.918 + 2.70 6.00 -21.814 -26.982 + 2.70 6.10 -21.837 -27.036 + 2.70 6.20 -21.892 -27.124 + 2.70 6.30 -22.070 -27.187 + 2.70 6.40 -22.251 -27.247 + 2.70 6.50 -22.387 -27.356 + 2.70 6.60 -22.467 -27.425 + 2.70 6.70 -22.497 -27.461 + 2.70 6.80 -22.503 -27.503 + 2.70 6.90 -22.493 -27.546 + 2.70 7.00 -22.491 -27.553 + 2.70 7.10 -22.535 -27.604 + 2.70 7.20 -22.610 -27.692 + 2.70 7.30 -22.658 -27.709 + 2.70 7.40 -22.670 -27.762 + 2.70 7.50 -22.660 -27.831 + 2.70 7.60 -22.634 -27.898 + 2.70 7.70 -22.601 -27.939 + 2.70 7.80 -22.564 -27.962 + 2.70 7.90 -22.526 -27.987 + 2.70 8.00 -22.489 -28.100 + 2.70 8.10 -22.450 -28.229 + 2.70 8.20 -22.410 -28.360 + 2.70 8.30 -22.369 -28.497 + 2.70 8.40 -22.324 -28.624 + 2.70 8.50 -22.280 -28.748 + 2.70 8.60 -22.231 -28.872 + 2.70 8.70 -22.183 -28.999 + 2.70 8.80 -22.132 -29.131 + 2.70 8.90 -22.079 -29.267 + 2.70 9.00 -22.026 -29.406 + 2.80 1.00 -28.759 -27.077 + 2.80 1.10 -28.534 -27.077 + 2.80 1.20 -28.336 -27.077 + 2.80 1.30 -28.162 -27.077 + 2.80 1.40 -28.008 -27.077 + 2.80 1.50 -27.872 -27.077 + 2.80 1.60 -27.748 -27.077 + 2.80 1.70 -27.630 -27.077 + 2.80 1.80 -27.503 -27.077 + 2.80 1.90 -27.352 -27.077 + 2.80 2.00 -27.171 -27.077 + 2.80 2.10 -26.969 -27.077 + 2.80 2.20 -26.764 -27.078 + 2.80 2.30 -26.570 -27.078 + 2.80 2.40 -26.396 -27.078 + 2.80 2.50 -26.244 -27.078 + 2.80 2.60 -26.111 -27.078 + 2.80 2.70 -25.995 -27.079 + 2.80 2.80 -25.893 -27.079 + 2.80 2.90 -25.802 -27.079 + 2.80 3.00 -25.721 -27.079 + 2.80 3.10 -25.646 -27.079 + 2.80 3.20 -25.579 -27.079 + 2.80 3.30 -25.517 -27.078 + 2.80 3.40 -25.453 -27.078 + 2.80 3.50 -25.382 -27.077 + 2.80 3.60 -25.299 -27.076 + 2.80 3.70 -25.195 -27.075 + 2.80 3.80 -25.062 -27.074 + 2.80 3.90 -24.879 -27.070 + 2.80 4.00 -24.528 -27.063 + 2.80 4.10 -23.435 -27.045 + 2.80 4.20 -22.010 -26.986 + 2.80 4.30 -21.814 -27.959 + 2.80 4.40 -21.936 -28.235 + 2.80 4.50 -21.960 -28.029 + 2.80 4.60 -21.889 -27.910 + 2.80 4.70 -21.735 -27.847 + 2.80 4.80 -21.528 -27.682 + 2.80 4.90 -21.342 -27.436 + 2.80 5.00 -21.278 -26.818 + 2.80 5.10 -21.324 -26.639 + 2.80 5.20 -21.325 -26.552 + 2.80 5.30 -21.300 -26.579 + 2.80 5.40 -21.291 -26.590 + 2.80 5.50 -21.466 -26.553 + 2.80 5.60 -21.674 -26.618 + 2.80 5.70 -21.725 -26.712 + 2.80 5.80 -21.778 -26.836 + 2.80 5.90 -21.807 -26.912 + 2.80 6.00 -21.814 -26.978 + 2.80 6.10 -21.837 -27.035 + 2.80 6.20 -21.892 -27.124 + 2.80 6.30 -22.070 -27.187 + 2.80 6.40 -22.251 -27.247 + 2.80 6.50 -22.387 -27.356 + 2.80 6.60 -22.467 -27.425 + 2.80 6.70 -22.497 -27.461 + 2.80 6.80 -22.503 -27.503 + 2.80 6.90 -22.493 -27.546 + 2.80 7.00 -22.491 -27.553 + 2.80 7.10 -22.535 -27.604 + 2.80 7.20 -22.610 -27.692 + 2.80 7.30 -22.658 -27.709 + 2.80 7.40 -22.670 -27.762 + 2.80 7.50 -22.660 -27.831 + 2.80 7.60 -22.634 -27.898 + 2.80 7.70 -22.601 -27.939 + 2.80 7.80 -22.564 -27.962 + 2.80 7.90 -22.526 -27.987 + 2.80 8.00 -22.489 -28.100 + 2.80 8.10 -22.450 -28.229 + 2.80 8.20 -22.410 -28.360 + 2.80 8.30 -22.369 -28.497 + 2.80 8.40 -22.324 -28.624 + 2.80 8.50 -22.280 -28.748 + 2.80 8.60 -22.231 -28.872 + 2.80 8.70 -22.183 -28.999 + 2.80 8.80 -22.132 -29.131 + 2.80 8.90 -22.079 -29.267 + 2.80 9.00 -22.026 -29.406 + 2.90 1.00 -28.822 -27.176 + 2.90 1.10 -28.598 -27.176 + 2.90 1.20 -28.400 -27.176 + 2.90 1.30 -28.225 -27.177 + 2.90 1.40 -28.071 -27.177 + 2.90 1.50 -27.933 -27.177 + 2.90 1.60 -27.808 -27.177 + 2.90 1.70 -27.686 -27.177 + 2.90 1.80 -27.555 -27.177 + 2.90 1.90 -27.400 -27.177 + 2.90 2.00 -27.217 -27.177 + 2.90 2.10 -27.013 -27.177 + 2.90 2.20 -26.808 -27.177 + 2.90 2.30 -26.615 -27.178 + 2.90 2.40 -26.441 -27.178 + 2.90 2.50 -26.290 -27.178 + 2.90 2.60 -26.157 -27.178 + 2.90 2.70 -26.040 -27.178 + 2.90 2.80 -25.937 -27.178 + 2.90 2.90 -25.844 -27.179 + 2.90 3.00 -25.761 -27.179 + 2.90 3.10 -25.684 -27.179 + 2.90 3.20 -25.615 -27.178 + 2.90 3.30 -25.552 -27.178 + 2.90 3.40 -25.488 -27.177 + 2.90 3.50 -25.417 -27.176 + 2.90 3.60 -25.335 -27.175 + 2.90 3.70 -25.232 -27.174 + 2.90 3.80 -25.100 -27.173 + 2.90 3.90 -24.921 -27.169 + 2.90 4.00 -24.572 -27.161 + 2.90 4.10 -23.444 -27.136 + 2.90 4.20 -22.010 -27.028 + 2.90 4.30 -21.815 -28.051 + 2.90 4.40 -21.936 -28.293 + 2.90 4.50 -21.962 -28.040 + 2.90 4.60 -21.891 -27.915 + 2.90 4.70 -21.736 -27.850 + 2.90 4.80 -21.528 -27.685 + 2.90 4.90 -21.342 -27.438 + 2.90 5.00 -21.278 -26.818 + 2.90 5.10 -21.324 -26.639 + 2.90 5.20 -21.325 -26.552 + 2.90 5.30 -21.300 -26.579 + 2.90 5.40 -21.291 -26.590 + 2.90 5.50 -21.466 -26.553 + 2.90 5.60 -21.674 -26.617 + 2.90 5.70 -21.725 -26.709 + 2.90 5.80 -21.778 -26.829 + 2.90 5.90 -21.807 -26.904 + 2.90 6.00 -21.814 -26.974 + 2.90 6.10 -21.837 -27.034 + 2.90 6.20 -21.892 -27.124 + 2.90 6.30 -22.070 -27.187 + 2.90 6.40 -22.251 -27.247 + 2.90 6.50 -22.387 -27.356 + 2.90 6.60 -22.467 -27.425 + 2.90 6.70 -22.497 -27.461 + 2.90 6.80 -22.503 -27.503 + 2.90 6.90 -22.493 -27.546 + 2.90 7.00 -22.491 -27.553 + 2.90 7.10 -22.535 -27.604 + 2.90 7.20 -22.610 -27.692 + 2.90 7.30 -22.658 -27.709 + 2.90 7.40 -22.670 -27.762 + 2.90 7.50 -22.660 -27.831 + 2.90 7.60 -22.634 -27.898 + 2.90 7.70 -22.601 -27.939 + 2.90 7.80 -22.564 -27.962 + 2.90 7.90 -22.526 -27.987 + 2.90 8.00 -22.489 -28.100 + 2.90 8.10 -22.450 -28.229 + 2.90 8.20 -22.410 -28.360 + 2.90 8.30 -22.369 -28.497 + 2.90 8.40 -22.324 -28.624 + 2.90 8.50 -22.280 -28.748 + 2.90 8.60 -22.231 -28.872 + 2.90 8.70 -22.183 -28.999 + 2.90 8.80 -22.132 -29.131 + 2.90 8.90 -22.079 -29.267 + 2.90 9.00 -22.026 -29.406 + 3.00 1.00 -28.891 -27.276 + 3.00 1.10 -28.666 -27.276 + 3.00 1.20 -28.469 -27.276 + 3.00 1.30 -28.293 -27.276 + 3.00 1.40 -28.137 -27.276 + 3.00 1.50 -27.998 -27.277 + 3.00 1.60 -27.869 -27.277 + 3.00 1.70 -27.743 -27.277 + 3.00 1.80 -27.606 -27.277 + 3.00 1.90 -27.447 -27.277 + 3.00 2.00 -27.259 -27.277 + 3.00 2.10 -27.055 -27.277 + 3.00 2.20 -26.850 -27.277 + 3.00 2.30 -26.658 -27.277 + 3.00 2.40 -26.484 -27.277 + 3.00 2.50 -26.333 -27.278 + 3.00 2.60 -26.200 -27.278 + 3.00 2.70 -26.083 -27.278 + 3.00 2.80 -25.979 -27.278 + 3.00 2.90 -25.885 -27.278 + 3.00 3.00 -25.800 -27.278 + 3.00 3.10 -25.721 -27.278 + 3.00 3.20 -25.650 -27.278 + 3.00 3.30 -25.587 -27.277 + 3.00 3.40 -25.523 -27.277 + 3.00 3.50 -25.452 -27.276 + 3.00 3.60 -25.371 -27.275 + 3.00 3.70 -25.269 -27.274 + 3.00 3.80 -25.138 -27.272 + 3.00 3.90 -24.961 -27.268 + 3.00 4.00 -24.615 -27.259 + 3.00 4.10 -23.452 -27.224 + 3.00 4.20 -22.011 -27.066 + 3.00 4.30 -21.815 -28.140 + 3.00 4.40 -21.937 -28.343 + 3.00 4.50 -21.963 -28.047 + 3.00 4.60 -21.892 -27.919 + 3.00 4.70 -21.737 -27.853 + 3.00 4.80 -21.529 -27.688 + 3.00 4.90 -21.342 -27.440 + 3.00 5.00 -21.278 -26.817 + 3.00 5.10 -21.325 -26.638 + 3.00 5.20 -21.325 -26.553 + 3.00 5.30 -21.300 -26.579 + 3.00 5.40 -21.291 -26.590 + 3.00 5.50 -21.466 -26.553 + 3.00 5.60 -21.674 -26.616 + 3.00 5.70 -21.725 -26.706 + 3.00 5.80 -21.778 -26.821 + 3.00 5.90 -21.807 -26.895 + 3.00 6.00 -21.814 -26.967 + 3.00 6.10 -21.837 -27.032 + 3.00 6.20 -21.892 -27.123 + 3.00 6.30 -22.070 -27.187 + 3.00 6.40 -22.251 -27.247 + 3.00 6.50 -22.387 -27.356 + 3.00 6.60 -22.467 -27.425 + 3.00 6.70 -22.497 -27.461 + 3.00 6.80 -22.503 -27.503 + 3.00 6.90 -22.493 -27.546 + 3.00 7.00 -22.491 -27.553 + 3.00 7.10 -22.535 -27.604 + 3.00 7.20 -22.610 -27.692 + 3.00 7.30 -22.658 -27.709 + 3.00 7.40 -22.670 -27.762 + 3.00 7.50 -22.660 -27.831 + 3.00 7.60 -22.634 -27.898 + 3.00 7.70 -22.601 -27.939 + 3.00 7.80 -22.564 -27.962 + 3.00 7.90 -22.526 -27.987 + 3.00 8.00 -22.489 -28.100 + 3.00 8.10 -22.450 -28.229 + 3.00 8.20 -22.410 -28.360 + 3.00 8.30 -22.369 -28.497 + 3.00 8.40 -22.324 -28.624 + 3.00 8.50 -22.280 -28.748 + 3.00 8.60 -22.231 -28.872 + 3.00 8.70 -22.183 -28.999 + 3.00 8.80 -22.132 -29.131 + 3.00 8.90 -22.079 -29.267 + 3.00 9.00 -22.026 -29.406 + 3.10 1.00 -28.964 -27.376 + 3.10 1.10 -28.739 -27.376 + 3.10 1.20 -28.541 -27.376 + 3.10 1.30 -28.364 -27.376 + 3.10 1.40 -28.206 -27.376 + 3.10 1.50 -28.064 -27.376 + 3.10 1.60 -27.931 -27.376 + 3.10 1.70 -27.799 -27.377 + 3.10 1.80 -27.656 -27.377 + 3.10 1.90 -27.490 -27.377 + 3.10 2.00 -27.300 -27.377 + 3.10 2.10 -27.094 -27.377 + 3.10 2.20 -26.889 -27.377 + 3.10 2.30 -26.697 -27.377 + 3.10 2.40 -26.524 -27.377 + 3.10 2.50 -26.374 -27.377 + 3.10 2.60 -26.242 -27.378 + 3.10 2.70 -26.125 -27.378 + 3.10 2.80 -26.020 -27.378 + 3.10 2.90 -25.925 -27.378 + 3.10 3.00 -25.838 -27.378 + 3.10 3.10 -25.757 -27.378 + 3.10 3.20 -25.685 -27.378 + 3.10 3.30 -25.621 -27.377 + 3.10 3.40 -25.556 -27.376 + 3.10 3.50 -25.486 -27.376 + 3.10 3.60 -25.406 -27.375 + 3.10 3.70 -25.306 -27.373 + 3.10 3.80 -25.176 -27.371 + 3.10 3.90 -25.002 -27.367 + 3.10 4.00 -24.658 -27.357 + 3.10 4.10 -23.458 -27.310 + 3.10 4.20 -22.011 -27.098 + 3.10 4.30 -21.816 -28.227 + 3.10 4.40 -21.938 -28.383 + 3.10 4.50 -21.965 -28.052 + 3.10 4.60 -21.894 -27.923 + 3.10 4.70 -21.738 -27.856 + 3.10 4.80 -21.529 -27.639 + 3.10 4.90 -21.343 -27.403 + 3.10 5.00 -21.278 -26.812 + 3.10 5.10 -21.325 -26.638 + 3.10 5.20 -21.325 -26.553 + 3.10 5.30 -21.300 -26.580 + 3.10 5.40 -21.291 -26.590 + 3.10 5.50 -21.466 -26.553 + 3.10 5.60 -21.674 -26.615 + 3.10 5.70 -21.725 -26.702 + 3.10 5.80 -21.778 -26.812 + 3.10 5.90 -21.807 -26.884 + 3.10 6.00 -21.814 -26.960 + 3.10 6.10 -21.837 -27.029 + 3.10 6.20 -21.892 -27.123 + 3.10 6.30 -22.070 -27.187 + 3.10 6.40 -22.251 -27.247 + 3.10 6.50 -22.387 -27.356 + 3.10 6.60 -22.467 -27.425 + 3.10 6.70 -22.497 -27.461 + 3.10 6.80 -22.503 -27.503 + 3.10 6.90 -22.493 -27.546 + 3.10 7.00 -22.491 -27.553 + 3.10 7.10 -22.535 -27.604 + 3.10 7.20 -22.610 -27.692 + 3.10 7.30 -22.658 -27.709 + 3.10 7.40 -22.670 -27.762 + 3.10 7.50 -22.660 -27.831 + 3.10 7.60 -22.634 -27.898 + 3.10 7.70 -22.601 -27.939 + 3.10 7.80 -22.564 -27.962 + 3.10 7.90 -22.526 -27.987 + 3.10 8.00 -22.489 -28.100 + 3.10 8.10 -22.450 -28.229 + 3.10 8.20 -22.410 -28.360 + 3.10 8.30 -22.369 -28.497 + 3.10 8.40 -22.324 -28.624 + 3.10 8.50 -22.280 -28.748 + 3.10 8.60 -22.231 -28.872 + 3.10 8.70 -22.183 -28.999 + 3.10 8.80 -22.132 -29.131 + 3.10 8.90 -22.079 -29.267 + 3.10 9.00 -22.026 -29.406 + 3.20 1.00 -29.041 -27.476 + 3.20 1.10 -28.816 -27.476 + 3.20 1.20 -28.617 -27.476 + 3.20 1.30 -28.439 -27.476 + 3.20 1.40 -28.278 -27.476 + 3.20 1.50 -28.131 -27.476 + 3.20 1.60 -27.993 -27.476 + 3.20 1.70 -27.854 -27.476 + 3.20 1.80 -27.704 -27.476 + 3.20 1.90 -27.532 -27.477 + 3.20 2.00 -27.337 -27.477 + 3.20 2.10 -27.130 -27.477 + 3.20 2.20 -26.925 -27.477 + 3.20 2.30 -26.734 -27.477 + 3.20 2.40 -26.562 -27.477 + 3.20 2.50 -26.413 -27.477 + 3.20 2.60 -26.281 -27.477 + 3.20 2.70 -26.164 -27.477 + 3.20 2.80 -26.059 -27.478 + 3.20 2.90 -25.964 -27.478 + 3.20 3.00 -25.876 -27.478 + 3.20 3.10 -25.794 -27.478 + 3.20 3.20 -25.720 -27.477 + 3.20 3.30 -25.654 -27.477 + 3.20 3.40 -25.589 -27.476 + 3.20 3.50 -25.519 -27.475 + 3.20 3.60 -25.441 -27.474 + 3.20 3.70 -25.341 -27.473 + 3.20 3.80 -25.213 -27.471 + 3.20 3.90 -25.042 -27.466 + 3.20 4.00 -24.701 -27.455 + 3.20 4.10 -23.464 -27.393 + 3.20 4.20 -22.011 -27.127 + 3.20 4.30 -21.816 -28.309 + 3.20 4.40 -21.938 -28.414 + 3.20 4.50 -21.967 -28.056 + 3.20 4.60 -21.896 -27.926 + 3.20 4.70 -21.739 -27.579 + 3.20 4.80 -21.529 -27.500 + 3.20 4.90 -21.343 -27.352 + 3.20 5.00 -21.279 -26.809 + 3.20 5.10 -21.325 -26.638 + 3.20 5.20 -21.325 -26.553 + 3.20 5.30 -21.300 -26.580 + 3.20 5.40 -21.292 -26.590 + 3.20 5.50 -21.466 -26.553 + 3.20 5.60 -21.674 -26.614 + 3.20 5.70 -21.725 -26.696 + 3.20 5.80 -21.778 -26.800 + 3.20 5.90 -21.808 -26.871 + 3.20 6.00 -21.814 -26.951 + 3.20 6.10 -21.837 -27.026 + 3.20 6.20 -21.892 -27.123 + 3.20 6.30 -22.070 -27.187 + 3.20 6.40 -22.251 -27.247 + 3.20 6.50 -22.387 -27.356 + 3.20 6.60 -22.467 -27.425 + 3.20 6.70 -22.497 -27.461 + 3.20 6.80 -22.503 -27.503 + 3.20 6.90 -22.493 -27.546 + 3.20 7.00 -22.491 -27.553 + 3.20 7.10 -22.535 -27.604 + 3.20 7.20 -22.610 -27.692 + 3.20 7.30 -22.658 -27.709 + 3.20 7.40 -22.670 -27.762 + 3.20 7.50 -22.660 -27.831 + 3.20 7.60 -22.634 -27.898 + 3.20 7.70 -22.601 -27.939 + 3.20 7.80 -22.564 -27.962 + 3.20 7.90 -22.526 -27.987 + 3.20 8.00 -22.489 -28.100 + 3.20 8.10 -22.450 -28.229 + 3.20 8.20 -22.410 -28.360 + 3.20 8.30 -22.369 -28.497 + 3.20 8.40 -22.324 -28.624 + 3.20 8.50 -22.280 -28.748 + 3.20 8.60 -22.231 -28.872 + 3.20 8.70 -22.183 -28.999 + 3.20 8.80 -22.132 -29.131 + 3.20 8.90 -22.079 -29.267 + 3.20 9.00 -22.026 -29.406 + 3.30 1.00 -29.122 -27.576 + 3.30 1.10 -28.897 -27.576 + 3.30 1.20 -28.697 -27.576 + 3.30 1.30 -28.516 -27.576 + 3.30 1.40 -28.352 -27.576 + 3.30 1.50 -28.200 -27.576 + 3.30 1.60 -28.054 -27.576 + 3.30 1.70 -27.907 -27.576 + 3.30 1.80 -27.749 -27.576 + 3.30 1.90 -27.571 -27.576 + 3.30 2.00 -27.372 -27.576 + 3.30 2.10 -27.163 -27.577 + 3.30 2.20 -26.959 -27.577 + 3.30 2.30 -26.769 -27.577 + 3.30 2.40 -26.597 -27.577 + 3.30 2.50 -26.449 -27.577 + 3.30 2.60 -26.318 -27.577 + 3.30 2.70 -26.202 -27.577 + 3.30 2.80 -26.097 -27.577 + 3.30 2.90 -26.001 -27.578 + 3.30 3.00 -25.913 -27.578 + 3.30 3.10 -25.829 -27.577 + 3.30 3.20 -25.754 -27.577 + 3.30 3.30 -25.688 -27.577 + 3.30 3.40 -25.622 -27.576 + 3.30 3.50 -25.552 -27.575 + 3.30 3.60 -25.475 -27.574 + 3.30 3.70 -25.377 -27.573 + 3.30 3.80 -25.250 -27.571 + 3.30 3.90 -25.082 -27.565 + 3.30 4.00 -24.744 -27.553 + 3.30 4.10 -23.469 -27.473 + 3.30 4.20 -22.012 -27.152 + 3.30 4.30 -21.817 -28.387 + 3.30 4.40 -21.939 -28.437 + 3.30 4.50 -21.969 -28.058 + 3.30 4.60 -21.898 -27.445 + 3.30 4.70 -21.740 -27.367 + 3.30 4.80 -21.530 -27.416 + 3.30 4.90 -21.343 -27.313 + 3.30 5.00 -21.279 -26.806 + 3.30 5.10 -21.325 -26.638 + 3.30 5.20 -21.325 -26.553 + 3.30 5.30 -21.300 -26.580 + 3.30 5.40 -21.292 -26.590 + 3.30 5.50 -21.466 -26.553 + 3.30 5.60 -21.675 -26.613 + 3.30 5.70 -21.726 -26.690 + 3.30 5.80 -21.778 -26.786 + 3.30 5.90 -21.808 -26.854 + 3.30 6.00 -21.814 -26.939 + 3.30 6.10 -21.837 -27.022 + 3.30 6.20 -21.892 -27.122 + 3.30 6.30 -22.070 -27.187 + 3.30 6.40 -22.251 -27.247 + 3.30 6.50 -22.387 -27.356 + 3.30 6.60 -22.467 -27.425 + 3.30 6.70 -22.497 -27.461 + 3.30 6.80 -22.503 -27.503 + 3.30 6.90 -22.493 -27.546 + 3.30 7.00 -22.491 -27.553 + 3.30 7.10 -22.535 -27.604 + 3.30 7.20 -22.610 -27.692 + 3.30 7.30 -22.658 -27.709 + 3.30 7.40 -22.670 -27.762 + 3.30 7.50 -22.660 -27.831 + 3.30 7.60 -22.634 -27.898 + 3.30 7.70 -22.601 -27.939 + 3.30 7.80 -22.564 -27.962 + 3.30 7.90 -22.526 -27.987 + 3.30 8.00 -22.489 -28.100 + 3.30 8.10 -22.450 -28.229 + 3.30 8.20 -22.410 -28.360 + 3.30 8.30 -22.369 -28.497 + 3.30 8.40 -22.324 -28.624 + 3.30 8.50 -22.280 -28.748 + 3.30 8.60 -22.231 -28.872 + 3.30 8.70 -22.183 -28.999 + 3.30 8.80 -22.132 -29.131 + 3.30 8.90 -22.079 -29.267 + 3.30 9.00 -22.026 -29.406 + 3.40 1.00 -29.206 -27.676 + 3.40 1.10 -28.981 -27.676 + 3.40 1.20 -28.779 -27.676 + 3.40 1.30 -28.596 -27.676 + 3.40 1.40 -28.427 -27.676 + 3.40 1.50 -28.268 -27.676 + 3.40 1.60 -28.114 -27.676 + 3.40 1.70 -27.959 -27.676 + 3.40 1.80 -27.792 -27.676 + 3.40 1.90 -27.607 -27.676 + 3.40 2.00 -27.404 -27.676 + 3.40 2.10 -27.194 -27.676 + 3.40 2.20 -26.990 -27.677 + 3.40 2.30 -26.800 -27.677 + 3.40 2.40 -26.630 -27.677 + 3.40 2.50 -26.483 -27.677 + 3.40 2.60 -26.353 -27.677 + 3.40 2.70 -26.238 -27.677 + 3.40 2.80 -26.133 -27.677 + 3.40 2.90 -26.037 -27.677 + 3.40 3.00 -25.949 -27.677 + 3.40 3.10 -25.864 -27.677 + 3.40 3.20 -25.788 -27.677 + 3.40 3.30 -25.720 -27.677 + 3.40 3.40 -25.654 -27.676 + 3.40 3.50 -25.584 -27.675 + 3.40 3.60 -25.508 -27.674 + 3.40 3.70 -25.412 -27.673 + 3.40 3.80 -25.287 -27.670 + 3.40 3.90 -25.122 -27.664 + 3.40 4.00 -24.787 -27.650 + 3.40 4.10 -23.473 -27.549 + 3.40 4.20 -22.012 -27.173 + 3.40 4.30 -21.818 -28.460 + 3.40 4.40 -21.940 -28.453 + 3.40 4.50 -21.972 -27.597 + 3.40 4.60 -21.901 -27.199 + 3.40 4.70 -21.741 -27.252 + 3.40 4.80 -21.530 -27.360 + 3.40 4.90 -21.343 -27.282 + 3.40 5.00 -21.279 -26.804 + 3.40 5.10 -21.326 -26.637 + 3.40 5.20 -21.325 -26.553 + 3.40 5.30 -21.300 -26.581 + 3.40 5.40 -21.292 -26.590 + 3.40 5.50 -21.467 -26.553 + 3.40 5.60 -21.675 -26.611 + 3.40 5.70 -21.726 -26.682 + 3.40 5.80 -21.778 -26.769 + 3.40 5.90 -21.808 -26.835 + 3.40 6.00 -21.814 -26.926 + 3.40 6.10 -21.837 -27.018 + 3.40 6.20 -21.892 -27.122 + 3.40 6.30 -22.070 -27.187 + 3.40 6.40 -22.251 -27.247 + 3.40 6.50 -22.387 -27.356 + 3.40 6.60 -22.467 -27.425 + 3.40 6.70 -22.497 -27.461 + 3.40 6.80 -22.503 -27.503 + 3.40 6.90 -22.493 -27.546 + 3.40 7.00 -22.491 -27.553 + 3.40 7.10 -22.535 -27.604 + 3.40 7.20 -22.610 -27.692 + 3.40 7.30 -22.658 -27.709 + 3.40 7.40 -22.670 -27.762 + 3.40 7.50 -22.660 -27.831 + 3.40 7.60 -22.634 -27.898 + 3.40 7.70 -22.601 -27.939 + 3.40 7.80 -22.564 -27.962 + 3.40 7.90 -22.526 -27.987 + 3.40 8.00 -22.489 -28.100 + 3.40 8.10 -22.450 -28.229 + 3.40 8.20 -22.410 -28.360 + 3.40 8.30 -22.369 -28.497 + 3.40 8.40 -22.324 -28.624 + 3.40 8.50 -22.280 -28.748 + 3.40 8.60 -22.231 -28.872 + 3.40 8.70 -22.183 -28.999 + 3.40 8.80 -22.132 -29.131 + 3.40 8.90 -22.079 -29.267 + 3.40 9.00 -22.026 -29.406 + 3.50 1.00 -29.293 -27.776 + 3.50 1.10 -29.067 -27.776 + 3.50 1.20 -28.864 -27.776 + 3.50 1.30 -28.677 -27.776 + 3.50 1.40 -28.503 -27.776 + 3.50 1.50 -28.336 -27.776 + 3.50 1.60 -28.173 -27.776 + 3.50 1.70 -28.009 -27.776 + 3.50 1.80 -27.834 -27.776 + 3.50 1.90 -27.641 -27.776 + 3.50 2.00 -27.434 -27.776 + 3.50 2.10 -27.223 -27.776 + 3.50 2.20 -27.019 -27.776 + 3.50 2.30 -26.830 -27.776 + 3.50 2.40 -26.661 -27.777 + 3.50 2.50 -26.515 -27.777 + 3.50 2.60 -26.386 -27.777 + 3.50 2.70 -26.271 -27.777 + 3.50 2.80 -26.168 -27.777 + 3.50 2.90 -26.072 -27.777 + 3.50 3.00 -25.984 -27.777 + 3.50 3.10 -25.899 -27.777 + 3.50 3.20 -25.822 -27.777 + 3.50 3.30 -25.753 -27.776 + 3.50 3.40 -25.686 -27.776 + 3.50 3.50 -25.616 -27.775 + 3.50 3.60 -25.542 -27.774 + 3.50 3.70 -25.447 -27.773 + 3.50 3.80 -25.324 -27.770 + 3.50 3.90 -25.163 -27.763 + 3.50 4.00 -24.829 -27.748 + 3.50 4.10 -23.476 -27.621 + 3.50 4.20 -22.013 -27.192 + 3.50 4.30 -21.819 -28.527 + 3.50 4.40 -21.942 -28.463 + 3.50 4.50 -21.974 -27.232 + 3.50 4.60 -21.903 -27.071 + 3.50 4.70 -21.742 -27.180 + 3.50 4.80 -21.530 -27.322 + 3.50 4.90 -21.343 -27.257 + 3.50 5.00 -21.279 -26.801 + 3.50 5.10 -21.326 -26.637 + 3.50 5.20 -21.325 -26.553 + 3.50 5.30 -21.300 -26.581 + 3.50 5.40 -21.292 -26.590 + 3.50 5.50 -21.467 -26.553 + 3.50 5.60 -21.675 -26.608 + 3.50 5.70 -21.726 -26.672 + 3.50 5.80 -21.778 -26.748 + 3.50 5.90 -21.808 -26.811 + 3.50 6.00 -21.815 -26.909 + 3.50 6.10 -21.837 -27.012 + 3.50 6.20 -21.892 -27.121 + 3.50 6.30 -22.070 -27.187 + 3.50 6.40 -22.251 -27.247 + 3.50 6.50 -22.387 -27.356 + 3.50 6.60 -22.467 -27.425 + 3.50 6.70 -22.497 -27.461 + 3.50 6.80 -22.503 -27.503 + 3.50 6.90 -22.493 -27.546 + 3.50 7.00 -22.491 -27.553 + 3.50 7.10 -22.535 -27.604 + 3.50 7.20 -22.610 -27.692 + 3.50 7.30 -22.658 -27.709 + 3.50 7.40 -22.670 -27.762 + 3.50 7.50 -22.660 -27.831 + 3.50 7.60 -22.634 -27.898 + 3.50 7.70 -22.601 -27.939 + 3.50 7.80 -22.564 -27.962 + 3.50 7.90 -22.526 -27.987 + 3.50 8.00 -22.489 -28.100 + 3.50 8.10 -22.450 -28.229 + 3.50 8.20 -22.410 -28.360 + 3.50 8.30 -22.369 -28.497 + 3.50 8.40 -22.324 -28.624 + 3.50 8.50 -22.280 -28.748 + 3.50 8.60 -22.231 -28.872 + 3.50 8.70 -22.183 -28.999 + 3.50 8.80 -22.132 -29.131 + 3.50 8.90 -22.079 -29.267 + 3.50 9.00 -22.026 -29.406 + 3.60 1.00 -29.382 -27.876 + 3.60 1.10 -29.155 -27.876 + 3.60 1.20 -28.950 -27.876 + 3.60 1.30 -28.760 -27.876 + 3.60 1.40 -28.579 -27.876 + 3.60 1.50 -28.403 -27.876 + 3.60 1.60 -28.231 -27.876 + 3.60 1.70 -28.056 -27.876 + 3.60 1.80 -27.873 -27.876 + 3.60 1.90 -27.674 -27.876 + 3.60 2.00 -27.463 -27.876 + 3.60 2.10 -27.250 -27.876 + 3.60 2.20 -27.046 -27.876 + 3.60 2.30 -26.858 -27.876 + 3.60 2.40 -26.690 -27.876 + 3.60 2.50 -26.545 -27.877 + 3.60 2.60 -26.417 -27.877 + 3.60 2.70 -26.303 -27.877 + 3.60 2.80 -26.200 -27.877 + 3.60 2.90 -26.106 -27.877 + 3.60 3.00 -26.017 -27.877 + 3.60 3.10 -25.933 -27.877 + 3.60 3.20 -25.855 -27.877 + 3.60 3.30 -25.785 -27.876 + 3.60 3.40 -25.717 -27.876 + 3.60 3.50 -25.648 -27.875 + 3.60 3.60 -25.575 -27.874 + 3.60 3.70 -25.483 -27.873 + 3.60 3.80 -25.361 -27.870 + 3.60 3.90 -25.203 -27.862 + 3.60 4.00 -24.872 -27.845 + 3.60 4.10 -23.479 -27.689 + 3.60 4.20 -22.013 -27.208 + 3.60 4.30 -21.820 -28.587 + 3.60 4.40 -21.943 -27.750 + 3.60 4.50 -21.977 -27.070 + 3.60 4.60 -21.906 -26.992 + 3.60 4.70 -21.744 -27.131 + 3.60 4.80 -21.531 -27.295 + 3.60 4.90 -21.343 -27.236 + 3.60 5.00 -21.279 -26.800 + 3.60 5.10 -21.326 -26.637 + 3.60 5.20 -21.325 -26.553 + 3.60 5.30 -21.300 -26.581 + 3.60 5.40 -21.292 -26.590 + 3.60 5.50 -21.467 -26.552 + 3.60 5.60 -21.675 -26.605 + 3.60 5.70 -21.726 -26.660 + 3.60 5.80 -21.779 -26.724 + 3.60 5.90 -21.808 -26.784 + 3.60 6.00 -21.815 -26.889 + 3.60 6.10 -21.837 -27.005 + 3.60 6.20 -21.892 -27.120 + 3.60 6.30 -22.070 -27.186 + 3.60 6.40 -22.251 -27.247 + 3.60 6.50 -22.387 -27.356 + 3.60 6.60 -22.467 -27.425 + 3.60 6.70 -22.497 -27.461 + 3.60 6.80 -22.503 -27.503 + 3.60 6.90 -22.493 -27.546 + 3.60 7.00 -22.491 -27.553 + 3.60 7.10 -22.535 -27.604 + 3.60 7.20 -22.610 -27.692 + 3.60 7.30 -22.658 -27.709 + 3.60 7.40 -22.670 -27.762 + 3.60 7.50 -22.660 -27.831 + 3.60 7.60 -22.634 -27.898 + 3.60 7.70 -22.601 -27.939 + 3.60 7.80 -22.564 -27.962 + 3.60 7.90 -22.526 -27.987 + 3.60 8.00 -22.489 -28.100 + 3.60 8.10 -22.450 -28.229 + 3.60 8.20 -22.410 -28.360 + 3.60 8.30 -22.369 -28.497 + 3.60 8.40 -22.324 -28.624 + 3.60 8.50 -22.280 -28.748 + 3.60 8.60 -22.231 -28.872 + 3.60 8.70 -22.183 -28.999 + 3.60 8.80 -22.132 -29.131 + 3.60 8.90 -22.079 -29.267 + 3.60 9.00 -22.026 -29.406 + 3.70 1.00 -29.473 -27.976 + 3.70 1.10 -29.246 -27.976 + 3.70 1.20 -29.038 -27.976 + 3.70 1.30 -28.843 -27.976 + 3.70 1.40 -28.655 -27.976 + 3.70 1.50 -28.469 -27.976 + 3.70 1.60 -28.286 -27.976 + 3.70 1.70 -28.103 -27.976 + 3.70 1.80 -27.911 -27.976 + 3.70 1.90 -27.706 -27.976 + 3.70 2.00 -27.491 -27.976 + 3.70 2.10 -27.276 -27.976 + 3.70 2.20 -27.072 -27.976 + 3.70 2.30 -26.885 -27.976 + 3.70 2.40 -26.718 -27.976 + 3.70 2.50 -26.574 -27.976 + 3.70 2.60 -26.447 -27.976 + 3.70 2.70 -26.334 -27.977 + 3.70 2.80 -26.232 -27.977 + 3.70 2.90 -26.138 -27.977 + 3.70 3.00 -26.050 -27.977 + 3.70 3.10 -25.966 -27.977 + 3.70 3.20 -25.888 -27.976 + 3.70 3.30 -25.817 -27.976 + 3.70 3.40 -25.748 -27.976 + 3.70 3.50 -25.679 -27.975 + 3.70 3.60 -25.607 -27.974 + 3.70 3.70 -25.518 -27.973 + 3.70 3.80 -25.399 -27.969 + 3.70 3.90 -25.245 -27.961 + 3.70 4.00 -24.914 -27.943 + 3.70 4.10 -23.482 -27.752 + 3.70 4.20 -22.013 -27.222 + 3.70 4.30 -21.820 -28.640 + 3.70 4.40 -21.945 -27.375 + 3.70 4.50 -21.980 -26.974 + 3.70 4.60 -21.908 -26.938 + 3.70 4.70 -21.745 -27.097 + 3.70 4.80 -21.531 -27.276 + 3.70 4.90 -21.343 -27.218 + 3.70 5.00 -21.280 -26.798 + 3.70 5.10 -21.326 -26.636 + 3.70 5.20 -21.325 -26.553 + 3.70 5.30 -21.300 -26.581 + 3.70 5.40 -21.292 -26.589 + 3.70 5.50 -21.468 -26.552 + 3.70 5.60 -21.675 -26.601 + 3.70 5.70 -21.726 -26.646 + 3.70 5.80 -21.779 -26.696 + 3.70 5.90 -21.808 -26.752 + 3.70 6.00 -21.815 -26.866 + 3.70 6.10 -21.837 -26.996 + 3.70 6.20 -21.892 -27.118 + 3.70 6.30 -22.070 -27.186 + 3.70 6.40 -22.251 -27.247 + 3.70 6.50 -22.387 -27.356 + 3.70 6.60 -22.467 -27.425 + 3.70 6.70 -22.497 -27.461 + 3.70 6.80 -22.503 -27.503 + 3.70 6.90 -22.493 -27.546 + 3.70 7.00 -22.491 -27.553 + 3.70 7.10 -22.535 -27.604 + 3.70 7.20 -22.610 -27.692 + 3.70 7.30 -22.658 -27.709 + 3.70 7.40 -22.670 -27.762 + 3.70 7.50 -22.660 -27.831 + 3.70 7.60 -22.634 -27.898 + 3.70 7.70 -22.601 -27.939 + 3.70 7.80 -22.564 -27.962 + 3.70 7.90 -22.526 -27.987 + 3.70 8.00 -22.489 -28.100 + 3.70 8.10 -22.450 -28.229 + 3.70 8.20 -22.410 -28.360 + 3.70 8.30 -22.369 -28.497 + 3.70 8.40 -22.324 -28.624 + 3.70 8.50 -22.280 -28.748 + 3.70 8.60 -22.231 -28.872 + 3.70 8.70 -22.183 -28.999 + 3.70 8.80 -22.132 -29.131 + 3.70 8.90 -22.079 -29.267 + 3.70 9.00 -22.026 -29.406 + 3.80 1.00 -29.566 -28.076 + 3.80 1.10 -29.337 -28.076 + 3.80 1.20 -29.128 -28.076 + 3.80 1.30 -28.927 -28.076 + 3.80 1.40 -28.730 -28.076 + 3.80 1.50 -28.534 -28.076 + 3.80 1.60 -28.341 -28.076 + 3.80 1.70 -28.148 -28.076 + 3.80 1.80 -27.948 -28.076 + 3.80 1.90 -27.736 -28.076 + 3.80 2.00 -27.518 -28.076 + 3.80 2.10 -27.301 -28.076 + 3.80 2.20 -27.097 -28.076 + 3.80 2.30 -26.911 -28.076 + 3.80 2.40 -26.744 -28.076 + 3.80 2.50 -26.601 -28.076 + 3.80 2.60 -26.475 -28.076 + 3.80 2.70 -26.363 -28.076 + 3.80 2.80 -26.262 -28.077 + 3.80 2.90 -26.169 -28.077 + 3.80 3.00 -26.083 -28.077 + 3.80 3.10 -25.999 -28.077 + 3.80 3.20 -25.921 -28.076 + 3.80 3.30 -25.849 -28.076 + 3.80 3.40 -25.778 -28.075 + 3.80 3.50 -25.709 -28.075 + 3.80 3.60 -25.639 -28.074 + 3.80 3.70 -25.554 -28.073 + 3.80 3.80 -25.437 -28.069 + 3.80 3.90 -25.287 -28.060 + 3.80 4.00 -24.957 -28.039 + 3.80 4.10 -23.484 -27.810 + 3.80 4.20 -22.014 -27.234 + 3.80 4.30 -21.821 -28.686 + 3.80 4.40 -21.946 -27.210 + 3.80 4.50 -21.983 -26.911 + 3.80 4.60 -21.911 -26.901 + 3.80 4.70 -21.747 -27.073 + 3.80 4.80 -21.532 -27.262 + 3.80 4.90 -21.344 -27.201 + 3.80 5.00 -21.280 -26.796 + 3.80 5.10 -21.326 -26.637 + 3.80 5.20 -21.325 -26.553 + 3.80 5.30 -21.300 -26.581 + 3.80 5.40 -21.292 -26.589 + 3.80 5.50 -21.468 -26.552 + 3.80 5.60 -21.675 -26.597 + 3.80 5.70 -21.726 -26.629 + 3.80 5.80 -21.779 -26.663 + 3.80 5.90 -21.808 -26.716 + 3.80 6.00 -21.815 -26.838 + 3.80 6.10 -21.837 -26.985 + 3.80 6.20 -21.892 -27.117 + 3.80 6.30 -22.070 -27.186 + 3.80 6.40 -22.251 -27.246 + 3.80 6.50 -22.387 -27.356 + 3.80 6.60 -22.467 -27.425 + 3.80 6.70 -22.497 -27.461 + 3.80 6.80 -22.503 -27.503 + 3.80 6.90 -22.493 -27.546 + 3.80 7.00 -22.491 -27.553 + 3.80 7.10 -22.535 -27.604 + 3.80 7.20 -22.610 -27.692 + 3.80 7.30 -22.658 -27.709 + 3.80 7.40 -22.670 -27.762 + 3.80 7.50 -22.660 -27.831 + 3.80 7.60 -22.634 -27.898 + 3.80 7.70 -22.601 -27.939 + 3.80 7.80 -22.564 -27.962 + 3.80 7.90 -22.526 -27.987 + 3.80 8.00 -22.489 -28.100 + 3.80 8.10 -22.450 -28.229 + 3.80 8.20 -22.410 -28.360 + 3.80 8.30 -22.369 -28.497 + 3.80 8.40 -22.324 -28.624 + 3.80 8.50 -22.280 -28.748 + 3.80 8.60 -22.231 -28.872 + 3.80 8.70 -22.183 -28.999 + 3.80 8.80 -22.132 -29.131 + 3.80 8.90 -22.079 -29.267 + 3.80 9.00 -22.026 -29.406 + 3.90 1.00 -29.660 -28.176 + 3.90 1.10 -29.430 -28.176 + 3.90 1.20 -29.218 -28.176 + 3.90 1.30 -29.011 -28.176 + 3.90 1.40 -28.805 -28.176 + 3.90 1.50 -28.598 -28.176 + 3.90 1.60 -28.395 -28.176 + 3.90 1.70 -28.193 -28.176 + 3.90 1.80 -27.985 -28.176 + 3.90 1.90 -27.767 -28.176 + 3.90 2.00 -27.544 -28.176 + 3.90 2.10 -27.326 -28.176 + 3.90 2.20 -27.122 -28.176 + 3.90 2.30 -26.936 -28.176 + 3.90 2.40 -26.770 -28.176 + 3.90 2.50 -26.628 -28.176 + 3.90 2.60 -26.503 -28.176 + 3.90 2.70 -26.392 -28.176 + 3.90 2.80 -26.292 -28.176 + 3.90 2.90 -26.200 -28.176 + 3.90 3.00 -26.114 -28.176 + 3.90 3.10 -26.032 -28.176 + 3.90 3.20 -25.953 -28.176 + 3.90 3.30 -25.880 -28.175 + 3.90 3.40 -25.808 -28.175 + 3.90 3.50 -25.737 -28.174 + 3.90 3.60 -25.669 -28.174 + 3.90 3.70 -25.589 -28.172 + 3.90 3.80 -25.477 -28.168 + 3.90 3.90 -25.330 -28.159 + 3.90 4.00 -24.999 -28.136 + 3.90 4.10 -23.486 -27.862 + 3.90 4.20 -22.014 -27.245 + 3.90 4.30 -21.822 -27.863 + 3.90 4.40 -21.948 -27.113 + 3.90 4.50 -21.986 -26.867 + 3.90 4.60 -21.913 -26.874 + 3.90 4.70 -21.748 -27.056 + 3.90 4.80 -21.532 -27.252 + 3.90 4.90 -21.344 -27.187 + 3.90 5.00 -21.280 -26.795 + 3.90 5.10 -21.327 -26.636 + 3.90 5.20 -21.325 -26.553 + 3.90 5.30 -21.300 -26.581 + 3.90 5.40 -21.293 -26.589 + 3.90 5.50 -21.469 -26.551 + 3.90 5.60 -21.676 -26.591 + 3.90 5.70 -21.726 -26.609 + 3.90 5.80 -21.779 -26.627 + 3.90 5.90 -21.809 -26.675 + 3.90 6.00 -21.815 -26.807 + 3.90 6.10 -21.837 -26.972 + 3.90 6.20 -21.892 -27.115 + 3.90 6.30 -22.070 -27.186 + 3.90 6.40 -22.251 -27.246 + 3.90 6.50 -22.387 -27.356 + 3.90 6.60 -22.467 -27.425 + 3.90 6.70 -22.497 -27.461 + 3.90 6.80 -22.503 -27.503 + 3.90 6.90 -22.493 -27.546 + 3.90 7.00 -22.491 -27.553 + 3.90 7.10 -22.535 -27.604 + 3.90 7.20 -22.610 -27.692 + 3.90 7.30 -22.658 -27.709 + 3.90 7.40 -22.670 -27.762 + 3.90 7.50 -22.660 -27.831 + 3.90 7.60 -22.634 -27.898 + 3.90 7.70 -22.601 -27.939 + 3.90 7.80 -22.564 -27.962 + 3.90 7.90 -22.526 -27.987 + 3.90 8.00 -22.489 -28.100 + 3.90 8.10 -22.450 -28.229 + 3.90 8.20 -22.410 -28.360 + 3.90 8.30 -22.369 -28.497 + 3.90 8.40 -22.324 -28.624 + 3.90 8.50 -22.280 -28.748 + 3.90 8.60 -22.231 -28.872 + 3.90 8.70 -22.183 -28.999 + 3.90 8.80 -22.132 -29.131 + 3.90 8.90 -22.079 -29.267 + 3.90 9.00 -22.026 -29.406 + 4.00 1.00 -29.755 -28.276 + 4.00 1.10 -29.524 -28.276 + 4.00 1.20 -29.309 -28.276 + 4.00 1.30 -29.096 -28.276 + 4.00 1.40 -28.879 -28.276 + 4.00 1.50 -28.662 -28.276 + 4.00 1.60 -28.448 -28.276 + 4.00 1.70 -28.238 -28.276 + 4.00 1.80 -28.022 -28.276 + 4.00 1.90 -27.799 -28.276 + 4.00 2.00 -27.571 -28.276 + 4.00 2.10 -27.351 -28.276 + 4.00 2.20 -27.146 -28.276 + 4.00 2.30 -26.960 -28.276 + 4.00 2.40 -26.795 -28.276 + 4.00 2.50 -26.654 -28.276 + 4.00 2.60 -26.530 -28.276 + 4.00 2.70 -26.420 -28.276 + 4.00 2.80 -26.321 -28.276 + 4.00 2.90 -26.230 -28.276 + 4.00 3.00 -26.146 -28.276 + 4.00 3.10 -26.064 -28.276 + 4.00 3.20 -25.985 -28.276 + 4.00 3.30 -25.910 -28.275 + 4.00 3.40 -25.836 -28.274 + 4.00 3.50 -25.763 -28.274 + 4.00 3.60 -25.696 -28.273 + 4.00 3.70 -25.625 -28.272 + 4.00 3.80 -25.518 -28.268 + 4.00 3.90 -25.374 -28.258 + 4.00 4.00 -25.043 -28.232 + 4.00 4.10 -23.488 -27.909 + 4.00 4.20 -22.015 -27.255 + 4.00 4.30 -21.824 -27.586 + 4.00 4.40 -21.950 -27.049 + 4.00 4.50 -21.989 -26.835 + 4.00 4.60 -21.916 -26.854 + 4.00 4.70 -21.749 -27.043 + 4.00 4.80 -21.533 -27.246 + 4.00 4.90 -21.344 -27.174 + 4.00 5.00 -21.280 -26.794 + 4.00 5.10 -21.327 -26.635 + 4.00 5.20 -21.325 -26.553 + 4.00 5.30 -21.300 -26.581 + 4.00 5.40 -21.293 -26.588 + 4.00 5.50 -21.469 -26.550 + 4.00 5.60 -21.676 -26.585 + 4.00 5.70 -21.726 -26.586 + 4.00 5.80 -21.779 -26.586 + 4.00 5.90 -21.809 -26.630 + 4.00 6.00 -21.816 -26.771 + 4.00 6.10 -21.837 -26.957 + 4.00 6.20 -21.892 -27.113 + 4.00 6.30 -22.070 -27.186 + 4.00 6.40 -22.251 -27.246 + 4.00 6.50 -22.387 -27.356 + 4.00 6.60 -22.467 -27.425 + 4.00 6.70 -22.497 -27.461 + 4.00 6.80 -22.503 -27.503 + 4.00 6.90 -22.493 -27.546 + 4.00 7.00 -22.491 -27.553 + 4.00 7.10 -22.535 -27.604 + 4.00 7.20 -22.610 -27.692 + 4.00 7.30 -22.658 -27.709 + 4.00 7.40 -22.670 -27.762 + 4.00 7.50 -22.660 -27.831 + 4.00 7.60 -22.634 -27.898 + 4.00 7.70 -22.601 -27.939 + 4.00 7.80 -22.564 -27.962 + 4.00 7.90 -22.526 -27.987 + 4.00 8.00 -22.489 -28.100 + 4.00 8.10 -22.450 -28.229 + 4.00 8.20 -22.410 -28.360 + 4.00 8.30 -22.369 -28.497 + 4.00 8.40 -22.324 -28.624 + 4.00 8.50 -22.280 -28.748 + 4.00 8.60 -22.231 -28.872 + 4.00 8.70 -22.183 -28.999 + 4.00 8.80 -22.132 -29.131 + 4.00 8.90 -22.079 -29.267 + 4.00 9.00 -22.026 -29.406 + 4.10 1.00 -29.851 -28.376 + 4.10 1.10 -29.619 -28.376 + 4.10 1.20 -29.400 -28.376 + 4.10 1.30 -29.180 -28.376 + 4.10 1.40 -28.953 -28.376 + 4.10 1.50 -28.725 -28.376 + 4.10 1.60 -28.503 -28.376 + 4.10 1.70 -28.284 -28.376 + 4.10 1.80 -28.061 -28.376 + 4.10 1.90 -27.830 -28.376 + 4.10 2.00 -27.599 -28.376 + 4.10 2.10 -27.376 -28.376 + 4.10 2.20 -27.170 -28.376 + 4.10 2.30 -26.984 -28.376 + 4.10 2.40 -26.819 -28.376 + 4.10 2.50 -26.679 -28.376 + 4.10 2.60 -26.556 -28.376 + 4.10 2.70 -26.447 -28.376 + 4.10 2.80 -26.350 -28.376 + 4.10 2.90 -26.260 -28.376 + 4.10 3.00 -26.177 -28.376 + 4.10 3.10 -26.096 -28.376 + 4.10 3.20 -26.017 -28.375 + 4.10 3.30 -25.940 -28.374 + 4.10 3.40 -25.862 -28.373 + 4.10 3.50 -25.786 -28.372 + 4.10 3.60 -25.721 -28.371 + 4.10 3.70 -25.661 -28.371 + 4.10 3.80 -25.560 -28.367 + 4.10 3.90 -25.420 -28.356 + 4.10 4.00 -25.086 -28.327 + 4.10 4.10 -23.490 -27.951 + 4.10 4.20 -22.016 -27.264 + 4.10 4.30 -21.825 -27.448 + 4.10 4.40 -21.952 -27.003 + 4.10 4.50 -21.991 -26.811 + 4.10 4.60 -21.919 -26.839 + 4.10 4.70 -21.751 -27.035 + 4.10 4.80 -21.533 -27.242 + 4.10 4.90 -21.344 -27.162 + 4.10 5.00 -21.281 -26.792 + 4.10 5.10 -21.327 -26.635 + 4.10 5.20 -21.325 -26.553 + 4.10 5.30 -21.300 -26.581 + 4.10 5.40 -21.293 -26.588 + 4.10 5.50 -21.470 -26.549 + 4.10 5.60 -21.676 -26.577 + 4.10 5.70 -21.727 -26.560 + 4.10 5.80 -21.780 -26.541 + 4.10 5.90 -21.809 -26.581 + 4.10 6.00 -21.816 -26.731 + 4.10 6.10 -21.837 -26.939 + 4.10 6.20 -21.892 -27.110 + 4.10 6.30 -22.070 -27.185 + 4.10 6.40 -22.251 -27.246 + 4.10 6.50 -22.387 -27.356 + 4.10 6.60 -22.467 -27.425 + 4.10 6.70 -22.497 -27.461 + 4.10 6.80 -22.503 -27.503 + 4.10 6.90 -22.493 -27.546 + 4.10 7.00 -22.491 -27.553 + 4.10 7.10 -22.535 -27.604 + 4.10 7.20 -22.610 -27.692 + 4.10 7.30 -22.658 -27.709 + 4.10 7.40 -22.670 -27.762 + 4.10 7.50 -22.660 -27.831 + 4.10 7.60 -22.634 -27.898 + 4.10 7.70 -22.601 -27.939 + 4.10 7.80 -22.564 -27.962 + 4.10 7.90 -22.526 -27.987 + 4.10 8.00 -22.489 -28.100 + 4.10 8.10 -22.450 -28.229 + 4.10 8.20 -22.410 -28.360 + 4.10 8.30 -22.369 -28.497 + 4.10 8.40 -22.324 -28.624 + 4.10 8.50 -22.280 -28.748 + 4.10 8.60 -22.231 -28.872 + 4.10 8.70 -22.183 -28.999 + 4.10 8.80 -22.132 -29.131 + 4.10 8.90 -22.079 -29.267 + 4.10 9.00 -22.026 -29.406 + 4.20 1.00 -29.947 -28.476 + 4.20 1.10 -29.715 -28.476 + 4.20 1.20 -29.492 -28.476 + 4.20 1.30 -29.264 -28.476 + 4.20 1.40 -29.027 -28.476 + 4.20 1.50 -28.789 -28.476 + 4.20 1.60 -28.558 -28.476 + 4.20 1.70 -28.331 -28.476 + 4.20 1.80 -28.101 -28.476 + 4.20 1.90 -27.863 -28.476 + 4.20 2.00 -27.627 -28.476 + 4.20 2.10 -27.402 -28.476 + 4.20 2.20 -27.194 -28.476 + 4.20 2.30 -27.008 -28.476 + 4.20 2.40 -26.844 -28.476 + 4.20 2.50 -26.705 -28.476 + 4.20 2.60 -26.583 -28.476 + 4.20 2.70 -26.475 -28.476 + 4.20 2.80 -26.378 -28.476 + 4.20 2.90 -26.290 -28.476 + 4.20 3.00 -26.208 -28.476 + 4.20 3.10 -26.128 -28.475 + 4.20 3.20 -26.048 -28.474 + 4.20 3.30 -25.967 -28.473 + 4.20 3.40 -25.886 -28.472 + 4.20 3.50 -25.804 -28.470 + 4.20 3.60 -25.741 -28.469 + 4.20 3.70 -25.699 -28.471 + 4.20 3.80 -25.604 -28.466 + 4.20 3.90 -25.467 -28.454 + 4.20 4.00 -25.130 -28.422 + 4.20 4.10 -23.491 -27.989 + 4.20 4.20 -22.016 -27.272 + 4.20 4.30 -21.826 -27.362 + 4.20 4.40 -21.953 -26.971 + 4.20 4.50 -21.994 -26.793 + 4.20 4.60 -21.921 -26.829 + 4.20 4.70 -21.752 -27.029 + 4.20 4.80 -21.533 -27.239 + 4.20 4.90 -21.344 -27.151 + 4.20 5.00 -21.281 -26.791 + 4.20 5.10 -21.327 -26.634 + 4.20 5.20 -21.325 -26.552 + 4.20 5.30 -21.300 -26.581 + 4.20 5.40 -21.293 -26.587 + 4.20 5.50 -21.471 -26.548 + 4.20 5.60 -21.676 -26.567 + 4.20 5.70 -21.727 -26.530 + 4.20 5.80 -21.780 -26.492 + 4.20 5.90 -21.810 -26.528 + 4.20 6.00 -21.816 -26.687 + 4.20 6.10 -21.838 -26.918 + 4.20 6.20 -21.892 -27.106 + 4.20 6.30 -22.070 -27.185 + 4.20 6.40 -22.251 -27.246 + 4.20 6.50 -22.387 -27.356 + 4.20 6.60 -22.467 -27.425 + 4.20 6.70 -22.497 -27.461 + 4.20 6.80 -22.503 -27.503 + 4.20 6.90 -22.493 -27.546 + 4.20 7.00 -22.491 -27.553 + 4.20 7.10 -22.535 -27.604 + 4.20 7.20 -22.610 -27.692 + 4.20 7.30 -22.658 -27.709 + 4.20 7.40 -22.670 -27.762 + 4.20 7.50 -22.660 -27.831 + 4.20 7.60 -22.634 -27.898 + 4.20 7.70 -22.601 -27.939 + 4.20 7.80 -22.564 -27.962 + 4.20 7.90 -22.526 -27.987 + 4.20 8.00 -22.489 -28.100 + 4.20 8.10 -22.450 -28.229 + 4.20 8.20 -22.410 -28.360 + 4.20 8.30 -22.369 -28.497 + 4.20 8.40 -22.324 -28.624 + 4.20 8.50 -22.280 -28.748 + 4.20 8.60 -22.231 -28.872 + 4.20 8.70 -22.183 -28.999 + 4.20 8.80 -22.132 -29.131 + 4.20 8.90 -22.079 -29.267 + 4.20 9.00 -22.026 -29.406 + 4.30 1.00 -30.044 -28.576 + 4.30 1.10 -29.810 -28.576 + 4.30 1.20 -29.584 -28.576 + 4.30 1.30 -29.349 -28.576 + 4.30 1.40 -29.102 -28.576 + 4.30 1.50 -28.854 -28.576 + 4.30 1.60 -28.615 -28.576 + 4.30 1.70 -28.381 -28.576 + 4.30 1.80 -28.142 -28.576 + 4.30 1.90 -27.897 -28.576 + 4.30 2.00 -27.655 -28.576 + 4.30 2.10 -27.427 -28.576 + 4.30 2.20 -27.219 -28.576 + 4.30 2.30 -27.033 -28.576 + 4.30 2.40 -26.869 -28.576 + 4.30 2.50 -26.730 -28.576 + 4.30 2.60 -26.609 -28.576 + 4.30 2.70 -26.503 -28.576 + 4.30 2.80 -26.408 -28.576 + 4.30 2.90 -26.321 -28.576 + 4.30 3.00 -26.240 -28.576 + 4.30 3.10 -26.160 -28.575 + 4.30 3.20 -26.078 -28.573 + 4.30 3.30 -25.993 -28.571 + 4.30 3.40 -25.905 -28.569 + 4.30 3.50 -25.815 -28.566 + 4.30 3.60 -25.761 -28.566 + 4.30 3.70 -25.738 -28.570 + 4.30 3.80 -25.650 -28.565 + 4.30 3.90 -25.517 -28.552 + 4.30 4.00 -25.174 -28.516 + 4.30 4.10 -23.493 -28.021 + 4.30 4.20 -22.017 -27.279 + 4.30 4.30 -21.827 -27.304 + 4.30 4.40 -21.955 -26.946 + 4.30 4.50 -21.997 -26.780 + 4.30 4.60 -21.923 -26.821 + 4.30 4.70 -21.753 -27.025 + 4.30 4.80 -21.534 -27.238 + 4.30 4.90 -21.344 -27.140 + 4.30 5.00 -21.281 -26.789 + 4.30 5.10 -21.328 -26.633 + 4.30 5.20 -21.325 -26.551 + 4.30 5.30 -21.300 -26.581 + 4.30 5.40 -21.293 -26.586 + 4.30 5.50 -21.471 -26.546 + 4.30 5.60 -21.677 -26.556 + 4.30 5.70 -21.727 -26.498 + 4.30 5.80 -21.780 -26.441 + 4.30 5.90 -21.811 -26.473 + 4.30 6.00 -21.817 -26.639 + 4.30 6.10 -21.838 -26.894 + 4.30 6.20 -21.892 -27.102 + 4.30 6.30 -22.070 -27.184 + 4.30 6.40 -22.251 -27.246 + 4.30 6.50 -22.387 -27.356 + 4.30 6.60 -22.467 -27.425 + 4.30 6.70 -22.497 -27.461 + 4.30 6.80 -22.503 -27.503 + 4.30 6.90 -22.493 -27.546 + 4.30 7.00 -22.491 -27.553 + 4.30 7.10 -22.535 -27.604 + 4.30 7.20 -22.610 -27.692 + 4.30 7.30 -22.658 -27.709 + 4.30 7.40 -22.670 -27.762 + 4.30 7.50 -22.660 -27.831 + 4.30 7.60 -22.634 -27.898 + 4.30 7.70 -22.601 -27.939 + 4.30 7.80 -22.564 -27.962 + 4.30 7.90 -22.526 -27.987 + 4.30 8.00 -22.489 -28.100 + 4.30 8.10 -22.450 -28.229 + 4.30 8.20 -22.410 -28.360 + 4.30 8.30 -22.369 -28.497 + 4.30 8.40 -22.324 -28.624 + 4.30 8.50 -22.280 -28.748 + 4.30 8.60 -22.231 -28.872 + 4.30 8.70 -22.183 -28.999 + 4.30 8.80 -22.132 -29.131 + 4.30 8.90 -22.079 -29.267 + 4.30 9.00 -22.026 -29.406 + 4.40 1.00 -30.141 -28.676 + 4.40 1.10 -29.906 -28.676 + 4.40 1.20 -29.676 -28.676 + 4.40 1.30 -29.434 -28.676 + 4.40 1.40 -29.178 -28.676 + 4.40 1.50 -28.921 -28.676 + 4.40 1.60 -28.674 -28.676 + 4.40 1.70 -28.432 -28.676 + 4.40 1.80 -28.184 -28.676 + 4.40 1.90 -27.932 -28.676 + 4.40 2.00 -27.685 -28.676 + 4.40 2.10 -27.454 -28.676 + 4.40 2.20 -27.244 -28.676 + 4.40 2.30 -27.057 -28.676 + 4.40 2.40 -26.894 -28.676 + 4.40 2.50 -26.756 -28.676 + 4.40 2.60 -26.636 -28.676 + 4.40 2.70 -26.531 -28.676 + 4.40 2.80 -26.438 -28.676 + 4.40 2.90 -26.353 -28.676 + 4.40 3.00 -26.273 -28.675 + 4.40 3.10 -26.192 -28.674 + 4.40 3.20 -26.107 -28.672 + 4.40 3.30 -26.014 -28.668 + 4.40 3.40 -25.918 -28.665 + 4.40 3.50 -25.819 -28.661 + 4.40 3.60 -25.783 -28.663 + 4.40 3.70 -25.779 -28.669 + 4.40 3.80 -25.699 -28.664 + 4.40 3.90 -25.568 -28.650 + 4.40 4.00 -25.219 -28.610 + 4.40 4.10 -23.495 -28.049 + 4.40 4.20 -22.017 -27.281 + 4.40 4.30 -21.827 -27.263 + 4.40 4.40 -21.957 -26.928 + 4.40 4.50 -21.999 -26.769 + 4.40 4.60 -21.926 -26.816 + 4.40 4.70 -21.754 -27.023 + 4.40 4.80 -21.534 -27.238 + 4.40 4.90 -21.344 -27.130 + 4.40 5.00 -21.281 -26.788 + 4.40 5.10 -21.328 -26.631 + 4.40 5.20 -21.325 -26.550 + 4.40 5.30 -21.300 -26.580 + 4.40 5.40 -21.294 -26.585 + 4.40 5.50 -21.472 -26.544 + 4.40 5.60 -21.677 -26.544 + 4.40 5.70 -21.727 -26.463 + 4.40 5.80 -21.781 -26.388 + 4.40 5.90 -21.811 -26.415 + 4.40 6.00 -21.817 -26.588 + 4.40 6.10 -21.838 -26.867 + 4.40 6.20 -21.892 -27.097 + 4.40 6.30 -22.070 -27.184 + 4.40 6.40 -22.251 -27.246 + 4.40 6.50 -22.387 -27.356 + 4.40 6.60 -22.467 -27.425 + 4.40 6.70 -22.497 -27.461 + 4.40 6.80 -22.503 -27.503 + 4.40 6.90 -22.493 -27.546 + 4.40 7.00 -22.491 -27.553 + 4.40 7.10 -22.535 -27.604 + 4.40 7.20 -22.610 -27.692 + 4.40 7.30 -22.658 -27.709 + 4.40 7.40 -22.670 -27.762 + 4.40 7.50 -22.660 -27.831 + 4.40 7.60 -22.634 -27.898 + 4.40 7.70 -22.601 -27.939 + 4.40 7.80 -22.564 -27.962 + 4.40 7.90 -22.526 -27.987 + 4.40 8.00 -22.489 -28.100 + 4.40 8.10 -22.450 -28.229 + 4.40 8.20 -22.410 -28.360 + 4.40 8.30 -22.369 -28.497 + 4.40 8.40 -22.324 -28.624 + 4.40 8.50 -22.280 -28.748 + 4.40 8.60 -22.231 -28.872 + 4.40 8.70 -22.183 -28.999 + 4.40 8.80 -22.132 -29.131 + 4.40 8.90 -22.079 -29.267 + 4.40 9.00 -22.026 -29.406 + 4.50 1.00 -30.239 -28.776 + 4.50 1.10 -30.003 -28.776 + 4.50 1.20 -29.769 -28.776 + 4.50 1.30 -29.520 -28.776 + 4.50 1.40 -29.255 -28.776 + 4.50 1.50 -28.990 -28.776 + 4.50 1.60 -28.736 -28.776 + 4.50 1.70 -28.485 -28.776 + 4.50 1.80 -28.228 -28.776 + 4.50 1.90 -27.967 -28.776 + 4.50 2.00 -27.714 -28.776 + 4.50 2.10 -27.480 -28.776 + 4.50 2.20 -27.269 -28.776 + 4.50 2.30 -27.082 -28.776 + 4.50 2.40 -26.919 -28.776 + 4.50 2.50 -26.783 -28.776 + 4.50 2.60 -26.664 -28.776 + 4.50 2.70 -26.560 -28.776 + 4.50 2.80 -26.469 -28.776 + 4.50 2.90 -26.386 -28.776 + 4.50 3.00 -26.307 -28.775 + 4.50 3.10 -26.224 -28.773 + 4.50 3.20 -26.133 -28.769 + 4.50 3.30 -26.031 -28.764 + 4.50 3.40 -25.924 -28.760 + 4.50 3.50 -25.816 -28.754 + 4.50 3.60 -25.810 -28.760 + 4.50 3.70 -25.824 -28.768 + 4.50 3.80 -25.750 -28.763 + 4.50 3.90 -25.621 -28.747 + 4.50 4.00 -25.263 -28.702 + 4.50 4.10 -23.496 -28.074 + 4.50 4.20 -22.018 -27.259 + 4.50 4.30 -21.828 -27.233 + 4.50 4.40 -21.958 -26.913 + 4.50 4.50 -22.001 -26.761 + 4.50 4.60 -21.928 -26.812 + 4.50 4.70 -21.755 -27.022 + 4.50 4.80 -21.535 -27.239 + 4.50 4.90 -21.344 -27.120 + 4.50 5.00 -21.282 -26.769 + 4.50 5.10 -21.328 -26.618 + 4.50 5.20 -21.325 -26.545 + 4.50 5.30 -21.300 -26.579 + 4.50 5.40 -21.294 -26.584 + 4.50 5.50 -21.473 -26.542 + 4.50 5.60 -21.677 -26.530 + 4.50 5.70 -21.728 -26.426 + 4.50 5.80 -21.782 -26.334 + 4.50 5.90 -21.812 -26.357 + 4.50 6.00 -21.818 -26.536 + 4.50 6.10 -21.838 -26.837 + 4.50 6.20 -21.892 -27.091 + 4.50 6.30 -22.070 -27.183 + 4.50 6.40 -22.251 -27.246 + 4.50 6.50 -22.387 -27.356 + 4.50 6.60 -22.467 -27.425 + 4.50 6.70 -22.497 -27.461 + 4.50 6.80 -22.503 -27.503 + 4.50 6.90 -22.493 -27.546 + 4.50 7.00 -22.491 -27.553 + 4.50 7.10 -22.535 -27.604 + 4.50 7.20 -22.610 -27.692 + 4.50 7.30 -22.658 -27.709 + 4.50 7.40 -22.670 -27.762 + 4.50 7.50 -22.660 -27.831 + 4.50 7.60 -22.634 -27.898 + 4.50 7.70 -22.601 -27.939 + 4.50 7.80 -22.564 -27.962 + 4.50 7.90 -22.526 -27.987 + 4.50 8.00 -22.489 -28.100 + 4.50 8.10 -22.450 -28.229 + 4.50 8.20 -22.410 -28.360 + 4.50 8.30 -22.369 -28.497 + 4.50 8.40 -22.324 -28.624 + 4.50 8.50 -22.280 -28.748 + 4.50 8.60 -22.231 -28.872 + 4.50 8.70 -22.183 -28.999 + 4.50 8.80 -22.132 -29.131 + 4.50 8.90 -22.079 -29.267 + 4.50 9.00 -22.026 -29.406 + 4.60 1.00 -30.336 -28.876 + 4.60 1.10 -30.099 -28.876 + 4.60 1.20 -29.862 -28.876 + 4.60 1.30 -29.606 -28.876 + 4.60 1.40 -29.333 -28.876 + 4.60 1.50 -29.061 -28.876 + 4.60 1.60 -28.799 -28.876 + 4.60 1.70 -28.539 -28.876 + 4.60 1.80 -28.272 -28.876 + 4.60 1.90 -28.003 -28.876 + 4.60 2.00 -27.745 -28.876 + 4.60 2.10 -27.507 -28.876 + 4.60 2.20 -27.295 -28.876 + 4.60 2.30 -27.108 -28.876 + 4.60 2.40 -26.946 -28.876 + 4.60 2.50 -26.810 -28.876 + 4.60 2.60 -26.693 -28.876 + 4.60 2.70 -26.591 -28.876 + 4.60 2.80 -26.501 -28.876 + 4.60 2.90 -26.420 -28.876 + 4.60 3.00 -26.341 -28.875 + 4.60 3.10 -26.255 -28.871 + 4.60 3.20 -26.155 -28.866 + 4.60 3.30 -26.041 -28.859 + 4.60 3.40 -25.924 -28.852 + 4.60 3.50 -25.811 -28.846 + 4.60 3.60 -25.843 -28.858 + 4.60 3.70 -25.871 -28.867 + 4.60 3.80 -25.803 -28.861 + 4.60 3.90 -25.676 -28.845 + 4.60 4.00 -25.308 -28.794 + 4.60 4.10 -23.497 -28.095 + 4.60 4.20 -22.018 -27.244 + 4.60 4.30 -21.829 -27.210 + 4.60 4.40 -21.960 -26.902 + 4.60 4.50 -22.003 -26.755 + 4.60 4.60 -21.929 -26.809 + 4.60 4.70 -21.756 -27.023 + 4.60 4.80 -21.535 -27.241 + 4.60 4.90 -21.344 -26.995 + 4.60 5.00 -21.282 -26.714 + 4.60 5.10 -21.329 -26.599 + 4.60 5.20 -21.325 -26.539 + 4.60 5.30 -21.301 -26.577 + 4.60 5.40 -21.294 -26.583 + 4.60 5.50 -21.474 -26.539 + 4.60 5.60 -21.678 -26.516 + 4.60 5.70 -21.728 -26.388 + 4.60 5.80 -21.782 -26.280 + 4.60 5.90 -21.813 -26.298 + 4.60 6.00 -21.819 -26.482 + 4.60 6.10 -21.839 -26.804 + 4.60 6.20 -21.892 -27.084 + 4.60 6.30 -22.070 -27.182 + 4.60 6.40 -22.251 -27.245 + 4.60 6.50 -22.387 -27.356 + 4.60 6.60 -22.467 -27.425 + 4.60 6.70 -22.497 -27.461 + 4.60 6.80 -22.503 -27.503 + 4.60 6.90 -22.493 -27.546 + 4.60 7.00 -22.491 -27.553 + 4.60 7.10 -22.535 -27.604 + 4.60 7.20 -22.610 -27.692 + 4.60 7.30 -22.658 -27.709 + 4.60 7.40 -22.670 -27.762 + 4.60 7.50 -22.660 -27.831 + 4.60 7.60 -22.634 -27.898 + 4.60 7.70 -22.601 -27.939 + 4.60 7.80 -22.564 -27.962 + 4.60 7.90 -22.526 -27.987 + 4.60 8.00 -22.489 -28.100 + 4.60 8.10 -22.450 -28.229 + 4.60 8.20 -22.410 -28.360 + 4.60 8.30 -22.369 -28.497 + 4.60 8.40 -22.324 -28.624 + 4.60 8.50 -22.280 -28.748 + 4.60 8.60 -22.231 -28.872 + 4.60 8.70 -22.183 -28.999 + 4.60 8.80 -22.132 -29.131 + 4.60 8.90 -22.079 -29.267 + 4.60 9.00 -22.026 -29.406 + 4.70 1.00 -30.433 -28.976 + 4.70 1.10 -30.195 -28.976 + 4.70 1.20 -29.954 -28.976 + 4.70 1.30 -29.693 -28.976 + 4.70 1.40 -29.413 -28.976 + 4.70 1.50 -29.134 -28.976 + 4.70 1.60 -28.865 -28.976 + 4.70 1.70 -28.595 -28.976 + 4.70 1.80 -28.317 -28.976 + 4.70 1.90 -28.039 -28.976 + 4.70 2.00 -27.775 -28.976 + 4.70 2.10 -27.535 -28.976 + 4.70 2.20 -27.322 -28.976 + 4.70 2.30 -27.135 -28.976 + 4.70 2.40 -26.973 -28.976 + 4.70 2.50 -26.839 -28.976 + 4.70 2.60 -26.723 -28.976 + 4.70 2.70 -26.623 -28.976 + 4.70 2.80 -26.535 -28.976 + 4.70 2.90 -26.455 -28.975 + 4.70 3.00 -26.376 -28.974 + 4.70 3.10 -26.283 -28.969 + 4.70 3.20 -26.172 -28.961 + 4.70 3.30 -26.046 -28.951 + 4.70 3.40 -25.919 -28.944 + 4.70 3.50 -25.808 -28.937 + 4.70 3.60 -25.881 -28.956 + 4.70 3.70 -25.922 -28.966 + 4.70 3.80 -25.858 -28.960 + 4.70 3.90 -25.733 -28.942 + 4.70 4.00 -25.353 -28.884 + 4.70 4.10 -23.499 -28.114 + 4.70 4.20 -22.019 -27.233 + 4.70 4.30 -21.830 -27.193 + 4.70 4.40 -21.961 -26.894 + 4.70 4.50 -22.005 -26.751 + 4.70 4.60 -21.931 -26.808 + 4.70 4.70 -21.757 -27.024 + 4.70 4.80 -21.535 -27.082 + 4.70 4.90 -21.345 -26.886 + 4.70 5.00 -21.282 -26.675 + 4.70 5.10 -21.329 -26.584 + 4.70 5.20 -21.325 -26.533 + 4.70 5.30 -21.301 -26.574 + 4.70 5.40 -21.295 -26.581 + 4.70 5.50 -21.474 -26.536 + 4.70 5.60 -21.678 -26.500 + 4.70 5.70 -21.729 -26.350 + 4.70 5.80 -21.783 -26.228 + 4.70 5.90 -21.814 -26.241 + 4.70 6.00 -21.820 -26.428 + 4.70 6.10 -21.839 -26.769 + 4.70 6.20 -21.892 -27.076 + 4.70 6.30 -22.070 -27.181 + 4.70 6.40 -22.251 -27.245 + 4.70 6.50 -22.387 -27.356 + 4.70 6.60 -22.467 -27.425 + 4.70 6.70 -22.497 -27.461 + 4.70 6.80 -22.503 -27.503 + 4.70 6.90 -22.493 -27.546 + 4.70 7.00 -22.491 -27.553 + 4.70 7.10 -22.535 -27.604 + 4.70 7.20 -22.610 -27.692 + 4.70 7.30 -22.658 -27.709 + 4.70 7.40 -22.670 -27.762 + 4.70 7.50 -22.660 -27.831 + 4.70 7.60 -22.634 -27.898 + 4.70 7.70 -22.601 -27.939 + 4.70 7.80 -22.564 -27.962 + 4.70 7.90 -22.526 -27.987 + 4.70 8.00 -22.489 -28.100 + 4.70 8.10 -22.450 -28.229 + 4.70 8.20 -22.410 -28.360 + 4.70 8.30 -22.369 -28.497 + 4.70 8.40 -22.324 -28.624 + 4.70 8.50 -22.280 -28.748 + 4.70 8.60 -22.231 -28.872 + 4.70 8.70 -22.183 -28.999 + 4.70 8.80 -22.132 -29.131 + 4.70 8.90 -22.079 -29.267 + 4.70 9.00 -22.026 -29.406 + 4.80 1.00 -30.530 -29.076 + 4.80 1.10 -30.290 -29.076 + 4.80 1.20 -30.047 -29.076 + 4.80 1.30 -29.780 -29.076 + 4.80 1.40 -29.494 -29.076 + 4.80 1.50 -29.209 -29.076 + 4.80 1.60 -28.932 -29.076 + 4.80 1.70 -28.651 -29.076 + 4.80 1.80 -28.362 -29.076 + 4.80 1.90 -28.076 -29.076 + 4.80 2.00 -27.806 -29.076 + 4.80 2.10 -27.564 -29.076 + 4.80 2.20 -27.350 -29.076 + 4.80 2.30 -27.163 -29.076 + 4.80 2.40 -27.002 -29.076 + 4.80 2.50 -26.869 -29.076 + 4.80 2.60 -26.755 -29.076 + 4.80 2.70 -26.657 -29.076 + 4.80 2.80 -26.571 -29.076 + 4.80 2.90 -26.493 -29.075 + 4.80 3.00 -26.411 -29.072 + 4.80 3.10 -26.308 -29.066 + 4.80 3.20 -26.184 -29.054 + 4.80 3.30 -26.045 -29.043 + 4.80 3.40 -25.913 -29.033 + 4.80 3.50 -25.809 -29.029 + 4.80 3.60 -25.920 -29.054 + 4.80 3.70 -25.975 -29.065 + 4.80 3.80 -25.916 -29.058 + 4.80 3.90 -25.791 -29.038 + 4.80 4.00 -25.397 -28.973 + 4.80 4.10 -23.500 -28.130 + 4.80 4.20 -22.019 -27.225 + 4.80 4.30 -21.831 -27.180 + 4.80 4.40 -21.963 -26.887 + 4.80 4.50 -22.007 -26.747 + 4.80 4.60 -21.933 -26.807 + 4.80 4.70 -21.758 -26.955 + 4.80 4.80 -21.535 -26.975 + 4.80 4.90 -21.345 -26.814 + 4.80 5.00 -21.282 -26.646 + 4.80 5.10 -21.329 -26.571 + 4.80 5.20 -21.326 -26.527 + 4.80 5.30 -21.301 -26.571 + 4.80 5.40 -21.295 -26.578 + 4.80 5.50 -21.475 -26.533 + 4.80 5.60 -21.679 -26.484 + 4.80 5.70 -21.729 -26.313 + 4.80 5.80 -21.784 -26.177 + 4.80 5.90 -21.815 -26.186 + 4.80 6.00 -21.821 -26.375 + 4.80 6.10 -21.840 -26.733 + 4.80 6.20 -21.893 -27.066 + 4.80 6.30 -22.070 -27.180 + 4.80 6.40 -22.252 -27.245 + 4.80 6.50 -22.387 -27.356 + 4.80 6.60 -22.467 -27.425 + 4.80 6.70 -22.497 -27.461 + 4.80 6.80 -22.503 -27.503 + 4.80 6.90 -22.493 -27.546 + 4.80 7.00 -22.491 -27.553 + 4.80 7.10 -22.535 -27.604 + 4.80 7.20 -22.610 -27.692 + 4.80 7.30 -22.658 -27.709 + 4.80 7.40 -22.670 -27.762 + 4.80 7.50 -22.660 -27.831 + 4.80 7.60 -22.634 -27.898 + 4.80 7.70 -22.601 -27.939 + 4.80 7.80 -22.564 -27.962 + 4.80 7.90 -22.526 -27.987 + 4.80 8.00 -22.489 -28.100 + 4.80 8.10 -22.450 -28.229 + 4.80 8.20 -22.410 -28.360 + 4.80 8.30 -22.369 -28.497 + 4.80 8.40 -22.324 -28.624 + 4.80 8.50 -22.280 -28.748 + 4.80 8.60 -22.231 -28.872 + 4.80 8.70 -22.183 -28.999 + 4.80 8.80 -22.132 -29.131 + 4.80 8.90 -22.079 -29.267 + 4.80 9.00 -22.026 -29.406 + 4.90 1.00 -30.625 -29.176 + 4.90 1.10 -30.385 -29.176 + 4.90 1.20 -30.139 -29.176 + 4.90 1.30 -29.868 -29.176 + 4.90 1.40 -29.576 -29.176 + 4.90 1.50 -29.285 -29.176 + 4.90 1.60 -28.999 -29.176 + 4.90 1.70 -28.708 -29.176 + 4.90 1.80 -28.407 -29.176 + 4.90 1.90 -28.112 -29.176 + 4.90 2.00 -27.837 -29.176 + 4.90 2.10 -27.592 -29.176 + 4.90 2.20 -27.378 -29.176 + 4.90 2.30 -27.192 -29.176 + 4.90 2.40 -27.032 -29.176 + 4.90 2.50 -26.901 -29.175 + 4.90 2.60 -26.789 -29.175 + 4.90 2.70 -26.693 -29.175 + 4.90 2.80 -26.610 -29.175 + 4.90 2.90 -26.533 -29.175 + 4.90 3.00 -26.445 -29.171 + 4.90 3.10 -26.328 -29.161 + 4.90 3.20 -26.189 -29.148 + 4.90 3.30 -26.043 -29.133 + 4.90 3.40 -25.908 -29.124 + 4.90 3.50 -25.814 -29.120 + 4.90 3.60 -25.958 -29.152 + 4.90 3.70 -26.030 -29.164 + 4.90 3.80 -25.975 -29.156 + 4.90 3.90 -25.851 -29.135 + 4.90 4.00 -25.440 -29.060 + 4.90 4.10 -23.501 -28.144 + 4.90 4.20 -22.020 -27.220 + 4.90 4.30 -21.831 -27.169 + 4.90 4.40 -21.964 -26.882 + 4.90 4.50 -22.009 -26.745 + 4.90 4.60 -21.934 -26.791 + 4.90 4.70 -21.759 -26.900 + 4.90 4.80 -21.536 -26.906 + 4.90 4.90 -21.345 -26.763 + 4.90 5.00 -21.282 -26.624 + 4.90 5.10 -21.330 -26.560 + 4.90 5.20 -21.326 -26.521 + 4.90 5.30 -21.301 -26.568 + 4.90 5.40 -21.296 -26.576 + 4.90 5.50 -21.476 -26.529 + 4.90 5.60 -21.679 -26.467 + 4.90 5.70 -21.729 -26.277 + 4.90 5.80 -21.785 -26.129 + 4.90 5.90 -21.816 -26.134 + 4.90 6.00 -21.822 -26.324 + 4.90 6.10 -21.840 -26.696 + 4.90 6.20 -21.893 -27.056 + 4.90 6.30 -22.070 -27.178 + 4.90 6.40 -22.252 -27.244 + 4.90 6.50 -22.387 -27.356 + 4.90 6.60 -22.467 -27.425 + 4.90 6.70 -22.497 -27.461 + 4.90 6.80 -22.503 -27.503 + 4.90 6.90 -22.493 -27.546 + 4.90 7.00 -22.491 -27.553 + 4.90 7.10 -22.535 -27.604 + 4.90 7.20 -22.610 -27.692 + 4.90 7.30 -22.658 -27.709 + 4.90 7.40 -22.670 -27.762 + 4.90 7.50 -22.660 -27.831 + 4.90 7.60 -22.634 -27.898 + 4.90 7.70 -22.601 -27.939 + 4.90 7.80 -22.564 -27.962 + 4.90 7.90 -22.526 -27.987 + 4.90 8.00 -22.489 -28.100 + 4.90 8.10 -22.450 -28.229 + 4.90 8.20 -22.410 -28.360 + 4.90 8.30 -22.369 -28.497 + 4.90 8.40 -22.324 -28.624 + 4.90 8.50 -22.280 -28.748 + 4.90 8.60 -22.231 -28.872 + 4.90 8.70 -22.183 -28.999 + 4.90 8.80 -22.132 -29.131 + 4.90 8.90 -22.079 -29.267 + 4.90 9.00 -22.026 -29.406 + 5.00 1.00 -30.718 -29.276 + 5.00 1.10 -30.478 -29.276 + 5.00 1.20 -30.230 -29.276 + 5.00 1.30 -29.955 -29.276 + 5.00 1.40 -29.659 -29.276 + 5.00 1.50 -29.362 -29.276 + 5.00 1.60 -29.067 -29.276 + 5.00 1.70 -28.763 -29.276 + 5.00 1.80 -28.451 -29.276 + 5.00 1.90 -28.147 -29.276 + 5.00 2.00 -27.869 -29.276 + 5.00 2.10 -27.622 -29.276 + 5.00 2.20 -27.407 -29.276 + 5.00 2.30 -27.222 -29.276 + 5.00 2.40 -27.064 -29.275 + 5.00 2.50 -26.935 -29.275 + 5.00 2.60 -26.825 -29.275 + 5.00 2.70 -26.731 -29.275 + 5.00 2.80 -26.651 -29.275 + 5.00 2.90 -26.574 -29.274 + 5.00 3.00 -26.477 -29.268 + 5.00 3.10 -26.342 -29.255 + 5.00 3.20 -26.192 -29.239 + 5.00 3.30 -26.040 -29.225 + 5.00 3.40 -25.905 -29.214 + 5.00 3.50 -25.823 -29.212 + 5.00 3.60 -25.993 -29.249 + 5.00 3.70 -26.085 -29.262 + 5.00 3.80 -26.035 -29.255 + 5.00 3.90 -25.911 -29.231 + 5.00 4.00 -25.483 -29.146 + 5.00 4.10 -23.503 -28.156 + 5.00 4.20 -22.020 -27.216 + 5.00 4.30 -21.832 -27.161 + 5.00 4.40 -21.965 -26.878 + 5.00 4.50 -22.011 -26.740 + 5.00 4.60 -21.936 -26.769 + 5.00 4.70 -21.760 -26.861 + 5.00 4.80 -21.536 -26.857 + 5.00 4.90 -21.345 -26.726 + 5.00 5.00 -21.283 -26.607 + 5.00 5.10 -21.330 -26.550 + 5.00 5.20 -21.326 -26.514 + 5.00 5.30 -21.301 -26.563 + 5.00 5.40 -21.296 -26.572 + 5.00 5.50 -21.477 -26.525 + 5.00 5.60 -21.679 -26.451 + 5.00 5.70 -21.730 -26.242 + 5.00 5.80 -21.786 -26.084 + 5.00 5.90 -21.818 -26.085 + 5.00 6.00 -21.823 -26.275 + 5.00 6.10 -21.841 -26.659 + 5.00 6.20 -21.893 -27.045 + 5.00 6.30 -22.070 -27.176 + 5.00 6.40 -22.252 -27.244 + 5.00 6.50 -22.387 -27.356 + 5.00 6.60 -22.467 -27.425 + 5.00 6.70 -22.497 -27.461 + 5.00 6.80 -22.503 -27.503 + 5.00 6.90 -22.493 -27.546 + 5.00 7.00 -22.491 -27.553 + 5.00 7.10 -22.535 -27.604 + 5.00 7.20 -22.610 -27.692 + 5.00 7.30 -22.658 -27.709 + 5.00 7.40 -22.670 -27.762 + 5.00 7.50 -22.660 -27.831 + 5.00 7.60 -22.634 -27.898 + 5.00 7.70 -22.601 -27.939 + 5.00 7.80 -22.564 -27.962 + 5.00 7.90 -22.526 -27.987 + 5.00 8.00 -22.489 -28.100 + 5.00 8.10 -22.450 -28.229 + 5.00 8.20 -22.410 -28.360 + 5.00 8.30 -22.369 -28.497 + 5.00 8.40 -22.324 -28.624 + 5.00 8.50 -22.280 -28.748 + 5.00 8.60 -22.231 -28.872 + 5.00 8.70 -22.183 -28.999 + 5.00 8.80 -22.132 -29.131 + 5.00 8.90 -22.079 -29.267 + 5.00 9.00 -22.026 -29.406 + 5.10 1.00 -30.810 -29.376 + 5.10 1.10 -30.569 -29.376 + 5.10 1.20 -30.319 -29.376 + 5.10 1.30 -30.041 -29.376 + 5.10 1.40 -29.742 -29.376 + 5.10 1.50 -29.439 -29.376 + 5.10 1.60 -29.134 -29.376 + 5.10 1.70 -28.818 -29.376 + 5.10 1.80 -28.494 -29.376 + 5.10 1.90 -28.183 -29.376 + 5.10 2.00 -27.900 -29.376 + 5.10 2.10 -27.653 -29.376 + 5.10 2.20 -27.438 -29.376 + 5.10 2.30 -27.254 -29.375 + 5.10 2.40 -27.098 -29.375 + 5.10 2.50 -26.971 -29.375 + 5.10 2.60 -26.863 -29.375 + 5.10 2.70 -26.772 -29.375 + 5.10 2.80 -26.694 -29.375 + 5.10 2.90 -26.616 -29.373 + 5.10 3.00 -26.505 -29.365 + 5.10 3.10 -26.350 -29.349 + 5.10 3.20 -26.193 -29.331 + 5.10 3.30 -26.041 -29.315 + 5.10 3.40 -25.907 -29.306 + 5.10 3.50 -25.835 -29.305 + 5.10 3.60 -26.024 -29.346 + 5.10 3.70 -26.140 -29.361 + 5.10 3.80 -26.095 -29.352 + 5.10 3.90 -25.973 -29.327 + 5.10 4.00 -25.523 -29.229 + 5.10 4.10 -23.504 -28.167 + 5.10 4.20 -22.021 -27.214 + 5.10 4.30 -21.832 -27.155 + 5.10 4.40 -21.966 -26.875 + 5.10 4.50 -22.012 -26.729 + 5.10 4.60 -21.937 -26.753 + 5.10 4.70 -21.761 -26.833 + 5.10 4.80 -21.536 -26.822 + 5.10 4.90 -21.345 -26.697 + 5.10 5.00 -21.283 -26.593 + 5.10 5.10 -21.331 -26.541 + 5.10 5.20 -21.326 -26.507 + 5.10 5.30 -21.302 -26.558 + 5.10 5.40 -21.297 -26.568 + 5.10 5.50 -21.478 -26.521 + 5.10 5.60 -21.680 -26.436 + 5.10 5.70 -21.731 -26.211 + 5.10 5.80 -21.787 -26.043 + 5.10 5.90 -21.819 -26.040 + 5.10 6.00 -21.824 -26.230 + 5.10 6.10 -21.841 -26.623 + 5.10 6.20 -21.893 -27.033 + 5.10 6.30 -22.070 -27.174 + 5.10 6.40 -22.252 -27.243 + 5.10 6.50 -22.387 -27.356 + 5.10 6.60 -22.467 -27.425 + 5.10 6.70 -22.497 -27.461 + 5.10 6.80 -22.503 -27.503 + 5.10 6.90 -22.493 -27.546 + 5.10 7.00 -22.491 -27.553 + 5.10 7.10 -22.535 -27.604 + 5.10 7.20 -22.610 -27.692 + 5.10 7.30 -22.658 -27.709 + 5.10 7.40 -22.670 -27.762 + 5.10 7.50 -22.660 -27.831 + 5.10 7.60 -22.634 -27.898 + 5.10 7.70 -22.601 -27.939 + 5.10 7.80 -22.564 -27.962 + 5.10 7.90 -22.526 -27.987 + 5.10 8.00 -22.489 -28.100 + 5.10 8.10 -22.450 -28.229 + 5.10 8.20 -22.410 -28.360 + 5.10 8.30 -22.369 -28.497 + 5.10 8.40 -22.324 -28.624 + 5.10 8.50 -22.280 -28.748 + 5.10 8.60 -22.231 -28.872 + 5.10 8.70 -22.183 -28.999 + 5.10 8.80 -22.132 -29.131 + 5.10 8.90 -22.079 -29.267 + 5.10 9.00 -22.026 -29.406 + 5.20 1.00 -30.898 -29.476 + 5.20 1.10 -30.657 -29.476 + 5.20 1.20 -30.406 -29.476 + 5.20 1.30 -30.126 -29.476 + 5.20 1.40 -29.823 -29.476 + 5.20 1.50 -29.515 -29.476 + 5.20 1.60 -29.200 -29.476 + 5.20 1.70 -28.870 -29.476 + 5.20 1.80 -28.535 -29.476 + 5.20 1.90 -28.218 -29.476 + 5.20 2.00 -27.933 -29.476 + 5.20 2.10 -27.684 -29.476 + 5.20 2.20 -27.471 -29.475 + 5.20 2.30 -27.289 -29.475 + 5.20 2.40 -27.135 -29.475 + 5.20 2.50 -27.009 -29.475 + 5.20 2.60 -26.904 -29.475 + 5.20 2.70 -26.816 -29.475 + 5.20 2.80 -26.740 -29.474 + 5.20 2.90 -26.658 -29.472 + 5.20 3.00 -26.526 -29.461 + 5.20 3.10 -26.355 -29.441 + 5.20 3.20 -26.196 -29.424 + 5.20 3.30 -26.044 -29.409 + 5.20 3.40 -25.912 -29.397 + 5.20 3.50 -25.851 -29.400 + 5.20 3.60 -26.050 -29.442 + 5.20 3.70 -26.194 -29.459 + 5.20 3.80 -26.156 -29.450 + 5.20 3.90 -26.035 -29.423 + 5.20 4.00 -25.561 -29.310 + 5.20 4.10 -23.506 -28.177 + 5.20 4.20 -22.021 -27.213 + 5.20 4.30 -21.833 -27.150 + 5.20 4.40 -21.967 -26.862 + 5.20 4.50 -22.013 -26.719 + 5.20 4.60 -21.939 -26.741 + 5.20 4.70 -21.762 -26.812 + 5.20 4.80 -21.537 -26.796 + 5.20 4.90 -21.345 -26.676 + 5.20 5.00 -21.283 -26.582 + 5.20 5.10 -21.331 -26.530 + 5.20 5.20 -21.327 -26.497 + 5.20 5.30 -21.302 -26.551 + 5.20 5.40 -21.297 -26.563 + 5.20 5.50 -21.479 -26.516 + 5.20 5.60 -21.680 -26.421 + 5.20 5.70 -21.731 -26.182 + 5.20 5.80 -21.788 -26.007 + 5.20 5.90 -21.821 -26.000 + 5.20 6.00 -21.826 -26.188 + 5.20 6.10 -21.842 -26.588 + 5.20 6.20 -21.893 -27.020 + 5.20 6.30 -22.070 -27.171 + 5.20 6.40 -22.252 -27.242 + 5.20 6.50 -22.387 -27.355 + 5.20 6.60 -22.467 -27.425 + 5.20 6.70 -22.497 -27.461 + 5.20 6.80 -22.503 -27.503 + 5.20 6.90 -22.493 -27.546 + 5.20 7.00 -22.491 -27.553 + 5.20 7.10 -22.535 -27.604 + 5.20 7.20 -22.610 -27.692 + 5.20 7.30 -22.658 -27.709 + 5.20 7.40 -22.670 -27.762 + 5.20 7.50 -22.660 -27.831 + 5.20 7.60 -22.634 -27.898 + 5.20 7.70 -22.601 -27.939 + 5.20 7.80 -22.564 -27.962 + 5.20 7.90 -22.526 -27.987 + 5.20 8.00 -22.489 -28.100 + 5.20 8.10 -22.450 -28.229 + 5.20 8.20 -22.410 -28.360 + 5.20 8.30 -22.369 -28.497 + 5.20 8.40 -22.324 -28.624 + 5.20 8.50 -22.280 -28.748 + 5.20 8.60 -22.231 -28.872 + 5.20 8.70 -22.183 -28.999 + 5.20 8.80 -22.132 -29.131 + 5.20 8.90 -22.079 -29.267 + 5.20 9.00 -22.026 -29.406 + 5.30 1.00 -30.980 -29.576 + 5.30 1.10 -30.739 -29.576 + 5.30 1.20 -30.488 -29.576 + 5.30 1.30 -30.206 -29.576 + 5.30 1.40 -29.901 -29.576 + 5.30 1.50 -29.588 -29.576 + 5.30 1.60 -29.262 -29.576 + 5.30 1.70 -28.920 -29.576 + 5.30 1.80 -28.575 -29.576 + 5.30 1.90 -28.252 -29.576 + 5.30 2.00 -27.966 -29.576 + 5.30 2.10 -27.718 -29.575 + 5.30 2.20 -27.506 -29.575 + 5.30 2.30 -27.326 -29.575 + 5.30 2.40 -27.174 -29.575 + 5.30 2.50 -27.050 -29.574 + 5.30 2.60 -26.947 -29.574 + 5.30 2.70 -26.862 -29.574 + 5.30 2.80 -26.788 -29.574 + 5.30 2.90 -26.698 -29.570 + 5.30 3.00 -26.540 -29.556 + 5.30 3.10 -26.360 -29.535 + 5.30 3.20 -26.203 -29.516 + 5.30 3.30 -26.052 -29.503 + 5.30 3.40 -25.922 -29.494 + 5.30 3.50 -25.869 -29.497 + 5.30 3.60 -26.072 -29.538 + 5.30 3.70 -26.245 -29.557 + 5.30 3.80 -26.217 -29.548 + 5.30 3.90 -26.096 -29.519 + 5.30 4.00 -25.598 -29.388 + 5.30 4.10 -23.507 -28.186 + 5.30 4.20 -22.021 -27.214 + 5.30 4.30 -21.833 -27.146 + 5.30 4.40 -21.968 -26.845 + 5.30 4.50 -22.015 -26.711 + 5.30 4.60 -21.940 -26.731 + 5.30 4.70 -21.763 -26.795 + 5.30 4.80 -21.537 -26.776 + 5.30 4.90 -21.345 -26.658 + 5.30 5.00 -21.284 -26.572 + 5.30 5.10 -21.332 -26.519 + 5.30 5.20 -21.327 -26.484 + 5.30 5.30 -21.302 -26.541 + 5.30 5.40 -21.298 -26.556 + 5.30 5.50 -21.481 -26.511 + 5.30 5.60 -21.681 -26.407 + 5.30 5.70 -21.732 -26.156 + 5.30 5.80 -21.789 -25.975 + 5.30 5.90 -21.822 -25.964 + 5.30 6.00 -21.827 -26.151 + 5.30 6.10 -21.843 -26.556 + 5.30 6.20 -21.893 -27.007 + 5.30 6.30 -22.070 -27.168 + 5.30 6.40 -22.252 -27.241 + 5.30 6.50 -22.387 -27.355 + 5.30 6.60 -22.467 -27.425 + 5.30 6.70 -22.497 -27.461 + 5.30 6.80 -22.503 -27.503 + 5.30 6.90 -22.493 -27.546 + 5.30 7.00 -22.491 -27.553 + 5.30 7.10 -22.535 -27.604 + 5.30 7.20 -22.610 -27.692 + 5.30 7.30 -22.658 -27.709 + 5.30 7.40 -22.670 -27.762 + 5.30 7.50 -22.660 -27.831 + 5.30 7.60 -22.634 -27.898 + 5.30 7.70 -22.601 -27.939 + 5.30 7.80 -22.564 -27.962 + 5.30 7.90 -22.526 -27.987 + 5.30 8.00 -22.489 -28.100 + 5.30 8.10 -22.450 -28.229 + 5.30 8.20 -22.410 -28.360 + 5.30 8.30 -22.369 -28.497 + 5.30 8.40 -22.324 -28.624 + 5.30 8.50 -22.280 -28.748 + 5.30 8.60 -22.231 -28.872 + 5.30 8.70 -22.183 -28.999 + 5.30 8.80 -22.132 -29.131 + 5.30 8.90 -22.079 -29.267 + 5.30 9.00 -22.026 -29.406 + 5.40 1.00 -31.055 -29.676 + 5.40 1.10 -30.815 -29.676 + 5.40 1.20 -30.563 -29.676 + 5.40 1.30 -30.282 -29.676 + 5.40 1.40 -29.975 -29.676 + 5.40 1.50 -29.657 -29.676 + 5.40 1.60 -29.321 -29.676 + 5.40 1.70 -28.967 -29.676 + 5.40 1.80 -28.614 -29.676 + 5.40 1.90 -28.287 -29.675 + 5.40 2.00 -28.000 -29.675 + 5.40 2.10 -27.753 -29.675 + 5.40 2.20 -27.543 -29.675 + 5.40 2.30 -27.365 -29.675 + 5.40 2.40 -27.215 -29.674 + 5.40 2.50 -27.093 -29.674 + 5.40 2.60 -26.992 -29.673 + 5.40 2.70 -26.911 -29.674 + 5.40 2.80 -26.838 -29.673 + 5.40 2.90 -26.734 -29.668 + 5.40 3.00 -26.549 -29.651 + 5.40 3.10 -26.367 -29.630 + 5.40 3.20 -26.214 -29.612 + 5.40 3.30 -26.064 -29.599 + 5.40 3.40 -25.937 -29.588 + 5.40 3.50 -25.889 -29.592 + 5.40 3.60 -26.088 -29.634 + 5.40 3.70 -26.292 -29.655 + 5.40 3.80 -26.276 -29.645 + 5.40 3.90 -26.158 -29.615 + 5.40 4.00 -25.631 -29.463 + 5.40 4.10 -23.508 -28.194 + 5.40 4.20 -22.022 -27.215 + 5.40 4.30 -21.834 -27.143 + 5.40 4.40 -21.969 -26.832 + 5.40 4.50 -22.016 -26.706 + 5.40 4.60 -21.941 -26.724 + 5.40 4.70 -21.764 -26.783 + 5.40 4.80 -21.538 -26.761 + 5.40 4.90 -21.345 -26.645 + 5.40 5.00 -21.284 -26.564 + 5.40 5.10 -21.332 -26.506 + 5.40 5.20 -21.328 -26.468 + 5.40 5.30 -21.302 -26.527 + 5.40 5.40 -21.298 -26.548 + 5.40 5.50 -21.482 -26.505 + 5.40 5.60 -21.682 -26.394 + 5.40 5.70 -21.732 -26.134 + 5.40 5.80 -21.790 -25.946 + 5.40 5.90 -21.823 -25.933 + 5.40 6.00 -21.828 -26.118 + 5.40 6.10 -21.843 -26.527 + 5.40 6.20 -21.894 -26.994 + 5.40 6.30 -22.070 -27.165 + 5.40 6.40 -22.252 -27.240 + 5.40 6.50 -22.387 -27.355 + 5.40 6.60 -22.467 -27.425 + 5.40 6.70 -22.497 -27.461 + 5.40 6.80 -22.503 -27.503 + 5.40 6.90 -22.493 -27.546 + 5.40 7.00 -22.491 -27.553 + 5.40 7.10 -22.535 -27.604 + 5.40 7.20 -22.610 -27.692 + 5.40 7.30 -22.658 -27.709 + 5.40 7.40 -22.670 -27.762 + 5.40 7.50 -22.660 -27.831 + 5.40 7.60 -22.634 -27.898 + 5.40 7.70 -22.601 -27.939 + 5.40 7.80 -22.564 -27.962 + 5.40 7.90 -22.526 -27.987 + 5.40 8.00 -22.489 -28.100 + 5.40 8.10 -22.450 -28.229 + 5.40 8.20 -22.410 -28.360 + 5.40 8.30 -22.369 -28.497 + 5.40 8.40 -22.324 -28.624 + 5.40 8.50 -22.280 -28.748 + 5.40 8.60 -22.231 -28.872 + 5.40 8.70 -22.183 -28.999 + 5.40 8.80 -22.132 -29.131 + 5.40 8.90 -22.079 -29.267 + 5.40 9.00 -22.026 -29.406 + 5.50 1.00 -31.118 -29.775 + 5.50 1.10 -30.880 -29.776 + 5.50 1.20 -30.629 -29.776 + 5.50 1.30 -30.349 -29.776 + 5.50 1.40 -30.042 -29.776 + 5.50 1.50 -29.720 -29.776 + 5.50 1.60 -29.375 -29.776 + 5.50 1.70 -29.011 -29.775 + 5.50 1.80 -28.652 -29.775 + 5.50 1.90 -28.323 -29.775 + 5.50 2.00 -28.035 -29.775 + 5.50 2.10 -27.790 -29.775 + 5.50 2.20 -27.583 -29.775 + 5.50 2.30 -27.407 -29.775 + 5.50 2.40 -27.259 -29.774 + 5.50 2.50 -27.137 -29.773 + 5.50 2.60 -27.039 -29.773 + 5.50 2.70 -26.962 -29.773 + 5.50 2.80 -26.888 -29.773 + 5.50 2.90 -26.762 -29.764 + 5.50 3.00 -26.556 -29.745 + 5.50 3.10 -26.379 -29.726 + 5.50 3.20 -26.229 -29.710 + 5.50 3.30 -26.081 -29.698 + 5.50 3.40 -25.956 -29.687 + 5.50 3.50 -25.911 -29.691 + 5.50 3.60 -26.099 -29.729 + 5.50 3.70 -26.335 -29.753 + 5.50 3.80 -26.335 -29.743 + 5.50 3.90 -26.219 -29.710 + 5.50 4.00 -25.662 -29.534 + 5.50 4.10 -23.510 -28.201 + 5.50 4.20 -22.022 -27.216 + 5.50 4.30 -21.834 -27.084 + 5.50 4.40 -21.969 -26.822 + 5.50 4.50 -22.017 -26.702 + 5.50 4.60 -21.942 -26.719 + 5.50 4.70 -21.765 -26.774 + 5.50 4.80 -21.538 -26.746 + 5.50 4.90 -21.346 -26.633 + 5.50 5.00 -21.284 -26.556 + 5.50 5.10 -21.333 -26.491 + 5.50 5.20 -21.328 -26.449 + 5.50 5.30 -21.303 -26.510 + 5.50 5.40 -21.299 -26.536 + 5.50 5.50 -21.483 -26.498 + 5.50 5.60 -21.682 -26.382 + 5.50 5.70 -21.733 -26.114 + 5.50 5.80 -21.791 -25.922 + 5.50 5.90 -21.824 -25.906 + 5.50 6.00 -21.830 -26.089 + 5.50 6.10 -21.844 -26.500 + 5.50 6.20 -21.894 -26.981 + 5.50 6.30 -22.071 -27.162 + 5.50 6.40 -22.252 -27.239 + 5.50 6.50 -22.387 -27.355 + 5.50 6.60 -22.467 -27.425 + 5.50 6.70 -22.497 -27.461 + 5.50 6.80 -22.503 -27.503 + 5.50 6.90 -22.493 -27.546 + 5.50 7.00 -22.491 -27.553 + 5.50 7.10 -22.535 -27.604 + 5.50 7.20 -22.610 -27.692 + 5.50 7.30 -22.658 -27.709 + 5.50 7.40 -22.670 -27.762 + 5.50 7.50 -22.660 -27.831 + 5.50 7.60 -22.634 -27.898 + 5.50 7.70 -22.601 -27.939 + 5.50 7.80 -22.564 -27.962 + 5.50 7.90 -22.526 -27.987 + 5.50 8.00 -22.489 -28.100 + 5.50 8.10 -22.450 -28.229 + 5.50 8.20 -22.410 -28.360 + 5.50 8.30 -22.369 -28.497 + 5.50 8.40 -22.324 -28.624 + 5.50 8.50 -22.280 -28.748 + 5.50 8.60 -22.231 -28.872 + 5.50 8.70 -22.183 -28.999 + 5.50 8.80 -22.132 -29.131 + 5.50 8.90 -22.079 -29.267 + 5.50 9.00 -22.026 -29.406 + 5.60 1.00 -31.165 -29.875 + 5.60 1.10 -30.930 -29.875 + 5.60 1.20 -30.681 -29.875 + 5.60 1.30 -30.404 -29.875 + 5.60 1.40 -30.099 -29.876 + 5.60 1.50 -29.774 -29.876 + 5.60 1.60 -29.422 -29.876 + 5.60 1.70 -29.051 -29.875 + 5.60 1.80 -28.688 -29.875 + 5.60 1.90 -28.359 -29.875 + 5.60 2.00 -28.073 -29.875 + 5.60 2.10 -27.831 -29.875 + 5.60 2.20 -27.626 -29.875 + 5.60 2.30 -27.452 -29.874 + 5.60 2.40 -27.305 -29.873 + 5.60 2.50 -27.183 -29.872 + 5.60 2.60 -27.086 -29.872 + 5.60 2.70 -27.013 -29.872 + 5.60 2.80 -26.937 -29.871 + 5.60 2.90 -26.781 -29.860 + 5.60 3.00 -26.566 -29.840 + 5.60 3.10 -26.397 -29.823 + 5.60 3.20 -26.250 -29.808 + 5.60 3.30 -26.102 -29.798 + 5.60 3.40 -25.980 -29.786 + 5.60 3.50 -25.935 -29.790 + 5.60 3.60 -26.108 -29.825 + 5.60 3.70 -26.371 -29.850 + 5.60 3.80 -26.391 -29.840 + 5.60 3.90 -26.280 -29.806 + 5.60 4.00 -25.690 -29.601 + 5.60 4.10 -23.511 -28.208 + 5.60 4.20 -22.023 -27.218 + 5.60 4.30 -21.835 -27.034 + 5.60 4.40 -21.970 -26.814 + 5.60 4.50 -22.018 -26.698 + 5.60 4.60 -21.943 -26.715 + 5.60 4.70 -21.766 -26.766 + 5.60 4.80 -21.539 -26.729 + 5.60 4.90 -21.346 -26.624 + 5.60 5.00 -21.285 -26.549 + 5.60 5.10 -21.334 -26.474 + 5.60 5.20 -21.328 -26.426 + 5.60 5.30 -21.303 -26.489 + 5.60 5.40 -21.300 -26.523 + 5.60 5.50 -21.484 -26.490 + 5.60 5.60 -21.683 -26.370 + 5.60 5.70 -21.733 -26.097 + 5.60 5.80 -21.792 -25.901 + 5.60 5.90 -21.826 -25.882 + 5.60 6.00 -21.831 -26.063 + 5.60 6.10 -21.844 -26.476 + 5.60 6.20 -21.894 -26.968 + 5.60 6.30 -22.071 -27.158 + 5.60 6.40 -22.252 -27.238 + 5.60 6.50 -22.387 -27.355 + 5.60 6.60 -22.467 -27.425 + 5.60 6.70 -22.497 -27.461 + 5.60 6.80 -22.503 -27.503 + 5.60 6.90 -22.493 -27.546 + 5.60 7.00 -22.491 -27.553 + 5.60 7.10 -22.535 -27.604 + 5.60 7.20 -22.610 -27.692 + 5.60 7.30 -22.658 -27.709 + 5.60 7.40 -22.670 -27.762 + 5.60 7.50 -22.660 -27.831 + 5.60 7.60 -22.634 -27.898 + 5.60 7.70 -22.601 -27.939 + 5.60 7.80 -22.564 -27.962 + 5.60 7.90 -22.526 -27.987 + 5.60 8.00 -22.489 -28.100 + 5.60 8.10 -22.450 -28.229 + 5.60 8.20 -22.410 -28.360 + 5.60 8.30 -22.369 -28.497 + 5.60 8.40 -22.324 -28.624 + 5.60 8.50 -22.280 -28.748 + 5.60 8.60 -22.231 -28.872 + 5.60 8.70 -22.183 -28.999 + 5.60 8.80 -22.132 -29.131 + 5.60 8.90 -22.079 -29.267 + 5.60 9.00 -22.026 -29.406 + 5.70 1.00 -31.188 -29.975 + 5.70 1.10 -30.959 -29.975 + 5.70 1.20 -30.715 -29.975 + 5.70 1.30 -30.443 -29.975 + 5.70 1.40 -30.143 -29.975 + 5.70 1.50 -29.818 -29.975 + 5.70 1.60 -29.463 -29.975 + 5.70 1.70 -29.088 -29.975 + 5.70 1.80 -28.724 -29.975 + 5.70 1.90 -28.396 -29.975 + 5.70 2.00 -28.114 -29.975 + 5.70 2.10 -27.874 -29.975 + 5.70 2.20 -27.672 -29.974 + 5.70 2.30 -27.500 -29.974 + 5.70 2.40 -27.352 -29.972 + 5.70 2.50 -27.228 -29.971 + 5.70 2.60 -27.134 -29.970 + 5.70 2.70 -27.065 -29.971 + 5.70 2.80 -26.981 -29.970 + 5.70 2.90 -26.793 -29.956 + 5.70 3.00 -26.581 -29.936 + 5.70 3.10 -26.421 -29.921 + 5.70 3.20 -26.276 -29.907 + 5.70 3.30 -26.128 -29.897 + 5.70 3.40 -26.008 -29.885 + 5.70 3.50 -25.962 -29.889 + 5.70 3.60 -26.114 -29.922 + 5.70 3.70 -26.400 -29.948 + 5.70 3.80 -26.445 -29.937 + 5.70 3.90 -26.340 -29.901 + 5.70 4.00 -25.716 -29.663 + 5.70 4.10 -23.512 -28.215 + 5.70 4.20 -22.023 -27.220 + 5.70 4.30 -21.835 -26.998 + 5.70 4.40 -21.971 -26.808 + 5.70 4.50 -22.019 -26.696 + 5.70 4.60 -21.944 -26.713 + 5.70 4.70 -21.767 -26.761 + 5.70 4.80 -21.540 -26.715 + 5.70 4.90 -21.346 -26.616 + 5.70 5.00 -21.286 -26.542 + 5.70 5.10 -21.334 -26.455 + 5.70 5.20 -21.329 -26.401 + 5.70 5.30 -21.304 -26.465 + 5.70 5.40 -21.301 -26.507 + 5.70 5.50 -21.486 -26.481 + 5.70 5.60 -21.683 -26.360 + 5.70 5.70 -21.734 -26.081 + 5.70 5.80 -21.792 -25.884 + 5.70 5.90 -21.827 -25.863 + 5.70 6.00 -21.832 -26.042 + 5.70 6.10 -21.845 -26.455 + 5.70 6.20 -21.894 -26.956 + 5.70 6.30 -22.071 -27.154 + 5.70 6.40 -22.252 -27.237 + 5.70 6.50 -22.387 -27.355 + 5.70 6.60 -22.467 -27.425 + 5.70 6.70 -22.497 -27.461 + 5.70 6.80 -22.503 -27.503 + 5.70 6.90 -22.493 -27.546 + 5.70 7.00 -22.491 -27.553 + 5.70 7.10 -22.535 -27.604 + 5.70 7.20 -22.610 -27.692 + 5.70 7.30 -22.658 -27.709 + 5.70 7.40 -22.670 -27.762 + 5.70 7.50 -22.660 -27.831 + 5.70 7.60 -22.634 -27.898 + 5.70 7.70 -22.601 -27.939 + 5.70 7.80 -22.564 -27.962 + 5.70 7.90 -22.526 -27.987 + 5.70 8.00 -22.489 -28.100 + 5.70 8.10 -22.450 -28.229 + 5.70 8.20 -22.410 -28.360 + 5.70 8.30 -22.369 -28.497 + 5.70 8.40 -22.324 -28.624 + 5.70 8.50 -22.280 -28.748 + 5.70 8.60 -22.231 -28.872 + 5.70 8.70 -22.183 -28.999 + 5.70 8.80 -22.132 -29.131 + 5.70 8.90 -22.079 -29.267 + 5.70 9.00 -22.026 -29.406 + 5.80 1.00 -31.181 -30.075 + 5.80 1.10 -30.963 -30.075 + 5.80 1.20 -30.726 -30.075 + 5.80 1.30 -30.461 -30.075 + 5.80 1.40 -30.169 -30.075 + 5.80 1.50 -29.849 -30.075 + 5.80 1.60 -29.494 -30.075 + 5.80 1.70 -29.120 -30.075 + 5.80 1.80 -28.759 -30.075 + 5.80 1.90 -28.435 -30.075 + 5.80 2.00 -28.157 -30.075 + 5.80 2.10 -27.921 -30.074 + 5.80 2.20 -27.721 -30.073 + 5.80 2.30 -27.549 -30.073 + 5.80 2.40 -27.400 -30.070 + 5.80 2.50 -27.275 -30.069 + 5.80 2.60 -27.182 -30.069 + 5.80 2.70 -27.117 -30.070 + 5.80 2.80 -27.019 -30.068 + 5.80 2.90 -26.804 -30.051 + 5.80 3.00 -26.604 -30.034 + 5.80 3.10 -26.450 -30.020 + 5.80 3.20 -26.306 -30.006 + 5.80 3.30 -26.158 -29.997 + 5.80 3.40 -26.038 -29.984 + 5.80 3.50 -25.991 -29.991 + 5.80 3.60 -26.120 -30.019 + 5.80 3.70 -26.421 -30.045 + 5.80 3.80 -26.496 -30.034 + 5.80 3.90 -26.399 -29.996 + 5.80 4.00 -25.738 -29.720 + 5.80 4.10 -23.514 -28.219 + 5.80 4.20 -22.023 -27.223 + 5.80 4.30 -21.836 -26.972 + 5.80 4.40 -21.972 -26.804 + 5.80 4.50 -22.020 -26.694 + 5.80 4.60 -21.945 -26.711 + 5.80 4.70 -21.768 -26.757 + 5.80 4.80 -21.540 -26.703 + 5.80 4.90 -21.347 -26.610 + 5.80 5.00 -21.286 -26.503 + 5.80 5.10 -21.335 -26.258 + 5.80 5.20 -21.329 -26.153 + 5.80 5.30 -21.304 -26.248 + 5.80 5.40 -21.301 -26.405 + 5.80 5.50 -21.487 -26.462 + 5.80 5.60 -21.684 -26.349 + 5.80 5.70 -21.735 -26.068 + 5.80 5.80 -21.793 -25.869 + 5.80 5.90 -21.828 -25.846 + 5.80 6.00 -21.833 -26.023 + 5.80 6.10 -21.846 -26.437 + 5.80 6.20 -21.894 -26.944 + 5.80 6.30 -22.071 -27.150 + 5.80 6.40 -22.252 -27.235 + 5.80 6.50 -22.387 -27.354 + 5.80 6.60 -22.467 -27.425 + 5.80 6.70 -22.497 -27.461 + 5.80 6.80 -22.503 -27.503 + 5.80 6.90 -22.493 -27.546 + 5.80 7.00 -22.491 -27.553 + 5.80 7.10 -22.535 -27.604 + 5.80 7.20 -22.610 -27.692 + 5.80 7.30 -22.658 -27.709 + 5.80 7.40 -22.670 -27.762 + 5.80 7.50 -22.660 -27.831 + 5.80 7.60 -22.634 -27.898 + 5.80 7.70 -22.601 -27.939 + 5.80 7.80 -22.564 -27.962 + 5.80 7.90 -22.526 -27.987 + 5.80 8.00 -22.489 -28.100 + 5.80 8.10 -22.450 -28.229 + 5.80 8.20 -22.410 -28.360 + 5.80 8.30 -22.369 -28.497 + 5.80 8.40 -22.324 -28.624 + 5.80 8.50 -22.280 -28.748 + 5.80 8.60 -22.231 -28.872 + 5.80 8.70 -22.183 -28.999 + 5.80 8.80 -22.132 -29.131 + 5.80 8.90 -22.079 -29.267 + 5.80 9.00 -22.026 -29.406 + 5.90 1.00 -31.139 -30.175 + 5.90 1.10 -30.935 -30.175 + 5.90 1.20 -30.710 -30.175 + 5.90 1.30 -30.456 -30.175 + 5.90 1.40 -30.175 -30.175 + 5.90 1.50 -29.864 -30.175 + 5.90 1.60 -29.516 -30.175 + 5.90 1.70 -29.149 -30.175 + 5.90 1.80 -28.794 -30.175 + 5.90 1.90 -28.477 -30.175 + 5.90 2.00 -28.204 -30.175 + 5.90 2.10 -27.972 -30.174 + 5.90 2.20 -27.774 -30.173 + 5.90 2.30 -27.602 -30.172 + 5.90 2.40 -27.450 -30.169 + 5.90 2.50 -27.323 -30.168 + 5.90 2.60 -27.230 -30.167 + 5.90 2.70 -27.167 -30.169 + 5.90 2.80 -27.047 -30.165 + 5.90 2.90 -26.821 -30.147 + 5.90 3.00 -26.635 -30.132 + 5.90 3.10 -26.486 -30.119 + 5.90 3.20 -26.341 -30.106 + 5.90 3.30 -26.183 -30.091 + 5.90 3.40 -26.065 -30.075 + 5.90 3.50 -26.024 -30.092 + 5.90 3.60 -26.127 -30.116 + 5.90 3.70 -26.434 -30.142 + 5.90 3.80 -26.543 -30.131 + 5.90 3.90 -26.457 -30.091 + 5.90 4.00 -25.758 -29.773 + 5.90 4.10 -23.515 -28.221 + 5.90 4.20 -22.024 -27.225 + 5.90 4.30 -21.836 -26.952 + 5.90 4.40 -21.973 -26.800 + 5.90 4.50 -22.022 -26.693 + 5.90 4.60 -21.946 -26.710 + 5.90 4.70 -21.769 -26.753 + 5.90 4.80 -21.541 -26.692 + 5.90 4.90 -21.347 -26.579 + 5.90 5.00 -21.287 -26.319 + 5.90 5.10 -21.336 -26.070 + 5.90 5.20 -21.330 -25.992 + 5.90 5.30 -21.305 -26.112 + 5.90 5.40 -21.302 -26.323 + 5.90 5.50 -21.488 -26.441 + 5.90 5.60 -21.685 -26.339 + 5.90 5.70 -21.735 -26.057 + 5.90 5.80 -21.794 -25.856 + 5.90 5.90 -21.828 -25.832 + 5.90 6.00 -21.834 -26.008 + 5.90 6.10 -21.846 -26.421 + 5.90 6.20 -21.895 -26.934 + 5.90 6.30 -22.071 -27.146 + 5.90 6.40 -22.252 -27.234 + 5.90 6.50 -22.387 -27.354 + 5.90 6.60 -22.467 -27.425 + 5.90 6.70 -22.497 -27.461 + 5.90 6.80 -22.503 -27.503 + 5.90 6.90 -22.493 -27.546 + 5.90 7.00 -22.491 -27.553 + 5.90 7.10 -22.535 -27.604 + 5.90 7.20 -22.610 -27.692 + 5.90 7.30 -22.658 -27.709 + 5.90 7.40 -22.670 -27.762 + 5.90 7.50 -22.660 -27.831 + 5.90 7.60 -22.634 -27.898 + 5.90 7.70 -22.601 -27.939 + 5.90 7.80 -22.564 -27.962 + 5.90 7.90 -22.526 -27.987 + 5.90 8.00 -22.489 -28.100 + 5.90 8.10 -22.450 -28.229 + 5.90 8.20 -22.410 -28.360 + 5.90 8.30 -22.369 -28.497 + 5.90 8.40 -22.324 -28.624 + 5.90 8.50 -22.280 -28.748 + 5.90 8.60 -22.231 -28.872 + 5.90 8.70 -22.183 -28.999 + 5.90 8.80 -22.132 -29.131 + 5.90 8.90 -22.079 -29.267 + 5.90 9.00 -22.026 -29.406 + 6.00 1.00 -31.073 -30.275 + 6.00 1.10 -30.879 -30.275 + 6.00 1.20 -30.666 -30.275 + 6.00 1.30 -30.426 -30.275 + 6.00 1.40 -30.160 -30.275 + 6.00 1.50 -29.863 -30.275 + 6.00 1.60 -29.528 -30.275 + 6.00 1.70 -29.174 -30.275 + 6.00 1.80 -28.831 -30.275 + 6.00 1.90 -28.522 -30.274 + 6.00 2.00 -28.256 -30.274 + 6.00 2.10 -28.028 -30.273 + 6.00 2.20 -27.832 -30.272 + 6.00 2.30 -27.659 -30.271 + 6.00 2.40 -27.505 -30.267 + 6.00 2.50 -27.374 -30.266 + 6.00 2.60 -27.280 -30.266 + 6.00 2.70 -27.214 -30.267 + 6.00 2.80 -27.069 -30.262 + 6.00 2.90 -26.846 -30.244 + 6.00 3.00 -26.673 -30.231 + 6.00 3.10 -26.526 -30.219 + 6.00 3.20 -26.377 -30.203 + 6.00 3.30 -26.121 -30.123 + 6.00 3.40 -26.074 -30.147 + 6.00 3.50 -26.059 -30.194 + 6.00 3.60 -26.138 -30.214 + 6.00 3.70 -26.440 -30.239 + 6.00 3.80 -26.587 -30.227 + 6.00 3.90 -26.514 -30.186 + 6.00 4.00 -25.775 -29.820 + 6.00 4.10 -23.516 -28.223 + 6.00 4.20 -22.024 -27.218 + 6.00 4.30 -21.837 -26.936 + 6.00 4.40 -21.974 -26.797 + 6.00 4.50 -22.023 -26.692 + 6.00 4.60 -21.948 -26.709 + 6.00 4.70 -21.770 -26.751 + 6.00 4.80 -21.542 -26.680 + 6.00 4.90 -21.348 -26.518 + 6.00 5.00 -21.288 -26.211 + 6.00 5.10 -21.336 -25.958 + 6.00 5.20 -21.330 -25.888 + 6.00 5.30 -21.305 -26.021 + 6.00 5.40 -21.303 -26.265 + 6.00 5.50 -21.490 -26.421 + 6.00 5.60 -21.685 -26.329 + 6.00 5.70 -21.736 -26.047 + 6.00 5.80 -21.795 -25.845 + 6.00 5.90 -21.829 -25.820 + 6.00 6.00 -21.834 -25.995 + 6.00 6.10 -21.847 -26.407 + 6.00 6.20 -21.895 -26.924 + 6.00 6.30 -22.071 -27.142 + 6.00 6.40 -22.252 -27.232 + 6.00 6.50 -22.387 -27.354 + 6.00 6.60 -22.467 -27.425 + 6.00 6.70 -22.497 -27.461 + 6.00 6.80 -22.503 -27.503 + 6.00 6.90 -22.493 -27.546 + 6.00 7.00 -22.491 -27.553 + 6.00 7.10 -22.535 -27.604 + 6.00 7.20 -22.610 -27.692 + 6.00 7.30 -22.658 -27.709 + 6.00 7.40 -22.670 -27.762 + 6.00 7.50 -22.660 -27.831 + 6.00 7.60 -22.634 -27.898 + 6.00 7.70 -22.601 -27.939 + 6.00 7.80 -22.564 -27.962 + 6.00 7.90 -22.526 -27.987 + 6.00 8.00 -22.489 -28.100 + 6.00 8.10 -22.450 -28.229 + 6.00 8.20 -22.410 -28.360 + 6.00 8.30 -22.369 -28.497 + 6.00 8.40 -22.324 -28.624 + 6.00 8.50 -22.280 -28.748 + 6.00 8.60 -22.231 -28.872 + 6.00 8.70 -22.183 -28.999 + 6.00 8.80 -22.132 -29.131 + 6.00 8.90 -22.079 -29.267 + 6.00 9.00 -22.026 -29.406 diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 0bcdf5d3f..343c3d4d5 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -15,6 +15,7 @@ #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../grid/grid3D.h" +#include "../grid/grid_enum.h" void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { int n_cells = nx * ny * nz; @@ -56,20 +57,19 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { // get conserved quanitites - d_gas = dev_conserved[id]; - d_dust = dev_conserved[5*n_cells + id]; - E = dev_conserved[4*n_cells + id]; + d_gas = dev_conserved[n_cells*grid_enum::density + id]; + d_dust = dev_conserved[n_cells*grid_enum::dust_density + id]; + E = dev_conserved[n_cells*grid_enum::Energy + id]; n = d_gas*DENSITY_UNIT / (mu*MP); if (E < 0.0 || E != E) return; - vx = dev_conserved[1*n_cells + id] / d_gas; - vy = dev_conserved[2*n_cells + id] / d_gas; - vz = dev_conserved[3*n_cells + id] / d_gas; - + vx = dev_conserved[n_cells*grid_enum::momentum_x + id] / d_gas; + vy = dev_conserved[n_cells*grid_enum::momentum_y + id] / d_gas; + vz = dev_conserved[n_cells*grid_enum::momentum_z + id] / d_gas; #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d_gas; + ge = dev_conserved[n_cells*grid_enum::GasEnergy + id] / d_gas; ge = fmax(ge, (Real) TINY_NUMBER); #endif // DE @@ -102,10 +102,10 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density d_dust += dd; - dev_conserved[5*n_cells + id] = d_dust; + dev_conserved[n_cells*grid_enum::dust_density + id] = d_dust; #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = d_dust*ge; + dev_conserved[n_cells*grid_enum::GasEnergy + id] = d_dust*ge; #endif } } diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 84d41b20e..ffcc31a9a 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -48,6 +48,10 @@ enum : int { #endif #endif + #ifdef DUST + dust_density, + #endif // DUST + #endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS From 89c4c03460d43405deace2e6dd7c81e1e3aa0941 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 6 Dec 2022 14:09:37 -0500 Subject: [PATCH 141/694] bring grid_enum indexing into DUST --- cloudy_coolingcurve.txt | 9802 --------------------------------------- 1 file changed, 9802 deletions(-) delete mode 100644 cloudy_coolingcurve.txt diff --git a/cloudy_coolingcurve.txt b/cloudy_coolingcurve.txt deleted file mode 100644 index ac458a6d1..000000000 --- a/cloudy_coolingcurve.txt +++ /dev/null @@ -1,9802 +0,0 @@ -#log n log T log cool/n2 log heat/n2 - -6.00 1.00 -25.332 -21.168 - -6.00 1.10 -25.276 -21.209 - -6.00 1.20 -25.219 -21.250 - -6.00 1.30 -25.162 -21.293 - -6.00 1.40 -25.101 -21.336 - -6.00 1.50 -25.035 -21.379 - -6.00 1.60 -24.965 -21.423 - -6.00 1.70 -24.893 -21.468 - -6.00 1.80 -24.819 -21.513 - -6.00 1.90 -24.738 -21.559 - -6.00 2.00 -24.636 -21.605 - -6.00 2.10 -24.504 -21.652 - -6.00 2.20 -24.344 -21.698 - -6.00 2.30 -24.168 -21.742 - -6.00 2.40 -23.990 -21.786 - -6.00 2.50 -23.826 -21.831 - -6.00 2.60 -23.679 -21.875 - -6.00 2.70 -23.552 -21.920 - -6.00 2.80 -23.445 -21.965 - -6.00 2.90 -23.360 -22.011 - -6.00 3.00 -23.299 -22.057 - -6.00 3.10 -23.263 -22.103 - -6.00 3.20 -23.250 -22.148 - -6.00 3.30 -23.257 -22.194 - -6.00 3.40 -23.282 -22.238 - -6.00 3.50 -23.320 -22.282 - -6.00 3.60 -23.366 -22.325 - -6.00 3.70 -23.414 -22.368 - -6.00 3.80 -23.459 -22.409 - -6.00 3.90 -23.494 -22.449 - -6.00 4.00 -23.517 -22.487 - -6.00 4.10 -23.524 -22.525 - -6.00 4.20 -23.510 -22.561 - -6.00 4.30 -23.466 -22.596 - -6.00 4.40 -23.385 -22.630 - -6.00 4.50 -23.273 -22.662 - -6.00 4.60 -23.149 -22.693 - -6.00 4.70 -23.030 -22.723 - -6.00 4.80 -22.924 -22.752 - -6.00 4.90 -22.826 -22.780 - -6.00 5.00 -22.728 -22.808 - -6.00 5.10 -22.630 -22.835 - -6.00 5.20 -22.528 -22.864 - -6.00 5.30 -22.430 -22.893 - -6.00 5.40 -22.344 -22.921 - -6.00 5.50 -22.271 -22.950 - -6.00 5.60 -22.214 -22.983 - -6.00 5.70 -22.174 -23.024 - -6.00 5.80 -22.205 -23.092 - -6.00 5.90 -22.749 -23.478 - -6.00 6.00 -22.847 -23.628 - -6.00 6.10 -22.798 -23.664 - -6.00 6.20 -22.737 -23.692 - -6.00 6.30 -22.664 -23.715 - -6.00 6.40 -22.579 -23.714 - -6.00 6.50 -22.489 -23.719 - -6.00 6.60 -22.398 -23.740 - -6.00 6.70 -22.307 -23.776 - -6.00 6.80 -22.221 -23.753 - -6.00 6.90 -22.202 -23.011 - -6.00 7.00 -22.046 -24.082 - -6.00 7.10 -21.972 -24.212 - -6.00 7.20 -21.897 -24.347 - -6.00 7.30 -21.816 -24.281 - -6.00 7.40 -21.728 -24.271 - -6.00 7.50 -21.637 -24.091 - -6.00 7.60 -21.543 -23.749 - -6.00 7.70 -21.446 -23.747 - -6.00 7.80 -21.350 -23.855 - -6.00 7.90 -21.253 -23.978 - -6.00 8.00 -21.156 -24.093 - -6.00 8.10 -21.060 -24.227 - -6.00 8.20 -20.960 -24.276 - -6.00 8.30 -20.863 -24.361 - -6.00 8.40 -20.765 -24.479 - -6.00 8.50 -20.667 -24.502 - -6.00 8.60 -20.568 -24.587 - -6.00 8.70 -20.472 -24.607 - -6.00 8.80 -20.372 -24.646 - -6.00 8.90 -20.273 -24.677 - -6.00 9.00 -20.176 -24.705 - -5.90 1.00 -25.341 -21.199 - -5.90 1.10 -25.284 -21.239 - -5.90 1.20 -25.227 -21.279 - -5.90 1.30 -25.169 -21.320 - -5.90 1.40 -25.105 -21.362 - -5.90 1.50 -25.035 -21.405 - -5.90 1.60 -24.957 -21.448 - -5.90 1.70 -24.875 -21.492 - -5.90 1.80 -24.791 -21.536 - -5.90 1.90 -24.703 -21.581 - -5.90 2.00 -24.596 -21.627 - -5.90 2.10 -24.460 -21.673 - -5.90 2.20 -24.296 -21.718 - -5.90 2.30 -24.117 -21.762 - -5.90 2.40 -23.938 -21.807 - -5.90 2.50 -23.773 -21.851 - -5.90 2.60 -23.627 -21.895 - -5.90 2.70 -23.501 -21.941 - -5.90 2.80 -23.396 -21.986 - -5.90 2.90 -23.314 -22.033 - -5.90 3.00 -23.255 -22.079 - -5.90 3.10 -23.220 -22.127 - -5.90 3.20 -23.206 -22.174 - -5.90 3.30 -23.212 -22.221 - -5.90 3.40 -23.233 -22.267 - -5.90 3.50 -23.267 -22.313 - -5.90 3.60 -23.309 -22.358 - -5.90 3.70 -23.355 -22.402 - -5.90 3.80 -23.400 -22.444 - -5.90 3.90 -23.438 -22.485 - -5.90 4.00 -23.468 -22.525 - -5.90 4.10 -23.483 -22.564 - -5.90 4.20 -23.477 -22.602 - -5.90 4.30 -23.437 -22.639 - -5.90 4.40 -23.351 -22.674 - -5.90 4.50 -23.228 -22.707 - -5.90 4.60 -23.091 -22.739 - -5.90 4.70 -22.962 -22.770 - -5.90 4.80 -22.847 -22.799 - -5.90 4.90 -22.744 -22.829 - -5.90 5.00 -22.647 -22.858 - -5.90 5.10 -22.550 -22.888 - -5.90 5.20 -22.457 -22.919 - -5.90 5.30 -22.372 -22.949 - -5.90 5.40 -22.295 -22.979 - -5.90 5.50 -22.228 -23.007 - -5.90 5.60 -22.164 -23.038 - -5.90 5.70 -22.118 -23.073 - -5.90 5.80 -22.102 -23.117 - -5.90 5.90 -22.150 -23.188 - -5.90 6.00 -22.325 -23.333 - -5.90 6.10 -22.730 -23.645 - -5.90 6.20 -22.762 -23.714 - -5.90 6.30 -22.700 -23.733 - -5.90 6.40 -22.625 -23.747 - -5.90 6.50 -22.541 -23.757 - -5.90 6.60 -22.454 -23.779 - -5.90 6.70 -22.368 -23.822 - -5.90 6.80 -22.284 -23.865 - -5.90 6.90 -22.284 -23.266 - -5.90 7.00 -22.117 -24.176 - -5.90 7.10 -22.047 -24.293 - -5.90 7.20 -21.978 -24.368 - -5.90 7.30 -21.899 -24.540 - -5.90 7.40 -21.815 -24.417 - -5.90 7.50 -21.726 -24.267 - -5.90 7.60 -21.634 -23.912 - -5.90 7.70 -21.539 -23.757 - -5.90 7.80 -21.443 -23.867 - -5.90 7.90 -21.347 -23.982 - -5.90 8.00 -21.252 -24.105 - -5.90 8.10 -21.153 -24.207 - -5.90 8.20 -21.059 -24.357 - -5.90 8.30 -20.959 -24.403 - -5.90 8.40 -20.861 -24.481 - -5.90 8.50 -20.763 -24.595 - -5.90 8.60 -20.665 -24.617 - -5.90 8.70 -20.567 -24.671 - -5.90 8.80 -20.483 -24.727 - -5.90 8.90 -20.371 -24.755 - -5.90 9.00 -20.273 -24.786 - -5.80 1.00 -25.350 -21.232 - -5.80 1.10 -25.293 -21.270 - -5.80 1.20 -25.236 -21.310 - -5.80 1.30 -25.176 -21.350 - -5.80 1.40 -25.109 -21.391 - -5.80 1.50 -25.032 -21.432 - -5.80 1.60 -24.945 -21.474 - -5.80 1.70 -24.850 -21.517 - -5.80 1.80 -24.755 -21.560 - -5.80 1.90 -24.659 -21.604 - -5.80 2.00 -24.549 -21.649 - -5.80 2.10 -24.414 -21.694 - -5.80 2.20 -24.251 -21.739 - -5.80 2.30 -24.073 -21.784 - -5.80 2.40 -23.896 -21.827 - -5.80 2.50 -23.732 -21.872 - -5.80 2.60 -23.587 -21.916 - -5.80 2.70 -23.461 -21.962 - -5.80 2.80 -23.357 -22.007 - -5.80 2.90 -23.276 -22.054 - -5.80 3.00 -23.219 -22.101 - -5.80 3.10 -23.184 -22.149 - -5.80 3.20 -23.171 -22.198 - -5.80 3.30 -23.175 -22.246 - -5.80 3.40 -23.195 -22.294 - -5.80 3.50 -23.226 -22.342 - -5.80 3.60 -23.265 -22.388 - -5.80 3.70 -23.307 -22.433 - -5.80 3.80 -23.350 -22.477 - -5.80 3.90 -23.388 -22.519 - -5.80 4.00 -23.420 -22.561 - -5.80 4.10 -23.440 -22.601 - -5.80 4.20 -23.437 -22.641 - -5.80 4.30 -23.397 -22.679 - -5.80 4.40 -23.304 -22.715 - -5.80 4.50 -23.169 -22.750 - -5.80 4.60 -23.020 -22.782 - -5.80 4.70 -22.882 -22.813 - -5.80 4.80 -22.759 -22.844 - -5.80 4.90 -22.651 -22.874 - -5.80 5.00 -22.553 -22.905 - -5.80 5.10 -22.461 -22.937 - -5.80 5.20 -22.376 -22.972 - -5.80 5.30 -22.301 -23.006 - -5.80 5.40 -22.239 -23.039 - -5.80 5.50 -22.179 -23.067 - -5.80 5.60 -22.126 -23.098 - -5.80 5.70 -22.079 -23.132 - -5.80 5.80 -22.054 -23.172 - -5.80 5.90 -22.070 -23.223 - -5.80 6.00 -22.154 -23.308 - -5.80 6.10 -22.294 -23.446 - -5.80 6.20 -22.455 -23.601 - -5.80 6.30 -22.646 -23.754 - -5.80 6.40 -22.645 -23.778 - -5.80 6.50 -22.574 -23.801 - -5.80 6.60 -22.497 -23.833 - -5.80 6.70 -22.418 -23.881 - -5.80 6.80 -22.342 -23.934 - -5.80 6.90 -22.270 -23.743 - -5.80 7.00 -22.180 -24.255 - -5.80 7.10 -22.119 -24.422 - -5.80 7.20 -22.053 -24.522 - -5.80 7.30 -21.980 -24.633 - -5.80 7.40 -21.899 -24.550 - -5.80 7.50 -21.813 -24.433 - -5.80 7.60 -21.723 -24.113 - -5.80 7.70 -21.630 -23.851 - -5.80 7.80 -21.535 -23.877 - -5.80 7.90 -21.440 -23.995 - -5.80 8.00 -21.344 -24.114 - -5.80 8.10 -21.248 -24.228 - -5.80 8.20 -21.152 -24.356 - -5.80 8.30 -21.057 -24.485 - -5.80 8.40 -20.957 -24.563 - -5.80 8.50 -20.864 -24.635 - -5.80 8.60 -20.762 -24.670 - -5.80 8.70 -20.664 -24.731 - -5.80 8.80 -20.577 -24.791 - -5.80 8.90 -20.481 -24.838 - -5.80 9.00 -20.382 -24.874 - -5.70 1.00 -25.358 -21.266 - -5.70 1.10 -25.301 -21.304 - -5.70 1.20 -25.244 -21.342 - -5.70 1.30 -25.182 -21.381 - -5.70 1.40 -25.112 -21.421 - -5.70 1.50 -25.028 -21.462 - -5.70 1.60 -24.929 -21.502 - -5.70 1.70 -24.821 -21.544 - -5.70 1.80 -24.712 -21.586 - -5.70 1.90 -24.607 -21.630 - -5.70 2.00 -24.495 -21.673 - -5.70 2.10 -24.364 -21.718 - -5.70 2.20 -24.207 -21.762 - -5.70 2.30 -24.035 -21.806 - -5.70 2.40 -23.863 -21.850 - -5.70 2.50 -23.702 -21.894 - -5.70 2.60 -23.558 -21.938 - -5.70 2.70 -23.432 -21.983 - -5.70 2.80 -23.329 -22.029 - -5.70 2.90 -23.248 -22.076 - -5.70 3.00 -23.191 -22.123 - -5.70 3.10 -23.156 -22.171 - -5.70 3.20 -23.142 -22.221 - -5.70 3.30 -23.146 -22.270 - -5.70 3.40 -23.164 -22.319 - -5.70 3.50 -23.193 -22.368 - -5.70 3.60 -23.229 -22.416 - -5.70 3.70 -23.269 -22.463 - -5.70 3.80 -23.309 -22.508 - -5.70 3.90 -23.346 -22.552 - -5.70 4.00 -23.378 -22.594 - -5.70 4.10 -23.398 -22.636 - -5.70 4.20 -23.395 -22.677 - -5.70 4.30 -23.351 -22.717 - -5.70 4.40 -23.249 -22.754 - -5.70 4.50 -23.102 -22.789 - -5.70 4.60 -22.941 -22.822 - -5.70 4.70 -22.793 -22.854 - -5.70 4.80 -22.663 -22.885 - -5.70 4.90 -22.550 -22.916 - -5.70 5.00 -22.452 -22.949 - -5.70 5.10 -22.364 -22.984 - -5.70 5.20 -22.289 -23.022 - -5.70 5.30 -22.227 -23.061 - -5.70 5.40 -22.180 -23.096 - -5.70 5.50 -22.134 -23.128 - -5.70 5.60 -22.090 -23.162 - -5.70 5.70 -22.046 -23.196 - -5.70 5.80 -22.019 -23.236 - -5.70 5.90 -22.009 -23.281 - -5.70 6.00 -22.047 -23.346 - -5.70 6.10 -22.138 -23.458 - -5.70 6.20 -22.285 -23.597 - -5.70 6.30 -22.410 -23.721 - -5.70 6.40 -22.559 -23.842 - -5.70 6.50 -22.590 -23.855 - -5.70 6.60 -22.527 -23.898 - -5.70 6.70 -22.458 -23.953 - -5.70 6.80 -22.390 -24.015 - -5.70 6.90 -22.324 -23.962 - -5.70 7.00 -22.239 -24.346 - -5.70 7.10 -22.181 -24.469 - -5.70 7.20 -22.125 -24.577 - -5.70 7.30 -22.058 -24.727 - -5.70 7.40 -21.981 -24.677 - -5.70 7.50 -21.897 -24.593 - -5.70 7.60 -21.809 -24.308 - -5.70 7.70 -21.719 -23.940 - -5.70 7.80 -21.625 -23.896 - -5.70 7.90 -21.531 -24.005 - -5.70 8.00 -21.436 -24.128 - -5.70 8.10 -21.341 -24.246 - -5.70 8.20 -21.245 -24.406 - -5.70 8.30 -21.151 -24.493 - -5.70 8.40 -21.052 -24.554 - -5.70 8.50 -20.957 -24.687 - -5.70 8.60 -20.863 -24.754 - -5.70 8.70 -20.761 -24.786 - -5.70 8.80 -20.663 -24.869 - -5.70 8.90 -20.575 -24.904 - -5.70 9.00 -20.467 -24.939 - -5.60 1.00 -25.366 -21.301 - -5.60 1.10 -25.309 -21.338 - -5.60 1.20 -25.251 -21.376 - -5.60 1.30 -25.189 -21.415 - -5.60 1.40 -25.115 -21.453 - -5.60 1.50 -25.022 -21.493 - -5.60 1.60 -24.910 -21.533 - -5.60 1.70 -24.787 -21.573 - -5.60 1.80 -24.665 -21.615 - -5.60 1.90 -24.550 -21.657 - -5.60 2.00 -24.437 -21.699 - -5.60 2.10 -24.311 -21.743 - -5.60 2.20 -24.164 -21.786 - -5.60 2.30 -24.001 -21.830 - -5.60 2.40 -23.836 -21.873 - -5.60 2.50 -23.681 -21.917 - -5.60 2.60 -23.539 -21.962 - -5.60 2.70 -23.415 -22.007 - -5.60 2.80 -23.311 -22.052 - -5.60 2.90 -23.230 -22.098 - -5.60 3.00 -23.171 -22.145 - -5.60 3.10 -23.135 -22.194 - -5.60 3.20 -23.120 -22.243 - -5.60 3.30 -23.122 -22.293 - -5.60 3.40 -23.139 -22.343 - -5.60 3.50 -23.166 -22.393 - -5.60 3.60 -23.201 -22.442 - -5.60 3.70 -23.239 -22.490 - -5.60 3.80 -23.277 -22.537 - -5.60 3.90 -23.312 -22.581 - -5.60 4.00 -23.341 -22.625 - -5.60 4.10 -23.359 -22.668 - -5.60 4.20 -23.354 -22.711 - -5.60 4.30 -23.302 -22.752 - -5.60 4.40 -23.189 -22.790 - -5.60 4.50 -23.029 -22.825 - -5.60 4.60 -22.859 -22.859 - -5.60 4.70 -22.701 -22.890 - -5.60 4.80 -22.563 -22.922 - -5.60 4.90 -22.444 -22.955 - -5.60 5.00 -22.344 -22.989 - -5.60 5.10 -22.260 -23.028 - -5.60 5.20 -22.196 -23.070 - -5.60 5.30 -22.146 -23.113 - -5.60 5.40 -22.112 -23.152 - -5.60 5.50 -22.085 -23.188 - -5.60 5.60 -22.051 -23.225 - -5.60 5.70 -22.015 -23.264 - -5.60 5.80 -21.986 -23.306 - -5.60 5.90 -21.978 -23.350 - -5.60 6.00 -22.005 -23.406 - -5.60 6.10 -22.062 -23.498 - -5.60 6.20 -22.206 -23.634 - -5.60 6.30 -22.344 -23.765 - -5.60 6.40 -22.453 -23.884 - -5.60 6.50 -22.536 -23.974 - -5.60 6.60 -22.538 -23.998 - -5.60 6.70 -22.490 -24.033 - -5.60 6.80 -22.431 -24.102 - -5.60 6.90 -22.371 -24.108 - -5.60 7.00 -22.428 -23.111 - -5.60 7.10 -22.241 -24.561 - -5.60 7.20 -22.192 -24.675 - -5.60 7.30 -22.131 -24.758 - -5.60 7.40 -22.058 -24.797 - -5.60 7.50 -21.978 -24.741 - -5.60 7.60 -21.890 -24.985 - -5.60 7.70 -21.805 -24.137 - -5.60 7.80 -21.713 -23.966 - -5.60 7.90 -21.620 -24.014 - -5.60 8.00 -21.526 -24.139 - -5.60 8.10 -21.432 -24.261 - -5.60 8.20 -21.337 -24.414 - -5.60 8.30 -21.242 -24.483 - -5.60 8.40 -21.148 -24.596 - -5.60 8.50 -21.050 -24.730 - -5.60 8.60 -20.957 -24.787 - -5.60 8.70 -20.856 -24.835 - -5.60 8.80 -20.758 -24.902 - -5.60 8.90 -20.669 -24.965 - -5.60 9.00 -20.563 -25.010 - -5.50 1.00 -25.374 -21.336 - -5.50 1.10 -25.317 -21.374 - -5.50 1.20 -25.259 -21.411 - -5.50 1.30 -25.195 -21.449 - -5.50 1.40 -25.117 -21.487 - -5.50 1.50 -25.015 -21.526 - -5.50 1.60 -24.890 -21.565 - -5.50 1.70 -24.751 -21.604 - -5.50 1.80 -24.616 -21.645 - -5.50 1.90 -24.492 -21.686 - -5.50 2.00 -24.376 -21.728 - -5.50 2.10 -24.256 -21.770 - -5.50 2.20 -24.119 -21.813 - -5.50 2.30 -23.969 -21.855 - -5.50 2.40 -23.815 -21.898 - -5.50 2.50 -23.667 -21.942 - -5.50 2.60 -23.530 -21.986 - -5.50 2.70 -23.408 -22.031 - -5.50 2.80 -23.304 -22.076 - -5.50 2.90 -23.221 -22.122 - -5.50 3.00 -23.161 -22.169 - -5.50 3.10 -23.123 -22.216 - -5.50 3.20 -23.104 -22.265 - -5.50 3.30 -23.104 -22.315 - -5.50 3.40 -23.118 -22.365 - -5.50 3.50 -23.144 -22.416 - -5.50 3.60 -23.177 -22.466 - -5.50 3.70 -23.214 -22.515 - -5.50 3.80 -23.250 -22.563 - -5.50 3.90 -23.283 -22.609 - -5.50 4.00 -23.310 -22.654 - -5.50 4.10 -23.325 -22.698 - -5.50 4.20 -23.314 -22.742 - -5.50 4.30 -23.253 -22.784 - -5.50 4.40 -23.128 -22.823 - -5.50 4.50 -22.956 -22.859 - -5.50 4.60 -22.776 -22.892 - -5.50 4.70 -22.609 -22.924 - -5.50 4.80 -22.462 -22.956 - -5.50 4.90 -22.337 -22.989 - -5.50 5.00 -22.234 -23.025 - -5.50 5.10 -22.153 -23.068 - -5.50 5.20 -22.096 -23.116 - -5.50 5.30 -22.062 -23.164 - -5.50 5.40 -22.043 -23.207 - -5.50 5.50 -22.028 -23.247 - -5.50 5.60 -22.013 -23.289 - -5.50 5.70 -21.988 -23.333 - -5.50 5.80 -21.960 -23.377 - -5.50 5.90 -21.951 -23.423 - -5.50 6.00 -21.933 -23.476 - -5.50 6.10 -21.976 -23.555 - -5.50 6.20 -22.131 -23.683 - -5.50 6.30 -22.301 -23.823 - -5.50 6.40 -22.407 -23.948 - -5.50 6.50 -22.502 -24.053 - -5.50 6.60 -22.521 -24.122 - -5.50 6.70 -22.502 -24.165 - -5.50 6.80 -22.461 -24.214 - -5.50 6.90 -22.412 -24.227 - -5.50 7.00 -22.496 -23.293 - -5.50 7.10 -22.294 -24.654 - -5.50 7.20 -22.253 -24.777 - -5.50 7.30 -22.200 -24.867 - -5.50 7.40 -22.131 -24.996 - -5.50 7.50 -22.055 -24.879 - -5.50 7.60 -21.972 -24.675 - -5.50 7.70 -21.884 -25.111 - -5.50 7.80 -21.798 -24.052 - -5.50 7.90 -21.706 -24.036 - -5.50 8.00 -21.614 -24.148 - -5.50 8.10 -21.522 -24.273 - -5.50 8.20 -21.428 -24.461 - -5.50 8.30 -21.334 -24.503 - -5.50 8.40 -21.239 -24.608 - -5.50 8.50 -21.143 -24.760 - -5.50 8.60 -21.047 -24.796 - -5.50 8.70 -20.950 -24.921 - -5.50 8.80 -20.853 -24.953 - -5.50 8.90 -20.756 -25.018 - -5.50 9.00 -20.658 -25.073 - -5.40 1.00 -25.381 -21.371 - -5.40 1.10 -25.324 -21.409 - -5.40 1.20 -25.266 -21.447 - -5.40 1.30 -25.200 -21.484 - -5.40 1.40 -25.118 -21.522 - -5.40 1.50 -25.007 -21.560 - -5.40 1.60 -24.868 -21.599 - -5.40 1.70 -24.716 -21.637 - -5.40 1.80 -24.568 -21.677 - -5.40 1.90 -24.436 -21.717 - -5.40 2.00 -24.317 -21.758 - -5.40 2.10 -24.201 -21.799 - -5.40 2.20 -24.074 -21.841 - -5.40 2.30 -23.937 -21.883 - -5.40 2.40 -23.796 -21.925 - -5.40 2.50 -23.658 -21.968 - -5.40 2.60 -23.528 -22.012 - -5.40 2.70 -23.409 -22.056 - -5.40 2.80 -23.306 -22.101 - -5.40 2.90 -23.223 -22.147 - -5.40 3.00 -23.161 -22.193 - -5.40 3.10 -23.119 -22.240 - -5.40 3.20 -23.096 -22.288 - -5.40 3.30 -23.092 -22.337 - -5.40 3.40 -23.103 -22.388 - -5.40 3.50 -23.127 -22.438 - -5.40 3.60 -23.158 -22.489 - -5.40 3.70 -23.192 -22.539 - -5.40 3.80 -23.226 -22.587 - -5.40 3.90 -23.257 -22.634 - -5.40 4.00 -23.281 -22.680 - -5.40 4.10 -23.292 -22.726 - -5.40 4.20 -23.275 -22.771 - -5.40 4.30 -23.205 -22.814 - -5.40 4.40 -23.067 -22.853 - -5.40 4.50 -22.884 -22.889 - -5.40 4.60 -22.696 -22.922 - -5.40 4.70 -22.520 -22.954 - -5.40 4.80 -22.364 -22.986 - -5.40 4.90 -22.231 -23.019 - -5.40 5.00 -22.124 -23.058 - -5.40 5.10 -22.045 -23.105 - -5.40 5.20 -21.996 -23.159 - -5.40 5.30 -21.974 -23.213 - -5.40 5.40 -21.971 -23.261 - -5.40 5.50 -21.972 -23.307 - -5.40 5.60 -21.974 -23.354 - -5.40 5.70 -21.962 -23.403 - -5.40 5.80 -21.936 -23.451 - -5.40 5.90 -21.918 -23.499 - -5.40 6.00 -21.919 -23.553 - -5.40 6.10 -21.948 -23.624 - -5.40 6.20 -22.084 -23.741 - -5.40 6.30 -22.268 -23.887 - -5.40 6.40 -22.382 -24.021 - -5.40 6.50 -22.473 -24.137 - -5.40 6.60 -22.511 -24.222 - -5.40 6.70 -22.508 -24.277 - -5.40 6.80 -22.481 -24.337 - -5.40 6.90 -22.442 -24.373 - -5.40 7.00 -22.418 -23.699 - -5.40 7.10 -22.342 -24.748 - -5.40 7.20 -22.309 -24.875 - -5.40 7.30 -22.263 -24.973 - -5.40 7.40 -22.200 -25.027 - -5.40 7.50 -22.128 -25.010 - -5.40 7.60 -22.048 -24.848 - -5.40 7.70 -21.965 -24.543 - -5.40 7.80 -21.879 -24.248 - -5.40 7.90 -21.790 -24.114 - -5.40 8.00 -21.700 -24.166 - -5.40 8.10 -21.609 -24.283 - -5.40 8.20 -21.517 -24.404 - -5.40 8.30 -21.423 -24.519 - -5.40 8.40 -21.329 -24.628 - -5.40 8.50 -21.235 -24.731 - -5.40 8.60 -21.139 -24.827 - -5.40 8.70 -21.043 -24.917 - -5.40 8.80 -20.951 -25.011 - -5.40 8.90 -20.850 -25.070 - -5.40 9.00 -20.753 -25.133 - -5.30 1.00 -25.388 -21.406 - -5.30 1.10 -25.331 -21.444 - -5.30 1.20 -25.272 -21.482 - -5.30 1.30 -25.205 -21.520 - -5.30 1.40 -25.118 -21.558 - -5.30 1.50 -24.998 -21.596 - -5.30 1.60 -24.847 -21.634 - -5.30 1.70 -24.683 -21.672 - -5.30 1.80 -24.524 -21.711 - -5.30 1.90 -24.384 -21.750 - -5.30 2.00 -24.261 -21.790 - -5.30 2.10 -24.147 -21.830 - -5.30 2.20 -24.030 -21.871 - -5.30 2.30 -23.906 -21.912 - -5.30 2.40 -23.779 -21.954 - -5.30 2.50 -23.652 -21.996 - -5.30 2.60 -23.530 -22.040 - -5.30 2.70 -23.416 -22.083 - -5.30 2.80 -23.317 -22.128 - -5.30 2.90 -23.233 -22.173 - -5.30 3.00 -23.169 -22.218 - -5.30 3.10 -23.123 -22.265 - -5.30 3.20 -23.096 -22.312 - -5.30 3.30 -23.087 -22.361 - -5.30 3.40 -23.094 -22.410 - -5.30 3.50 -23.113 -22.461 - -5.30 3.60 -23.142 -22.511 - -5.30 3.70 -23.174 -22.561 - -5.30 3.80 -23.206 -22.611 - -5.30 3.90 -23.234 -22.658 - -5.30 4.00 -23.255 -22.705 - -5.30 4.10 -23.262 -22.752 - -5.30 4.20 -23.237 -22.797 - -5.30 4.30 -23.156 -22.841 - -5.30 4.40 -23.008 -22.881 - -5.30 4.50 -22.816 -22.918 - -5.30 4.60 -22.620 -22.951 - -5.30 4.70 -22.437 -22.982 - -5.30 4.80 -22.272 -23.013 - -5.30 4.90 -22.130 -23.046 - -5.30 5.00 -22.017 -23.086 - -5.30 5.10 -21.937 -23.138 - -5.30 5.20 -21.895 -23.200 - -5.30 5.30 -21.886 -23.261 - -5.30 5.40 -21.900 -23.316 - -5.30 5.50 -21.918 -23.368 - -5.30 5.60 -21.935 -23.421 - -5.30 5.70 -21.934 -23.476 - -5.30 5.80 -21.920 -23.528 - -5.30 5.90 -21.902 -23.578 - -5.30 6.00 -21.905 -23.632 - -5.30 6.10 -21.929 -23.700 - -5.30 6.20 -22.046 -23.807 - -5.30 6.30 -22.233 -23.957 - -5.30 6.40 -22.364 -24.100 - -5.30 6.50 -22.458 -24.225 - -5.30 6.60 -22.507 -24.318 - -5.30 6.70 -22.513 -24.390 - -5.30 6.80 -22.497 -24.450 - -5.30 6.90 -22.467 -24.499 - -5.30 7.00 -22.443 -24.002 - -5.30 7.10 -22.385 -24.846 - -5.30 7.20 -22.359 -24.975 - -5.30 7.30 -22.321 -25.078 - -5.30 7.40 -22.264 -25.137 - -5.30 7.50 -22.196 -25.133 - -5.30 7.60 -22.120 -25.008 - -5.30 7.70 -22.040 -24.739 - -5.30 7.80 -21.956 -24.457 - -5.30 7.90 -21.870 -24.238 - -5.30 8.00 -21.782 -24.245 - -5.30 8.10 -21.693 -24.316 - -5.30 8.20 -21.603 -24.416 - -5.30 8.30 -21.511 -24.534 - -5.30 8.40 -21.418 -24.646 - -5.30 8.50 -21.325 -24.753 - -5.30 8.60 -21.232 -24.855 - -5.30 8.70 -21.135 -24.974 - -5.30 8.80 -21.040 -25.037 - -5.30 8.90 -20.947 -25.143 - -5.30 9.00 -20.847 -25.187 - -5.20 1.00 -25.394 -21.439 - -5.20 1.10 -25.337 -21.478 - -5.20 1.20 -25.278 -21.517 - -5.20 1.30 -25.209 -21.555 - -5.20 1.40 -25.117 -21.593 - -5.20 1.50 -24.990 -21.631 - -5.20 1.60 -24.828 -21.669 - -5.20 1.70 -24.653 -21.707 - -5.20 1.80 -24.485 -21.746 - -5.20 1.90 -24.338 -21.784 - -5.20 2.00 -24.211 -21.823 - -5.20 2.10 -24.098 -21.863 - -5.20 2.20 -23.988 -21.903 - -5.20 2.30 -23.875 -21.944 - -5.20 2.40 -23.761 -21.985 - -5.20 2.50 -23.646 -22.026 - -5.20 2.60 -23.534 -22.069 - -5.20 2.70 -23.427 -22.112 - -5.20 2.80 -23.332 -22.155 - -5.20 2.90 -23.250 -22.200 - -5.20 3.00 -23.184 -22.245 - -5.20 3.10 -23.136 -22.291 - -5.20 3.20 -23.104 -22.337 - -5.20 3.30 -23.089 -22.385 - -5.20 3.40 -23.091 -22.434 - -5.20 3.50 -23.105 -22.484 - -5.20 3.60 -23.130 -22.534 - -5.20 3.70 -23.159 -22.584 - -5.20 3.80 -23.187 -22.633 - -5.20 3.90 -23.212 -22.682 - -5.20 4.00 -23.230 -22.729 - -5.20 4.10 -23.232 -22.776 - -5.20 4.20 -23.200 -22.822 - -5.20 4.30 -23.108 -22.867 - -5.20 4.40 -22.951 -22.908 - -5.20 4.50 -22.752 -22.944 - -5.20 4.60 -22.550 -22.978 - -5.20 4.70 -22.361 -23.009 - -5.20 4.80 -22.188 -23.039 - -5.20 4.90 -22.039 -23.072 - -5.20 5.00 -21.918 -23.113 - -5.20 5.10 -21.835 -23.169 - -5.20 5.20 -21.795 -23.239 - -5.20 5.30 -21.798 -23.309 - -5.20 5.40 -21.827 -23.373 - -5.20 5.50 -21.867 -23.433 - -5.20 5.60 -21.896 -23.491 - -5.20 5.70 -21.909 -23.550 - -5.20 5.80 -21.897 -23.608 - -5.20 5.90 -21.890 -23.659 - -5.20 6.00 -21.889 -23.714 - -5.20 6.10 -21.908 -23.781 - -5.20 6.20 -22.012 -23.881 - -5.20 6.30 -22.208 -24.031 - -5.20 6.40 -22.346 -24.181 - -5.20 6.50 -22.449 -24.315 - -5.20 6.60 -22.505 -24.417 - -5.20 6.70 -22.519 -24.497 - -5.20 6.80 -22.510 -24.561 - -5.20 6.90 -22.487 -24.619 - -5.20 7.00 -22.467 -24.235 - -5.20 7.10 -22.420 -24.938 - -5.20 7.20 -22.403 -25.074 - -5.20 7.30 -22.373 -25.182 - -5.20 7.40 -22.322 -25.246 - -5.20 7.50 -22.259 -25.253 - -5.20 7.60 -22.186 -25.157 - -5.20 7.70 -22.109 -24.928 - -5.20 7.80 -22.029 -24.663 - -5.20 7.90 -21.946 -24.464 - -5.20 8.00 -21.861 -24.339 - -5.20 8.10 -21.774 -24.385 - -5.20 8.20 -21.685 -24.437 - -5.20 8.30 -21.596 -24.563 - -5.20 8.40 -21.505 -24.670 - -5.20 8.50 -21.413 -24.772 - -5.20 8.60 -21.320 -24.883 - -5.20 8.70 -21.226 -24.982 - -5.20 8.80 -21.130 -25.074 - -5.20 8.90 -21.035 -25.160 - -5.20 9.00 -20.940 -25.236 - -5.10 1.00 -25.400 -21.469 - -5.10 1.10 -25.343 -21.510 - -5.10 1.20 -25.283 -21.550 - -5.10 1.30 -25.212 -21.589 - -5.10 1.40 -25.116 -21.628 - -5.10 1.50 -24.982 -21.667 - -5.10 1.60 -24.812 -21.705 - -5.10 1.70 -24.628 -21.743 - -5.10 1.80 -24.453 -21.781 - -5.10 1.90 -24.299 -21.820 - -5.10 2.00 -24.168 -21.858 - -5.10 2.10 -24.054 -21.897 - -5.10 2.20 -23.948 -21.937 - -5.10 2.30 -23.845 -21.977 - -5.10 2.40 -23.742 -22.017 - -5.10 2.50 -23.639 -22.058 - -5.10 2.60 -23.537 -22.099 - -5.10 2.70 -23.439 -22.142 - -5.10 2.80 -23.349 -22.185 - -5.10 2.90 -23.270 -22.228 - -5.10 3.00 -23.205 -22.272 - -5.10 3.10 -23.154 -22.318 - -5.10 3.20 -23.119 -22.364 - -5.10 3.30 -23.099 -22.411 - -5.10 3.40 -23.094 -22.459 - -5.10 3.50 -23.104 -22.508 - -5.10 3.60 -23.123 -22.557 - -5.10 3.70 -23.148 -22.607 - -5.10 3.80 -23.172 -22.656 - -5.10 3.90 -23.194 -22.705 - -5.10 4.00 -23.207 -22.752 - -5.10 4.10 -23.202 -22.800 - -5.10 4.20 -23.162 -22.847 - -5.10 4.30 -23.061 -22.892 - -5.10 4.40 -22.896 -22.933 - -5.10 4.50 -22.692 -22.971 - -5.10 4.60 -22.487 -23.004 - -5.10 4.70 -22.293 -23.036 - -5.10 4.80 -22.115 -23.066 - -5.10 4.90 -21.958 -23.098 - -5.10 5.00 -21.831 -23.140 - -5.10 5.10 -21.743 -23.200 - -5.10 5.20 -21.703 -23.276 - -5.10 5.30 -21.713 -23.356 - -5.10 5.40 -21.758 -23.431 - -5.10 5.50 -21.820 -23.500 - -5.10 5.60 -21.863 -23.563 - -5.10 5.70 -21.869 -23.627 - -5.10 5.80 -21.883 -23.689 - -5.10 5.90 -21.874 -23.742 - -5.10 6.00 -21.880 -23.798 - -5.10 6.10 -21.896 -23.865 - -5.10 6.20 -21.990 -23.960 - -5.10 6.30 -22.187 -24.109 - -5.10 6.40 -22.335 -24.266 - -5.10 6.50 -22.442 -24.408 - -5.10 6.60 -22.504 -24.516 - -5.10 6.70 -22.524 -24.602 - -5.10 6.80 -22.522 -24.671 - -5.10 6.90 -22.505 -24.735 - -5.10 7.00 -22.487 -24.437 - -5.10 7.10 -22.647 -23.418 - -5.10 7.20 -22.441 -25.174 - -5.10 7.30 -22.419 -25.285 - -5.10 7.40 -22.375 -25.352 - -5.10 7.50 -22.317 -25.368 - -5.10 7.60 -22.248 -25.297 - -5.10 7.70 -22.174 -25.107 - -5.10 7.80 -22.097 -24.866 - -5.10 7.90 -22.017 -24.676 - -5.10 8.00 -21.935 -24.544 - -5.10 8.10 -21.851 -24.474 - -5.10 8.20 -21.765 -24.520 - -5.10 8.30 -21.677 -24.608 - -5.10 8.40 -21.588 -24.696 - -5.10 8.50 -21.498 -24.789 - -5.10 8.60 -21.406 -24.945 - -5.10 8.70 -21.314 -25.028 - -5.10 8.80 -21.220 -25.126 - -5.10 8.90 -21.125 -25.226 - -5.10 9.00 -21.032 -25.301 - -5.00 1.00 -25.405 -21.498 - -5.00 1.10 -25.348 -21.540 - -5.00 1.20 -25.287 -21.581 - -5.00 1.30 -25.214 -21.622 - -5.00 1.40 -25.114 -21.662 - -5.00 1.50 -24.974 -21.701 - -5.00 1.60 -24.798 -21.740 - -5.00 1.70 -24.608 -21.779 - -5.00 1.80 -24.427 -21.817 - -5.00 1.90 -24.268 -21.856 - -5.00 2.00 -24.133 -21.894 - -5.00 2.10 -24.017 -21.933 - -5.00 2.20 -23.914 -21.972 - -5.00 2.30 -23.816 -22.011 - -5.00 2.40 -23.723 -22.051 - -5.00 2.50 -23.630 -22.091 - -5.00 2.60 -23.538 -22.132 - -5.00 2.70 -23.449 -22.173 - -5.00 2.80 -23.365 -22.215 - -5.00 2.90 -23.291 -22.258 - -5.00 3.00 -23.228 -22.301 - -5.00 3.10 -23.177 -22.346 - -5.00 3.20 -23.139 -22.391 - -5.00 3.30 -23.114 -22.437 - -5.00 3.40 -23.104 -22.485 - -5.00 3.50 -23.108 -22.533 - -5.00 3.60 -23.122 -22.581 - -5.00 3.70 -23.141 -22.631 - -5.00 3.80 -23.162 -22.680 - -5.00 3.90 -23.178 -22.728 - -5.00 4.00 -23.186 -22.776 - -5.00 4.10 -23.175 -22.824 - -5.00 4.20 -23.126 -22.871 - -5.00 4.30 -23.015 -22.917 - -5.00 4.40 -22.844 -22.959 - -5.00 4.50 -22.637 -22.997 - -5.00 4.60 -22.429 -23.032 - -5.00 4.70 -22.232 -23.064 - -5.00 4.80 -22.050 -23.094 - -5.00 4.90 -21.889 -23.127 - -5.00 5.00 -21.757 -23.171 - -5.00 5.10 -21.665 -23.236 - -5.00 5.20 -21.622 -23.317 - -5.00 5.30 -21.631 -23.403 - -5.00 5.40 -21.690 -23.491 - -5.00 5.50 -21.776 -23.571 - -5.00 5.60 -21.816 -23.640 - -5.00 5.70 -21.847 -23.706 - -5.00 5.80 -21.865 -23.772 - -5.00 5.90 -21.866 -23.828 - -5.00 6.00 -21.869 -23.884 - -5.00 6.10 -21.886 -23.952 - -5.00 6.20 -21.972 -24.043 - -5.00 6.30 -22.169 -24.191 - -5.00 6.40 -22.324 -24.354 - -5.00 6.50 -22.436 -24.502 - -5.00 6.60 -22.504 -24.616 - -5.00 6.70 -22.530 -24.707 - -5.00 6.80 -22.532 -24.780 - -5.00 6.90 -22.520 -24.849 - -5.00 7.00 -22.505 -24.618 - -5.00 7.10 -22.577 -23.658 - -5.00 7.20 -22.475 -25.274 - -5.00 7.30 -22.460 -25.387 - -5.00 7.40 -22.422 -25.458 - -5.00 7.50 -22.369 -25.481 - -5.00 7.60 -22.304 -25.429 - -5.00 7.70 -22.234 -25.275 - -5.00 7.80 -22.160 -25.062 - -5.00 7.90 -22.083 -24.885 - -5.00 8.00 -22.005 -24.766 - -5.00 8.10 -21.924 -24.704 - -5.00 8.20 -21.840 -24.684 - -5.00 8.30 -21.755 -24.719 - -5.00 8.40 -21.668 -24.794 - -5.00 8.50 -21.580 -24.897 - -5.00 8.60 -21.490 -24.991 - -5.00 8.70 -21.399 -25.095 - -5.00 8.80 -21.307 -25.191 - -5.00 8.90 -21.214 -25.278 - -5.00 9.00 -21.121 -25.362 - -4.90 1.00 -25.409 -21.524 - -4.90 1.10 -25.352 -21.568 - -4.90 1.20 -25.290 -21.610 - -4.90 1.30 -25.214 -21.652 - -4.90 1.40 -25.112 -21.693 - -4.90 1.50 -24.968 -21.734 - -4.90 1.60 -24.787 -21.774 - -4.90 1.70 -24.593 -21.814 - -4.90 1.80 -24.409 -21.853 - -4.90 1.90 -24.245 -21.892 - -4.90 2.00 -24.105 -21.931 - -4.90 2.10 -23.987 -21.969 - -4.90 2.20 -23.884 -22.008 - -4.90 2.30 -23.790 -22.047 - -4.90 2.40 -23.704 -22.086 - -4.90 2.50 -23.620 -22.126 - -4.90 2.60 -23.537 -22.166 - -4.90 2.70 -23.456 -22.207 - -4.90 2.80 -23.380 -22.248 - -4.90 2.90 -23.311 -22.290 - -4.90 3.00 -23.251 -22.332 - -4.90 3.10 -23.201 -22.376 - -4.90 3.20 -23.161 -22.420 - -4.90 3.30 -23.134 -22.465 - -4.90 3.40 -23.120 -22.512 - -4.90 3.50 -23.118 -22.559 - -4.90 3.60 -23.126 -22.607 - -4.90 3.70 -23.141 -22.655 - -4.90 3.80 -23.156 -22.704 - -4.90 3.90 -23.167 -22.752 - -4.90 4.00 -23.169 -22.801 - -4.90 4.10 -23.151 -22.849 - -4.90 4.20 -23.092 -22.896 - -4.90 4.30 -22.972 -22.942 - -4.90 4.40 -22.795 -22.985 - -4.90 4.50 -22.585 -23.024 - -4.90 4.60 -22.376 -23.061 - -4.90 4.70 -22.177 -23.094 - -4.90 4.80 -21.993 -23.126 - -4.90 4.90 -21.831 -23.160 - -4.90 5.00 -21.697 -23.207 - -4.90 5.10 -21.603 -23.278 - -4.90 5.20 -21.555 -23.363 - -4.90 5.30 -21.560 -23.454 - -4.90 5.40 -21.627 -23.552 - -4.90 5.50 -21.736 -23.645 - -4.90 5.60 -21.789 -23.720 - -4.90 5.70 -21.825 -23.788 - -4.90 5.80 -21.850 -23.858 - -4.90 5.90 -21.858 -23.915 - -4.90 6.00 -21.861 -23.972 - -4.90 6.10 -21.880 -24.041 - -4.90 6.20 -21.959 -24.129 - -4.90 6.30 -22.152 -24.277 - -4.90 6.40 -22.314 -24.444 - -4.90 6.50 -22.432 -24.597 - -4.90 6.60 -22.504 -24.716 - -4.90 6.70 -22.534 -24.810 - -4.90 6.80 -22.541 -24.887 - -4.90 6.90 -22.533 -24.960 - -4.90 7.00 -22.521 -24.785 - -4.90 7.10 -22.538 -23.947 - -4.90 7.20 -22.503 -25.348 - -4.90 7.30 -22.495 -25.489 - -4.90 7.40 -22.463 -25.563 - -4.90 7.50 -22.415 -25.592 - -4.90 7.60 -22.355 -25.556 - -4.90 7.70 -22.288 -25.432 - -4.90 7.80 -22.218 -25.250 - -4.90 7.90 -22.144 -25.089 - -4.90 8.00 -22.069 -24.979 - -4.90 8.10 -21.991 -24.923 - -4.90 8.20 -21.911 -24.915 - -4.90 8.30 -21.829 -24.943 - -4.90 8.40 -21.744 -24.997 - -4.90 8.50 -21.658 -25.070 - -4.90 8.60 -21.570 -25.157 - -4.90 8.70 -21.480 -25.245 - -4.90 8.80 -21.390 -25.329 - -4.90 8.90 -21.299 -25.417 - -4.90 9.00 -21.207 -25.493 - -4.80 1.00 -25.414 -21.547 - -4.80 1.10 -25.356 -21.592 - -4.80 1.20 -25.292 -21.637 - -4.80 1.30 -25.214 -21.680 - -4.80 1.40 -25.108 -21.723 - -4.80 1.50 -24.962 -21.765 - -4.80 1.60 -24.780 -21.806 - -4.80 1.70 -24.584 -21.847 - -4.80 1.80 -24.396 -21.887 - -4.80 1.90 -24.229 -21.927 - -4.80 2.00 -24.085 -21.966 - -4.80 2.10 -23.963 -22.006 - -4.80 2.20 -23.859 -22.045 - -4.80 2.30 -23.768 -22.084 - -4.80 2.40 -23.685 -22.123 - -4.80 2.50 -23.608 -22.162 - -4.80 2.60 -23.533 -22.201 - -4.80 2.70 -23.460 -22.241 - -4.80 2.80 -23.391 -22.282 - -4.80 2.90 -23.328 -22.323 - -4.80 3.00 -23.272 -22.364 - -4.80 3.10 -23.224 -22.407 - -4.80 3.20 -23.185 -22.450 - -4.80 3.30 -23.156 -22.495 - -4.80 3.40 -23.139 -22.540 - -4.80 3.50 -23.133 -22.587 - -4.80 3.60 -23.136 -22.634 - -4.80 3.70 -23.145 -22.681 - -4.80 3.80 -23.156 -22.729 - -4.80 3.90 -23.161 -22.778 - -4.80 4.00 -23.156 -22.826 - -4.80 4.10 -23.130 -22.874 - -4.80 4.20 -23.061 -22.922 - -4.80 4.30 -22.932 -22.968 - -4.80 4.40 -22.748 -23.012 - -4.80 4.50 -22.536 -23.053 - -4.80 4.60 -22.325 -23.090 - -4.80 4.70 -22.125 -23.126 - -4.80 4.80 -21.942 -23.159 - -4.80 4.90 -21.780 -23.196 - -4.80 5.00 -21.648 -23.250 - -4.80 5.10 -21.553 -23.328 - -4.80 5.20 -21.503 -23.417 - -4.80 5.30 -21.502 -23.510 - -4.80 5.40 -21.570 -23.616 - -4.80 5.50 -21.690 -23.722 - -4.80 5.60 -21.770 -23.803 - -4.80 5.70 -21.804 -23.872 - -4.80 5.80 -21.840 -23.945 - -4.80 5.90 -21.849 -24.004 - -4.80 6.00 -21.854 -24.062 - -4.80 6.10 -21.875 -24.131 - -4.80 6.20 -21.946 -24.218 - -4.80 6.30 -22.137 -24.365 - -4.80 6.40 -22.305 -24.535 - -4.80 6.50 -22.428 -24.693 - -4.80 6.60 -22.505 -24.816 - -4.80 6.70 -22.538 -24.913 - -4.80 6.80 -22.548 -24.994 - -4.80 6.90 -22.544 -25.069 - -4.80 7.00 -22.534 -24.941 - -4.80 7.10 -22.540 -24.200 - -4.80 7.20 -22.551 -24.273 - -4.80 7.30 -22.525 -25.585 - -4.80 7.40 -22.499 -25.665 - -4.80 7.50 -22.456 -25.697 - -4.80 7.60 -22.399 -25.671 - -4.80 7.70 -22.337 -25.568 - -4.80 7.80 -22.270 -25.411 - -4.80 7.90 -22.200 -25.260 - -4.80 8.00 -22.129 -25.156 - -4.80 8.10 -22.054 -25.107 - -4.80 8.20 -21.977 -25.105 - -4.80 8.30 -21.897 -25.138 - -4.80 8.40 -21.815 -25.197 - -4.80 8.50 -21.732 -25.273 - -4.80 8.60 -21.646 -25.356 - -4.80 8.70 -21.558 -25.441 - -4.80 8.80 -21.470 -25.522 - -4.80 8.90 -21.380 -25.599 - -4.80 9.00 -21.289 -25.670 - -4.70 1.00 -25.418 -21.568 - -4.70 1.10 -25.359 -21.615 - -4.70 1.20 -25.293 -21.661 - -4.70 1.30 -25.212 -21.706 - -4.70 1.40 -25.103 -21.750 - -4.70 1.50 -24.956 -21.794 - -4.70 1.60 -24.774 -21.836 - -4.70 1.70 -24.579 -21.879 - -4.70 1.80 -24.390 -21.920 - -4.70 1.90 -24.220 -21.961 - -4.70 2.00 -24.072 -22.001 - -4.70 2.10 -23.946 -22.041 - -4.70 2.20 -23.839 -22.081 - -4.70 2.30 -23.748 -22.120 - -4.70 2.40 -23.668 -22.160 - -4.70 2.50 -23.595 -22.199 - -4.70 2.60 -23.527 -22.238 - -4.70 2.70 -23.461 -22.277 - -4.70 2.80 -23.399 -22.317 - -4.70 2.90 -23.342 -22.358 - -4.70 3.00 -23.290 -22.398 - -4.70 3.10 -23.246 -22.440 - -4.70 3.20 -23.208 -22.483 - -4.70 3.30 -23.179 -22.526 - -4.70 3.40 -23.160 -22.571 - -4.70 3.50 -23.150 -22.616 - -4.70 3.60 -23.150 -22.662 - -4.70 3.70 -23.155 -22.709 - -4.70 3.80 -23.160 -22.756 - -4.70 3.90 -23.160 -22.804 - -4.70 4.00 -23.148 -22.852 - -4.70 4.10 -23.112 -22.901 - -4.70 4.20 -23.033 -22.948 - -4.70 4.30 -22.895 -22.995 - -4.70 4.40 -22.706 -23.039 - -4.70 4.50 -22.491 -23.081 - -4.70 4.60 -22.277 -23.121 - -4.70 4.70 -22.076 -23.158 - -4.70 4.80 -21.893 -23.195 - -4.70 4.90 -21.734 -23.236 - -4.70 5.00 -21.606 -23.298 - -4.70 5.10 -21.516 -23.387 - -4.70 5.20 -21.464 -23.478 - -4.70 5.30 -21.454 -23.572 - -4.70 5.40 -21.518 -23.683 - -4.70 5.50 -21.651 -23.801 - -4.70 5.60 -21.751 -23.889 - -4.70 5.70 -21.784 -23.959 - -4.70 5.80 -21.832 -24.034 - -4.70 5.90 -21.841 -24.095 - -4.70 6.00 -21.850 -24.153 - -4.70 6.10 -21.869 -24.224 - -4.70 6.20 -21.936 -24.310 - -4.70 6.30 -22.127 -24.455 - -4.70 6.40 -22.299 -24.628 - -4.70 6.50 -22.426 -24.790 - -4.70 6.60 -22.506 -24.916 - -4.70 6.70 -22.542 -25.016 - -4.70 6.80 -22.554 -25.099 - -4.70 6.90 -22.552 -25.177 - -4.70 7.00 -22.545 -25.088 - -4.70 7.10 -22.547 -24.424 - -4.70 7.20 -22.617 -23.925 - -4.70 7.30 -22.555 -25.016 - -4.70 7.40 -22.531 -25.422 - -4.70 7.50 -22.492 -25.378 - -4.70 7.60 -22.440 -25.293 - -4.70 7.70 -22.381 -25.242 - -4.70 7.80 -22.317 -25.184 - -4.70 7.90 -22.251 -25.125 - -4.70 8.00 -22.182 -25.119 - -4.70 8.10 -22.111 -25.145 - -4.70 8.20 -22.037 -25.194 - -4.70 8.30 -21.961 -25.262 - -4.70 8.40 -21.882 -25.345 - -4.70 8.50 -21.801 -25.434 - -4.70 8.60 -21.717 -25.526 - -4.70 8.70 -21.632 -25.614 - -4.70 8.80 -21.546 -25.693 - -4.70 8.90 -21.458 -25.766 - -4.70 9.00 -21.369 -25.830 - -4.60 1.00 -25.422 -21.587 - -4.60 1.10 -25.362 -21.635 - -4.60 1.20 -25.293 -21.682 - -4.60 1.30 -25.209 -21.729 - -4.60 1.40 -25.097 -21.775 - -4.60 1.50 -24.949 -21.820 - -4.60 1.60 -24.770 -21.864 - -4.60 1.70 -24.577 -21.908 - -4.60 1.80 -24.389 -21.951 - -4.60 1.90 -24.217 -21.993 - -4.60 2.00 -24.065 -22.035 - -4.60 2.10 -23.935 -22.076 - -4.60 2.20 -23.825 -22.117 - -4.60 2.30 -23.732 -22.157 - -4.60 2.40 -23.653 -22.196 - -4.60 2.50 -23.584 -22.236 - -4.60 2.60 -23.520 -22.275 - -4.60 2.70 -23.460 -22.314 - -4.60 2.80 -23.404 -22.354 - -4.60 2.90 -23.353 -22.394 - -4.60 3.00 -23.306 -22.434 - -4.60 3.10 -23.265 -22.475 - -4.60 3.20 -23.230 -22.516 - -4.60 3.30 -23.202 -22.559 - -4.60 3.40 -23.182 -22.602 - -4.60 3.50 -23.171 -22.647 - -4.60 3.60 -23.167 -22.692 - -4.60 3.70 -23.168 -22.738 - -4.60 3.80 -23.169 -22.784 - -4.60 3.90 -23.162 -22.831 - -4.60 4.00 -23.143 -22.879 - -4.60 4.10 -23.098 -22.927 - -4.60 4.20 -23.009 -22.975 - -4.60 4.30 -22.862 -23.022 - -4.60 4.40 -22.667 -23.067 - -4.60 4.50 -22.448 -23.111 - -4.60 4.60 -22.232 -23.152 - -4.60 4.70 -22.029 -23.192 - -4.60 4.80 -21.846 -23.231 - -4.60 4.90 -21.690 -23.278 - -4.60 5.00 -21.569 -23.351 - -4.60 5.10 -21.486 -23.452 - -4.60 5.20 -21.435 -23.547 - -4.60 5.30 -21.418 -23.641 - -4.60 5.40 -21.475 -23.754 - -4.60 5.50 -21.622 -23.882 - -4.60 5.60 -21.730 -23.976 - -4.60 5.70 -21.773 -24.048 - -4.60 5.80 -21.824 -24.124 - -4.60 5.90 -21.837 -24.187 - -4.60 6.00 -21.845 -24.246 - -4.60 6.10 -21.864 -24.317 - -4.60 6.20 -21.930 -24.403 - -4.60 6.30 -22.119 -24.547 - -4.60 6.40 -22.294 -24.723 - -4.60 6.50 -22.423 -24.887 - -4.60 6.60 -22.505 -25.016 - -4.60 6.70 -22.544 -25.117 - -4.60 6.80 -22.559 -25.203 - -4.60 6.90 -22.560 -25.279 - -4.60 7.00 -22.554 -25.226 - -4.60 7.10 -22.554 -24.631 - -4.60 7.20 -22.605 -24.117 - -4.60 7.30 -22.613 -24.172 - -4.60 7.40 -22.575 -24.466 - -4.60 7.50 -22.535 -24.556 - -4.60 7.60 -22.482 -24.631 - -4.60 7.70 -22.423 -24.727 - -4.60 7.80 -22.360 -24.831 - -4.60 7.90 -22.296 -24.939 - -4.60 8.00 -22.231 -25.055 - -4.60 8.10 -22.162 -25.169 - -4.60 8.20 -22.092 -25.280 - -4.60 8.30 -22.019 -25.390 - -4.60 8.40 -21.943 -25.497 - -4.60 8.50 -21.865 -25.601 - -4.60 8.60 -21.784 -25.697 - -4.60 8.70 -21.702 -25.784 - -4.60 8.80 -21.618 -25.858 - -4.60 8.90 -21.532 -25.924 - -4.60 9.00 -21.444 -25.980 - -4.50 1.00 -25.426 -21.604 - -4.50 1.10 -25.364 -21.653 - -4.50 1.20 -25.293 -21.701 - -4.50 1.30 -25.204 -21.749 - -4.50 1.40 -25.089 -21.797 - -4.50 1.50 -24.942 -21.843 - -4.50 1.60 -24.766 -21.889 - -4.50 1.70 -24.578 -21.935 - -4.50 1.80 -24.392 -21.979 - -4.50 1.90 -24.218 -22.023 - -4.50 2.00 -24.063 -22.066 - -4.50 2.10 -23.929 -22.109 - -4.50 2.20 -23.815 -22.151 - -4.50 2.30 -23.720 -22.192 - -4.50 2.40 -23.641 -22.232 - -4.50 2.50 -23.573 -22.272 - -4.50 2.60 -23.513 -22.312 - -4.50 2.70 -23.458 -22.352 - -4.50 2.80 -23.408 -22.391 - -4.50 2.90 -23.362 -22.431 - -4.50 3.00 -23.320 -22.471 - -4.50 3.10 -23.283 -22.511 - -4.50 3.20 -23.250 -22.552 - -4.50 3.30 -23.224 -22.593 - -4.50 3.40 -23.204 -22.636 - -4.50 3.50 -23.192 -22.679 - -4.50 3.60 -23.187 -22.723 - -4.50 3.70 -23.185 -22.768 - -4.50 3.80 -23.181 -22.814 - -4.50 3.90 -23.169 -22.860 - -4.50 4.00 -23.142 -22.908 - -4.50 4.10 -23.088 -22.955 - -4.50 4.20 -22.988 -23.003 - -4.50 4.30 -22.832 -23.050 - -4.50 4.40 -22.632 -23.096 - -4.50 4.50 -22.410 -23.140 - -4.50 4.60 -22.190 -23.183 - -4.50 4.70 -21.985 -23.226 - -4.50 4.80 -21.803 -23.269 - -4.50 4.90 -21.649 -23.321 - -4.50 5.00 -21.535 -23.407 - -4.50 5.10 -21.462 -23.522 - -4.50 5.20 -21.413 -23.622 - -4.50 5.30 -21.391 -23.716 - -4.50 5.40 -21.436 -23.829 - -4.50 5.50 -21.596 -23.966 - -4.50 5.60 -21.720 -24.066 - -4.50 5.70 -21.764 -24.138 - -4.50 5.80 -21.814 -24.216 - -4.50 5.90 -21.834 -24.280 - -4.50 6.00 -21.838 -24.340 - -4.50 6.10 -21.861 -24.412 - -4.50 6.20 -21.925 -24.497 - -4.50 6.30 -22.112 -24.641 - -4.50 6.40 -22.289 -24.818 - -4.50 6.50 -22.421 -24.985 - -4.50 6.60 -22.506 -25.116 - -4.50 6.70 -22.546 -25.219 - -4.50 6.80 -22.563 -25.307 - -4.50 6.90 -22.566 -25.385 - -4.50 7.00 -22.561 -25.359 - -4.50 7.10 -22.561 -24.828 - -4.50 7.20 -22.601 -24.331 - -4.50 7.30 -22.629 -24.206 - -4.50 7.40 -22.606 -24.312 - -4.50 7.50 -22.565 -24.444 - -4.50 7.60 -22.513 -24.571 - -4.50 7.70 -22.456 -24.699 - -4.50 7.80 -22.396 -24.838 - -4.50 7.90 -22.334 -24.994 - -4.50 8.00 -22.273 -25.149 - -4.50 8.10 -22.208 -25.297 - -4.50 8.20 -22.141 -25.433 - -4.50 8.30 -22.071 -25.559 - -4.50 8.40 -21.998 -25.674 - -4.50 8.50 -21.924 -25.778 - -4.50 8.60 -21.846 -25.870 - -4.50 8.70 -21.766 -25.950 - -4.50 8.80 -21.684 -26.017 - -4.50 8.90 -21.601 -26.074 - -4.50 9.00 -21.516 -26.121 - -4.40 1.00 -25.430 -21.619 - -4.40 1.10 -25.367 -21.669 - -4.40 1.20 -25.292 -21.718 - -4.40 1.30 -25.198 -21.768 - -4.40 1.40 -25.079 -21.816 - -4.40 1.50 -24.932 -21.864 - -4.40 1.60 -24.762 -21.912 - -4.40 1.70 -24.579 -21.959 - -4.40 1.80 -24.397 -22.005 - -4.40 1.90 -24.224 -22.051 - -4.40 2.00 -24.067 -22.095 - -4.40 2.10 -23.929 -22.139 - -4.40 2.20 -23.811 -22.183 - -4.40 2.30 -23.713 -22.225 - -4.40 2.40 -23.632 -22.267 - -4.40 2.50 -23.564 -22.308 - -4.40 2.60 -23.506 -22.349 - -4.40 2.70 -23.455 -22.389 - -4.40 2.80 -23.410 -22.429 - -4.40 2.90 -23.368 -22.469 - -4.40 3.00 -23.331 -22.508 - -4.40 3.10 -23.298 -22.548 - -4.40 3.20 -23.269 -22.588 - -4.40 3.30 -23.245 -22.629 - -4.40 3.40 -23.226 -22.671 - -4.40 3.50 -23.214 -22.713 - -4.40 3.60 -23.208 -22.756 - -4.40 3.70 -23.203 -22.800 - -4.40 3.80 -23.196 -22.845 - -4.40 3.90 -23.178 -22.891 - -4.40 4.00 -23.143 -22.937 - -4.40 4.10 -23.079 -22.984 - -4.40 4.20 -22.970 -23.032 - -4.40 4.30 -22.806 -23.078 - -4.40 4.40 -22.601 -23.124 - -4.40 4.50 -22.375 -23.170 - -4.40 4.60 -22.152 -23.215 - -4.40 4.70 -21.945 -23.260 - -4.40 4.80 -21.762 -23.307 - -4.40 4.90 -21.610 -23.366 - -4.40 5.00 -21.502 -23.467 - -4.40 5.10 -21.442 -23.597 - -4.40 5.20 -21.397 -23.702 - -4.40 5.30 -21.370 -23.795 - -4.40 5.40 -21.404 -23.908 - -4.40 5.50 -21.569 -24.052 - -4.40 5.60 -21.712 -24.158 - -4.40 5.70 -21.757 -24.231 - -4.40 5.80 -21.807 -24.309 - -4.40 5.90 -21.827 -24.374 - -4.40 6.00 -21.835 -24.435 - -4.40 6.10 -21.859 -24.507 - -4.40 6.20 -21.918 -24.592 - -4.40 6.30 -22.106 -24.735 - -4.40 6.40 -22.284 -24.914 - -4.40 6.50 -22.419 -25.083 - -4.40 6.60 -22.506 -25.216 - -4.40 6.70 -22.548 -25.320 - -4.40 6.80 -22.566 -25.409 - -4.40 6.90 -22.570 -25.490 - -4.40 7.00 -22.567 -25.485 - -4.40 7.10 -22.567 -25.016 - -4.40 7.20 -22.603 -24.541 - -4.40 7.30 -22.631 -24.384 - -4.40 7.40 -22.618 -24.443 - -4.40 7.50 -22.583 -24.568 - -4.40 7.60 -22.535 -24.698 - -4.40 7.70 -22.482 -24.829 - -4.40 7.80 -22.426 -24.976 - -4.40 7.90 -22.367 -25.146 - -4.40 8.00 -22.309 -25.315 - -4.40 8.10 -22.248 -25.474 - -4.40 8.20 -22.184 -25.616 - -4.40 8.30 -22.118 -25.743 - -4.40 8.40 -22.048 -25.855 - -4.40 8.50 -21.977 -25.953 - -4.40 8.60 -21.901 -26.038 - -4.40 8.70 -21.825 -26.109 - -4.40 8.80 -21.746 -26.166 - -4.40 8.90 -21.665 -26.215 - -4.40 9.00 -21.582 -26.253 - -4.30 1.00 -25.434 -21.632 - -4.30 1.10 -25.369 -21.683 - -4.30 1.20 -25.291 -21.734 - -4.30 1.30 -25.191 -21.784 - -4.30 1.40 -25.068 -21.834 - -4.30 1.50 -24.921 -21.883 - -4.30 1.60 -24.755 -21.932 - -4.30 1.70 -24.580 -21.981 - -4.30 1.80 -24.403 -22.028 - -4.30 1.90 -24.233 -22.076 - -4.30 2.00 -24.074 -22.122 - -4.30 2.10 -23.933 -22.168 - -4.30 2.20 -23.811 -22.212 - -4.30 2.30 -23.709 -22.256 - -4.30 2.40 -23.626 -22.300 - -4.30 2.50 -23.558 -22.342 - -4.30 2.60 -23.501 -22.384 - -4.30 2.70 -23.453 -22.425 - -4.30 2.80 -23.411 -22.466 - -4.30 2.90 -23.374 -22.506 - -4.30 3.00 -23.341 -22.546 - -4.30 3.10 -23.312 -22.586 - -4.30 3.20 -23.287 -22.626 - -4.30 3.30 -23.265 -22.667 - -4.30 3.40 -23.248 -22.708 - -4.30 3.50 -23.237 -22.749 - -4.30 3.60 -23.230 -22.791 - -4.30 3.70 -23.224 -22.834 - -4.30 3.80 -23.214 -22.878 - -4.30 3.90 -23.190 -22.922 - -4.30 4.00 -23.146 -22.968 - -4.30 4.10 -23.073 -23.014 - -4.30 4.20 -22.954 -23.061 - -4.30 4.30 -22.783 -23.108 - -4.30 4.40 -22.574 -23.154 - -4.30 4.50 -22.346 -23.200 - -4.30 4.60 -22.119 -23.247 - -4.30 4.70 -21.911 -23.295 - -4.30 4.80 -21.727 -23.345 - -4.30 4.90 -21.574 -23.412 - -4.30 5.00 -21.472 -23.529 - -4.30 5.10 -21.424 -23.675 - -4.30 5.20 -21.384 -23.785 - -4.30 5.30 -21.355 -23.879 - -4.30 5.40 -21.381 -23.990 - -4.30 5.50 -21.550 -24.140 - -4.30 5.60 -21.705 -24.251 - -4.30 5.70 -21.750 -24.324 - -4.30 5.80 -21.803 -24.403 - -4.30 5.90 -21.822 -24.470 - -4.30 6.00 -21.832 -24.530 - -4.30 6.10 -21.856 -24.603 - -4.30 6.20 -21.916 -24.688 - -4.30 6.30 -22.099 -24.831 - -4.30 6.40 -22.280 -25.011 - -4.30 6.50 -22.417 -25.181 - -4.30 6.60 -22.506 -25.315 - -4.30 6.70 -22.550 -25.421 - -4.30 6.80 -22.569 -25.511 - -4.30 6.90 -22.574 -25.593 - -4.30 7.00 -22.571 -25.607 - -4.30 7.10 -22.573 -25.199 - -4.30 7.20 -22.606 -24.744 - -4.30 7.30 -22.633 -24.586 - -4.30 7.40 -22.626 -24.632 - -4.30 7.50 -22.597 -24.752 - -4.30 7.60 -22.553 -24.880 - -4.30 7.70 -22.504 -25.011 - -4.30 7.80 -22.451 -25.159 - -4.30 7.90 -22.396 -25.334 - -4.30 8.00 -22.341 -25.505 - -4.30 8.10 -22.282 -25.664 - -4.30 8.20 -22.222 -25.804 - -4.30 8.30 -22.159 -25.926 - -4.30 8.40 -22.092 -26.031 - -4.30 8.50 -22.024 -26.120 - -4.30 8.60 -21.952 -26.195 - -4.30 8.70 -21.878 -26.257 - -4.30 8.80 -21.802 -26.306 - -4.30 8.90 -21.723 -26.347 - -4.30 9.00 -21.643 -26.379 - -4.20 1.00 -25.440 -21.645 - -4.20 1.10 -25.372 -21.697 - -4.20 1.20 -25.289 -21.748 - -4.20 1.30 -25.184 -21.799 - -4.20 1.40 -25.055 -21.850 - -4.20 1.50 -24.907 -21.900 - -4.20 1.60 -24.746 -21.950 - -4.20 1.70 -24.579 -22.000 - -4.20 1.80 -24.409 -22.049 - -4.20 1.90 -24.242 -22.098 - -4.20 2.00 -24.084 -22.146 - -4.20 2.10 -23.940 -22.193 - -4.20 2.20 -23.815 -22.240 - -4.20 2.30 -23.709 -22.285 - -4.20 2.40 -23.623 -22.330 - -4.20 2.50 -23.554 -22.374 - -4.20 2.60 -23.497 -22.417 - -4.20 2.70 -23.451 -22.460 - -4.20 2.80 -23.412 -22.502 - -4.20 2.90 -23.379 -22.543 - -4.20 3.00 -23.350 -22.584 - -4.20 3.10 -23.325 -22.624 - -4.20 3.20 -23.303 -22.664 - -4.20 3.30 -23.284 -22.704 - -4.20 3.40 -23.270 -22.745 - -4.20 3.50 -23.259 -22.786 - -4.20 3.60 -23.253 -22.828 - -4.20 3.70 -23.246 -22.870 - -4.20 3.80 -23.233 -22.912 - -4.20 3.90 -23.203 -22.956 - -4.20 4.00 -23.151 -23.001 - -4.20 4.10 -23.068 -23.046 - -4.20 4.20 -22.940 -23.092 - -4.20 4.30 -22.763 -23.138 - -4.20 4.40 -22.550 -23.184 - -4.20 4.50 -22.319 -23.232 - -4.20 4.60 -22.091 -23.281 - -4.20 4.70 -21.882 -23.331 - -4.20 4.80 -21.696 -23.385 - -4.20 4.90 -21.543 -23.460 - -4.20 5.00 -21.445 -23.595 - -4.20 5.10 -21.409 -23.756 - -4.20 5.20 -21.374 -23.871 - -4.20 5.30 -21.344 -23.965 - -4.20 5.40 -21.362 -24.076 - -4.20 5.50 -21.535 -24.229 - -4.20 5.60 -21.699 -24.345 - -4.20 5.70 -21.744 -24.418 - -4.20 5.80 -21.796 -24.497 - -4.20 5.90 -21.820 -24.566 - -4.20 6.00 -21.830 -24.627 - -4.20 6.10 -21.852 -24.700 - -4.20 6.20 -21.913 -24.785 - -4.20 6.30 -22.093 -24.927 - -4.20 6.40 -22.278 -25.108 - -4.20 6.50 -22.415 -25.279 - -4.20 6.60 -22.506 -25.415 - -4.20 6.70 -22.551 -25.521 - -4.20 6.80 -22.571 -25.612 - -4.20 6.90 -22.576 -25.695 - -4.20 7.00 -22.575 -25.725 - -4.20 7.10 -22.577 -25.375 - -4.20 7.20 -22.609 -24.942 - -4.20 7.30 -22.636 -24.790 - -4.20 7.40 -22.633 -24.836 - -4.20 7.50 -22.608 -24.956 - -4.20 7.60 -22.568 -25.083 - -4.20 7.70 -22.522 -25.213 - -4.20 7.80 -22.471 -25.359 - -4.20 7.90 -22.419 -25.532 - -4.20 8.00 -22.367 -25.700 - -4.20 8.10 -22.312 -25.855 - -4.20 8.20 -22.255 -25.988 - -4.20 8.30 -22.195 -26.101 - -4.20 8.40 -22.131 -26.197 - -4.20 8.50 -22.066 -26.277 - -4.20 8.60 -21.996 -26.343 - -4.20 8.70 -21.926 -26.397 - -4.20 8.80 -21.852 -26.438 - -4.20 8.90 -21.776 -26.472 - -4.20 9.00 -21.699 -26.498 - -4.10 1.00 -25.446 -21.658 - -4.10 1.10 -25.376 -21.709 - -4.10 1.20 -25.288 -21.761 - -4.10 1.30 -25.175 -21.812 - -4.10 1.40 -25.041 -21.864 - -4.10 1.50 -24.892 -21.915 - -4.10 1.60 -24.735 -21.966 - -4.10 1.70 -24.575 -22.017 - -4.10 1.80 -24.413 -22.068 - -4.10 1.90 -24.252 -22.118 - -4.10 2.00 -24.095 -22.167 - -4.10 2.10 -23.950 -22.216 - -4.10 2.20 -23.822 -22.264 - -4.10 2.30 -23.713 -22.311 - -4.10 2.40 -23.624 -22.358 - -4.10 2.50 -23.553 -22.404 - -4.10 2.60 -23.496 -22.449 - -4.10 2.70 -23.450 -22.493 - -4.10 2.80 -23.413 -22.536 - -4.10 2.90 -23.383 -22.578 - -4.10 3.00 -23.358 -22.620 - -4.10 3.10 -23.337 -22.661 - -4.10 3.20 -23.318 -22.702 - -4.10 3.30 -23.303 -22.743 - -4.10 3.40 -23.291 -22.783 - -4.10 3.50 -23.282 -22.824 - -4.10 3.60 -23.277 -22.865 - -4.10 3.70 -23.268 -22.907 - -4.10 3.80 -23.252 -22.949 - -4.10 3.90 -23.217 -22.991 - -4.10 4.00 -23.157 -23.035 - -4.10 4.10 -23.064 -23.079 - -4.10 4.20 -22.927 -23.124 - -4.10 4.30 -22.745 -23.170 - -4.10 4.40 -22.529 -23.216 - -4.10 4.50 -22.296 -23.264 - -4.10 4.60 -22.067 -23.315 - -4.10 4.70 -21.857 -23.369 - -4.10 4.80 -21.671 -23.427 - -4.10 4.90 -21.515 -23.510 - -4.10 5.00 -21.420 -23.664 - -4.10 5.10 -21.395 -23.840 - -4.10 5.20 -21.366 -23.960 - -4.10 5.30 -21.334 -24.055 - -4.10 5.40 -21.347 -24.164 - -4.10 5.50 -21.522 -24.320 - -4.10 5.60 -21.695 -24.440 - -4.10 5.70 -21.741 -24.514 - -4.10 5.80 -21.792 -24.593 - -4.10 5.90 -21.818 -24.662 - -4.10 6.00 -21.829 -24.723 - -4.10 6.10 -21.849 -24.797 - -4.10 6.20 -21.904 -24.882 - -4.10 6.30 -22.091 -25.024 - -4.10 6.40 -22.274 -25.205 - -4.10 6.50 -22.414 -25.378 - -4.10 6.60 -22.506 -25.514 - -4.10 6.70 -22.552 -25.621 - -4.10 6.80 -22.572 -25.713 - -4.10 6.90 -22.578 -25.797 - -4.10 7.00 -22.578 -25.839 - -4.10 7.10 -22.580 -25.545 - -4.10 7.20 -22.612 -25.136 - -4.10 7.30 -22.640 -24.991 - -4.10 7.40 -22.639 -25.040 - -4.10 7.50 -22.617 -25.162 - -4.10 7.60 -22.580 -25.290 - -4.10 7.70 -22.537 -25.418 - -4.10 7.80 -22.489 -25.561 - -4.10 7.90 -22.439 -25.729 - -4.10 8.00 -22.389 -25.892 - -4.10 8.10 -22.337 -26.038 - -4.10 8.20 -22.283 -26.163 - -4.10 8.30 -22.225 -26.268 - -4.10 8.40 -22.164 -26.355 - -4.10 8.50 -22.102 -26.425 - -4.10 8.60 -22.035 -26.483 - -4.10 8.70 -21.967 -26.528 - -4.10 8.80 -21.897 -26.563 - -4.10 8.90 -21.824 -26.591 - -4.10 9.00 -21.749 -26.613 - -4.00 1.00 -25.453 -21.671 - -4.00 1.10 -25.380 -21.722 - -4.00 1.20 -25.287 -21.774 - -4.00 1.30 -25.167 -21.826 - -4.00 1.40 -25.026 -21.877 - -4.00 1.50 -24.874 -21.929 - -4.00 1.60 -24.721 -21.981 - -4.00 1.70 -24.567 -22.033 - -4.00 1.80 -24.414 -22.084 - -4.00 1.90 -24.260 -22.135 - -4.00 2.00 -24.107 -22.186 - -4.00 2.10 -23.962 -22.236 - -4.00 2.20 -23.832 -22.286 - -4.00 2.30 -23.721 -22.335 - -4.00 2.40 -23.629 -22.383 - -4.00 2.50 -23.555 -22.431 - -4.00 2.60 -23.497 -22.477 - -4.00 2.70 -23.451 -22.523 - -4.00 2.80 -23.415 -22.568 - -4.00 2.90 -23.387 -22.612 - -4.00 3.00 -23.365 -22.655 - -4.00 3.10 -23.347 -22.697 - -4.00 3.20 -23.332 -22.739 - -4.00 3.30 -23.321 -22.780 - -4.00 3.40 -23.311 -22.821 - -4.00 3.50 -23.305 -22.862 - -4.00 3.60 -23.300 -22.903 - -4.00 3.70 -23.291 -22.945 - -4.00 3.80 -23.273 -22.986 - -4.00 3.90 -23.232 -23.028 - -4.00 4.00 -23.162 -23.071 - -4.00 4.10 -23.060 -23.114 - -4.00 4.20 -22.915 -23.158 - -4.00 4.30 -22.727 -23.203 - -4.00 4.40 -22.508 -23.249 - -4.00 4.50 -22.274 -23.298 - -4.00 4.60 -22.046 -23.351 - -4.00 4.70 -21.838 -23.408 - -4.00 4.80 -21.651 -23.469 - -4.00 4.90 -21.491 -23.562 - -4.00 5.00 -21.397 -23.736 - -4.00 5.10 -21.383 -23.926 - -4.00 5.20 -21.359 -24.050 - -4.00 5.30 -21.326 -24.146 - -4.00 5.40 -21.335 -24.254 - -4.00 5.50 -21.511 -24.413 - -4.00 5.60 -21.690 -24.535 - -4.00 5.70 -21.737 -24.610 - -4.00 5.80 -21.790 -24.689 - -4.00 5.90 -21.817 -24.759 - -4.00 6.00 -21.823 -24.821 - -4.00 6.10 -21.844 -24.895 - -4.00 6.20 -21.903 -24.980 - -4.00 6.30 -22.088 -25.122 - -4.00 6.40 -22.272 -25.303 - -4.00 6.50 -22.412 -25.476 - -4.00 6.60 -22.505 -25.613 - -4.00 6.70 -22.552 -25.721 - -4.00 6.80 -22.573 -25.813 - -4.00 6.90 -22.580 -25.898 - -4.00 7.00 -22.579 -25.950 - -4.00 7.10 -22.582 -25.709 - -4.00 7.20 -22.615 -25.327 - -4.00 7.30 -22.644 -25.188 - -4.00 7.40 -22.645 -25.241 - -4.00 7.50 -22.625 -25.365 - -4.00 7.60 -22.590 -25.493 - -4.00 7.70 -22.549 -25.620 - -4.00 7.80 -22.503 -25.759 - -4.00 7.90 -22.455 -25.921 - -4.00 8.00 -22.408 -26.076 - -4.00 8.10 -22.358 -26.214 - -4.00 8.20 -22.306 -26.330 - -4.00 8.30 -22.251 -26.425 - -4.00 8.40 -22.193 -26.503 - -4.00 8.50 -22.133 -26.565 - -4.00 8.60 -22.069 -26.615 - -4.00 8.70 -22.004 -26.654 - -4.00 8.80 -21.936 -26.683 - -4.00 8.90 -21.865 -26.707 - -4.00 9.00 -21.793 -26.725 - -3.90 1.00 -25.461 -21.685 - -3.90 1.10 -25.386 -21.735 - -3.90 1.20 -25.286 -21.787 - -3.90 1.30 -25.159 -21.838 - -3.90 1.40 -25.011 -21.890 - -3.90 1.50 -24.856 -21.943 - -3.90 1.60 -24.704 -21.995 - -3.90 1.70 -24.557 -22.047 - -3.90 1.80 -24.412 -22.099 - -3.90 1.90 -24.265 -22.151 - -3.90 2.00 -24.117 -22.203 - -3.90 2.10 -23.975 -22.255 - -3.90 2.20 -23.844 -22.306 - -3.90 2.30 -23.731 -22.356 - -3.90 2.40 -23.637 -22.406 - -3.90 2.50 -23.561 -22.455 - -3.90 2.60 -23.501 -22.503 - -3.90 2.70 -23.454 -22.551 - -3.90 2.80 -23.419 -22.597 - -3.90 2.90 -23.392 -22.643 - -3.90 3.00 -23.371 -22.688 - -3.90 3.10 -23.356 -22.732 - -3.90 3.20 -23.345 -22.775 - -3.90 3.30 -23.337 -22.817 - -3.90 3.40 -23.331 -22.859 - -3.90 3.50 -23.327 -22.901 - -3.90 3.60 -23.323 -22.942 - -3.90 3.70 -23.314 -22.983 - -3.90 3.80 -23.292 -23.024 - -3.90 3.90 -23.246 -23.066 - -3.90 4.00 -23.168 -23.108 - -3.90 4.10 -23.056 -23.151 - -3.90 4.20 -22.903 -23.194 - -3.90 4.30 -22.710 -23.238 - -3.90 4.40 -22.487 -23.284 - -3.90 4.50 -22.253 -23.333 - -3.90 4.60 -22.027 -23.389 - -3.90 4.70 -21.821 -23.449 - -3.90 4.80 -21.633 -23.514 - -3.90 4.90 -21.471 -23.616 - -3.90 5.00 -21.378 -23.812 - -3.90 5.10 -21.373 -24.014 - -3.90 5.20 -21.353 -24.142 - -3.90 5.30 -21.321 -24.239 - -3.90 5.40 -21.326 -24.346 - -3.90 5.50 -21.502 -24.506 - -3.90 5.60 -21.686 -24.631 - -3.90 5.70 -21.734 -24.706 - -3.90 5.80 -21.787 -24.786 - -3.90 5.90 -21.816 -24.856 - -3.90 6.00 -21.821 -24.918 - -3.90 6.10 -21.842 -24.992 - -3.90 6.20 -21.901 -25.077 - -3.90 6.30 -22.085 -25.219 - -3.90 6.40 -22.269 -25.401 - -3.90 6.50 -22.409 -25.574 - -3.90 6.60 -22.504 -25.712 - -3.90 6.70 -22.552 -25.820 - -3.90 6.80 -22.574 -25.912 - -3.90 6.90 -22.580 -25.998 - -3.90 7.00 -22.579 -26.059 - -3.90 7.10 -22.582 -25.867 - -3.90 7.20 -22.617 -25.515 - -3.90 7.30 -22.647 -25.383 - -3.90 7.40 -22.650 -25.438 - -3.90 7.50 -22.632 -25.563 - -3.90 7.60 -22.599 -25.691 - -3.90 7.70 -22.559 -25.815 - -3.90 7.80 -22.515 -25.951 - -3.90 7.90 -22.469 -26.105 - -3.90 8.00 -22.424 -26.252 - -3.90 8.10 -22.375 -26.381 - -3.90 8.20 -22.325 -26.488 - -3.90 8.30 -22.273 -26.574 - -3.90 8.40 -22.217 -26.643 - -3.90 8.50 -22.159 -26.698 - -3.90 8.60 -22.098 -26.741 - -3.90 8.70 -22.035 -26.774 - -3.90 8.80 -21.970 -26.799 - -3.90 8.90 -21.901 -26.819 - -3.90 9.00 -21.832 -26.834 - -3.80 1.00 -25.472 -21.699 - -3.80 1.10 -25.393 -21.749 - -3.80 1.20 -25.288 -21.800 - -3.80 1.30 -25.152 -21.851 - -3.80 1.40 -24.997 -21.903 - -3.80 1.50 -24.838 -21.955 - -3.80 1.60 -24.687 -22.008 - -3.80 1.70 -24.544 -22.060 - -3.80 1.80 -24.406 -22.113 - -3.80 1.90 -24.267 -22.166 - -3.80 2.00 -24.126 -22.218 - -3.80 2.10 -23.987 -22.271 - -3.80 2.20 -23.857 -22.323 - -3.80 2.30 -23.743 -22.375 - -3.80 2.40 -23.647 -22.426 - -3.80 2.50 -23.569 -22.477 - -3.80 2.60 -23.507 -22.527 - -3.80 2.70 -23.460 -22.576 - -3.80 2.80 -23.423 -22.624 - -3.80 2.90 -23.397 -22.672 - -3.80 3.00 -23.378 -22.718 - -3.80 3.10 -23.365 -22.764 - -3.80 3.20 -23.357 -22.808 - -3.80 3.30 -23.352 -22.852 - -3.80 3.40 -23.349 -22.895 - -3.80 3.50 -23.347 -22.938 - -3.80 3.60 -23.345 -22.980 - -3.80 3.70 -23.335 -23.022 - -3.80 3.80 -23.311 -23.063 - -3.80 3.90 -23.259 -23.105 - -3.80 4.00 -23.173 -23.146 - -3.80 4.10 -23.052 -23.189 - -3.80 4.20 -22.891 -23.231 - -3.80 4.30 -22.692 -23.274 - -3.80 4.40 -22.467 -23.320 - -3.80 4.50 -22.232 -23.371 - -3.80 4.60 -22.009 -23.430 - -3.80 4.70 -21.806 -23.493 - -3.80 4.80 -21.619 -23.560 - -3.80 4.90 -21.453 -23.674 - -3.80 5.00 -21.361 -23.891 - -3.80 5.10 -21.364 -24.104 - -3.80 5.20 -21.348 -24.234 - -3.80 5.30 -21.317 -24.333 - -3.80 5.40 -21.319 -24.439 - -3.80 5.50 -21.495 -24.601 - -3.80 5.60 -21.684 -24.728 - -3.80 5.70 -21.732 -24.803 - -3.80 5.80 -21.786 -24.883 - -3.80 5.90 -21.812 -24.954 - -3.80 6.00 -21.819 -25.016 - -3.80 6.10 -21.841 -25.090 - -3.80 6.20 -21.899 -25.175 - -3.80 6.30 -22.081 -25.317 - -3.80 6.40 -22.263 -25.498 - -3.80 6.50 -22.408 -25.672 - -3.80 6.60 -22.504 -25.811 - -3.80 6.70 -22.552 -25.918 - -3.80 6.80 -22.574 -26.011 - -3.80 6.90 -22.580 -26.098 - -3.80 7.00 -22.578 -26.165 - -3.80 7.10 -22.583 -26.018 - -3.80 7.20 -22.619 -25.699 - -3.80 7.30 -22.650 -25.574 - -3.80 7.40 -22.654 -25.631 - -3.80 7.50 -22.638 -25.756 - -3.80 7.60 -22.606 -25.883 - -3.80 7.70 -22.567 -26.005 - -3.80 7.80 -22.524 -26.135 - -3.80 7.90 -22.480 -26.282 - -3.80 8.00 -22.436 -26.420 - -3.80 8.10 -22.389 -26.540 - -3.80 8.20 -22.342 -26.638 - -3.80 8.30 -22.291 -26.716 - -3.80 8.40 -22.237 -26.777 - -3.80 8.50 -22.182 -26.825 - -3.80 8.60 -22.122 -26.862 - -3.80 8.70 -22.061 -26.890 - -3.80 8.80 -21.998 -26.912 - -3.80 8.90 -21.932 -26.928 - -3.80 9.00 -21.865 -26.941 - -3.70 1.00 -25.484 -21.715 - -3.70 1.10 -25.402 -21.764 - -3.70 1.20 -25.291 -21.814 - -3.70 1.30 -25.148 -21.865 - -3.70 1.40 -24.985 -21.917 - -3.70 1.50 -24.821 -21.968 - -3.70 1.60 -24.669 -22.021 - -3.70 1.70 -24.530 -22.073 - -3.70 1.80 -24.398 -22.126 - -3.70 1.90 -24.266 -22.179 - -3.70 2.00 -24.132 -22.233 - -3.70 2.10 -23.997 -22.286 - -3.70 2.20 -23.870 -22.339 - -3.70 2.30 -23.757 -22.391 - -3.70 2.40 -23.660 -22.444 - -3.70 2.50 -23.580 -22.496 - -3.70 2.60 -23.517 -22.547 - -3.70 2.70 -23.467 -22.598 - -3.70 2.80 -23.430 -22.648 - -3.70 2.90 -23.403 -22.697 - -3.70 3.00 -23.385 -22.746 - -3.70 3.10 -23.374 -22.793 - -3.70 3.20 -23.368 -22.840 - -3.70 3.30 -23.365 -22.885 - -3.70 3.40 -23.365 -22.930 - -3.70 3.50 -23.366 -22.974 - -3.70 3.60 -23.366 -23.017 - -3.70 3.70 -23.355 -23.060 - -3.70 3.80 -23.329 -23.102 - -3.70 3.90 -23.271 -23.144 - -3.70 4.00 -23.177 -23.185 - -3.70 4.10 -23.047 -23.227 - -3.70 4.20 -22.879 -23.270 - -3.70 4.30 -22.674 -23.312 - -3.70 4.40 -22.445 -23.358 - -3.70 4.50 -22.211 -23.411 - -3.70 4.60 -21.993 -23.473 - -3.70 4.70 -21.794 -23.539 - -3.70 4.80 -21.607 -23.609 - -3.70 4.90 -21.438 -23.735 - -3.70 5.00 -21.346 -23.972 - -3.70 5.10 -21.356 -24.196 - -3.70 5.20 -21.344 -24.328 - -3.70 5.30 -21.313 -24.427 - -3.70 5.40 -21.313 -24.534 - -3.70 5.50 -21.489 -24.696 - -3.70 5.60 -21.682 -24.824 - -3.70 5.70 -21.731 -24.899 - -3.70 5.80 -21.785 -24.980 - -3.70 5.90 -21.811 -25.051 - -3.70 6.00 -21.817 -25.114 - -3.70 6.10 -21.840 -25.188 - -3.70 6.20 -21.899 -25.273 - -3.70 6.30 -22.076 -25.414 - -3.70 6.40 -22.262 -25.596 - -3.70 6.50 -22.407 -25.770 - -3.70 6.60 -22.503 -25.909 - -3.70 6.70 -22.552 -26.016 - -3.70 6.80 -22.573 -26.110 - -3.70 6.90 -22.579 -26.197 - -3.70 7.00 -22.576 -26.269 - -3.70 7.10 -22.582 -26.163 - -3.70 7.20 -22.620 -25.879 - -3.70 7.30 -22.652 -25.763 - -3.70 7.40 -22.657 -25.821 - -3.70 7.50 -22.642 -25.945 - -3.70 7.60 -22.612 -26.070 - -3.70 7.70 -22.574 -26.188 - -3.70 7.80 -22.532 -26.313 - -3.70 7.90 -22.489 -26.451 - -3.70 8.00 -22.447 -26.580 - -3.70 8.10 -22.401 -26.691 - -3.70 8.20 -22.355 -26.780 - -3.70 8.30 -22.306 -26.850 - -3.70 8.40 -22.253 -26.905 - -3.70 8.50 -22.200 -26.947 - -3.70 8.60 -22.142 -26.979 - -3.70 8.70 -22.084 -27.003 - -3.70 8.80 -22.023 -27.021 - -3.70 8.90 -21.959 -27.036 - -3.70 9.00 -21.894 -27.046 - -3.60 1.00 -25.499 -21.733 - -3.60 1.10 -25.413 -21.781 - -3.60 1.20 -25.296 -21.830 - -3.60 1.30 -25.145 -21.880 - -3.60 1.40 -24.975 -21.931 - -3.60 1.50 -24.807 -21.982 - -3.60 1.60 -24.653 -22.034 - -3.60 1.70 -24.515 -22.086 - -3.60 1.80 -24.388 -22.139 - -3.60 1.90 -24.263 -22.192 - -3.60 2.00 -24.135 -22.246 - -3.60 2.10 -24.006 -22.299 - -3.60 2.20 -23.882 -22.353 - -3.60 2.30 -23.770 -22.407 - -3.60 2.40 -23.674 -22.460 - -3.60 2.50 -23.594 -22.513 - -3.60 2.60 -23.529 -22.566 - -3.60 2.70 -23.478 -22.618 - -3.60 2.80 -23.439 -22.670 - -3.60 2.90 -23.412 -22.721 - -3.60 3.00 -23.393 -22.771 - -3.60 3.10 -23.382 -22.820 - -3.60 3.20 -23.377 -22.868 - -3.60 3.30 -23.377 -22.915 - -3.60 3.40 -23.380 -22.962 - -3.60 3.50 -23.383 -23.008 - -3.60 3.60 -23.384 -23.052 - -3.60 3.70 -23.373 -23.096 - -3.60 3.80 -23.345 -23.139 - -3.60 3.90 -23.281 -23.182 - -3.60 4.00 -23.180 -23.224 - -3.60 4.10 -23.042 -23.267 - -3.60 4.20 -22.867 -23.309 - -3.60 4.30 -22.657 -23.352 - -3.60 4.40 -22.424 -23.398 - -3.60 4.50 -22.191 -23.453 - -3.60 4.60 -21.977 -23.520 - -3.60 4.70 -21.783 -23.589 - -3.60 4.80 -21.596 -23.661 - -3.60 4.90 -21.424 -23.799 - -3.60 5.00 -21.334 -24.057 - -3.60 5.10 -21.350 -24.289 - -3.60 5.20 -21.340 -24.423 - -3.60 5.30 -21.310 -24.523 - -3.60 5.40 -21.308 -24.629 - -3.60 5.50 -21.484 -24.791 - -3.60 5.60 -21.680 -24.921 - -3.60 5.70 -21.730 -24.996 - -3.60 5.80 -21.784 -25.077 - -3.60 5.90 -21.810 -25.149 - -3.60 6.00 -21.817 -25.211 - -3.60 6.10 -21.840 -25.286 - -3.60 6.20 -21.896 -25.371 - -3.60 6.30 -22.075 -25.511 - -3.60 6.40 -22.261 -25.693 - -3.60 6.50 -22.406 -25.868 - -3.60 6.60 -22.502 -26.006 - -3.60 6.70 -22.551 -26.114 - -3.60 6.80 -22.573 -26.207 - -3.60 6.90 -22.576 -26.294 - -3.60 7.00 -22.574 -26.371 - -3.60 7.10 -22.582 -26.301 - -3.60 7.20 -22.620 -26.054 - -3.60 7.30 -22.654 -25.948 - -3.60 7.40 -22.660 -26.008 - -3.60 7.50 -22.646 -26.129 - -3.60 7.60 -22.616 -26.251 - -3.60 7.70 -22.580 -26.365 - -3.60 7.80 -22.539 -26.484 - -3.60 7.90 -22.496 -26.612 - -3.60 8.00 -22.455 -26.732 - -3.60 8.10 -22.411 -26.834 - -3.60 8.20 -22.366 -26.915 - -3.60 8.30 -22.318 -26.978 - -3.60 8.40 -22.267 -27.027 - -3.60 8.50 -22.215 -27.064 - -3.60 8.60 -22.159 -27.092 - -3.60 8.70 -22.102 -27.113 - -3.60 8.80 -22.043 -27.129 - -3.60 8.90 -21.981 -27.141 - -3.60 9.00 -21.918 -27.150 - -3.50 1.00 -25.517 -21.754 - -3.50 1.10 -25.427 -21.800 - -3.50 1.20 -25.305 -21.848 - -3.50 1.30 -25.146 -21.896 - -3.50 1.40 -24.968 -21.946 - -3.50 1.50 -24.795 -21.997 - -3.50 1.60 -24.638 -22.048 - -3.50 1.70 -24.501 -22.100 - -3.50 1.80 -24.377 -22.152 - -3.50 1.90 -24.257 -22.205 - -3.50 2.00 -24.135 -22.259 - -3.50 2.10 -24.012 -22.312 - -3.50 2.20 -23.893 -22.366 - -3.50 2.30 -23.783 -22.420 - -3.50 2.40 -23.688 -22.475 - -3.50 2.50 -23.608 -22.529 - -3.50 2.60 -23.543 -22.582 - -3.50 2.70 -23.491 -22.636 - -3.50 2.80 -23.451 -22.689 - -3.50 2.90 -23.422 -22.741 - -3.50 3.00 -23.402 -22.793 - -3.50 3.10 -23.391 -22.844 - -3.50 3.20 -23.387 -22.894 - -3.50 3.30 -23.388 -22.943 - -3.50 3.40 -23.392 -22.992 - -3.50 3.50 -23.398 -23.039 - -3.50 3.60 -23.400 -23.085 - -3.50 3.70 -23.389 -23.131 - -3.50 3.80 -23.359 -23.176 - -3.50 3.90 -23.291 -23.220 - -3.50 4.00 -23.182 -23.263 - -3.50 4.10 -23.037 -23.306 - -3.50 4.20 -22.855 -23.348 - -3.50 4.30 -22.639 -23.392 - -3.50 4.40 -22.404 -23.439 - -3.50 4.50 -22.171 -23.499 - -3.50 4.60 -21.963 -23.571 - -3.50 4.70 -21.773 -23.642 - -3.50 4.80 -21.586 -23.717 - -3.50 4.90 -21.412 -23.866 - -3.50 5.00 -21.324 -24.144 - -3.50 5.10 -21.345 -24.383 - -3.50 5.20 -21.337 -24.518 - -3.50 5.30 -21.308 -24.619 - -3.50 5.40 -21.305 -24.725 - -3.50 5.50 -21.480 -24.887 - -3.50 5.60 -21.679 -25.018 - -3.50 5.70 -21.729 -25.093 - -3.50 5.80 -21.782 -25.174 - -3.50 5.90 -21.809 -25.246 - -3.50 6.00 -21.816 -25.309 - -3.50 6.10 -21.839 -25.383 - -3.50 6.20 -21.894 -25.468 - -3.50 6.30 -22.074 -25.608 - -3.50 6.40 -22.260 -25.789 - -3.50 6.50 -22.404 -25.965 - -3.50 6.60 -22.500 -26.103 - -3.50 6.70 -22.550 -26.211 - -3.50 6.80 -22.571 -26.303 - -3.50 6.90 -22.573 -26.391 - -3.50 7.00 -22.572 -26.471 - -3.50 7.10 -22.579 -26.432 - -3.50 7.20 -22.621 -26.224 - -3.50 7.30 -22.655 -26.129 - -3.50 7.40 -22.662 -26.189 - -3.50 7.50 -22.649 -26.309 - -3.50 7.60 -22.620 -26.427 - -3.50 7.70 -22.584 -26.535 - -3.50 7.80 -22.544 -26.646 - -3.50 7.90 -22.502 -26.765 - -3.50 8.00 -22.462 -26.876 - -3.50 8.10 -22.418 -26.969 - -3.50 8.20 -22.374 -27.043 - -3.50 8.30 -22.328 -27.100 - -3.50 8.40 -22.278 -27.144 - -3.50 8.50 -22.228 -27.177 - -3.50 8.60 -22.173 -27.202 - -3.50 8.70 -22.118 -27.221 - -3.50 8.80 -22.060 -27.235 - -3.50 8.90 -21.999 -27.245 - -3.50 9.00 -21.938 -27.254 - -3.40 1.00 -25.537 -21.777 - -3.40 1.10 -25.445 -21.822 - -3.40 1.20 -25.316 -21.868 - -3.40 1.30 -25.150 -21.915 - -3.40 1.40 -24.965 -21.963 - -3.40 1.50 -24.786 -22.013 - -3.40 1.60 -24.626 -22.063 - -3.40 1.70 -24.488 -22.114 - -3.40 1.80 -24.366 -22.166 - -3.40 1.90 -24.251 -22.219 - -3.40 2.00 -24.134 -22.272 - -3.40 2.10 -24.017 -22.325 - -3.40 2.20 -23.902 -22.379 - -3.40 2.30 -23.795 -22.434 - -3.40 2.40 -23.702 -22.488 - -3.40 2.50 -23.623 -22.543 - -3.40 2.60 -23.558 -22.597 - -3.40 2.70 -23.505 -22.652 - -3.40 2.80 -23.464 -22.706 - -3.40 2.90 -23.434 -22.759 - -3.40 3.00 -23.414 -22.813 - -3.40 3.10 -23.402 -22.865 - -3.40 3.20 -23.397 -22.917 - -3.40 3.30 -23.399 -22.968 - -3.40 3.40 -23.404 -23.018 - -3.40 3.50 -23.410 -23.068 - -3.40 3.60 -23.414 -23.116 - -3.40 3.70 -23.403 -23.163 - -3.40 3.80 -23.371 -23.210 - -3.40 3.90 -23.299 -23.255 - -3.40 4.00 -23.185 -23.300 - -3.40 4.10 -23.032 -23.344 - -3.40 4.20 -22.843 -23.387 - -3.40 4.30 -22.622 -23.432 - -3.40 4.40 -22.383 -23.482 - -3.40 4.50 -22.153 -23.547 - -3.40 4.60 -21.951 -23.625 - -3.40 4.70 -21.765 -23.699 - -3.40 4.80 -21.577 -23.776 - -3.40 4.90 -21.402 -23.937 - -3.40 5.00 -21.315 -24.232 - -3.40 5.10 -21.341 -24.478 - -3.40 5.20 -21.335 -24.614 - -3.40 5.30 -21.307 -24.715 - -3.40 5.40 -21.302 -24.821 - -3.40 5.50 -21.477 -24.982 - -3.40 5.60 -21.678 -25.114 - -3.40 5.70 -21.728 -25.190 - -3.40 5.80 -21.781 -25.270 - -3.40 5.90 -21.809 -25.343 - -3.40 6.00 -21.816 -25.406 - -3.40 6.10 -21.838 -25.480 - -3.40 6.20 -21.894 -25.565 - -3.40 6.30 -22.074 -25.705 - -3.40 6.40 -22.259 -25.885 - -3.40 6.50 -22.402 -26.061 - -3.40 6.60 -22.499 -26.200 - -3.40 6.70 -22.549 -26.306 - -3.40 6.80 -22.569 -26.399 - -3.40 6.90 -22.570 -26.487 - -3.40 7.00 -22.565 -26.569 - -3.40 7.10 -22.576 -26.556 - -3.40 7.20 -22.620 -26.386 - -3.40 7.30 -22.656 -26.305 - -3.40 7.40 -22.664 -26.366 - -3.40 7.50 -22.651 -26.482 - -3.40 7.60 -22.623 -26.595 - -3.40 7.70 -22.587 -26.698 - -3.40 7.80 -22.548 -26.801 - -3.40 7.90 -22.507 -26.909 - -3.40 8.00 -22.467 -27.011 - -3.40 8.10 -22.425 -27.097 - -3.40 8.20 -22.382 -27.164 - -3.40 8.30 -22.336 -27.217 - -3.40 8.40 -22.287 -27.256 - -3.40 8.50 -22.238 -27.286 - -3.40 8.60 -22.184 -27.309 - -3.40 8.70 -22.130 -27.326 - -3.40 8.80 -22.074 -27.339 - -3.40 8.90 -22.015 -27.348 - -3.40 9.00 -21.954 -27.356 - -3.30 1.00 -25.562 -21.803 - -3.30 1.10 -25.465 -21.846 - -3.30 1.20 -25.332 -21.890 - -3.30 1.30 -25.158 -21.936 - -3.30 1.40 -24.967 -21.983 - -3.30 1.50 -24.782 -22.031 - -3.30 1.60 -24.618 -22.080 - -3.30 1.70 -24.479 -22.130 - -3.30 1.80 -24.357 -22.181 - -3.30 1.90 -24.244 -22.233 - -3.30 2.00 -24.132 -22.285 - -3.30 2.10 -24.020 -22.338 - -3.30 2.20 -23.909 -22.392 - -3.30 2.30 -23.806 -22.447 - -3.30 2.40 -23.715 -22.501 - -3.30 2.50 -23.638 -22.556 - -3.30 2.60 -23.573 -22.611 - -3.30 2.70 -23.520 -22.666 - -3.30 2.80 -23.479 -22.721 - -3.30 2.90 -23.448 -22.776 - -3.30 3.00 -23.426 -22.830 - -3.30 3.10 -23.413 -22.884 - -3.30 3.20 -23.408 -22.938 - -3.30 3.30 -23.410 -22.990 - -3.30 3.40 -23.415 -23.042 - -3.30 3.50 -23.422 -23.094 - -3.30 3.60 -23.425 -23.144 - -3.30 3.70 -23.414 -23.193 - -3.30 3.80 -23.381 -23.242 - -3.30 3.90 -23.306 -23.289 - -3.30 4.00 -23.186 -23.335 - -3.30 4.10 -23.027 -23.381 - -3.30 4.20 -22.832 -23.426 - -3.30 4.30 -22.605 -23.472 - -3.30 4.40 -22.363 -23.526 - -3.30 4.50 -22.136 -23.598 - -3.30 4.60 -21.940 -23.683 - -3.30 4.70 -21.758 -23.760 - -3.30 4.80 -21.570 -23.838 - -3.30 4.90 -21.393 -24.012 - -3.30 5.00 -21.308 -24.323 - -3.30 5.10 -21.337 -24.573 - -3.30 5.20 -21.333 -24.710 - -3.30 5.30 -21.305 -24.812 - -3.30 5.40 -21.300 -24.917 - -3.30 5.50 -21.475 -25.078 - -3.30 5.60 -21.677 -25.209 - -3.30 5.70 -21.728 -25.286 - -3.30 5.80 -21.780 -25.367 - -3.30 5.90 -21.808 -25.440 - -3.30 6.00 -21.815 -25.502 - -3.30 6.10 -21.838 -25.577 - -3.30 6.20 -21.894 -25.662 - -3.30 6.30 -22.073 -25.801 - -3.30 6.40 -22.258 -25.980 - -3.30 6.50 -22.400 -26.156 - -3.30 6.60 -22.499 -26.295 - -3.30 6.70 -22.546 -26.401 - -3.30 6.80 -22.566 -26.493 - -3.30 6.90 -22.563 -26.580 - -3.30 7.00 -22.553 -26.663 - -3.30 7.10 -22.570 -26.672 - -3.30 7.20 -22.619 -26.541 - -3.30 7.30 -22.657 -26.474 - -3.30 7.40 -22.666 -26.536 - -3.30 7.50 -22.654 -26.648 - -3.30 7.60 -22.625 -26.756 - -3.30 7.70 -22.590 -26.852 - -3.30 7.80 -22.551 -26.946 - -3.30 7.90 -22.511 -27.043 - -3.30 8.00 -22.472 -27.137 - -3.30 8.10 -22.430 -27.217 - -3.30 8.20 -22.387 -27.279 - -3.30 8.30 -22.342 -27.328 - -3.30 8.40 -22.295 -27.365 - -3.30 8.50 -22.246 -27.392 - -3.30 8.60 -22.194 -27.413 - -3.30 8.70 -22.141 -27.429 - -3.30 8.80 -22.085 -27.441 - -3.30 8.90 -22.027 -27.450 - -3.30 9.00 -21.968 -27.457 - -3.20 1.00 -25.590 -21.833 - -3.20 1.10 -25.490 -21.874 - -3.20 1.20 -25.351 -21.916 - -3.20 1.30 -25.170 -21.960 - -3.20 1.40 -24.972 -22.005 - -3.20 1.50 -24.782 -22.051 - -3.20 1.60 -24.615 -22.099 - -3.20 1.70 -24.473 -22.147 - -3.20 1.80 -24.351 -22.197 - -3.20 1.90 -24.239 -22.248 - -3.20 2.00 -24.130 -22.300 - -3.20 2.10 -24.022 -22.352 - -3.20 2.20 -23.915 -22.406 - -3.20 2.30 -23.816 -22.460 - -3.20 2.40 -23.727 -22.514 - -3.20 2.50 -23.652 -22.569 - -3.20 2.60 -23.588 -22.624 - -3.20 2.70 -23.536 -22.680 - -3.20 2.80 -23.495 -22.735 - -3.20 2.90 -23.463 -22.791 - -3.20 3.00 -23.441 -22.846 - -3.20 3.10 -23.427 -22.901 - -3.20 3.20 -23.420 -22.956 - -3.20 3.30 -23.421 -23.010 - -3.20 3.40 -23.426 -23.064 - -3.20 3.50 -23.432 -23.117 - -3.20 3.60 -23.435 -23.169 - -3.20 3.70 -23.424 -23.220 - -3.20 3.80 -23.390 -23.271 - -3.20 3.90 -23.313 -23.320 - -3.20 4.00 -23.188 -23.368 - -3.20 4.10 -23.022 -23.416 - -3.20 4.20 -22.821 -23.462 - -3.20 4.30 -22.588 -23.510 - -3.20 4.40 -22.343 -23.569 - -3.20 4.50 -22.119 -23.652 - -3.20 4.60 -21.931 -23.745 - -3.20 4.70 -21.752 -23.825 - -3.20 4.80 -21.563 -23.905 - -3.20 4.90 -21.385 -24.090 - -3.20 5.00 -21.302 -24.415 - -3.20 5.10 -21.334 -24.669 - -3.20 5.20 -21.331 -24.806 - -3.20 5.30 -21.304 -24.909 - -3.20 5.40 -21.298 -25.013 - -3.20 5.50 -21.473 -25.173 - -3.20 5.60 -21.677 -25.305 - -3.20 5.70 -21.728 -25.381 - -3.20 5.80 -21.780 -25.463 - -3.20 5.90 -21.808 -25.536 - -3.20 6.00 -21.815 -25.599 - -3.20 6.10 -21.837 -25.673 - -3.20 6.20 -21.893 -25.758 - -3.20 6.30 -22.073 -25.896 - -3.20 6.40 -22.256 -26.074 - -3.20 6.50 -22.400 -26.250 - -3.20 6.60 -22.497 -26.389 - -3.20 6.70 -22.545 -26.495 - -3.20 6.80 -22.562 -26.586 - -3.20 6.90 -22.552 -26.672 - -3.20 7.00 -22.551 -26.756 - -3.20 7.10 -22.565 -26.780 - -3.20 7.20 -22.618 -26.686 - -3.20 7.30 -22.655 -26.634 - -3.20 7.40 -22.666 -26.698 - -3.20 7.50 -22.655 -26.805 - -3.20 7.60 -22.627 -26.907 - -3.20 7.70 -22.593 -26.996 - -3.20 7.80 -22.554 -27.081 - -3.20 7.90 -22.514 -27.167 - -3.20 8.00 -22.475 -27.254 - -3.20 8.10 -22.434 -27.329 - -3.20 8.20 -22.392 -27.388 - -3.20 8.30 -22.348 -27.434 - -3.20 8.40 -22.301 -27.469 - -3.20 8.50 -22.253 -27.496 - -3.20 8.60 -22.201 -27.516 - -3.20 8.70 -22.149 -27.531 - -3.20 8.80 -22.094 -27.542 - -3.20 8.90 -22.037 -27.551 - -3.20 9.00 -21.979 -27.558 - -3.10 1.00 -25.622 -21.867 - -3.10 1.10 -25.518 -21.905 - -3.10 1.20 -25.374 -21.945 - -3.10 1.30 -25.187 -21.987 - -3.10 1.40 -24.982 -22.030 - -3.10 1.50 -24.786 -22.074 - -3.10 1.60 -24.615 -22.120 - -3.10 1.70 -24.470 -22.167 - -3.10 1.80 -24.347 -22.216 - -3.10 1.90 -24.236 -22.265 - -3.10 2.00 -24.129 -22.316 - -3.10 2.10 -24.024 -22.367 - -3.10 2.20 -23.921 -22.420 - -3.10 2.30 -23.824 -22.473 - -3.10 2.40 -23.738 -22.527 - -3.10 2.50 -23.665 -22.582 - -3.10 2.60 -23.603 -22.637 - -3.10 2.70 -23.552 -22.692 - -3.10 2.80 -23.510 -22.748 - -3.10 2.90 -23.479 -22.804 - -3.10 3.00 -23.456 -22.860 - -3.10 3.10 -23.441 -22.916 - -3.10 3.20 -23.433 -22.972 - -3.10 3.30 -23.432 -23.028 - -3.10 3.40 -23.436 -23.083 - -3.10 3.50 -23.442 -23.137 - -3.10 3.60 -23.444 -23.191 - -3.10 3.70 -23.432 -23.245 - -3.10 3.80 -23.397 -23.297 - -3.10 3.90 -23.318 -23.348 - -3.10 4.00 -23.189 -23.399 - -3.10 4.10 -23.018 -23.448 - -3.10 4.20 -22.809 -23.497 - -3.10 4.30 -22.569 -23.548 - -3.10 4.40 -22.321 -23.613 - -3.10 4.50 -22.103 -23.708 - -3.10 4.60 -21.923 -23.811 - -3.10 4.70 -21.747 -23.893 - -3.10 4.80 -21.557 -23.976 - -3.10 4.90 -21.377 -24.171 - -3.10 5.00 -21.297 -24.509 - -3.10 5.10 -21.332 -24.765 - -3.10 5.20 -21.330 -24.903 - -3.10 5.30 -21.303 -25.005 - -3.10 5.40 -21.297 -25.109 - -3.10 5.50 -21.472 -25.267 - -3.10 5.60 -21.676 -25.399 - -3.10 5.70 -21.727 -25.475 - -3.10 5.80 -21.779 -25.558 - -3.10 5.90 -21.808 -25.631 - -3.10 6.00 -21.815 -25.694 - -3.10 6.10 -21.837 -25.768 - -3.10 6.20 -21.893 -25.853 - -3.10 6.30 -22.072 -25.990 - -3.10 6.40 -22.255 -26.167 - -3.10 6.50 -22.399 -26.343 - -3.10 6.60 -22.496 -26.482 - -3.10 6.70 -22.543 -26.586 - -3.10 6.80 -22.555 -26.676 - -3.10 6.90 -22.551 -26.762 - -3.10 7.00 -22.544 -26.845 - -3.10 7.10 -22.563 -26.883 - -3.10 7.20 -22.615 -26.820 - -3.10 7.30 -22.656 -26.786 - -3.10 7.40 -22.667 -26.850 - -3.10 7.50 -22.656 -26.953 - -3.10 7.60 -22.629 -27.048 - -3.10 7.70 -22.594 -27.130 - -3.10 7.80 -22.556 -27.205 - -3.10 7.90 -22.516 -27.281 - -3.10 8.00 -22.478 -27.362 - -3.10 8.10 -22.437 -27.433 - -3.10 8.20 -22.396 -27.491 - -3.10 8.30 -22.352 -27.536 - -3.10 8.40 -22.305 -27.570 - -3.10 8.50 -22.258 -27.596 - -3.10 8.60 -22.207 -27.615 - -3.10 8.70 -22.156 -27.631 - -3.10 8.80 -22.102 -27.642 - -3.10 8.90 -22.046 -27.651 - -3.10 9.00 -21.988 -27.658 - -3.00 1.00 -25.658 -21.904 - -3.00 1.10 -25.551 -21.940 - -3.00 1.20 -25.401 -21.978 - -3.00 1.30 -25.208 -22.018 - -3.00 1.40 -24.996 -22.058 - -3.00 1.50 -24.795 -22.101 - -3.00 1.60 -24.620 -22.145 - -3.00 1.70 -24.472 -22.190 - -3.00 1.80 -24.347 -22.237 - -3.00 1.90 -24.236 -22.285 - -3.00 2.00 -24.130 -22.334 - -3.00 2.10 -24.027 -22.384 - -3.00 2.20 -23.926 -22.435 - -3.00 2.30 -23.832 -22.488 - -3.00 2.40 -23.748 -22.541 - -3.00 2.50 -23.677 -22.595 - -3.00 2.60 -23.616 -22.650 - -3.00 2.70 -23.566 -22.705 - -3.00 2.80 -23.526 -22.761 - -3.00 2.90 -23.494 -22.817 - -3.00 3.00 -23.471 -22.874 - -3.00 3.10 -23.455 -22.930 - -3.00 3.20 -23.446 -22.987 - -3.00 3.30 -23.444 -23.043 - -3.00 3.40 -23.447 -23.100 - -3.00 3.50 -23.451 -23.156 - -3.00 3.60 -23.451 -23.211 - -3.00 3.70 -23.438 -23.266 - -3.00 3.80 -23.403 -23.320 - -3.00 3.90 -23.323 -23.374 - -3.00 4.00 -23.190 -23.426 - -3.00 4.10 -23.012 -23.478 - -3.00 4.20 -22.797 -23.528 - -3.00 4.30 -22.549 -23.583 - -3.00 4.40 -22.299 -23.657 - -3.00 4.50 -22.088 -23.766 - -3.00 4.60 -21.917 -23.879 - -3.00 4.70 -21.743 -23.966 - -3.00 4.80 -21.551 -24.051 - -3.00 4.90 -21.371 -24.255 - -3.00 5.00 -21.293 -24.603 - -3.00 5.10 -21.330 -24.862 - -3.00 5.20 -21.329 -24.999 - -3.00 5.30 -21.303 -25.101 - -3.00 5.40 -21.295 -25.204 - -3.00 5.50 -21.470 -25.361 - -3.00 5.60 -21.676 -25.492 - -3.00 5.70 -21.726 -25.568 - -3.00 5.80 -21.779 -25.652 - -3.00 5.90 -21.808 -25.725 - -3.00 6.00 -21.815 -25.788 - -3.00 6.10 -21.837 -25.861 - -3.00 6.20 -21.893 -25.947 - -3.00 6.30 -22.071 -26.083 - -3.00 6.40 -22.255 -26.258 - -3.00 6.50 -22.399 -26.435 - -3.00 6.60 -22.495 -26.573 - -3.00 6.70 -22.540 -26.676 - -3.00 6.80 -22.555 -26.765 - -3.00 6.90 -22.545 -26.848 - -3.00 7.00 -22.543 -26.933 - -3.00 7.10 -22.559 -26.977 - -3.00 7.20 -22.614 -26.944 - -3.00 7.30 -22.657 -26.926 - -3.00 7.40 -22.668 -26.991 - -3.00 7.50 -22.657 -27.088 - -3.00 7.60 -22.630 -27.178 - -3.00 7.70 -22.596 -27.251 - -3.00 7.80 -22.558 -27.318 - -3.00 7.90 -22.518 -27.384 - -3.00 8.00 -22.480 -27.461 - -3.00 8.10 -22.440 -27.531 - -3.00 8.20 -22.399 -27.587 - -3.00 8.30 -22.355 -27.633 - -3.00 8.40 -22.309 -27.667 - -3.00 8.50 -22.263 -27.693 - -3.00 8.60 -22.212 -27.713 - -3.00 8.70 -22.161 -27.729 - -3.00 8.80 -22.108 -27.741 - -3.00 8.90 -22.052 -27.750 - -3.00 9.00 -21.996 -27.757 - -2.90 1.00 -25.698 -21.946 - -2.90 1.10 -25.588 -21.980 - -2.90 1.20 -25.433 -22.015 - -2.90 1.30 -25.233 -22.052 - -2.90 1.40 -25.015 -22.091 - -2.90 1.50 -24.809 -22.131 - -2.90 1.60 -24.630 -22.172 - -2.90 1.70 -24.479 -22.216 - -2.90 1.80 -24.352 -22.260 - -2.90 1.90 -24.239 -22.307 - -2.90 2.00 -24.133 -22.354 - -2.90 2.10 -24.031 -22.403 - -2.90 2.20 -23.932 -22.453 - -2.90 2.30 -23.840 -22.504 - -2.90 2.40 -23.758 -22.556 - -2.90 2.50 -23.688 -22.610 - -2.90 2.60 -23.629 -22.664 - -2.90 2.70 -23.580 -22.719 - -2.90 2.80 -23.540 -22.774 - -2.90 2.90 -23.509 -22.830 - -2.90 3.00 -23.486 -22.887 - -2.90 3.10 -23.469 -22.944 - -2.90 3.20 -23.459 -23.001 - -2.90 3.30 -23.456 -23.058 - -2.90 3.40 -23.456 -23.115 - -2.90 3.50 -23.458 -23.172 - -2.90 3.60 -23.456 -23.229 - -2.90 3.70 -23.441 -23.285 - -2.90 3.80 -23.406 -23.341 - -2.90 3.90 -23.326 -23.396 - -2.90 4.00 -23.190 -23.451 - -2.90 4.10 -23.006 -23.504 - -2.90 4.20 -22.783 -23.558 - -2.90 4.30 -22.528 -23.617 - -2.90 4.40 -22.275 -23.701 - -2.90 4.50 -22.073 -23.827 - -2.90 4.60 -21.912 -23.950 - -2.90 4.70 -21.741 -24.042 - -2.90 4.80 -21.547 -24.129 - -2.90 4.90 -21.366 -24.342 - -2.90 5.00 -21.289 -24.698 - -2.90 5.10 -21.328 -24.958 - -2.90 5.20 -21.328 -25.094 - -2.90 5.30 -21.302 -25.196 - -2.90 5.40 -21.295 -25.299 - -2.90 5.50 -21.469 -25.453 - -2.90 5.60 -21.676 -25.583 - -2.90 5.70 -21.726 -25.660 - -2.90 5.80 -21.779 -25.745 - -2.90 5.90 -21.808 -25.818 - -2.90 6.00 -21.814 -25.881 - -2.90 6.10 -21.837 -25.954 - -2.90 6.20 -21.893 -26.039 - -2.90 6.30 -22.071 -26.174 - -2.90 6.40 -22.255 -26.347 - -2.90 6.50 -22.399 -26.524 - -2.90 6.60 -22.494 -26.663 - -2.90 6.70 -22.539 -26.764 - -2.90 6.80 -22.549 -26.847 - -2.90 6.90 -22.544 -26.933 - -2.90 7.00 -22.529 -27.009 - -2.90 7.10 -22.553 -27.063 - -2.90 7.20 -22.614 -27.057 - -2.90 7.30 -22.657 -27.053 - -2.90 7.40 -22.668 -27.119 - -2.90 7.50 -22.658 -27.211 - -2.90 7.60 -22.631 -27.294 - -2.90 7.70 -22.597 -27.361 - -2.90 7.80 -22.559 -27.419 - -2.90 7.90 -22.520 -27.476 - -2.90 8.00 -22.482 -27.551 - -2.90 8.10 -22.442 -27.620 - -2.90 8.20 -22.401 -27.677 - -2.90 8.30 -22.358 -27.724 - -2.90 8.40 -22.312 -27.760 - -2.90 8.50 -22.266 -27.787 - -2.90 8.60 -22.216 -27.808 - -2.90 8.70 -22.165 -27.825 - -2.90 8.80 -22.113 -27.837 - -2.90 8.90 -22.058 -27.848 - -2.90 9.00 -22.002 -27.855 - -2.80 1.00 -25.743 -21.992 - -2.80 1.10 -25.629 -22.023 - -2.80 1.20 -25.469 -22.056 - -2.80 1.30 -25.263 -22.090 - -2.80 1.40 -25.039 -22.127 - -2.80 1.50 -24.828 -22.164 - -2.80 1.60 -24.644 -22.204 - -2.80 1.70 -24.490 -22.245 - -2.80 1.80 -24.360 -22.287 - -2.80 1.90 -24.246 -22.332 - -2.80 2.00 -24.139 -22.377 - -2.80 2.10 -24.038 -22.424 - -2.80 2.20 -23.939 -22.473 - -2.80 2.30 -23.848 -22.523 - -2.80 2.40 -23.768 -22.573 - -2.80 2.50 -23.699 -22.626 - -2.80 2.60 -23.641 -22.679 - -2.80 2.70 -23.593 -22.733 - -2.80 2.80 -23.554 -22.788 - -2.80 2.90 -23.523 -22.843 - -2.80 3.00 -23.500 -22.900 - -2.80 3.10 -23.482 -22.957 - -2.80 3.20 -23.471 -23.014 - -2.80 3.30 -23.467 -23.071 - -2.80 3.40 -23.465 -23.129 - -2.80 3.50 -23.464 -23.187 - -2.80 3.60 -23.458 -23.245 - -2.80 3.70 -23.441 -23.303 - -2.80 3.80 -23.407 -23.360 - -2.80 3.90 -23.327 -23.417 - -2.80 4.00 -23.189 -23.473 - -2.80 4.10 -22.999 -23.528 - -2.80 4.20 -22.767 -23.584 - -2.80 4.30 -22.503 -23.648 - -2.80 4.40 -22.249 -23.745 - -2.80 4.50 -22.059 -23.890 - -2.80 4.60 -21.908 -24.024 - -2.80 4.70 -21.738 -24.121 - -2.80 4.80 -21.543 -24.210 - -2.80 4.90 -21.362 -24.431 - -2.80 5.00 -21.287 -24.794 - -2.80 5.10 -21.327 -25.055 - -2.80 5.20 -21.327 -25.190 - -2.80 5.30 -21.302 -25.291 - -2.80 5.40 -21.294 -25.392 - -2.80 5.50 -21.469 -25.544 - -2.80 5.60 -21.675 -25.673 - -2.80 5.70 -21.726 -25.750 - -2.80 5.80 -21.779 -25.836 - -2.80 5.90 -21.808 -25.910 - -2.80 6.00 -21.814 -25.972 - -2.80 6.10 -21.837 -26.045 - -2.80 6.20 -21.892 -26.130 - -2.80 6.30 -22.071 -26.263 - -2.80 6.40 -22.255 -26.433 - -2.80 6.50 -22.399 -26.611 - -2.80 6.60 -22.494 -26.749 - -2.80 6.70 -22.539 -26.848 - -2.80 6.80 -22.548 -26.930 - -2.80 6.90 -22.522 -26.996 - -2.80 7.00 -22.522 -27.082 - -2.80 7.10 -22.551 -27.145 - -2.80 7.20 -22.614 -27.159 - -2.80 7.30 -22.657 -27.167 - -2.80 7.40 -22.669 -27.234 - -2.80 7.50 -22.658 -27.320 - -2.80 7.60 -22.632 -27.398 - -2.80 7.70 -22.598 -27.458 - -2.80 7.80 -22.560 -27.508 - -2.80 7.90 -22.521 -27.557 - -2.80 8.00 -22.484 -27.632 - -2.80 8.10 -22.443 -27.702 - -2.80 8.20 -22.403 -27.761 - -2.80 8.30 -22.360 -27.811 - -2.80 8.40 -22.315 -27.849 - -2.80 8.50 -22.269 -27.878 - -2.80 8.60 -22.219 -27.901 - -2.80 8.70 -22.169 -27.919 - -2.80 8.80 -22.117 -27.933 - -2.80 8.90 -22.062 -27.944 - -2.80 9.00 -22.007 -27.953 - -2.70 1.00 -25.792 -22.041 - -2.70 1.10 -25.674 -22.070 - -2.70 1.20 -25.509 -22.101 - -2.70 1.30 -25.297 -22.133 - -2.70 1.40 -25.067 -22.167 - -2.70 1.50 -24.851 -22.202 - -2.70 1.60 -24.662 -22.239 - -2.70 1.70 -24.505 -22.278 - -2.70 1.80 -24.372 -22.318 - -2.70 1.90 -24.256 -22.360 - -2.70 2.00 -24.149 -22.403 - -2.70 2.10 -24.046 -22.449 - -2.70 2.20 -23.948 -22.495 - -2.70 2.30 -23.858 -22.543 - -2.70 2.40 -23.778 -22.593 - -2.70 2.50 -23.710 -22.643 - -2.70 2.60 -23.653 -22.695 - -2.70 2.70 -23.605 -22.748 - -2.70 2.80 -23.567 -22.802 - -2.70 2.90 -23.536 -22.857 - -2.70 3.00 -23.512 -22.913 - -2.70 3.10 -23.494 -22.970 - -2.70 3.20 -23.483 -23.027 - -2.70 3.30 -23.476 -23.084 - -2.70 3.40 -23.472 -23.143 - -2.70 3.50 -23.467 -23.201 - -2.70 3.60 -23.458 -23.259 - -2.70 3.70 -23.438 -23.318 - -2.70 3.80 -23.404 -23.377 - -2.70 3.90 -23.325 -23.435 - -2.70 4.00 -23.185 -23.493 - -2.70 4.10 -22.990 -23.550 - -2.70 4.20 -22.749 -23.608 - -2.70 4.30 -22.476 -23.677 - -2.70 4.40 -22.223 -23.790 - -2.70 4.50 -22.047 -23.954 - -2.70 4.60 -21.904 -24.100 - -2.70 4.70 -21.737 -24.203 - -2.70 4.80 -21.540 -24.295 - -2.70 4.90 -21.358 -24.522 - -2.70 5.00 -21.285 -24.891 - -2.70 5.10 -21.326 -25.151 - -2.70 5.20 -21.327 -25.284 - -2.70 5.30 -21.301 -25.385 - -2.70 5.40 -21.293 -25.485 - -2.70 5.50 -21.468 -25.633 - -2.70 5.60 -21.675 -25.760 - -2.70 5.70 -21.726 -25.839 - -2.70 5.80 -21.779 -25.925 - -2.70 5.90 -21.807 -25.999 - -2.70 6.00 -21.814 -26.062 - -2.70 6.10 -21.837 -26.133 - -2.70 6.20 -21.892 -26.218 - -2.70 6.30 -22.071 -26.350 - -2.70 6.40 -22.255 -26.517 - -2.70 6.50 -22.399 -26.696 - -2.70 6.60 -22.493 -26.833 - -2.70 6.70 -22.535 -26.927 - -2.70 6.80 -22.533 -26.986 - -2.70 6.90 -22.516 -27.068 - -2.70 7.00 -22.514 -27.150 - -2.70 7.10 -22.549 -27.220 - -2.70 7.20 -22.610 -27.242 - -2.70 7.30 -22.657 -27.266 - -2.70 7.40 -22.669 -27.333 - -2.70 7.50 -22.659 -27.415 - -2.70 7.60 -22.632 -27.488 - -2.70 7.70 -22.599 -27.542 - -2.70 7.80 -22.561 -27.585 - -2.70 7.90 -22.522 -27.628 - -2.70 8.00 -22.485 -27.704 - -2.70 8.10 -22.445 -27.776 - -2.70 8.20 -22.404 -27.839 - -2.70 8.30 -22.362 -27.892 - -2.70 8.40 -22.317 -27.933 - -2.70 8.50 -22.271 -27.966 - -2.70 8.60 -22.221 -27.991 - -2.70 8.70 -22.172 -28.011 - -2.70 8.80 -22.120 -28.027 - -2.70 8.90 -22.065 -28.040 - -2.70 9.00 -22.010 -28.049 - -2.60 1.00 -25.844 -22.095 - -2.60 1.10 -25.724 -22.122 - -2.60 1.20 -25.554 -22.150 - -2.60 1.30 -25.336 -22.180 - -2.60 1.40 -25.099 -22.211 - -2.60 1.50 -24.878 -22.244 - -2.60 1.60 -24.685 -22.278 - -2.60 1.70 -24.524 -22.315 - -2.60 1.80 -24.389 -22.352 - -2.60 1.90 -24.271 -22.392 - -2.60 2.00 -24.162 -22.433 - -2.60 2.10 -24.058 -22.476 - -2.60 2.20 -23.960 -22.521 - -2.60 2.30 -23.869 -22.567 - -2.60 2.40 -23.789 -22.614 - -2.60 2.50 -23.721 -22.663 - -2.60 2.60 -23.665 -22.714 - -2.60 2.70 -23.618 -22.765 - -2.60 2.80 -23.580 -22.818 - -2.60 2.90 -23.549 -22.872 - -2.60 3.00 -23.525 -22.927 - -2.60 3.10 -23.506 -22.983 - -2.60 3.20 -23.493 -23.040 - -2.60 3.30 -23.485 -23.098 - -2.60 3.40 -23.478 -23.156 - -2.60 3.50 -23.469 -23.214 - -2.60 3.60 -23.455 -23.273 - -2.60 3.70 -23.432 -23.332 - -2.60 3.80 -23.397 -23.392 - -2.60 3.90 -23.321 -23.451 - -2.60 4.00 -23.180 -23.510 - -2.60 4.10 -22.979 -23.569 - -2.60 4.20 -22.728 -23.630 - -2.60 4.30 -22.445 -23.705 - -2.60 4.40 -22.195 -23.835 - -2.60 4.50 -22.035 -24.021 - -2.60 4.60 -21.901 -24.178 - -2.60 4.70 -21.736 -24.287 - -2.60 4.80 -21.537 -24.382 - -2.60 4.90 -21.355 -24.614 - -2.60 5.00 -21.283 -24.987 - -2.60 5.10 -21.325 -25.246 - -2.60 5.20 -21.326 -25.378 - -2.60 5.30 -21.301 -25.477 - -2.60 5.40 -21.293 -25.576 - -2.60 5.50 -21.468 -25.719 - -2.60 5.60 -21.675 -25.845 - -2.60 5.70 -21.726 -25.924 - -2.60 5.80 -21.778 -26.012 - -2.60 5.90 -21.807 -26.086 - -2.60 6.00 -21.814 -26.148 - -2.60 6.10 -21.837 -26.220 - -2.60 6.20 -21.892 -26.305 - -2.60 6.30 -22.071 -26.434 - -2.60 6.40 -22.255 -26.598 - -2.60 6.50 -22.398 -26.776 - -2.60 6.60 -22.493 -26.913 - -2.60 6.70 -22.534 -27.004 - -2.60 6.80 -22.528 -27.055 - -2.60 6.90 -22.501 -27.127 - -2.60 7.00 -22.491 -27.171 - -2.60 7.10 -22.535 -27.247 - -2.60 7.20 -22.610 -27.319 - -2.60 7.30 -22.657 -27.351 - -2.60 7.40 -22.669 -27.418 - -2.60 7.50 -22.659 -27.496 - -2.60 7.60 -22.633 -27.565 - -2.60 7.70 -22.599 -27.615 - -2.60 7.80 -22.562 -27.652 - -2.60 7.90 -22.523 -27.689 - -2.60 8.00 -22.486 -27.767 - -2.60 8.10 -22.446 -27.843 - -2.60 8.20 -22.406 -27.910 - -2.60 8.30 -22.363 -27.968 - -2.60 8.40 -22.318 -28.013 - -2.60 8.50 -22.273 -28.049 - -2.60 8.60 -22.223 -28.078 - -2.60 8.70 -22.174 -28.101 - -2.60 8.80 -22.122 -28.119 - -2.60 8.90 -22.068 -28.134 - -2.60 9.00 -22.013 -28.145 - -2.50 1.00 -25.901 -22.153 - -2.50 1.10 -25.777 -22.178 - -2.50 1.20 -25.602 -22.203 - -2.50 1.30 -25.378 -22.231 - -2.50 1.40 -25.136 -22.259 - -2.50 1.50 -24.909 -22.290 - -2.50 1.60 -24.712 -22.322 - -2.50 1.70 -24.548 -22.355 - -2.50 1.80 -24.410 -22.391 - -2.50 1.90 -24.289 -22.428 - -2.50 2.00 -24.178 -22.467 - -2.50 2.10 -24.073 -22.507 - -2.50 2.20 -23.974 -22.550 - -2.50 2.30 -23.883 -22.594 - -2.50 2.40 -23.802 -22.639 - -2.50 2.50 -23.734 -22.686 - -2.50 2.60 -23.678 -22.735 - -2.50 2.70 -23.631 -22.785 - -2.50 2.80 -23.593 -22.836 - -2.50 2.90 -23.562 -22.889 - -2.50 3.00 -23.537 -22.943 - -2.50 3.10 -23.518 -22.998 - -2.50 3.20 -23.504 -23.054 - -2.50 3.30 -23.494 -23.111 - -2.50 3.40 -23.484 -23.169 - -2.50 3.50 -23.471 -23.227 - -2.50 3.60 -23.452 -23.286 - -2.50 3.70 -23.424 -23.346 - -2.50 3.80 -23.388 -23.406 - -2.50 3.90 -23.313 -23.466 - -2.50 4.00 -23.172 -23.526 - -2.50 4.10 -22.966 -23.586 - -2.50 4.20 -22.704 -23.649 - -2.50 4.30 -22.412 -23.732 - -2.50 4.40 -22.167 -23.881 - -2.50 4.50 -22.024 -24.088 - -2.50 4.60 -21.899 -24.257 - -2.50 4.70 -21.735 -24.374 - -2.50 4.80 -21.535 -24.471 - -2.50 4.90 -21.352 -24.708 - -2.50 5.00 -21.282 -25.084 - -2.50 5.10 -21.325 -25.341 - -2.50 5.20 -21.326 -25.470 - -2.50 5.30 -21.301 -25.568 - -2.50 5.40 -21.293 -25.665 - -2.50 5.50 -21.467 -25.803 - -2.50 5.60 -21.675 -25.926 - -2.50 5.70 -21.726 -26.007 - -2.50 5.80 -21.778 -26.097 - -2.50 5.90 -21.807 -26.171 - -2.50 6.00 -21.814 -26.233 - -2.50 6.10 -21.837 -26.303 - -2.50 6.20 -21.892 -26.388 - -2.50 6.30 -22.070 -26.514 - -2.50 6.40 -22.255 -26.674 - -2.50 6.50 -22.398 -26.854 - -2.50 6.60 -22.491 -26.987 - -2.50 6.70 -22.522 -27.050 - -2.50 6.80 -22.504 -27.103 - -2.50 6.90 -22.492 -27.172 - -2.50 7.00 -22.491 -27.227 - -2.50 7.10 -22.535 -27.303 - -2.50 7.20 -22.610 -27.385 - -2.50 7.30 -22.658 -27.422 - -2.50 7.40 -22.670 -27.489 - -2.50 7.50 -22.660 -27.564 - -2.50 7.60 -22.633 -27.630 - -2.50 7.70 -22.600 -27.676 - -2.50 7.80 -22.562 -27.709 - -2.50 7.90 -22.523 -27.742 - -2.50 8.00 -22.486 -27.822 - -2.50 8.10 -22.446 -27.903 - -2.50 8.20 -22.407 -27.975 - -2.50 8.30 -22.364 -28.038 - -2.50 8.40 -22.320 -28.088 - -2.50 8.50 -22.274 -28.129 - -2.50 8.60 -22.225 -28.161 - -2.50 8.70 -22.176 -28.188 - -2.50 8.80 -22.124 -28.209 - -2.50 8.90 -22.070 -28.226 - -2.50 9.00 -22.016 -28.239 - -2.40 1.00 -25.962 -22.215 - -2.40 1.10 -25.834 -22.237 - -2.40 1.20 -25.655 -22.261 - -2.40 1.30 -25.424 -22.285 - -2.40 1.40 -25.176 -22.312 - -2.40 1.50 -24.944 -22.340 - -2.40 1.60 -24.744 -22.369 - -2.40 1.70 -24.575 -22.401 - -2.40 1.80 -24.434 -22.433 - -2.40 1.90 -24.311 -22.468 - -2.40 2.00 -24.198 -22.504 - -2.40 2.10 -24.091 -22.543 - -2.40 2.20 -23.990 -22.583 - -2.40 2.30 -23.898 -22.624 - -2.40 2.40 -23.818 -22.667 - -2.40 2.50 -23.749 -22.712 - -2.40 2.60 -23.692 -22.759 - -2.40 2.70 -23.645 -22.807 - -2.40 2.80 -23.607 -22.857 - -2.40 2.90 -23.576 -22.908 - -2.40 3.00 -23.551 -22.960 - -2.40 3.10 -23.531 -23.014 - -2.40 3.20 -23.516 -23.069 - -2.40 3.30 -23.504 -23.125 - -2.40 3.40 -23.491 -23.183 - -2.40 3.50 -23.474 -23.241 - -2.40 3.60 -23.449 -23.300 - -2.40 3.70 -23.417 -23.359 - -2.40 3.80 -23.379 -23.419 - -2.40 3.90 -23.304 -23.480 - -2.40 4.00 -23.162 -23.541 - -2.40 4.10 -22.951 -23.602 - -2.40 4.20 -22.678 -23.667 - -2.40 4.30 -22.376 -23.757 - -2.40 4.40 -22.139 -23.929 - -2.40 4.50 -22.014 -24.157 - -2.40 4.60 -21.896 -24.337 - -2.40 4.70 -21.734 -24.463 - -2.40 4.80 -21.534 -24.562 - -2.40 4.90 -21.350 -24.803 - -2.40 5.00 -21.280 -25.180 - -2.40 5.10 -21.324 -25.435 - -2.40 5.20 -21.325 -25.560 - -2.40 5.30 -21.301 -25.657 - -2.40 5.40 -21.292 -25.751 - -2.40 5.50 -21.467 -25.883 - -2.40 5.60 -21.675 -26.004 - -2.40 5.70 -21.726 -26.085 - -2.40 5.80 -21.778 -26.177 - -2.40 5.90 -21.807 -26.252 - -2.40 6.00 -21.814 -26.314 - -2.40 6.10 -21.837 -26.383 - -2.40 6.20 -21.892 -26.468 - -2.40 6.30 -22.070 -26.591 - -2.40 6.40 -22.255 -26.746 - -2.40 6.50 -22.398 -26.926 - -2.40 6.60 -22.491 -27.060 - -2.40 6.70 -22.497 -27.088 - -2.40 6.80 -22.502 -27.160 - -2.40 6.90 -22.492 -27.227 - -2.40 7.00 -22.491 -27.277 - -2.40 7.10 -22.535 -27.352 - -2.40 7.20 -22.610 -27.441 - -2.40 7.30 -22.658 -27.481 - -2.40 7.40 -22.670 -27.547 - -2.40 7.50 -22.660 -27.620 - -2.40 7.60 -22.634 -27.684 - -2.40 7.70 -22.600 -27.727 - -2.40 7.80 -22.563 -27.757 - -2.40 7.90 -22.524 -27.786 - -2.40 8.00 -22.487 -27.870 - -2.40 8.10 -22.447 -27.955 - -2.40 8.20 -22.407 -28.033 - -2.40 8.30 -22.365 -28.103 - -2.40 8.40 -22.320 -28.158 - -2.40 8.50 -22.275 -28.203 - -2.40 8.60 -22.226 -28.241 - -2.40 8.70 -22.177 -28.271 - -2.40 8.80 -22.126 -28.296 - -2.40 8.90 -22.072 -28.316 - -2.40 9.00 -22.018 -28.332 - -2.30 1.00 -26.026 -22.280 - -2.30 1.10 -25.895 -22.300 - -2.30 1.20 -25.711 -22.322 - -2.30 1.30 -25.474 -22.344 - -2.30 1.40 -25.220 -22.368 - -2.30 1.50 -24.983 -22.394 - -2.30 1.60 -24.779 -22.421 - -2.30 1.70 -24.607 -22.450 - -2.30 1.80 -24.463 -22.480 - -2.30 1.90 -24.337 -22.512 - -2.30 2.00 -24.221 -22.546 - -2.30 2.10 -24.113 -22.582 - -2.30 2.20 -24.010 -22.619 - -2.30 2.30 -23.917 -22.658 - -2.30 2.40 -23.835 -22.699 - -2.30 2.50 -23.766 -22.742 - -2.30 2.60 -23.708 -22.786 - -2.30 2.70 -23.661 -22.832 - -2.30 2.80 -23.622 -22.880 - -2.30 2.90 -23.591 -22.929 - -2.30 3.00 -23.565 -22.980 - -2.30 3.10 -23.544 -23.032 - -2.30 3.20 -23.528 -23.086 - -2.30 3.30 -23.515 -23.141 - -2.30 3.40 -23.499 -23.197 - -2.30 3.50 -23.478 -23.255 - -2.30 3.60 -23.449 -23.313 - -2.30 3.70 -23.411 -23.372 - -2.30 3.80 -23.369 -23.433 - -2.30 3.90 -23.293 -23.493 - -2.30 4.00 -23.150 -23.555 - -2.30 4.10 -22.934 -23.617 - -2.30 4.20 -22.648 -23.683 - -2.30 4.30 -22.337 -23.783 - -2.30 4.40 -22.112 -23.980 - -2.30 4.50 -22.006 -24.226 - -2.30 4.60 -21.895 -24.419 - -2.30 4.70 -21.734 -24.554 - -2.30 4.80 -21.532 -24.655 - -2.30 4.90 -21.348 -24.899 - -2.30 5.00 -21.280 -25.277 - -2.30 5.10 -21.324 -25.528 - -2.30 5.20 -21.325 -25.648 - -2.30 5.30 -21.300 -25.744 - -2.30 5.40 -21.292 -25.835 - -2.30 5.50 -21.467 -25.959 - -2.30 5.60 -21.675 -26.077 - -2.30 5.70 -21.726 -26.160 - -2.30 5.80 -21.778 -26.255 - -2.30 5.90 -21.807 -26.330 - -2.30 6.00 -21.814 -26.392 - -2.30 6.10 -21.837 -26.459 - -2.30 6.20 -21.892 -26.544 - -2.30 6.30 -22.070 -26.663 - -2.30 6.40 -22.255 -26.813 - -2.30 6.50 -22.398 -26.994 - -2.30 6.60 -22.484 -27.104 - -2.30 6.70 -22.497 -27.143 - -2.30 6.80 -22.502 -27.211 - -2.30 6.90 -22.492 -27.276 - -2.30 7.00 -22.491 -27.321 - -2.30 7.10 -22.535 -27.394 - -2.30 7.20 -22.610 -27.488 - -2.30 7.30 -22.658 -27.529 - -2.30 7.40 -22.670 -27.594 - -2.30 7.50 -22.660 -27.665 - -2.30 7.60 -22.634 -27.729 - -2.30 7.70 -22.600 -27.770 - -2.30 7.80 -22.563 -27.796 - -2.30 7.90 -22.524 -27.824 - -2.30 8.00 -22.487 -27.910 - -2.30 8.10 -22.448 -28.001 - -2.30 8.20 -22.408 -28.084 - -2.30 8.30 -22.366 -28.161 - -2.30 8.40 -22.321 -28.222 - -2.30 8.50 -22.276 -28.273 - -2.30 8.60 -22.227 -28.316 - -2.30 8.70 -22.178 -28.351 - -2.30 8.80 -22.127 -28.381 - -2.30 8.90 -22.074 -28.404 - -2.30 9.00 -22.019 -28.423 - -2.20 1.00 -26.094 -22.348 - -2.20 1.10 -25.960 -22.367 - -2.20 1.20 -25.770 -22.386 - -2.20 1.30 -25.527 -22.407 - -2.20 1.40 -25.267 -22.429 - -2.20 1.50 -25.026 -22.452 - -2.20 1.60 -24.817 -22.477 - -2.20 1.70 -24.642 -22.503 - -2.20 1.80 -24.495 -22.531 - -2.20 1.90 -24.366 -22.561 - -2.20 2.00 -24.248 -22.592 - -2.20 2.10 -24.137 -22.625 - -2.20 2.20 -24.033 -22.660 - -2.20 2.30 -23.938 -22.696 - -2.20 2.40 -23.855 -22.735 - -2.20 2.50 -23.785 -22.775 - -2.20 2.60 -23.726 -22.817 - -2.20 2.70 -23.679 -22.861 - -2.20 2.80 -23.639 -22.906 - -2.20 2.90 -23.607 -22.954 - -2.20 3.00 -23.581 -23.002 - -2.20 3.10 -23.559 -23.053 - -2.20 3.20 -23.542 -23.105 - -2.20 3.30 -23.527 -23.159 - -2.20 3.40 -23.509 -23.214 - -2.20 3.50 -23.484 -23.270 - -2.20 3.60 -23.451 -23.328 - -2.20 3.70 -23.408 -23.386 - -2.20 3.80 -23.361 -23.446 - -2.20 3.90 -23.282 -23.507 - -2.20 4.00 -23.137 -23.568 - -2.20 4.10 -22.915 -23.630 - -2.20 4.20 -22.616 -23.699 - -2.20 4.30 -22.297 -23.809 - -2.20 4.40 -22.086 -24.032 - -2.20 4.50 -21.998 -24.296 - -2.20 4.60 -21.893 -24.502 - -2.20 4.70 -21.734 -24.646 - -2.20 4.80 -21.531 -24.749 - -2.20 4.90 -21.347 -24.995 - -2.20 5.00 -21.279 -25.373 - -2.20 5.10 -21.324 -25.619 - -2.20 5.20 -21.325 -25.734 - -2.20 5.30 -21.300 -25.827 - -2.20 5.40 -21.292 -25.916 - -2.20 5.50 -21.466 -26.032 - -2.20 5.60 -21.674 -26.146 - -2.20 5.70 -21.726 -26.229 - -2.20 5.80 -21.778 -26.328 - -2.20 5.90 -21.807 -26.403 - -2.20 6.00 -21.814 -26.463 - -2.20 6.10 -21.837 -26.530 - -2.20 6.20 -21.892 -26.615 - -2.20 6.30 -22.070 -26.731 - -2.20 6.40 -22.254 -26.875 - -2.20 6.50 -22.397 -27.056 - -2.20 6.60 -22.467 -27.120 - -2.20 6.70 -22.497 -27.192 - -2.20 6.80 -22.502 -27.257 - -2.20 6.90 -22.492 -27.320 - -2.20 7.00 -22.491 -27.360 - -2.20 7.10 -22.535 -27.431 - -2.20 7.20 -22.610 -27.527 - -2.20 7.30 -22.658 -27.567 - -2.20 7.40 -22.670 -27.632 - -2.20 7.50 -22.660 -27.702 - -2.20 7.60 -22.634 -27.765 - -2.20 7.70 -22.601 -27.805 - -2.20 7.80 -22.563 -27.829 - -2.20 7.90 -22.525 -27.855 - -2.20 8.00 -22.488 -27.945 - -2.20 8.10 -22.448 -28.040 - -2.20 8.20 -22.408 -28.129 - -2.20 8.30 -22.366 -28.213 - -2.20 8.40 -22.322 -28.281 - -2.20 8.50 -22.277 -28.338 - -2.20 8.60 -22.228 -28.386 - -2.20 8.70 -22.179 -28.427 - -2.20 8.80 -22.128 -28.461 - -2.20 8.90 -22.075 -28.489 - -2.20 9.00 -22.021 -28.511 - -2.10 1.00 -26.164 -22.420 - -2.10 1.10 -26.027 -22.436 - -2.10 1.20 -25.832 -22.454 - -2.10 1.30 -25.583 -22.473 - -2.10 1.40 -25.317 -22.493 - -2.10 1.50 -25.071 -22.514 - -2.10 1.60 -24.859 -22.536 - -2.10 1.70 -24.681 -22.560 - -2.10 1.80 -24.531 -22.586 - -2.10 1.90 -24.399 -22.613 - -2.10 2.00 -24.279 -22.642 - -2.10 2.10 -24.165 -22.672 - -2.10 2.20 -24.059 -22.705 - -2.10 2.30 -23.962 -22.739 - -2.10 2.40 -23.877 -22.775 - -2.10 2.50 -23.806 -22.812 - -2.10 2.60 -23.746 -22.852 - -2.10 2.70 -23.698 -22.893 - -2.10 2.80 -23.658 -22.936 - -2.10 2.90 -23.625 -22.981 - -2.10 3.00 -23.598 -23.028 - -2.10 3.10 -23.576 -23.076 - -2.10 3.20 -23.557 -23.127 - -2.10 3.30 -23.540 -23.179 - -2.10 3.40 -23.520 -23.232 - -2.10 3.50 -23.493 -23.287 - -2.10 3.60 -23.455 -23.343 - -2.10 3.70 -23.408 -23.401 - -2.10 3.80 -23.355 -23.460 - -2.10 3.90 -23.271 -23.520 - -2.10 4.00 -23.124 -23.581 - -2.10 4.10 -22.895 -23.644 - -2.10 4.20 -22.581 -23.714 - -2.10 4.30 -22.255 -23.837 - -2.10 4.40 -22.062 -24.086 - -2.10 4.50 -21.992 -24.366 - -2.10 4.60 -21.892 -24.586 - -2.10 4.70 -21.733 -24.740 - -2.10 4.80 -21.530 -24.844 - -2.10 4.90 -21.346 -25.092 - -2.10 5.00 -21.278 -25.468 - -2.10 5.10 -21.323 -25.708 - -2.10 5.20 -21.325 -25.817 - -2.10 5.30 -21.300 -25.908 - -2.10 5.40 -21.292 -25.992 - -2.10 5.50 -21.466 -26.099 - -2.10 5.60 -21.674 -26.210 - -2.10 5.70 -21.726 -26.296 - -2.10 5.80 -21.778 -26.396 - -2.10 5.90 -21.807 -26.472 - -2.10 6.00 -21.814 -26.533 - -2.10 6.10 -21.837 -26.597 - -2.10 6.20 -21.892 -26.682 - -2.10 6.30 -22.070 -26.793 - -2.10 6.40 -22.254 -26.932 - -2.10 6.50 -22.394 -27.105 - -2.10 6.60 -22.467 -27.168 - -2.10 6.70 -22.497 -27.235 - -2.10 6.80 -22.502 -27.298 - -2.10 6.90 -22.492 -27.358 - -2.10 7.00 -22.491 -27.393 - -2.10 7.10 -22.535 -27.462 - -2.10 7.20 -22.610 -27.559 - -2.10 7.30 -22.658 -27.598 - -2.10 7.40 -22.670 -27.662 - -2.10 7.50 -22.661 -27.731 - -2.10 7.60 -22.634 -27.794 - -2.10 7.70 -22.601 -27.833 - -2.10 7.80 -22.564 -27.856 - -2.10 7.90 -22.525 -27.881 - -2.10 8.00 -22.488 -27.974 - -2.10 8.10 -22.448 -28.074 - -2.10 8.20 -22.409 -28.168 - -2.10 8.30 -22.367 -28.258 - -2.10 8.40 -22.322 -28.333 - -2.10 8.50 -22.278 -28.396 - -2.10 8.60 -22.229 -28.451 - -2.10 8.70 -22.180 -28.498 - -2.10 8.80 -22.129 -28.538 - -2.10 8.90 -22.076 -28.571 - -2.10 9.00 -22.022 -28.597 - -2.00 1.00 -26.238 -22.494 - -2.00 1.10 -26.097 -22.509 - -2.00 1.20 -25.897 -22.525 - -2.00 1.30 -25.642 -22.542 - -2.00 1.40 -25.371 -22.560 - -2.00 1.50 -25.120 -22.579 - -2.00 1.60 -24.904 -22.600 - -2.00 1.70 -24.723 -22.621 - -2.00 1.80 -24.570 -22.645 - -2.00 1.90 -24.436 -22.669 - -2.00 2.00 -24.313 -22.696 - -2.00 2.10 -24.197 -22.724 - -2.00 2.20 -24.088 -22.754 - -2.00 2.30 -23.989 -22.785 - -2.00 2.40 -23.903 -22.818 - -2.00 2.50 -23.829 -22.853 - -2.00 2.60 -23.769 -22.890 - -2.00 2.70 -23.719 -22.929 - -2.00 2.80 -23.678 -22.970 - -2.00 2.90 -23.645 -23.012 - -2.00 3.00 -23.617 -23.057 - -2.00 3.10 -23.594 -23.103 - -2.00 3.20 -23.574 -23.151 - -2.00 3.30 -23.556 -23.201 - -2.00 3.40 -23.534 -23.253 - -2.00 3.50 -23.504 -23.306 - -2.00 3.60 -23.463 -23.361 - -2.00 3.70 -23.411 -23.417 - -2.00 3.80 -23.352 -23.475 - -2.00 3.90 -23.262 -23.535 - -2.00 4.00 -23.111 -23.595 - -2.00 4.10 -22.873 -23.657 - -2.00 4.20 -22.544 -23.730 - -2.00 4.30 -22.212 -23.866 - -2.00 4.40 -22.041 -24.143 - -2.00 4.50 -21.986 -24.434 - -2.00 4.60 -21.891 -24.672 - -2.00 4.70 -21.733 -24.835 - -2.00 4.80 -21.530 -24.940 - -2.00 4.90 -21.345 -25.190 - -2.00 5.00 -21.278 -25.563 - -2.00 5.10 -21.323 -25.795 - -2.00 5.20 -21.325 -25.897 - -2.00 5.30 -21.300 -25.984 - -2.00 5.40 -21.292 -26.065 - -2.00 5.50 -21.466 -26.161 - -2.00 5.60 -21.674 -26.268 - -2.00 5.70 -21.725 -26.356 - -2.00 5.80 -21.778 -26.459 - -2.00 5.90 -21.807 -26.535 - -2.00 6.00 -21.814 -26.594 - -2.00 6.10 -21.837 -26.658 - -2.00 6.20 -21.892 -26.743 - -2.00 6.30 -22.070 -26.850 - -2.00 6.40 -22.254 -26.983 - -2.00 6.50 -22.387 -27.112 - -2.00 6.60 -22.467 -27.210 - -2.00 6.70 -22.497 -27.273 - -2.00 6.80 -22.502 -27.333 - -2.00 6.90 -22.492 -27.390 - -2.00 7.00 -22.491 -27.422 - -2.00 7.10 -22.535 -27.488 - -2.00 7.20 -22.610 -27.585 - -2.00 7.30 -22.658 -27.623 - -2.00 7.40 -22.670 -27.685 - -2.00 7.50 -22.661 -27.754 - -2.00 7.60 -22.634 -27.818 - -2.00 7.70 -22.601 -27.856 - -2.00 7.80 -22.564 -27.878 - -2.00 7.90 -22.525 -27.902 - -2.00 8.00 -22.488 -27.998 - -2.00 8.10 -22.449 -28.102 - -2.00 8.20 -22.409 -28.202 - -2.00 8.30 -22.367 -28.298 - -2.00 8.40 -22.323 -28.379 - -2.00 8.50 -22.278 -28.449 - -2.00 8.60 -22.229 -28.511 - -2.00 8.70 -22.180 -28.564 - -2.00 8.80 -22.129 -28.611 - -2.00 8.90 -22.076 -28.649 - -2.00 9.00 -22.022 -28.680 - -1.90 1.00 -26.314 -22.571 - -1.90 1.10 -26.170 -22.585 - -1.90 1.20 -25.965 -22.599 - -1.90 1.30 -25.703 -22.614 - -1.90 1.40 -25.427 -22.630 - -1.90 1.50 -25.172 -22.648 - -1.90 1.60 -24.952 -22.666 - -1.90 1.70 -24.768 -22.686 - -1.90 1.80 -24.612 -22.707 - -1.90 1.90 -24.475 -22.730 - -1.90 2.00 -24.349 -22.754 - -1.90 2.10 -24.231 -22.779 - -1.90 2.20 -24.120 -22.807 - -1.90 2.30 -24.019 -22.836 - -1.90 2.40 -23.931 -22.866 - -1.90 2.50 -23.856 -22.899 - -1.90 2.60 -23.794 -22.933 - -1.90 2.70 -23.743 -22.969 - -1.90 2.80 -23.701 -23.007 - -1.90 2.90 -23.666 -23.047 - -1.90 3.00 -23.637 -23.089 - -1.90 3.10 -23.613 -23.133 - -1.90 3.20 -23.593 -23.179 - -1.90 3.30 -23.573 -23.226 - -1.90 3.40 -23.549 -23.276 - -1.90 3.50 -23.517 -23.327 - -1.90 3.60 -23.473 -23.381 - -1.90 3.70 -23.417 -23.436 - -1.90 3.80 -23.351 -23.492 - -1.90 3.90 -23.255 -23.550 - -1.90 4.00 -23.099 -23.610 - -1.90 4.10 -22.851 -23.672 - -1.90 4.20 -22.504 -23.746 - -1.90 4.30 -22.169 -23.897 - -1.90 4.40 -22.021 -24.201 - -1.90 4.50 -21.982 -24.503 - -1.90 4.60 -21.890 -24.759 - -1.90 4.70 -21.733 -24.931 - -1.90 4.80 -21.529 -25.037 - -1.90 4.90 -21.344 -25.287 - -1.90 5.00 -21.278 -25.656 - -1.90 5.10 -21.323 -25.879 - -1.90 5.20 -21.325 -25.973 - -1.90 5.30 -21.300 -26.056 - -1.90 5.40 -21.292 -26.132 - -1.90 5.50 -21.466 -26.218 - -1.90 5.60 -21.674 -26.321 - -1.90 5.70 -21.725 -26.408 - -1.90 5.80 -21.778 -26.517 - -1.90 5.90 -21.807 -26.592 - -1.90 6.00 -21.814 -26.652 - -1.90 6.10 -21.837 -26.714 - -1.90 6.20 -21.892 -26.798 - -1.90 6.30 -22.070 -26.901 - -1.90 6.40 -22.254 -27.028 - -1.90 6.50 -22.387 -27.152 - -1.90 6.60 -22.467 -27.246 - -1.90 6.70 -22.497 -27.305 - -1.90 6.80 -22.502 -27.363 - -1.90 6.90 -22.492 -27.418 - -1.90 7.00 -22.491 -27.442 - -1.90 7.10 -22.535 -27.510 - -1.90 7.20 -22.610 -27.607 - -1.90 7.30 -22.658 -27.642 - -1.90 7.40 -22.671 -27.704 - -1.90 7.50 -22.661 -27.773 - -1.90 7.60 -22.634 -27.837 - -1.90 7.70 -22.601 -27.875 - -1.90 7.80 -22.564 -27.896 - -1.90 7.90 -22.525 -27.919 - -1.90 8.00 -22.488 -28.018 - -1.90 8.10 -22.449 -28.126 - -1.90 8.20 -22.409 -28.230 - -1.90 8.30 -22.368 -28.333 - -1.90 8.40 -22.323 -28.420 - -1.90 8.50 -22.278 -28.496 - -1.90 8.60 -22.230 -28.565 - -1.90 8.70 -22.181 -28.625 - -1.90 8.80 -22.130 -28.678 - -1.90 8.90 -22.077 -28.723 - -1.90 9.00 -22.023 -28.760 - -1.80 1.00 -26.392 -22.651 - -1.80 1.10 -26.245 -22.662 - -1.80 1.20 -26.035 -22.675 - -1.80 1.30 -25.767 -22.689 - -1.80 1.40 -25.485 -22.704 - -1.80 1.50 -25.226 -22.719 - -1.80 1.60 -25.003 -22.736 - -1.80 1.70 -24.816 -22.754 - -1.80 1.80 -24.657 -22.773 - -1.80 1.90 -24.518 -22.793 - -1.80 2.00 -24.389 -22.815 - -1.80 2.10 -24.268 -22.839 - -1.80 2.20 -24.155 -22.864 - -1.80 2.30 -24.052 -22.890 - -1.80 2.40 -23.961 -22.918 - -1.80 2.50 -23.884 -22.948 - -1.80 2.60 -23.821 -22.980 - -1.80 2.70 -23.769 -23.013 - -1.80 2.80 -23.726 -23.049 - -1.80 2.90 -23.690 -23.086 - -1.80 3.00 -23.660 -23.125 - -1.80 3.10 -23.635 -23.167 - -1.80 3.20 -23.613 -23.210 - -1.80 3.30 -23.592 -23.255 - -1.80 3.40 -23.566 -23.303 - -1.80 3.50 -23.532 -23.352 - -1.80 3.60 -23.485 -23.403 - -1.80 3.70 -23.426 -23.456 - -1.80 3.80 -23.353 -23.511 - -1.80 3.90 -23.251 -23.568 - -1.80 4.00 -23.088 -23.626 - -1.80 4.10 -22.828 -23.687 - -1.80 4.20 -22.463 -23.763 - -1.80 4.30 -22.127 -23.932 - -1.80 4.40 -22.004 -24.261 - -1.80 4.50 -21.978 -24.571 - -1.80 4.60 -21.889 -24.848 - -1.80 4.70 -21.733 -25.027 - -1.80 4.80 -21.529 -25.134 - -1.80 4.90 -21.344 -25.385 - -1.80 5.00 -21.277 -25.748 - -1.80 5.10 -21.323 -25.959 - -1.80 5.20 -21.325 -26.044 - -1.80 5.30 -21.300 -26.124 - -1.80 5.40 -21.292 -26.195 - -1.80 5.50 -21.466 -26.269 - -1.80 5.60 -21.674 -26.368 - -1.80 5.70 -21.725 -26.458 - -1.80 5.80 -21.778 -26.569 - -1.80 5.90 -21.807 -26.644 - -1.80 6.00 -21.814 -26.705 - -1.80 6.10 -21.837 -26.764 - -1.80 6.20 -21.892 -26.848 - -1.80 6.30 -22.070 -26.947 - -1.80 6.40 -22.254 -27.067 - -1.80 6.50 -22.387 -27.187 - -1.80 6.60 -22.467 -27.277 - -1.80 6.70 -22.497 -27.333 - -1.80 6.80 -22.502 -27.388 - -1.80 6.90 -22.492 -27.442 - -1.80 7.00 -22.491 -27.466 - -1.80 7.10 -22.535 -27.528 - -1.80 7.20 -22.610 -27.620 - -1.80 7.30 -22.658 -27.657 - -1.80 7.40 -22.671 -27.719 - -1.80 7.50 -22.661 -27.788 - -1.80 7.60 -22.634 -27.851 - -1.80 7.70 -22.601 -27.889 - -1.80 7.80 -22.564 -27.910 - -1.80 7.90 -22.525 -27.933 - -1.80 8.00 -22.489 -28.034 - -1.80 8.10 -22.449 -28.146 - -1.80 8.20 -22.410 -28.254 - -1.80 8.30 -22.368 -28.362 - -1.80 8.40 -22.323 -28.455 - -1.80 8.50 -22.279 -28.538 - -1.80 8.60 -22.230 -28.613 - -1.80 8.70 -22.181 -28.680 - -1.80 8.80 -22.130 -28.740 - -1.80 8.90 -22.077 -28.792 - -1.80 9.00 -22.024 -28.835 - -1.70 1.00 -26.472 -22.732 - -1.70 1.10 -26.323 -22.743 - -1.70 1.20 -26.107 -22.754 - -1.70 1.30 -25.833 -22.766 - -1.70 1.40 -25.546 -22.780 - -1.70 1.50 -25.282 -22.794 - -1.70 1.60 -25.056 -22.809 - -1.70 1.70 -24.866 -22.825 - -1.70 1.80 -24.705 -22.842 - -1.70 1.90 -24.563 -22.861 - -1.70 2.00 -24.432 -22.881 - -1.70 2.10 -24.308 -22.902 - -1.70 2.20 -24.192 -22.924 - -1.70 2.30 -24.087 -22.949 - -1.70 2.40 -23.994 -22.974 - -1.70 2.50 -23.916 -23.002 - -1.70 2.60 -23.850 -23.031 - -1.70 2.70 -23.797 -23.062 - -1.70 2.80 -23.753 -23.094 - -1.70 2.90 -23.716 -23.129 - -1.70 3.00 -23.685 -23.166 - -1.70 3.10 -23.658 -23.204 - -1.70 3.20 -23.635 -23.245 - -1.70 3.30 -23.612 -23.288 - -1.70 3.40 -23.585 -23.333 - -1.70 3.50 -23.549 -23.380 - -1.70 3.60 -23.499 -23.428 - -1.70 3.70 -23.436 -23.479 - -1.70 3.80 -23.359 -23.532 - -1.70 3.90 -23.249 -23.587 - -1.70 4.00 -23.078 -23.644 - -1.70 4.10 -22.805 -23.704 - -1.70 4.20 -22.421 -23.782 - -1.70 4.30 -22.087 -23.970 - -1.70 4.40 -21.990 -24.323 - -1.70 4.50 -21.975 -24.640 - -1.70 4.60 -21.888 -24.938 - -1.70 4.70 -21.733 -25.124 - -1.70 4.80 -21.528 -25.232 - -1.70 4.90 -21.343 -25.483 - -1.70 5.00 -21.277 -25.839 - -1.70 5.10 -21.323 -26.036 - -1.70 5.20 -21.325 -26.110 - -1.70 5.30 -21.300 -26.186 - -1.70 5.40 -21.291 -26.250 - -1.70 5.50 -21.466 -26.314 - -1.70 5.60 -21.674 -26.409 - -1.70 5.70 -21.725 -26.499 - -1.70 5.80 -21.778 -26.615 - -1.70 5.90 -21.807 -26.692 - -1.70 6.00 -21.814 -26.752 - -1.70 6.10 -21.837 -26.808 - -1.70 6.20 -21.892 -26.893 - -1.70 6.30 -22.070 -26.987 - -1.70 6.40 -22.251 -27.088 - -1.70 6.50 -22.387 -27.217 - -1.70 6.60 -22.467 -27.304 - -1.70 6.70 -22.497 -27.357 - -1.70 6.80 -22.502 -27.409 - -1.70 6.90 -22.493 -27.461 - -1.70 7.00 -22.491 -27.482 - -1.70 7.10 -22.535 -27.542 - -1.70 7.20 -22.610 -27.638 - -1.70 7.30 -22.658 -27.670 - -1.70 7.40 -22.671 -27.730 - -1.70 7.50 -22.661 -27.798 - -1.70 7.60 -22.635 -27.862 - -1.70 7.70 -22.601 -27.900 - -1.70 7.80 -22.564 -27.921 - -1.70 7.90 -22.525 -27.944 - -1.70 8.00 -22.489 -28.047 - -1.70 8.10 -22.449 -28.162 - -1.70 8.20 -22.410 -28.274 - -1.70 8.30 -22.368 -28.387 - -1.70 8.40 -22.324 -28.485 - -1.70 8.50 -22.279 -28.573 - -1.70 8.60 -22.230 -28.655 - -1.70 8.70 -22.181 -28.729 - -1.70 8.80 -22.131 -28.797 - -1.70 8.90 -22.078 -28.856 - -1.70 9.00 -22.024 -28.906 - -1.60 1.00 -26.554 -22.815 - -1.60 1.10 -26.402 -22.825 - -1.60 1.20 -26.181 -22.835 - -1.60 1.30 -25.901 -22.846 - -1.60 1.40 -25.608 -22.858 - -1.60 1.50 -25.341 -22.871 - -1.60 1.60 -25.112 -22.884 - -1.60 1.70 -24.919 -22.899 - -1.60 1.80 -24.755 -22.914 - -1.60 1.90 -24.611 -22.931 - -1.60 2.00 -24.477 -22.949 - -1.60 2.10 -24.351 -22.968 - -1.60 2.20 -24.232 -22.989 - -1.60 2.30 -24.125 -23.011 - -1.60 2.40 -24.030 -23.034 - -1.60 2.50 -23.949 -23.059 - -1.60 2.60 -23.883 -23.086 - -1.60 2.70 -23.828 -23.114 - -1.60 2.80 -23.782 -23.144 - -1.60 2.90 -23.744 -23.176 - -1.60 3.00 -23.711 -23.210 - -1.60 3.10 -23.684 -23.246 - -1.60 3.20 -23.659 -23.284 - -1.60 3.30 -23.635 -23.324 - -1.60 3.40 -23.606 -23.366 - -1.60 3.50 -23.568 -23.411 - -1.60 3.60 -23.516 -23.457 - -1.60 3.70 -23.450 -23.506 - -1.60 3.80 -23.367 -23.557 - -1.60 3.90 -23.250 -23.609 - -1.60 4.00 -23.071 -23.664 - -1.60 4.10 -22.783 -23.722 - -1.60 4.20 -22.378 -23.804 - -1.60 4.30 -22.049 -24.012 - -1.60 4.40 -21.978 -24.386 - -1.60 4.50 -21.972 -24.710 - -1.60 4.60 -21.887 -25.029 - -1.60 4.70 -21.733 -25.222 - -1.60 4.80 -21.528 -25.330 - -1.60 4.90 -21.343 -25.581 - -1.60 5.00 -21.277 -25.927 - -1.60 5.10 -21.323 -26.109 - -1.60 5.20 -21.324 -26.171 - -1.60 5.30 -21.300 -26.243 - -1.60 5.40 -21.291 -26.303 - -1.60 5.50 -21.466 -26.353 - -1.60 5.60 -21.674 -26.445 - -1.60 5.70 -21.725 -26.538 - -1.60 5.80 -21.778 -26.655 - -1.60 5.90 -21.807 -26.731 - -1.60 6.00 -21.814 -26.792 - -1.60 6.10 -21.837 -26.847 - -1.60 6.20 -21.892 -26.931 - -1.60 6.30 -22.070 -27.021 - -1.60 6.40 -22.251 -27.116 - -1.60 6.50 -22.387 -27.242 - -1.60 6.60 -22.467 -27.326 - -1.60 6.70 -22.497 -27.376 - -1.60 6.80 -22.502 -27.427 - -1.60 6.90 -22.493 -27.477 - -1.60 7.00 -22.491 -27.496 - -1.60 7.10 -22.535 -27.554 - -1.60 7.20 -22.610 -27.649 - -1.60 7.30 -22.658 -27.679 - -1.60 7.40 -22.671 -27.738 - -1.60 7.50 -22.661 -27.806 - -1.60 7.60 -22.635 -27.871 - -1.60 7.70 -22.601 -27.909 - -1.60 7.80 -22.564 -27.930 - -1.60 7.90 -22.526 -27.953 - -1.60 8.00 -22.489 -28.058 - -1.60 8.10 -22.449 -28.175 - -1.60 8.20 -22.410 -28.291 - -1.60 8.30 -22.368 -28.407 - -1.60 8.40 -22.324 -28.510 - -1.60 8.50 -22.279 -28.604 - -1.60 8.60 -22.230 -28.692 - -1.60 8.70 -22.182 -28.773 - -1.60 8.80 -22.131 -28.848 - -1.60 8.90 -22.078 -28.914 - -1.60 9.00 -22.024 -28.972 - -1.50 1.00 -26.638 -22.900 - -1.50 1.10 -26.482 -22.909 - -1.50 1.20 -26.256 -22.918 - -1.50 1.30 -25.970 -22.928 - -1.50 1.40 -25.673 -22.939 - -1.50 1.50 -25.402 -22.950 - -1.50 1.60 -25.170 -22.962 - -1.50 1.70 -24.974 -22.975 - -1.50 1.80 -24.808 -22.989 - -1.50 1.90 -24.661 -23.004 - -1.50 2.00 -24.525 -23.020 - -1.50 2.10 -24.396 -23.038 - -1.50 2.20 -24.275 -23.056 - -1.50 2.30 -24.165 -23.076 - -1.50 2.40 -24.068 -23.097 - -1.50 2.50 -23.986 -23.120 - -1.50 2.60 -23.917 -23.144 - -1.50 2.70 -23.860 -23.170 - -1.50 2.80 -23.814 -23.198 - -1.50 2.90 -23.774 -23.227 - -1.50 3.00 -23.740 -23.259 - -1.50 3.10 -23.712 -23.292 - -1.50 3.20 -23.686 -23.327 - -1.50 3.30 -23.660 -23.365 - -1.50 3.40 -23.629 -23.404 - -1.50 3.50 -23.589 -23.446 - -1.50 3.60 -23.535 -23.490 - -1.50 3.70 -23.465 -23.536 - -1.50 3.80 -23.378 -23.584 - -1.50 3.90 -23.254 -23.635 - -1.50 4.00 -23.067 -23.687 - -1.50 4.10 -22.761 -23.744 - -1.50 4.20 -22.336 -23.828 - -1.50 4.30 -22.014 -24.059 - -1.50 4.40 -21.968 -24.450 - -1.50 4.50 -21.969 -24.780 - -1.50 4.60 -21.887 -25.122 - -1.50 4.70 -21.733 -25.320 - -1.50 4.80 -21.528 -25.428 - -1.50 4.90 -21.343 -25.679 - -1.50 5.00 -21.277 -26.013 - -1.50 5.10 -21.323 -26.177 - -1.50 5.20 -21.324 -26.226 - -1.50 5.30 -21.300 -26.293 - -1.50 5.40 -21.291 -26.349 - -1.50 5.50 -21.466 -26.388 - -1.50 5.60 -21.674 -26.476 - -1.50 5.70 -21.725 -26.570 - -1.50 5.80 -21.778 -26.690 - -1.50 5.90 -21.807 -26.768 - -1.50 6.00 -21.814 -26.827 - -1.50 6.10 -21.837 -26.881 - -1.50 6.20 -21.892 -26.965 - -1.50 6.30 -22.070 -27.051 - -1.50 6.40 -22.251 -27.140 - -1.50 6.50 -22.387 -27.263 - -1.50 6.60 -22.467 -27.345 - -1.50 6.70 -22.497 -27.392 - -1.50 6.80 -22.502 -27.442 - -1.50 6.90 -22.493 -27.491 - -1.50 7.00 -22.491 -27.507 - -1.50 7.10 -22.535 -27.564 - -1.50 7.20 -22.611 -27.658 - -1.50 7.30 -22.658 -27.686 - -1.50 7.40 -22.671 -27.744 - -1.50 7.50 -22.661 -27.813 - -1.50 7.60 -22.635 -27.878 - -1.50 7.70 -22.601 -27.917 - -1.50 7.80 -22.564 -27.937 - -1.50 7.90 -22.526 -27.960 - -1.50 8.00 -22.489 -28.066 - -1.50 8.10 -22.449 -28.186 - -1.50 8.20 -22.410 -28.304 - -1.50 8.30 -22.368 -28.424 - -1.50 8.40 -22.324 -28.531 - -1.50 8.50 -22.279 -28.630 - -1.50 8.60 -22.231 -28.723 - -1.50 8.70 -22.182 -28.811 - -1.50 8.80 -22.131 -28.893 - -1.50 8.90 -22.078 -28.967 - -1.50 9.00 -22.025 -29.033 - -1.40 1.00 -26.723 -22.987 - -1.40 1.10 -26.564 -22.995 - -1.40 1.20 -26.333 -23.003 - -1.40 1.30 -26.042 -23.012 - -1.40 1.40 -25.740 -23.021 - -1.40 1.50 -25.465 -23.031 - -1.40 1.60 -25.230 -23.042 - -1.40 1.70 -25.032 -23.054 - -1.40 1.80 -24.863 -23.067 - -1.40 1.90 -24.714 -23.080 - -1.40 2.00 -24.575 -23.095 - -1.40 2.10 -24.444 -23.110 - -1.40 2.20 -24.320 -23.127 - -1.40 2.30 -24.207 -23.145 - -1.40 2.40 -24.108 -23.164 - -1.40 2.50 -24.024 -23.185 - -1.40 2.60 -23.954 -23.207 - -1.40 2.70 -23.896 -23.231 - -1.40 2.80 -23.847 -23.256 - -1.40 2.90 -23.807 -23.283 - -1.40 3.00 -23.772 -23.311 - -1.40 3.10 -23.741 -23.342 - -1.40 3.20 -23.714 -23.375 - -1.40 3.30 -23.687 -23.409 - -1.40 3.40 -23.655 -23.446 - -1.40 3.50 -23.612 -23.485 - -1.40 3.60 -23.555 -23.526 - -1.40 3.70 -23.483 -23.569 - -1.40 3.80 -23.391 -23.615 - -1.40 3.90 -23.262 -23.663 - -1.40 4.00 -23.065 -23.713 - -1.40 4.10 -22.740 -23.768 - -1.40 4.20 -22.295 -23.855 - -1.40 4.30 -21.982 -24.110 - -1.40 4.40 -21.960 -24.515 - -1.40 4.50 -21.967 -24.853 - -1.40 4.60 -21.886 -25.215 - -1.40 4.70 -21.733 -25.418 - -1.40 4.80 -21.528 -25.527 - -1.40 4.90 -21.342 -25.776 - -1.40 5.00 -21.277 -26.096 - -1.40 5.10 -21.323 -26.240 - -1.40 5.20 -21.324 -26.276 - -1.40 5.30 -21.300 -26.339 - -1.40 5.40 -21.291 -26.388 - -1.40 5.50 -21.466 -26.417 - -1.40 5.60 -21.674 -26.502 - -1.40 5.70 -21.725 -26.598 - -1.40 5.80 -21.778 -26.721 - -1.40 5.90 -21.807 -26.799 - -1.40 6.00 -21.814 -26.857 - -1.40 6.10 -21.837 -26.909 - -1.40 6.20 -21.892 -26.993 - -1.40 6.30 -22.070 -27.076 - -1.40 6.40 -22.251 -27.160 - -1.40 6.50 -22.387 -27.281 - -1.40 6.60 -22.467 -27.360 - -1.40 6.70 -22.497 -27.405 - -1.40 6.80 -22.502 -27.454 - -1.40 6.90 -22.493 -27.502 - -1.40 7.00 -22.491 -27.516 - -1.40 7.10 -22.535 -27.572 - -1.40 7.20 -22.611 -27.666 - -1.40 7.30 -22.658 -27.691 - -1.40 7.40 -22.671 -27.750 - -1.40 7.50 -22.661 -27.818 - -1.40 7.60 -22.635 -27.884 - -1.40 7.70 -22.601 -27.922 - -1.40 7.80 -22.564 -27.943 - -1.40 7.90 -22.526 -27.965 - -1.40 8.00 -22.489 -28.073 - -1.40 8.10 -22.449 -28.194 - -1.40 8.20 -22.410 -28.315 - -1.40 8.30 -22.368 -28.438 - -1.40 8.40 -22.324 -28.549 - -1.40 8.50 -22.279 -28.652 - -1.40 8.60 -22.231 -28.750 - -1.40 8.70 -22.182 -28.844 - -1.40 8.80 -22.131 -28.932 - -1.40 8.90 -22.078 -29.014 - -1.40 9.00 -22.025 -29.087 - -1.30 1.00 -26.809 -23.075 - -1.30 1.10 -26.647 -23.082 - -1.30 1.20 -26.412 -23.089 - -1.30 1.30 -26.114 -23.097 - -1.30 1.40 -25.808 -23.106 - -1.30 1.50 -25.530 -23.115 - -1.30 1.60 -25.292 -23.125 - -1.30 1.70 -25.092 -23.135 - -1.30 1.80 -24.921 -23.146 - -1.30 1.90 -24.769 -23.159 - -1.30 2.00 -24.628 -23.172 - -1.30 2.10 -24.493 -23.186 - -1.30 2.20 -24.367 -23.201 - -1.30 2.30 -24.252 -23.217 - -1.30 2.40 -24.151 -23.234 - -1.30 2.50 -24.064 -23.253 - -1.30 2.60 -23.992 -23.273 - -1.30 2.70 -23.933 -23.294 - -1.30 2.80 -23.883 -23.317 - -1.30 2.90 -23.841 -23.342 - -1.30 3.00 -23.805 -23.368 - -1.30 3.10 -23.773 -23.396 - -1.30 3.20 -23.745 -23.426 - -1.30 3.30 -23.716 -23.458 - -1.30 3.40 -23.682 -23.492 - -1.30 3.50 -23.637 -23.528 - -1.30 3.60 -23.579 -23.566 - -1.30 3.70 -23.504 -23.607 - -1.30 3.80 -23.408 -23.650 - -1.30 3.90 -23.272 -23.695 - -1.30 4.00 -23.066 -23.743 - -1.30 4.10 -22.721 -23.795 - -1.30 4.20 -22.255 -23.885 - -1.30 4.30 -21.954 -24.166 - -1.30 4.40 -21.953 -24.580 - -1.30 4.50 -21.965 -24.927 - -1.30 4.60 -21.886 -25.310 - -1.30 4.70 -21.733 -25.517 - -1.30 4.80 -21.528 -25.625 - -1.30 4.90 -21.342 -25.873 - -1.30 5.00 -21.276 -26.175 - -1.30 5.10 -21.323 -26.297 - -1.30 5.20 -21.324 -26.320 - -1.30 5.30 -21.300 -26.378 - -1.30 5.40 -21.291 -26.423 - -1.30 5.50 -21.466 -26.442 - -1.30 5.60 -21.674 -26.524 - -1.30 5.70 -21.725 -26.621 - -1.30 5.80 -21.778 -26.746 - -1.30 5.90 -21.807 -26.825 - -1.30 6.00 -21.814 -26.883 - -1.30 6.10 -21.837 -26.933 - -1.30 6.20 -21.892 -27.017 - -1.30 6.30 -22.070 -27.096 - -1.30 6.40 -22.251 -27.176 - -1.30 6.50 -22.387 -27.295 - -1.30 6.60 -22.467 -27.373 - -1.30 6.70 -22.497 -27.416 - -1.30 6.80 -22.502 -27.463 - -1.30 6.90 -22.493 -27.511 - -1.30 7.00 -22.491 -27.524 - -1.30 7.10 -22.535 -27.579 - -1.30 7.20 -22.611 -27.671 - -1.30 7.30 -22.658 -27.696 - -1.30 7.40 -22.671 -27.754 - -1.30 7.50 -22.661 -27.823 - -1.30 7.60 -22.635 -27.888 - -1.30 7.70 -22.601 -27.927 - -1.30 7.80 -22.564 -27.947 - -1.30 7.90 -22.526 -27.970 - -1.30 8.00 -22.489 -28.079 - -1.30 8.10 -22.450 -28.201 - -1.30 8.20 -22.410 -28.324 - -1.30 8.30 -22.368 -28.450 - -1.30 8.40 -22.324 -28.563 - -1.30 8.50 -22.279 -28.670 - -1.30 8.60 -22.231 -28.772 - -1.30 8.70 -22.182 -28.871 - -1.30 8.80 -22.131 -28.966 - -1.30 8.90 -22.079 -29.055 - -1.30 9.00 -22.025 -29.137 - -1.20 1.00 -26.896 -23.165 - -1.20 1.10 -26.731 -23.171 - -1.20 1.20 -26.491 -23.177 - -1.20 1.30 -26.188 -23.184 - -1.20 1.40 -25.878 -23.192 - -1.20 1.50 -25.597 -23.200 - -1.20 1.60 -25.356 -23.209 - -1.20 1.70 -25.153 -23.218 - -1.20 1.80 -24.980 -23.228 - -1.20 1.90 -24.826 -23.239 - -1.20 2.00 -24.682 -23.251 - -1.20 2.10 -24.545 -23.263 - -1.20 2.20 -24.416 -23.277 - -1.20 2.30 -24.298 -23.292 - -1.20 2.40 -24.195 -23.307 - -1.20 2.50 -24.107 -23.324 - -1.20 2.60 -24.033 -23.342 - -1.20 2.70 -23.972 -23.361 - -1.20 2.80 -23.921 -23.382 - -1.20 2.90 -23.878 -23.404 - -1.20 3.00 -23.840 -23.428 - -1.20 3.10 -23.807 -23.454 - -1.20 3.20 -23.778 -23.481 - -1.20 3.30 -23.747 -23.511 - -1.20 3.40 -23.712 -23.542 - -1.20 3.50 -23.665 -23.575 - -1.20 3.60 -23.604 -23.611 - -1.20 3.70 -23.527 -23.648 - -1.20 3.80 -23.427 -23.689 - -1.20 3.90 -23.286 -23.731 - -1.20 4.00 -23.071 -23.776 - -1.20 4.10 -22.704 -23.826 - -1.20 4.20 -22.218 -23.920 - -1.20 4.30 -21.929 -24.226 - -1.20 4.40 -21.948 -24.648 - -1.20 4.50 -21.964 -25.004 - -1.20 4.60 -21.886 -25.406 - -1.20 4.70 -21.733 -25.615 - -1.20 4.80 -21.528 -25.723 - -1.20 4.90 -21.342 -25.970 - -1.20 5.00 -21.276 -26.251 - -1.20 5.10 -21.323 -26.349 - -1.20 5.20 -21.324 -26.359 - -1.20 5.30 -21.300 -26.412 - -1.20 5.40 -21.291 -26.450 - -1.20 5.50 -21.466 -26.462 - -1.20 5.60 -21.674 -26.542 - -1.20 5.70 -21.725 -26.640 - -1.20 5.80 -21.778 -26.767 - -1.20 5.90 -21.807 -26.846 - -1.20 6.00 -21.814 -26.904 - -1.20 6.10 -21.837 -26.953 - -1.20 6.20 -21.892 -27.037 - -1.20 6.30 -22.070 -27.113 - -1.20 6.40 -22.251 -27.190 - -1.20 6.50 -22.387 -27.307 - -1.20 6.60 -22.467 -27.379 - -1.20 6.70 -22.497 -27.425 - -1.20 6.80 -22.502 -27.471 - -1.20 6.90 -22.493 -27.518 - -1.20 7.00 -22.491 -27.530 - -1.20 7.10 -22.535 -27.584 - -1.20 7.20 -22.611 -27.676 - -1.20 7.30 -22.658 -27.699 - -1.20 7.40 -22.671 -27.757 - -1.20 7.50 -22.661 -27.826 - -1.20 7.60 -22.635 -27.892 - -1.20 7.70 -22.601 -27.931 - -1.20 7.80 -22.564 -27.951 - -1.20 7.90 -22.526 -27.974 - -1.20 8.00 -22.489 -28.083 - -1.20 8.10 -22.450 -28.207 - -1.20 8.20 -22.410 -28.331 - -1.20 8.30 -22.368 -28.459 - -1.20 8.40 -22.324 -28.575 - -1.20 8.50 -22.280 -28.685 - -1.20 8.60 -22.231 -28.791 - -1.20 8.70 -22.182 -28.895 - -1.20 8.80 -22.132 -28.996 - -1.20 8.90 -22.079 -29.091 - -1.20 9.00 -22.025 -29.180 - -1.10 1.00 -26.983 -23.255 - -1.10 1.10 -26.816 -23.260 - -1.10 1.20 -26.571 -23.266 - -1.10 1.30 -26.264 -23.273 - -1.10 1.40 -25.949 -23.279 - -1.10 1.50 -25.665 -23.287 - -1.10 1.60 -25.422 -23.295 - -1.10 1.70 -25.217 -23.303 - -1.10 1.80 -25.041 -23.312 - -1.10 1.90 -24.885 -23.322 - -1.10 2.00 -24.738 -23.332 - -1.10 2.10 -24.598 -23.344 - -1.10 2.20 -24.466 -23.356 - -1.10 2.30 -24.346 -23.369 - -1.10 2.40 -24.241 -23.383 - -1.10 2.50 -24.151 -23.398 - -1.10 2.60 -24.076 -23.414 - -1.10 2.70 -24.013 -23.432 - -1.10 2.80 -23.961 -23.450 - -1.10 2.90 -23.916 -23.471 - -1.10 3.00 -23.877 -23.492 - -1.10 3.10 -23.843 -23.516 - -1.10 3.20 -23.812 -23.540 - -1.10 3.30 -23.781 -23.567 - -1.10 3.40 -23.743 -23.596 - -1.10 3.50 -23.695 -23.626 - -1.10 3.60 -23.632 -23.659 - -1.10 3.70 -23.552 -23.694 - -1.10 3.80 -23.449 -23.731 - -1.10 3.90 -23.303 -23.771 - -1.10 4.00 -23.078 -23.813 - -1.10 4.10 -22.689 -23.861 - -1.10 4.20 -22.183 -23.959 - -1.10 4.30 -21.908 -24.290 - -1.10 4.40 -21.945 -24.717 - -1.10 4.50 -21.963 -25.083 - -1.10 4.60 -21.886 -25.502 - -1.10 4.70 -21.733 -25.714 - -1.10 4.80 -21.527 -25.822 - -1.10 4.90 -21.342 -26.065 - -1.10 5.00 -21.276 -26.322 - -1.10 5.10 -21.323 -26.394 - -1.10 5.20 -21.324 -26.392 - -1.10 5.30 -21.300 -26.442 - -1.10 5.40 -21.291 -26.477 - -1.10 5.50 -21.466 -26.479 - -1.10 5.60 -21.674 -26.557 - -1.10 5.70 -21.725 -26.656 - -1.10 5.80 -21.778 -26.785 - -1.10 5.90 -21.807 -26.864 - -1.10 6.00 -21.814 -26.922 - -1.10 6.10 -21.837 -26.970 - -1.10 6.20 -21.892 -27.054 - -1.10 6.30 -22.070 -27.127 - -1.10 6.40 -22.251 -27.201 - -1.10 6.50 -22.387 -27.317 - -1.10 6.60 -22.467 -27.391 - -1.10 6.70 -22.497 -27.432 - -1.10 6.80 -22.502 -27.477 - -1.10 6.90 -22.493 -27.523 - -1.10 7.00 -22.491 -27.534 - -1.10 7.10 -22.535 -27.584 - -1.10 7.20 -22.611 -27.679 - -1.10 7.30 -22.658 -27.702 - -1.10 7.40 -22.671 -27.760 - -1.10 7.50 -22.661 -27.829 - -1.10 7.60 -22.635 -27.895 - -1.10 7.70 -22.602 -27.934 - -1.10 7.80 -22.564 -27.954 - -1.10 7.90 -22.526 -27.977 - -1.10 8.00 -22.489 -28.087 - -1.10 8.10 -22.450 -28.211 - -1.10 8.20 -22.410 -28.337 - -1.10 8.30 -22.368 -28.467 - -1.10 8.40 -22.324 -28.585 - -1.10 8.50 -22.280 -28.697 - -1.10 8.60 -22.231 -28.807 - -1.10 8.70 -22.182 -28.914 - -1.10 8.80 -22.132 -29.020 - -1.10 8.90 -22.079 -29.122 - -1.10 9.00 -22.025 -29.218 - -1.00 1.00 -27.071 -23.347 - -1.00 1.10 -26.901 -23.351 - -1.00 1.20 -26.652 -23.357 - -1.00 1.30 -26.340 -23.362 - -1.00 1.40 -26.022 -23.368 - -1.00 1.50 -25.735 -23.375 - -1.00 1.60 -25.489 -23.382 - -1.00 1.70 -25.282 -23.389 - -1.00 1.80 -25.105 -23.397 - -1.00 1.90 -24.946 -23.406 - -1.00 2.00 -24.796 -23.416 - -1.00 2.10 -24.653 -23.426 - -1.00 2.20 -24.519 -23.437 - -1.00 2.30 -24.396 -23.448 - -1.00 2.40 -24.289 -23.461 - -1.00 2.50 -24.197 -23.475 - -1.00 2.60 -24.120 -23.489 - -1.00 2.70 -24.056 -23.505 - -1.00 2.80 -24.002 -23.522 - -1.00 2.90 -23.956 -23.540 - -1.00 3.00 -23.916 -23.560 - -1.00 3.10 -23.881 -23.581 - -1.00 3.20 -23.849 -23.603 - -1.00 3.30 -23.816 -23.628 - -1.00 3.40 -23.777 -23.654 - -1.00 3.50 -23.727 -23.682 - -1.00 3.60 -23.662 -23.712 - -1.00 3.70 -23.579 -23.744 - -1.00 3.80 -23.473 -23.778 - -1.00 3.90 -23.323 -23.815 - -1.00 4.00 -23.088 -23.854 - -1.00 4.10 -22.677 -23.899 - -1.00 4.20 -22.151 -24.002 - -1.00 4.30 -21.890 -24.358 - -1.00 4.40 -21.942 -24.788 - -1.00 4.50 -21.962 -25.165 - -1.00 4.60 -21.885 -25.599 - -1.00 4.70 -21.733 -25.812 - -1.00 4.80 -21.527 -25.920 - -1.00 4.90 -21.342 -26.160 - -1.00 5.00 -21.276 -26.388 - -1.00 5.10 -21.323 -26.435 - -1.00 5.20 -21.324 -26.420 - -1.00 5.30 -21.300 -26.466 - -1.00 5.40 -21.291 -26.498 - -1.00 5.50 -21.466 -26.494 - -1.00 5.60 -21.674 -26.569 - -1.00 5.70 -21.725 -26.669 - -1.00 5.80 -21.778 -26.800 - -1.00 5.90 -21.807 -26.879 - -1.00 6.00 -21.814 -26.937 - -1.00 6.10 -21.837 -26.984 - -1.00 6.20 -21.892 -27.067 - -1.00 6.30 -22.070 -27.139 - -1.00 6.40 -22.251 -27.210 - -1.00 6.50 -22.387 -27.324 - -1.00 6.60 -22.467 -27.398 - -1.00 6.70 -22.497 -27.438 - -1.00 6.80 -22.502 -27.483 - -1.00 6.90 -22.493 -27.528 - -1.00 7.00 -22.491 -27.538 - -1.00 7.10 -22.535 -27.591 - -1.00 7.20 -22.611 -27.682 - -1.00 7.30 -22.658 -27.705 - -1.00 7.40 -22.671 -27.762 - -1.00 7.50 -22.661 -27.831 - -1.00 7.60 -22.635 -27.897 - -1.00 7.70 -22.602 -27.936 - -1.00 7.80 -22.564 -27.956 - -1.00 7.90 -22.525 -27.977 - -1.00 8.00 -22.489 -28.089 - -1.00 8.10 -22.450 -28.215 - -1.00 8.20 -22.410 -28.341 - -1.00 8.30 -22.368 -28.473 - -1.00 8.40 -22.324 -28.593 - -1.00 8.50 -22.280 -28.707 - -1.00 8.60 -22.231 -28.819 - -1.00 8.70 -22.182 -28.931 - -1.00 8.80 -22.132 -29.041 - -1.00 8.90 -22.079 -29.148 - -1.00 9.00 -22.025 -29.251 - -0.90 1.00 -27.160 -23.439 - -0.90 1.10 -26.986 -23.443 - -0.90 1.20 -26.733 -23.448 - -0.90 1.30 -26.417 -23.453 - -0.90 1.40 -26.095 -23.458 - -0.90 1.50 -25.806 -23.464 - -0.90 1.60 -25.559 -23.470 - -0.90 1.70 -25.349 -23.477 - -0.90 1.80 -25.170 -23.484 - -0.90 1.90 -25.008 -23.492 - -0.90 2.00 -24.856 -23.501 - -0.90 2.10 -24.710 -23.510 - -0.90 2.20 -24.573 -23.519 - -0.90 2.30 -24.448 -23.530 - -0.90 2.40 -24.338 -23.541 - -0.90 2.50 -24.244 -23.554 - -0.90 2.60 -24.165 -23.567 - -0.90 2.70 -24.100 -23.581 - -0.90 2.80 -24.045 -23.596 - -0.90 2.90 -23.998 -23.612 - -0.90 3.00 -23.957 -23.630 - -0.90 3.10 -23.921 -23.649 - -0.90 3.20 -23.887 -23.670 - -0.90 3.30 -23.853 -23.692 - -0.90 3.40 -23.812 -23.715 - -0.90 3.50 -23.761 -23.741 - -0.90 3.60 -23.694 -23.768 - -0.90 3.70 -23.609 -23.798 - -0.90 3.80 -23.500 -23.829 - -0.90 3.90 -23.346 -23.863 - -0.90 4.00 -23.101 -23.899 - -0.90 4.10 -22.667 -23.942 - -0.90 4.20 -22.123 -24.049 - -0.90 4.30 -21.875 -24.431 - -0.90 4.40 -21.940 -24.861 - -0.90 4.50 -21.961 -25.249 - -0.90 4.60 -21.885 -25.696 - -0.90 4.70 -21.733 -25.911 - -0.90 4.80 -21.527 -26.017 - -0.90 4.90 -21.342 -26.254 - -0.90 5.00 -21.276 -26.449 - -0.90 5.10 -21.323 -26.469 - -0.90 5.20 -21.324 -26.444 - -0.90 5.30 -21.300 -26.487 - -0.90 5.40 -21.291 -26.516 - -0.90 5.50 -21.466 -26.505 - -0.90 5.60 -21.674 -26.579 - -0.90 5.70 -21.725 -26.679 - -0.90 5.80 -21.778 -26.812 - -0.90 5.90 -21.807 -26.891 - -0.90 6.00 -21.814 -26.949 - -0.90 6.10 -21.837 -26.995 - -0.90 6.20 -21.892 -27.079 - -0.90 6.30 -22.070 -27.148 - -0.90 6.40 -22.251 -27.217 - -0.90 6.50 -22.387 -27.331 - -0.90 6.60 -22.467 -27.399 - -0.90 6.70 -22.497 -27.443 - -0.90 6.80 -22.502 -27.487 - -0.90 6.90 -22.493 -27.532 - -0.90 7.00 -22.491 -27.541 - -0.90 7.10 -22.535 -27.594 - -0.90 7.20 -22.611 -27.684 - -0.90 7.30 -22.658 -27.706 - -0.90 7.40 -22.671 -27.764 - -0.90 7.50 -22.661 -27.833 - -0.90 7.60 -22.635 -27.899 - -0.90 7.70 -22.602 -27.938 - -0.90 7.80 -22.564 -27.955 - -0.90 7.90 -22.525 -27.979 - -0.90 8.00 -22.489 -28.091 - -0.90 8.10 -22.450 -28.218 - -0.90 8.20 -22.410 -28.345 - -0.90 8.30 -22.368 -28.477 - -0.90 8.40 -22.324 -28.599 - -0.90 8.50 -22.280 -28.715 - -0.90 8.60 -22.231 -28.830 - -0.90 8.70 -22.182 -28.944 - -0.90 8.80 -22.132 -29.058 - -0.90 8.90 -22.079 -29.170 - -0.90 9.00 -22.025 -29.278 - -0.80 1.00 -27.247 -23.532 - -0.80 1.10 -27.071 -23.536 - -0.80 1.20 -26.814 -23.540 - -0.80 1.30 -26.494 -23.545 - -0.80 1.40 -26.170 -23.550 - -0.80 1.50 -25.879 -23.555 - -0.80 1.60 -25.629 -23.560 - -0.80 1.70 -25.418 -23.566 - -0.80 1.80 -25.236 -23.573 - -0.80 1.90 -25.072 -23.580 - -0.80 2.00 -24.917 -23.587 - -0.80 2.10 -24.768 -23.595 - -0.80 2.20 -24.628 -23.604 - -0.80 2.30 -24.500 -23.614 - -0.80 2.40 -24.388 -23.624 - -0.80 2.50 -24.293 -23.635 - -0.80 2.60 -24.212 -23.646 - -0.80 2.70 -24.146 -23.659 - -0.80 2.80 -24.090 -23.673 - -0.80 2.90 -24.041 -23.688 - -0.80 3.00 -23.999 -23.703 - -0.80 3.10 -23.962 -23.721 - -0.80 3.20 -23.927 -23.739 - -0.80 3.30 -23.892 -23.759 - -0.80 3.40 -23.850 -23.780 - -0.80 3.50 -23.796 -23.804 - -0.80 3.60 -23.728 -23.829 - -0.80 3.70 -23.641 -23.855 - -0.80 3.80 -23.529 -23.884 - -0.80 3.90 -23.371 -23.915 - -0.80 4.00 -23.117 -23.949 - -0.80 4.10 -22.661 -23.989 - -0.80 4.20 -22.098 -24.101 - -0.80 4.30 -21.863 -24.507 - -0.80 4.40 -21.938 -24.937 - -0.80 4.50 -21.960 -25.335 - -0.80 4.60 -21.885 -25.793 - -0.80 4.70 -21.733 -26.009 - -0.80 4.80 -21.527 -26.114 - -0.80 4.90 -21.342 -26.346 - -0.80 5.00 -21.276 -26.504 - -0.80 5.10 -21.322 -26.499 - -0.80 5.20 -21.324 -26.464 - -0.80 5.30 -21.300 -26.504 - -0.80 5.40 -21.291 -26.530 - -0.80 5.50 -21.466 -26.515 - -0.80 5.60 -21.674 -26.587 - -0.80 5.70 -21.725 -26.688 - -0.80 5.80 -21.778 -26.821 - -0.80 5.90 -21.807 -26.901 - -0.80 6.00 -21.814 -26.958 - -0.80 6.10 -21.837 -27.004 - -0.80 6.20 -21.892 -27.088 - -0.80 6.30 -22.070 -27.156 - -0.80 6.40 -22.251 -27.223 - -0.80 6.50 -22.387 -27.336 - -0.80 6.60 -22.467 -27.408 - -0.80 6.70 -22.497 -27.446 - -0.80 6.80 -22.503 -27.490 - -0.80 6.90 -22.493 -27.535 - -0.80 7.00 -22.491 -27.544 - -0.80 7.10 -22.535 -27.592 - -0.80 7.20 -22.611 -27.686 - -0.80 7.30 -22.658 -27.708 - -0.80 7.40 -22.671 -27.765 - -0.80 7.50 -22.661 -27.834 - -0.80 7.60 -22.635 -27.901 - -0.80 7.70 -22.601 -27.934 - -0.80 7.80 -22.564 -27.956 - -0.80 7.90 -22.525 -27.981 - -0.80 8.00 -22.489 -28.093 - -0.80 8.10 -22.450 -28.220 - -0.80 8.20 -22.410 -28.348 - -0.80 8.30 -22.369 -28.481 - -0.80 8.40 -22.324 -28.604 - -0.80 8.50 -22.280 -28.722 - -0.80 8.60 -22.231 -28.838 - -0.80 8.70 -22.182 -28.955 - -0.80 8.80 -22.132 -29.072 - -0.80 8.90 -22.079 -29.188 - -0.80 9.00 -22.025 -29.302 - -0.70 1.00 -27.335 -23.626 - -0.70 1.10 -27.155 -23.630 - -0.70 1.20 -26.895 -23.633 - -0.70 1.30 -26.572 -23.637 - -0.70 1.40 -26.246 -23.642 - -0.70 1.50 -25.953 -23.646 - -0.70 1.60 -25.701 -23.651 - -0.70 1.70 -25.489 -23.657 - -0.70 1.80 -25.305 -23.662 - -0.70 1.90 -25.138 -23.669 - -0.70 2.00 -24.980 -23.675 - -0.70 2.10 -24.828 -23.683 - -0.70 2.20 -24.685 -23.690 - -0.70 2.30 -24.555 -23.699 - -0.70 2.40 -24.440 -23.708 - -0.70 2.50 -24.343 -23.718 - -0.70 2.60 -24.261 -23.728 - -0.70 2.70 -24.193 -23.740 - -0.70 2.80 -24.135 -23.752 - -0.70 2.90 -24.086 -23.765 - -0.70 3.00 -24.043 -23.779 - -0.70 3.10 -24.005 -23.795 - -0.70 3.20 -23.969 -23.812 - -0.70 3.30 -23.932 -23.829 - -0.70 3.40 -23.889 -23.849 - -0.70 3.50 -23.834 -23.870 - -0.70 3.60 -23.763 -23.892 - -0.70 3.70 -23.675 -23.917 - -0.70 3.80 -23.561 -23.943 - -0.70 3.90 -23.399 -23.971 - -0.70 4.00 -23.136 -24.002 - -0.70 4.10 -22.657 -24.040 - -0.70 4.20 -22.077 -24.157 - -0.70 4.30 -21.852 -24.586 - -0.70 4.40 -21.937 -25.015 - -0.70 4.50 -21.959 -25.424 - -0.70 4.60 -21.885 -25.891 - -0.70 4.70 -21.733 -26.107 - -0.70 4.80 -21.527 -26.211 - -0.70 4.90 -21.342 -26.437 - -0.70 5.00 -21.276 -26.554 - -0.70 5.10 -21.322 -26.524 - -0.70 5.20 -21.324 -26.480 - -0.70 5.30 -21.300 -26.518 - -0.70 5.40 -21.291 -26.542 - -0.70 5.50 -21.466 -26.522 - -0.70 5.60 -21.674 -26.594 - -0.70 5.70 -21.725 -26.695 - -0.70 5.80 -21.778 -26.829 - -0.70 5.90 -21.807 -26.909 - -0.70 6.00 -21.814 -26.966 - -0.70 6.10 -21.837 -27.012 - -0.70 6.20 -21.892 -27.095 - -0.70 6.30 -22.070 -27.162 - -0.70 6.40 -22.251 -27.228 - -0.70 6.50 -22.387 -27.340 - -0.70 6.60 -22.467 -27.411 - -0.70 6.70 -22.497 -27.449 - -0.70 6.80 -22.503 -27.493 - -0.70 6.90 -22.493 -27.537 - -0.70 7.00 -22.491 -27.546 - -0.70 7.10 -22.535 -27.597 - -0.70 7.20 -22.611 -27.688 - -0.70 7.30 -22.658 -27.709 - -0.70 7.40 -22.671 -27.766 - -0.70 7.50 -22.661 -27.835 - -0.70 7.60 -22.634 -27.893 - -0.70 7.70 -22.601 -27.935 - -0.70 7.80 -22.564 -27.958 - -0.70 7.90 -22.525 -27.982 - -0.70 8.00 -22.489 -28.095 - -0.70 8.10 -22.450 -28.222 - -0.70 8.20 -22.410 -28.350 - -0.70 8.30 -22.369 -28.484 - -0.70 8.40 -22.324 -28.608 - -0.70 8.50 -22.280 -28.727 - -0.70 8.60 -22.231 -28.845 - -0.70 8.70 -22.182 -28.964 - -0.70 8.80 -22.132 -29.084 - -0.70 8.90 -22.079 -29.203 - -0.70 9.00 -22.025 -29.321 - -0.60 1.00 -27.421 -23.721 - -0.60 1.10 -27.238 -23.724 - -0.60 1.20 -26.975 -23.727 - -0.60 1.30 -26.650 -23.731 - -0.60 1.40 -26.322 -23.735 - -0.60 1.50 -26.028 -23.739 - -0.60 1.60 -25.775 -23.743 - -0.60 1.70 -25.560 -23.748 - -0.60 1.80 -25.375 -23.753 - -0.60 1.90 -25.205 -23.759 - -0.60 2.00 -25.045 -23.765 - -0.60 2.10 -24.889 -23.771 - -0.60 2.20 -24.743 -23.778 - -0.60 2.30 -24.610 -23.786 - -0.60 2.40 -24.493 -23.794 - -0.60 2.50 -24.394 -23.803 - -0.60 2.60 -24.310 -23.812 - -0.60 2.70 -24.241 -23.822 - -0.60 2.80 -24.182 -23.833 - -0.60 2.90 -24.132 -23.845 - -0.60 3.00 -24.088 -23.858 - -0.60 3.10 -24.048 -23.872 - -0.60 3.20 -24.011 -23.887 - -0.60 3.30 -23.974 -23.903 - -0.60 3.40 -23.929 -23.920 - -0.60 3.50 -23.873 -23.939 - -0.60 3.60 -23.801 -23.960 - -0.60 3.70 -23.710 -23.982 - -0.60 3.80 -23.594 -24.006 - -0.60 3.90 -23.429 -24.031 - -0.60 4.00 -23.157 -24.059 - -0.60 4.10 -22.657 -24.094 - -0.60 4.20 -22.059 -24.217 - -0.60 4.30 -21.844 -24.668 - -0.60 4.40 -21.937 -25.096 - -0.60 4.50 -21.959 -25.514 - -0.60 4.60 -21.885 -25.989 - -0.60 4.70 -21.733 -26.205 - -0.60 4.80 -21.527 -26.307 - -0.60 4.90 -21.342 -26.525 - -0.60 5.00 -21.276 -26.597 - -0.60 5.10 -21.322 -26.546 - -0.60 5.20 -21.324 -26.494 - -0.60 5.30 -21.300 -26.530 - -0.60 5.40 -21.291 -26.551 - -0.60 5.50 -21.466 -26.528 - -0.60 5.60 -21.674 -26.599 - -0.60 5.70 -21.725 -26.701 - -0.60 5.80 -21.778 -26.836 - -0.60 5.90 -21.807 -26.916 - -0.60 6.00 -21.814 -26.973 - -0.60 6.10 -21.837 -27.017 - -0.60 6.20 -21.892 -27.101 - -0.60 6.30 -22.070 -27.167 - -0.60 6.40 -22.251 -27.232 - -0.60 6.50 -22.387 -27.343 - -0.60 6.60 -22.467 -27.414 - -0.60 6.70 -22.497 -27.452 - -0.60 6.80 -22.503 -27.495 - -0.60 6.90 -22.493 -27.539 - -0.60 7.00 -22.491 -27.547 - -0.60 7.10 -22.535 -27.599 - -0.60 7.20 -22.611 -27.689 - -0.60 7.30 -22.658 -27.710 - -0.60 7.40 -22.671 -27.767 - -0.60 7.50 -22.661 -27.836 - -0.60 7.60 -22.634 -27.894 - -0.60 7.70 -22.601 -27.936 - -0.60 7.80 -22.564 -27.959 - -0.60 7.90 -22.525 -27.983 - -0.60 8.00 -22.489 -28.096 - -0.60 8.10 -22.450 -28.223 - -0.60 8.20 -22.410 -28.352 - -0.60 8.30 -22.369 -28.487 - -0.60 8.40 -22.324 -28.611 - -0.60 8.50 -22.280 -28.731 - -0.60 8.60 -22.231 -28.850 - -0.60 8.70 -22.182 -28.971 - -0.60 8.80 -22.132 -29.093 - -0.60 8.90 -22.079 -29.216 - -0.60 9.00 -22.025 -29.338 - -0.50 1.00 -27.506 -23.816 - -0.50 1.10 -27.320 -23.819 - -0.50 1.20 -27.054 -23.822 - -0.50 1.30 -26.728 -23.825 - -0.50 1.40 -26.399 -23.828 - -0.50 1.50 -26.104 -23.832 - -0.50 1.60 -25.850 -23.836 - -0.50 1.70 -25.634 -23.840 - -0.50 1.80 -25.446 -23.845 - -0.50 1.90 -25.274 -23.850 - -0.50 2.00 -25.110 -23.855 - -0.50 2.10 -24.952 -23.861 - -0.50 2.20 -24.802 -23.867 - -0.50 2.30 -24.666 -23.874 - -0.50 2.40 -24.547 -23.881 - -0.50 2.50 -24.446 -23.889 - -0.50 2.60 -24.361 -23.897 - -0.50 2.70 -24.290 -23.907 - -0.50 2.80 -24.230 -23.916 - -0.50 2.90 -24.179 -23.927 - -0.50 3.00 -24.134 -23.939 - -0.50 3.10 -24.093 -23.951 - -0.50 3.20 -24.055 -23.964 - -0.50 3.30 -24.017 -23.979 - -0.50 3.40 -23.971 -23.995 - -0.50 3.50 -23.913 -24.012 - -0.50 3.60 -23.840 -24.030 - -0.50 3.70 -23.748 -24.050 - -0.50 3.80 -23.629 -24.072 - -0.50 3.90 -23.462 -24.095 - -0.50 4.00 -23.181 -24.120 - -0.50 4.10 -22.661 -24.153 - -0.50 4.20 -22.045 -24.282 - -0.50 4.30 -21.838 -24.753 - -0.50 4.40 -21.936 -25.179 - -0.50 4.50 -21.958 -25.606 - -0.50 4.60 -21.885 -26.086 - -0.50 4.70 -21.733 -26.302 - -0.50 4.80 -21.527 -26.402 - -0.50 4.90 -21.341 -26.611 - -0.50 5.00 -21.276 -26.636 - -0.50 5.10 -21.322 -26.563 - -0.50 5.20 -21.324 -26.505 - -0.50 5.30 -21.300 -26.539 - -0.50 5.40 -21.291 -26.559 - -0.50 5.50 -21.466 -26.533 - -0.50 5.60 -21.674 -26.603 - -0.50 5.70 -21.725 -26.705 - -0.50 5.80 -21.778 -26.841 - -0.50 5.90 -21.807 -26.921 - -0.50 6.00 -21.814 -26.978 - -0.50 6.10 -21.837 -27.022 - -0.50 6.20 -21.892 -27.106 - -0.50 6.30 -22.070 -27.171 - -0.50 6.40 -22.251 -27.235 - -0.50 6.50 -22.387 -27.346 - -0.50 6.60 -22.467 -27.416 - -0.50 6.70 -22.497 -27.453 - -0.50 6.80 -22.503 -27.496 - -0.50 6.90 -22.493 -27.541 - -0.50 7.00 -22.491 -27.548 - -0.50 7.10 -22.535 -27.596 - -0.50 7.20 -22.611 -27.690 - -0.50 7.30 -22.658 -27.711 - -0.50 7.40 -22.671 -27.768 - -0.50 7.50 -22.660 -27.828 - -0.50 7.60 -22.634 -27.895 - -0.50 7.70 -22.601 -27.936 - -0.50 7.80 -22.564 -27.959 - -0.50 7.90 -22.525 -27.984 - -0.50 8.00 -22.489 -28.097 - -0.50 8.10 -22.450 -28.225 - -0.50 8.20 -22.410 -28.354 - -0.50 8.30 -22.369 -28.489 - -0.50 8.40 -22.324 -28.614 - -0.50 8.50 -22.280 -28.734 - -0.50 8.60 -22.231 -28.855 - -0.50 8.70 -22.182 -28.976 - -0.50 8.80 -22.132 -29.101 - -0.50 8.90 -22.079 -29.226 - -0.50 9.00 -22.025 -29.351 - -0.40 1.00 -27.589 -23.912 - -0.40 1.10 -27.400 -23.914 - -0.40 1.20 -27.132 -23.917 - -0.40 1.30 -26.806 -23.920 - -0.40 1.40 -26.477 -23.923 - -0.40 1.50 -26.180 -23.926 - -0.40 1.60 -25.925 -23.929 - -0.40 1.70 -25.708 -23.933 - -0.40 1.80 -25.518 -23.937 - -0.40 1.90 -25.344 -23.942 - -0.40 2.00 -25.177 -23.947 - -0.40 2.10 -25.015 -23.952 - -0.40 2.20 -24.862 -23.957 - -0.40 2.30 -24.724 -23.963 - -0.40 2.40 -24.602 -23.970 - -0.40 2.50 -24.499 -23.977 - -0.40 2.60 -24.412 -23.984 - -0.40 2.70 -24.340 -23.993 - -0.40 2.80 -24.279 -24.001 - -0.40 2.90 -24.227 -24.011 - -0.40 3.00 -24.181 -24.021 - -0.40 3.10 -24.139 -24.032 - -0.40 3.20 -24.101 -24.044 - -0.40 3.30 -24.061 -24.057 - -0.40 3.40 -24.014 -24.072 - -0.40 3.50 -23.955 -24.087 - -0.40 3.60 -23.880 -24.103 - -0.40 3.70 -23.786 -24.121 - -0.40 3.80 -23.666 -24.141 - -0.40 3.90 -23.496 -24.162 - -0.40 4.00 -23.206 -24.184 - -0.40 4.10 -22.667 -24.215 - -0.40 4.20 -22.033 -24.350 - -0.40 4.30 -21.832 -24.841 - -0.40 4.40 -21.936 -25.265 - -0.40 4.50 -21.958 -25.699 - -0.40 4.60 -21.885 -26.184 - -0.40 4.70 -21.733 -26.399 - -0.40 4.80 -21.527 -26.496 - -0.40 4.90 -21.341 -26.694 - -0.40 5.00 -21.276 -26.669 - -0.40 5.10 -21.322 -26.578 - -0.40 5.20 -21.324 -26.514 - -0.40 5.30 -21.300 -26.547 - -0.40 5.40 -21.291 -26.565 - -0.40 5.50 -21.466 -26.537 - -0.40 5.60 -21.674 -26.607 - -0.40 5.70 -21.725 -26.709 - -0.40 5.80 -21.778 -26.845 - -0.40 5.90 -21.807 -26.925 - -0.40 6.00 -21.814 -26.982 - -0.40 6.10 -21.837 -27.026 - -0.40 6.20 -21.892 -27.110 - -0.40 6.30 -22.070 -27.174 - -0.40 6.40 -22.251 -27.237 - -0.40 6.50 -22.387 -27.348 - -0.40 6.60 -22.467 -27.418 - -0.40 6.70 -22.497 -27.455 - -0.40 6.80 -22.503 -27.498 - -0.40 6.90 -22.493 -27.542 - -0.40 7.00 -22.491 -27.549 - -0.40 7.10 -22.535 -27.601 - -0.40 7.20 -22.611 -27.691 - -0.40 7.30 -22.658 -27.711 - -0.40 7.40 -22.671 -27.768 - -0.40 7.50 -22.660 -27.828 - -0.40 7.60 -22.634 -27.896 - -0.40 7.70 -22.601 -27.937 - -0.40 7.80 -22.564 -27.960 - -0.40 7.90 -22.525 -27.984 - -0.40 8.00 -22.489 -28.097 - -0.40 8.10 -22.450 -28.226 - -0.40 8.20 -22.410 -28.355 - -0.40 8.30 -22.369 -28.491 - -0.40 8.40 -22.324 -28.616 - -0.40 8.50 -22.280 -28.737 - -0.40 8.60 -22.231 -28.858 - -0.40 8.70 -22.183 -28.981 - -0.40 8.80 -22.132 -29.107 - -0.40 8.90 -22.079 -29.234 - -0.40 9.00 -22.025 -29.362 - -0.30 1.00 -27.670 -24.008 - -0.30 1.10 -27.477 -24.010 - -0.30 1.20 -27.208 -24.012 - -0.30 1.30 -26.883 -24.015 - -0.30 1.40 -26.554 -24.018 - -0.30 1.50 -26.257 -24.021 - -0.30 1.60 -26.002 -24.024 - -0.30 1.70 -25.783 -24.027 - -0.30 1.80 -25.592 -24.031 - -0.30 1.90 -25.416 -24.035 - -0.30 2.00 -25.246 -24.039 - -0.30 2.10 -25.080 -24.044 - -0.30 2.20 -24.924 -24.049 - -0.30 2.30 -24.782 -24.054 - -0.30 2.40 -24.658 -24.060 - -0.30 2.50 -24.553 -24.066 - -0.30 2.60 -24.464 -24.073 - -0.30 2.70 -24.391 -24.080 - -0.30 2.80 -24.329 -24.088 - -0.30 2.90 -24.275 -24.097 - -0.30 3.00 -24.228 -24.106 - -0.30 3.10 -24.186 -24.116 - -0.30 3.20 -24.147 -24.126 - -0.30 3.30 -24.106 -24.138 - -0.30 3.40 -24.058 -24.151 - -0.30 3.50 -23.998 -24.164 - -0.30 3.60 -23.921 -24.179 - -0.30 3.70 -23.826 -24.195 - -0.30 3.80 -23.704 -24.213 - -0.30 3.90 -23.532 -24.232 - -0.30 4.00 -23.234 -24.252 - -0.30 4.10 -22.677 -24.281 - -0.30 4.20 -22.024 -24.421 - -0.30 4.30 -21.828 -24.931 - -0.30 4.40 -21.936 -25.353 - -0.30 4.50 -21.957 -25.794 - -0.30 4.60 -21.885 -26.281 - -0.30 4.70 -21.733 -26.494 - -0.30 4.80 -21.527 -26.588 - -0.30 4.90 -21.341 -26.773 - -0.30 5.00 -21.276 -26.697 - -0.30 5.10 -21.322 -26.589 - -0.30 5.20 -21.324 -26.521 - -0.30 5.30 -21.300 -26.553 - -0.30 5.40 -21.291 -26.570 - -0.30 5.50 -21.466 -26.541 - -0.30 5.60 -21.674 -26.610 - -0.30 5.70 -21.725 -26.712 - -0.30 5.80 -21.778 -26.848 - -0.30 5.90 -21.807 -26.928 - -0.30 6.00 -21.814 -26.985 - -0.30 6.10 -21.837 -27.029 - -0.30 6.20 -21.892 -27.113 - -0.30 6.30 -22.070 -27.177 - -0.30 6.40 -22.251 -27.239 - -0.30 6.50 -22.387 -27.350 - -0.30 6.60 -22.467 -27.419 - -0.30 6.70 -22.497 -27.456 - -0.30 6.80 -22.503 -27.499 - -0.30 6.90 -22.493 -27.543 - -0.30 7.00 -22.491 -27.550 - -0.30 7.10 -22.535 -27.601 - -0.30 7.20 -22.611 -27.687 - -0.30 7.30 -22.658 -27.712 - -0.30 7.40 -22.670 -27.761 - -0.30 7.50 -22.660 -27.829 - -0.30 7.60 -22.634 -27.896 - -0.30 7.70 -22.601 -27.938 - -0.30 7.80 -22.564 -27.960 - -0.30 7.90 -22.526 -27.985 - -0.30 8.00 -22.489 -28.098 - -0.30 8.10 -22.450 -28.226 - -0.30 8.20 -22.410 -28.356 - -0.30 8.30 -22.369 -28.492 - -0.30 8.40 -22.324 -28.618 - -0.30 8.50 -22.280 -28.739 - -0.30 8.60 -22.231 -28.861 - -0.30 8.70 -22.183 -28.985 - -0.30 8.80 -22.132 -29.112 - -0.30 8.90 -22.079 -29.240 - -0.30 9.00 -22.025 -29.371 - -0.20 1.00 -27.747 -24.104 - -0.20 1.10 -27.551 -24.106 - -0.20 1.20 -27.282 -24.108 - -0.20 1.30 -26.958 -24.111 - -0.20 1.40 -26.631 -24.113 - -0.20 1.50 -26.335 -24.116 - -0.20 1.60 -26.079 -24.119 - -0.20 1.70 -25.860 -24.122 - -0.20 1.80 -25.667 -24.125 - -0.20 1.90 -25.488 -24.128 - -0.20 2.00 -25.315 -24.132 - -0.20 2.10 -25.145 -24.136 - -0.20 2.20 -24.986 -24.141 - -0.20 2.30 -24.841 -24.146 - -0.20 2.40 -24.715 -24.151 - -0.20 2.50 -24.607 -24.156 - -0.20 2.60 -24.517 -24.162 - -0.20 2.70 -24.442 -24.169 - -0.20 2.80 -24.379 -24.176 - -0.20 2.90 -24.325 -24.184 - -0.20 3.00 -24.277 -24.192 - -0.20 3.10 -24.234 -24.201 - -0.20 3.20 -24.193 -24.210 - -0.20 3.30 -24.152 -24.221 - -0.20 3.40 -24.103 -24.232 - -0.20 3.50 -24.041 -24.244 - -0.20 3.60 -23.964 -24.258 - -0.20 3.70 -23.867 -24.272 - -0.20 3.80 -23.744 -24.288 - -0.20 3.90 -23.569 -24.305 - -0.20 4.00 -23.264 -24.323 - -0.20 4.10 -22.690 -24.350 - -0.20 4.20 -22.017 -24.496 - -0.20 4.30 -21.825 -25.022 - -0.20 4.40 -21.936 -25.443 - -0.20 4.50 -21.957 -25.889 - -0.20 4.60 -21.885 -26.378 - -0.20 4.70 -21.733 -26.589 - -0.20 4.80 -21.527 -26.679 - -0.20 4.90 -21.341 -26.849 - -0.20 5.00 -21.276 -26.720 - -0.20 5.10 -21.322 -26.599 - -0.20 5.20 -21.324 -26.527 - -0.20 5.30 -21.300 -26.558 - -0.20 5.40 -21.291 -26.574 - -0.20 5.50 -21.466 -26.543 - -0.20 5.60 -21.674 -26.612 - -0.20 5.70 -21.725 -26.714 - -0.20 5.80 -21.778 -26.851 - -0.20 5.90 -21.807 -26.931 - -0.20 6.00 -21.814 -26.988 - -0.20 6.10 -21.837 -27.032 - -0.20 6.20 -21.892 -27.115 - -0.20 6.30 -22.070 -27.179 - -0.20 6.40 -22.251 -27.241 - -0.20 6.50 -22.387 -27.351 - -0.20 6.60 -22.467 -27.421 - -0.20 6.70 -22.497 -27.457 - -0.20 6.80 -22.503 -27.500 - -0.20 6.90 -22.493 -27.543 - -0.20 7.00 -22.491 -27.551 - -0.20 7.10 -22.535 -27.598 - -0.20 7.20 -22.611 -27.692 - -0.20 7.30 -22.658 -27.712 - -0.20 7.40 -22.670 -27.761 - -0.20 7.50 -22.660 -27.829 - -0.20 7.60 -22.634 -27.897 - -0.20 7.70 -22.601 -27.938 - -0.20 7.80 -22.564 -27.961 - -0.20 7.90 -22.526 -27.985 - -0.20 8.00 -22.489 -28.098 - -0.20 8.10 -22.450 -28.227 - -0.20 8.20 -22.410 -28.357 - -0.20 8.30 -22.369 -28.493 - -0.20 8.40 -22.324 -28.619 - -0.20 8.50 -22.280 -28.741 - -0.20 8.60 -22.231 -28.863 - -0.20 8.70 -22.183 -28.988 - -0.20 8.80 -22.132 -29.116 - -0.20 8.90 -22.079 -29.246 - -0.20 9.00 -22.025 -29.378 - -0.10 1.00 -27.821 -24.201 - -0.10 1.10 -27.621 -24.203 - -0.10 1.20 -27.353 -24.205 - -0.10 1.30 -27.032 -24.207 - -0.10 1.40 -26.707 -24.209 - -0.10 1.50 -26.412 -24.211 - -0.10 1.60 -26.157 -24.214 - -0.10 1.70 -25.937 -24.217 - -0.10 1.80 -25.742 -24.220 - -0.10 1.90 -25.561 -24.223 - -0.10 2.00 -25.385 -24.226 - -0.10 2.10 -25.212 -24.230 - -0.10 2.20 -25.049 -24.234 - -0.10 2.30 -24.901 -24.238 - -0.10 2.40 -24.772 -24.243 - -0.10 2.50 -24.663 -24.248 - -0.10 2.60 -24.571 -24.253 - -0.10 2.70 -24.494 -24.259 - -0.10 2.80 -24.430 -24.265 - -0.10 2.90 -24.375 -24.272 - -0.10 3.00 -24.326 -24.279 - -0.10 3.10 -24.282 -24.287 - -0.10 3.20 -24.241 -24.296 - -0.10 3.30 -24.198 -24.305 - -0.10 3.40 -24.148 -24.315 - -0.10 3.50 -24.086 -24.326 - -0.10 3.60 -24.007 -24.338 - -0.10 3.70 -23.910 -24.351 - -0.10 3.80 -23.785 -24.365 - -0.10 3.90 -23.608 -24.380 - -0.10 4.00 -23.296 -24.397 - -0.10 4.10 -22.705 -24.422 - -0.10 4.20 -22.012 -24.574 - -0.10 4.30 -21.822 -25.116 - -0.10 4.40 -21.936 -25.535 - -0.10 4.50 -21.957 -25.985 - -0.10 4.60 -21.885 -26.474 - -0.10 4.70 -21.733 -26.683 - -0.10 4.80 -21.527 -26.768 - -0.10 4.90 -21.341 -26.920 - -0.10 5.00 -21.276 -26.740 - -0.10 5.10 -21.322 -26.607 - -0.10 5.20 -21.324 -26.532 - -0.10 5.30 -21.300 -26.562 - -0.10 5.40 -21.291 -26.578 - -0.10 5.50 -21.466 -26.545 - -0.10 5.60 -21.674 -26.613 - -0.10 5.70 -21.725 -26.716 - -0.10 5.80 -21.778 -26.853 - -0.10 5.90 -21.807 -26.933 - -0.10 6.00 -21.814 -26.990 - -0.10 6.10 -21.837 -27.034 - -0.10 6.20 -21.892 -27.117 - -0.10 6.30 -22.070 -27.181 - -0.10 6.40 -22.251 -27.242 - -0.10 6.50 -22.387 -27.352 - -0.10 6.60 -22.467 -27.421 - -0.10 6.70 -22.497 -27.458 - -0.10 6.80 -22.503 -27.500 - -0.10 6.90 -22.493 -27.544 - -0.10 7.00 -22.491 -27.551 - -0.10 7.10 -22.535 -27.602 - -0.10 7.20 -22.611 -27.692 - -0.10 7.30 -22.658 -27.712 - -0.10 7.40 -22.670 -27.761 - -0.10 7.50 -22.660 -27.829 - -0.10 7.60 -22.634 -27.897 - -0.10 7.70 -22.601 -27.938 - -0.10 7.80 -22.564 -27.961 - -0.10 7.90 -22.526 -27.985 - -0.10 8.00 -22.489 -28.099 - -0.10 8.10 -22.450 -28.227 - -0.10 8.20 -22.410 -28.357 - -0.10 8.30 -22.369 -28.494 - -0.10 8.40 -22.324 -28.620 - -0.10 8.50 -22.280 -28.742 - -0.10 8.60 -22.231 -28.865 - -0.10 8.70 -22.183 -28.990 - -0.10 8.80 -22.132 -29.119 - -0.10 8.90 -22.079 -29.250 - -0.10 9.00 -22.026 -29.383 - -0.00 1.00 -27.891 -24.299 - -0.00 1.10 -27.687 -24.300 - -0.00 1.20 -27.420 -24.302 - -0.00 1.30 -27.103 -24.304 - -0.00 1.40 -26.781 -24.306 - -0.00 1.50 -26.489 -24.308 - -0.00 1.60 -26.234 -24.310 - -0.00 1.70 -26.014 -24.312 - -0.00 1.80 -25.819 -24.315 - -0.00 1.90 -25.635 -24.318 - -0.00 2.00 -25.456 -24.321 - -0.00 2.10 -25.279 -24.324 - -0.00 2.20 -25.113 -24.328 - -0.00 2.30 -24.962 -24.331 - -0.00 2.40 -24.830 -24.336 - -0.00 2.50 -24.719 -24.340 - -0.00 2.60 -24.625 -24.345 - -0.00 2.70 -24.547 -24.350 - -0.00 2.80 -24.482 -24.356 - -0.00 2.90 -24.425 -24.362 - -0.00 3.00 -24.375 -24.368 - -0.00 3.10 -24.331 -24.375 - -0.00 3.20 -24.289 -24.383 - -0.00 3.30 -24.245 -24.391 - -0.00 3.40 -24.195 -24.400 - -0.00 3.50 -24.131 -24.410 - -0.00 3.60 -24.051 -24.421 - -0.00 3.70 -23.952 -24.432 - -0.00 3.80 -23.826 -24.445 - -0.00 3.90 -23.647 -24.458 - -0.00 4.00 -23.329 -24.473 - -0.00 4.10 -22.723 -24.497 - -0.00 4.20 -22.008 -24.654 - -0.00 4.30 -21.820 -25.210 - -0.00 4.40 -21.936 -25.628 - -0.00 4.50 -21.957 -26.082 - -0.00 4.60 -21.884 -26.570 - -0.00 4.70 -21.733 -26.775 - -0.00 4.80 -21.527 -26.854 - -0.00 4.90 -21.341 -26.985 - -0.00 5.00 -21.276 -26.756 - -0.00 5.10 -21.322 -26.613 - -0.00 5.20 -21.324 -26.536 - -0.00 5.30 -21.300 -26.565 - -0.00 5.40 -21.291 -26.580 - -0.00 5.50 -21.466 -26.547 - -0.00 5.60 -21.674 -26.615 - -0.00 5.70 -21.725 -26.717 - -0.00 5.80 -21.778 -26.855 - -0.00 5.90 -21.807 -26.935 - -0.00 6.00 -21.814 -26.992 - -0.00 6.10 -21.837 -27.035 - -0.00 6.20 -21.892 -27.119 - -0.00 6.30 -22.070 -27.182 - -0.00 6.40 -22.251 -27.243 - -0.00 6.50 -22.387 -27.353 - -0.00 6.60 -22.467 -27.418 - -0.00 6.70 -22.497 -27.458 - -0.00 6.80 -22.503 -27.501 - -0.00 6.90 -22.493 -27.545 - -0.00 7.00 -22.491 -27.552 - -0.00 7.10 -22.535 -27.603 - -0.00 7.20 -22.611 -27.692 - -0.00 7.30 -22.658 -27.708 - -0.00 7.40 -22.670 -27.761 - -0.00 7.50 -22.660 -27.830 - -0.00 7.60 -22.634 -27.897 - -0.00 7.70 -22.601 -27.938 - -0.00 7.80 -22.564 -27.961 - -0.00 7.90 -22.526 -27.986 - -0.00 8.00 -22.489 -28.099 - -0.00 8.10 -22.450 -28.228 - -0.00 8.20 -22.410 -28.358 - -0.00 8.30 -22.369 -28.494 - -0.00 8.40 -22.324 -28.621 - -0.00 8.50 -22.280 -28.743 - -0.00 8.60 -22.231 -28.867 - -0.00 8.70 -22.183 -28.992 - -0.00 8.80 -22.132 -29.121 - -0.00 8.90 -22.079 -29.253 - -0.00 9.00 -22.026 -29.388 - 0.10 1.00 -27.956 -24.396 - 0.10 1.10 -27.748 -24.398 - 0.10 1.20 -27.483 -24.399 - 0.10 1.30 -27.172 -24.401 - 0.10 1.40 -26.855 -24.402 - 0.10 1.50 -26.565 -24.404 - 0.10 1.60 -26.312 -24.406 - 0.10 1.70 -26.092 -24.408 - 0.10 1.80 -25.896 -24.411 - 0.10 1.90 -25.710 -24.413 - 0.10 2.00 -25.527 -24.416 - 0.10 2.10 -25.347 -24.419 - 0.10 2.20 -25.177 -24.422 - 0.10 2.30 -25.023 -24.425 - 0.10 2.40 -24.889 -24.429 - 0.10 2.50 -24.775 -24.433 - 0.10 2.60 -24.680 -24.437 - 0.10 2.70 -24.600 -24.442 - 0.10 2.80 -24.534 -24.447 - 0.10 2.90 -24.476 -24.453 - 0.10 3.00 -24.426 -24.458 - 0.10 3.10 -24.380 -24.465 - 0.10 3.20 -24.337 -24.472 - 0.10 3.30 -24.293 -24.479 - 0.10 3.40 -24.242 -24.487 - 0.10 3.50 -24.177 -24.496 - 0.10 3.60 -24.096 -24.505 - 0.10 3.70 -23.996 -24.516 - 0.10 3.80 -23.869 -24.527 - 0.10 3.90 -23.688 -24.539 - 0.10 4.00 -23.364 -24.552 - 0.10 4.10 -22.744 -24.574 - 0.10 4.20 -22.006 -24.738 - 0.10 4.30 -21.819 -25.306 - 0.10 4.40 -21.936 -25.722 - 0.10 4.50 -21.957 -26.179 - 0.10 4.60 -21.884 -26.665 - 0.10 4.70 -21.733 -26.865 - 0.10 4.80 -21.527 -26.937 - 0.10 4.90 -21.341 -27.046 - 0.10 5.00 -21.276 -26.770 - 0.10 5.10 -21.322 -26.618 - 0.10 5.20 -21.324 -26.539 - 0.10 5.30 -21.300 -26.568 - 0.10 5.40 -21.291 -26.582 - 0.10 5.50 -21.466 -26.548 - 0.10 5.60 -21.674 -26.616 - 0.10 5.70 -21.725 -26.718 - 0.10 5.80 -21.778 -26.856 - 0.10 5.90 -21.807 -26.936 - 0.10 6.00 -21.814 -26.993 - 0.10 6.10 -21.837 -27.036 - 0.10 6.20 -21.892 -27.120 - 0.10 6.30 -22.070 -27.183 - 0.10 6.40 -22.251 -27.244 - 0.10 6.50 -22.387 -27.353 - 0.10 6.60 -22.467 -27.423 - 0.10 6.70 -22.497 -27.459 - 0.10 6.80 -22.503 -27.501 - 0.10 6.90 -22.493 -27.545 - 0.10 7.00 -22.491 -27.552 - 0.10 7.10 -22.535 -27.603 - 0.10 7.20 -22.611 -27.693 - 0.10 7.30 -22.658 -27.708 - 0.10 7.40 -22.670 -27.762 - 0.10 7.50 -22.660 -27.830 - 0.10 7.60 -22.634 -27.897 - 0.10 7.70 -22.601 -27.939 - 0.10 7.80 -22.564 -27.962 - 0.10 7.90 -22.526 -27.986 - 0.10 8.00 -22.489 -28.099 - 0.10 8.10 -22.450 -28.228 - 0.10 8.20 -22.410 -28.358 - 0.10 8.30 -22.369 -28.495 - 0.10 8.40 -22.324 -28.622 - 0.10 8.50 -22.280 -28.740 - 0.10 8.60 -22.231 -28.868 - 0.10 8.70 -22.183 -28.994 - 0.10 8.80 -22.132 -29.123 - 0.10 8.90 -22.079 -29.256 - 0.10 9.00 -22.026 -29.392 - 0.20 1.00 -28.015 -24.494 - 0.20 1.10 -27.804 -24.495 - 0.20 1.20 -27.542 -24.497 - 0.20 1.30 -27.237 -24.498 - 0.20 1.40 -26.926 -24.499 - 0.20 1.50 -26.640 -24.501 - 0.20 1.60 -26.389 -24.503 - 0.20 1.70 -26.170 -24.505 - 0.20 1.80 -25.973 -24.507 - 0.20 1.90 -25.785 -24.509 - 0.20 2.00 -25.599 -24.512 - 0.20 2.10 -25.415 -24.514 - 0.20 2.20 -25.241 -24.517 - 0.20 2.30 -25.084 -24.520 - 0.20 2.40 -24.948 -24.523 - 0.20 2.50 -24.832 -24.527 - 0.20 2.60 -24.735 -24.531 - 0.20 2.70 -24.654 -24.535 - 0.20 2.80 -24.586 -24.539 - 0.20 2.90 -24.528 -24.544 - 0.20 3.00 -24.476 -24.550 - 0.20 3.10 -24.430 -24.555 - 0.20 3.20 -24.386 -24.561 - 0.20 3.30 -24.341 -24.568 - 0.20 3.40 -24.289 -24.575 - 0.20 3.50 -24.224 -24.583 - 0.20 3.60 -24.142 -24.591 - 0.20 3.70 -24.041 -24.601 - 0.20 3.80 -23.912 -24.610 - 0.20 3.90 -23.730 -24.621 - 0.20 4.00 -23.400 -24.633 - 0.20 4.10 -22.767 -24.653 - 0.20 4.20 -22.005 -24.823 - 0.20 4.30 -21.817 -25.402 - 0.20 4.40 -21.936 -25.817 - 0.20 4.50 -21.956 -26.276 - 0.20 4.60 -21.884 -26.758 - 0.20 4.70 -21.733 -26.953 - 0.20 4.80 -21.527 -27.017 - 0.20 4.90 -21.341 -27.101 - 0.20 5.00 -21.276 -26.781 - 0.20 5.10 -21.322 -26.622 - 0.20 5.20 -21.324 -26.541 - 0.20 5.30 -21.300 -26.570 - 0.20 5.40 -21.291 -26.584 - 0.20 5.50 -21.466 -26.549 - 0.20 5.60 -21.674 -26.617 - 0.20 5.70 -21.725 -26.719 - 0.20 5.80 -21.778 -26.857 - 0.20 5.90 -21.807 -26.937 - 0.20 6.00 -21.814 -26.994 - 0.20 6.10 -21.837 -27.037 - 0.20 6.20 -21.892 -27.121 - 0.20 6.30 -22.070 -27.184 - 0.20 6.40 -22.251 -27.244 - 0.20 6.50 -22.387 -27.354 - 0.20 6.60 -22.467 -27.423 - 0.20 6.70 -22.497 -27.459 - 0.20 6.80 -22.503 -27.502 - 0.20 6.90 -22.493 -27.545 - 0.20 7.00 -22.491 -27.552 - 0.20 7.10 -22.535 -27.603 - 0.20 7.20 -22.611 -27.693 - 0.20 7.30 -22.658 -27.708 - 0.20 7.40 -22.670 -27.762 - 0.20 7.50 -22.660 -27.830 - 0.20 7.60 -22.634 -27.897 - 0.20 7.70 -22.601 -27.939 - 0.20 7.80 -22.564 -27.962 - 0.20 7.90 -22.526 -27.986 - 0.20 8.00 -22.489 -28.099 - 0.20 8.10 -22.450 -28.228 - 0.20 8.20 -22.410 -28.359 - 0.20 8.30 -22.369 -28.495 - 0.20 8.40 -22.324 -28.622 - 0.20 8.50 -22.280 -28.745 - 0.20 8.60 -22.231 -28.869 - 0.20 8.70 -22.183 -28.995 - 0.20 8.80 -22.132 -29.125 - 0.20 8.90 -22.079 -29.258 - 0.20 9.00 -22.026 -29.395 - 0.30 1.00 -28.069 -24.592 - 0.30 1.10 -27.854 -24.593 - 0.30 1.20 -27.596 -24.594 - 0.30 1.30 -27.299 -24.596 - 0.30 1.40 -26.994 -24.597 - 0.30 1.50 -26.714 -24.598 - 0.30 1.60 -26.465 -24.600 - 0.30 1.70 -26.247 -24.602 - 0.30 1.80 -26.049 -24.604 - 0.30 1.90 -25.860 -24.606 - 0.30 2.00 -25.671 -24.608 - 0.30 2.10 -25.483 -24.610 - 0.30 2.20 -25.306 -24.613 - 0.30 2.30 -25.146 -24.615 - 0.30 2.40 -25.007 -24.618 - 0.30 2.50 -24.889 -24.621 - 0.30 2.60 -24.790 -24.625 - 0.30 2.70 -24.708 -24.629 - 0.30 2.80 -24.639 -24.633 - 0.30 2.90 -24.579 -24.637 - 0.30 3.00 -24.527 -24.642 - 0.30 3.10 -24.480 -24.647 - 0.30 3.20 -24.435 -24.652 - 0.30 3.30 -24.390 -24.658 - 0.30 3.40 -24.337 -24.664 - 0.30 3.50 -24.271 -24.671 - 0.30 3.60 -24.188 -24.679 - 0.30 3.70 -24.086 -24.687 - 0.30 3.80 -23.956 -24.696 - 0.30 3.90 -23.772 -24.705 - 0.30 4.00 -23.437 -24.715 - 0.30 4.10 -22.791 -24.735 - 0.30 4.20 -22.004 -24.911 - 0.30 4.30 -21.816 -25.499 - 0.30 4.40 -21.936 -25.914 - 0.30 4.50 -21.956 -26.373 - 0.30 4.60 -21.884 -26.850 - 0.30 4.70 -21.733 -27.039 - 0.30 4.80 -21.527 -27.092 - 0.30 4.90 -21.341 -27.150 - 0.30 5.00 -21.276 -26.790 - 0.30 5.10 -21.322 -26.626 - 0.30 5.20 -21.324 -26.543 - 0.30 5.30 -21.300 -26.571 - 0.30 5.40 -21.291 -26.585 - 0.30 5.50 -21.466 -26.550 - 0.30 5.60 -21.674 -26.618 - 0.30 5.70 -21.725 -26.720 - 0.30 5.80 -21.778 -26.858 - 0.30 5.90 -21.807 -26.938 - 0.30 6.00 -21.814 -26.995 - 0.30 6.10 -21.837 -27.038 - 0.30 6.20 -21.892 -27.122 - 0.30 6.30 -22.070 -27.184 - 0.30 6.40 -22.251 -27.245 - 0.30 6.50 -22.387 -27.354 - 0.30 6.60 -22.467 -27.424 - 0.30 6.70 -22.497 -27.460 - 0.30 6.80 -22.503 -27.502 - 0.30 6.90 -22.493 -27.546 - 0.30 7.00 -22.491 -27.552 - 0.30 7.10 -22.535 -27.603 - 0.30 7.20 -22.611 -27.693 - 0.30 7.30 -22.658 -27.708 - 0.30 7.40 -22.670 -27.762 - 0.30 7.50 -22.660 -27.830 - 0.30 7.60 -22.634 -27.898 - 0.30 7.70 -22.601 -27.939 - 0.30 7.80 -22.564 -27.962 - 0.30 7.90 -22.526 -27.986 - 0.30 8.00 -22.489 -28.100 - 0.30 8.10 -22.450 -28.228 - 0.30 8.20 -22.410 -28.359 - 0.30 8.30 -22.369 -28.496 - 0.30 8.40 -22.324 -28.622 - 0.30 8.50 -22.280 -28.746 - 0.30 8.60 -22.231 -28.869 - 0.30 8.70 -22.183 -28.996 - 0.30 8.80 -22.132 -29.126 - 0.30 8.90 -22.079 -29.260 - 0.30 9.00 -22.026 -29.397 - 0.40 1.00 -28.117 -24.690 - 0.40 1.10 -27.899 -24.691 - 0.40 1.20 -27.645 -24.692 - 0.40 1.30 -27.356 -24.693 - 0.40 1.40 -27.060 -24.695 - 0.40 1.50 -26.785 -24.696 - 0.40 1.60 -26.540 -24.697 - 0.40 1.70 -26.323 -24.699 - 0.40 1.80 -26.126 -24.701 - 0.40 1.90 -25.935 -24.702 - 0.40 2.00 -25.743 -24.704 - 0.40 2.10 -25.552 -24.706 - 0.40 2.20 -25.371 -24.709 - 0.40 2.30 -25.208 -24.711 - 0.40 2.40 -25.066 -24.714 - 0.40 2.50 -24.946 -24.717 - 0.40 2.60 -24.846 -24.720 - 0.40 2.70 -24.762 -24.723 - 0.40 2.80 -24.692 -24.727 - 0.40 2.90 -24.631 -24.730 - 0.40 3.00 -24.578 -24.735 - 0.40 3.10 -24.530 -24.739 - 0.40 3.20 -24.485 -24.744 - 0.40 3.30 -24.439 -24.749 - 0.40 3.40 -24.385 -24.755 - 0.40 3.50 -24.318 -24.761 - 0.40 3.60 -24.234 -24.768 - 0.40 3.70 -24.131 -24.775 - 0.40 3.80 -24.000 -24.783 - 0.40 3.90 -23.815 -24.791 - 0.40 4.00 -23.475 -24.800 - 0.40 4.10 -22.818 -24.818 - 0.40 4.20 -22.004 -25.000 - 0.40 4.30 -21.815 -25.596 - 0.40 4.40 -21.936 -26.010 - 0.40 4.50 -21.956 -26.470 - 0.40 4.60 -21.884 -26.940 - 0.40 4.70 -21.733 -27.121 - 0.40 4.80 -21.527 -27.164 - 0.40 4.90 -21.341 -27.194 - 0.40 5.00 -21.276 -26.797 - 0.40 5.10 -21.322 -26.628 - 0.40 5.20 -21.324 -26.545 - 0.40 5.30 -21.300 -26.573 - 0.40 5.40 -21.291 -26.586 - 0.40 5.50 -21.466 -26.551 - 0.40 5.60 -21.674 -26.618 - 0.40 5.70 -21.725 -26.721 - 0.40 5.80 -21.778 -26.859 - 0.40 5.90 -21.807 -26.939 - 0.40 6.00 -21.814 -26.995 - 0.40 6.10 -21.837 -27.039 - 0.40 6.20 -21.892 -27.122 - 0.40 6.30 -22.070 -27.185 - 0.40 6.40 -22.251 -27.245 - 0.40 6.50 -22.387 -27.355 - 0.40 6.60 -22.467 -27.424 - 0.40 6.70 -22.497 -27.460 - 0.40 6.80 -22.503 -27.502 - 0.40 6.90 -22.493 -27.546 - 0.40 7.00 -22.491 -27.553 - 0.40 7.10 -22.535 -27.603 - 0.40 7.20 -22.610 -27.692 - 0.40 7.30 -22.658 -27.709 - 0.40 7.40 -22.670 -27.762 - 0.40 7.50 -22.660 -27.830 - 0.40 7.60 -22.634 -27.898 - 0.40 7.70 -22.601 -27.939 - 0.40 7.80 -22.564 -27.962 - 0.40 7.90 -22.526 -27.986 - 0.40 8.00 -22.489 -28.100 - 0.40 8.10 -22.450 -28.229 - 0.40 8.20 -22.410 -28.359 - 0.40 8.30 -22.369 -28.496 - 0.40 8.40 -22.324 -28.623 - 0.40 8.50 -22.280 -28.746 - 0.40 8.60 -22.231 -28.870 - 0.40 8.70 -22.183 -28.997 - 0.40 8.80 -22.132 -29.127 - 0.40 8.90 -22.079 -29.261 - 0.40 9.00 -22.026 -29.399 - 0.50 1.00 -28.159 -24.789 - 0.50 1.10 -27.938 -24.790 - 0.50 1.20 -27.688 -24.791 - 0.50 1.30 -27.409 -24.792 - 0.50 1.40 -27.122 -24.793 - 0.50 1.50 -26.854 -24.794 - 0.50 1.60 -26.613 -24.795 - 0.50 1.70 -26.399 -24.796 - 0.50 1.80 -26.202 -24.798 - 0.50 1.90 -26.010 -24.800 - 0.50 2.00 -25.815 -24.801 - 0.50 2.10 -25.620 -24.803 - 0.50 2.20 -25.436 -24.805 - 0.50 2.30 -25.270 -24.807 - 0.50 2.40 -25.125 -24.810 - 0.50 2.50 -25.003 -24.812 - 0.50 2.60 -24.901 -24.815 - 0.50 2.70 -24.816 -24.818 - 0.50 2.80 -24.745 -24.821 - 0.50 2.90 -24.683 -24.825 - 0.50 3.00 -24.629 -24.828 - 0.50 3.10 -24.581 -24.832 - 0.50 3.20 -24.535 -24.837 - 0.50 3.30 -24.488 -24.841 - 0.50 3.40 -24.433 -24.846 - 0.50 3.50 -24.365 -24.852 - 0.50 3.60 -24.281 -24.858 - 0.50 3.70 -24.177 -24.864 - 0.50 3.80 -24.045 -24.871 - 0.50 3.90 -23.858 -24.878 - 0.50 4.00 -23.515 -24.886 - 0.50 4.10 -22.846 -24.903 - 0.50 4.20 -22.004 -25.091 - 0.50 4.30 -21.815 -25.694 - 0.50 4.40 -21.936 -26.108 - 0.50 4.50 -21.956 -26.567 - 0.50 4.60 -21.884 -27.028 - 0.50 4.70 -21.733 -27.200 - 0.50 4.80 -21.527 -27.230 - 0.50 4.90 -21.341 -27.232 - 0.50 5.00 -21.276 -26.803 - 0.50 5.10 -21.322 -26.630 - 0.50 5.20 -21.324 -26.546 - 0.50 5.30 -21.300 -26.574 - 0.50 5.40 -21.291 -26.587 - 0.50 5.50 -21.466 -26.551 - 0.50 5.60 -21.674 -26.619 - 0.50 5.70 -21.725 -26.721 - 0.50 5.80 -21.778 -26.859 - 0.50 5.90 -21.807 -26.939 - 0.50 6.00 -21.814 -26.996 - 0.50 6.10 -21.837 -27.039 - 0.50 6.20 -21.892 -27.123 - 0.50 6.30 -22.070 -27.185 - 0.50 6.40 -22.251 -27.245 - 0.50 6.50 -22.387 -27.355 - 0.50 6.60 -22.467 -27.424 - 0.50 6.70 -22.497 -27.460 - 0.50 6.80 -22.503 -27.502 - 0.50 6.90 -22.493 -27.546 - 0.50 7.00 -22.491 -27.553 - 0.50 7.10 -22.535 -27.604 - 0.50 7.20 -22.610 -27.692 - 0.50 7.30 -22.658 -27.709 - 0.50 7.40 -22.670 -27.762 - 0.50 7.50 -22.660 -27.830 - 0.50 7.60 -22.634 -27.898 - 0.50 7.70 -22.601 -27.939 - 0.50 7.80 -22.564 -27.962 - 0.50 7.90 -22.526 -27.986 - 0.50 8.00 -22.489 -28.100 - 0.50 8.10 -22.450 -28.229 - 0.50 8.20 -22.410 -28.359 - 0.50 8.30 -22.369 -28.496 - 0.50 8.40 -22.324 -28.623 - 0.50 8.50 -22.280 -28.746 - 0.50 8.60 -22.231 -28.870 - 0.50 8.70 -22.183 -28.997 - 0.50 8.80 -22.132 -29.128 - 0.50 8.90 -22.079 -29.262 - 0.50 9.00 -22.026 -29.401 - 0.60 1.00 -28.196 -24.887 - 0.60 1.10 -27.972 -24.888 - 0.60 1.20 -27.727 -24.889 - 0.60 1.30 -27.456 -24.890 - 0.60 1.40 -27.180 -24.891 - 0.60 1.50 -26.919 -24.892 - 0.60 1.60 -26.684 -24.893 - 0.60 1.70 -26.473 -24.894 - 0.60 1.80 -26.277 -24.896 - 0.60 1.90 -26.084 -24.897 - 0.60 2.00 -25.887 -24.899 - 0.60 2.10 -25.689 -24.900 - 0.60 2.20 -25.501 -24.902 - 0.60 2.30 -25.332 -24.904 - 0.60 2.40 -25.185 -24.906 - 0.60 2.50 -25.061 -24.908 - 0.60 2.60 -24.957 -24.911 - 0.60 2.70 -24.871 -24.913 - 0.60 2.80 -24.798 -24.916 - 0.60 2.90 -24.736 -24.919 - 0.60 3.00 -24.681 -24.923 - 0.60 3.10 -24.631 -24.926 - 0.60 3.20 -24.585 -24.930 - 0.60 3.30 -24.537 -24.934 - 0.60 3.40 -24.482 -24.939 - 0.60 3.50 -24.413 -24.943 - 0.60 3.60 -24.328 -24.949 - 0.60 3.70 -24.223 -24.954 - 0.60 3.80 -24.090 -24.960 - 0.60 3.90 -23.902 -24.967 - 0.60 4.00 -23.555 -24.973 - 0.60 4.10 -22.875 -24.990 - 0.60 4.20 -22.005 -25.183 - 0.60 4.30 -21.814 -25.793 - 0.60 4.40 -21.936 -26.206 - 0.60 4.50 -21.956 -26.664 - 0.60 4.60 -21.884 -27.113 - 0.60 4.70 -21.733 -27.275 - 0.60 4.80 -21.527 -27.291 - 0.60 4.90 -21.341 -27.265 - 0.60 5.00 -21.276 -26.808 - 0.60 5.10 -21.322 -26.632 - 0.60 5.20 -21.324 -26.547 - 0.60 5.30 -21.300 -26.574 - 0.60 5.40 -21.291 -26.588 - 0.60 5.50 -21.466 -26.552 - 0.60 5.60 -21.674 -26.619 - 0.60 5.70 -21.725 -26.721 - 0.60 5.80 -21.778 -26.860 - 0.60 5.90 -21.807 -26.940 - 0.60 6.00 -21.814 -26.996 - 0.60 6.10 -21.837 -27.040 - 0.60 6.20 -21.892 -27.123 - 0.60 6.30 -22.070 -27.186 - 0.60 6.40 -22.251 -27.246 - 0.60 6.50 -22.387 -27.355 - 0.60 6.60 -22.467 -27.424 - 0.60 6.70 -22.497 -27.460 - 0.60 6.80 -22.503 -27.502 - 0.60 6.90 -22.493 -27.546 - 0.60 7.00 -22.491 -27.553 - 0.60 7.10 -22.535 -27.604 - 0.60 7.20 -22.610 -27.692 - 0.60 7.30 -22.658 -27.709 - 0.60 7.40 -22.670 -27.762 - 0.60 7.50 -22.660 -27.830 - 0.60 7.60 -22.634 -27.898 - 0.60 7.70 -22.601 -27.939 - 0.60 7.80 -22.564 -27.962 - 0.60 7.90 -22.526 -27.986 - 0.60 8.00 -22.489 -28.100 - 0.60 8.10 -22.450 -28.229 - 0.60 8.20 -22.410 -28.359 - 0.60 8.30 -22.369 -28.496 - 0.60 8.40 -22.324 -28.623 - 0.60 8.50 -22.280 -28.747 - 0.60 8.60 -22.231 -28.871 - 0.60 8.70 -22.183 -28.998 - 0.60 8.80 -22.132 -29.129 - 0.60 8.90 -22.079 -29.263 - 0.60 9.00 -22.026 -29.402 - 0.70 1.00 -28.227 -24.986 - 0.70 1.10 -28.001 -24.987 - 0.70 1.20 -27.760 -24.988 - 0.70 1.30 -27.500 -24.988 - 0.70 1.40 -27.233 -24.989 - 0.70 1.50 -26.981 -24.990 - 0.70 1.60 -26.752 -24.991 - 0.70 1.70 -26.544 -24.992 - 0.70 1.80 -26.350 -24.993 - 0.70 1.90 -26.157 -24.995 - 0.70 2.00 -25.958 -24.996 - 0.70 2.10 -25.756 -24.998 - 0.70 2.20 -25.566 -24.999 - 0.70 2.30 -25.393 -25.001 - 0.70 2.40 -25.244 -25.003 - 0.70 2.50 -25.118 -25.005 - 0.70 2.60 -25.013 -25.007 - 0.70 2.70 -24.925 -25.009 - 0.70 2.80 -24.851 -25.012 - 0.70 2.90 -24.788 -25.015 - 0.70 3.00 -24.732 -25.018 - 0.70 3.10 -24.682 -25.021 - 0.70 3.20 -24.635 -25.024 - 0.70 3.30 -24.586 -25.028 - 0.70 3.40 -24.530 -25.032 - 0.70 3.50 -24.461 -25.036 - 0.70 3.60 -24.375 -25.041 - 0.70 3.70 -24.269 -25.046 - 0.70 3.80 -24.136 -25.051 - 0.70 3.90 -23.947 -25.056 - 0.70 4.00 -23.596 -25.062 - 0.70 4.10 -22.905 -25.078 - 0.70 4.20 -22.005 -25.276 - 0.70 4.30 -21.814 -25.891 - 0.70 4.40 -21.936 -26.304 - 0.70 4.50 -21.956 -26.760 - 0.70 4.60 -21.884 -27.196 - 0.70 4.70 -21.733 -27.345 - 0.70 4.80 -21.527 -27.346 - 0.70 4.90 -21.341 -27.293 - 0.70 5.00 -21.276 -26.811 - 0.70 5.10 -21.322 -26.633 - 0.70 5.20 -21.324 -26.548 - 0.70 5.30 -21.300 -26.575 - 0.70 5.40 -21.291 -26.588 - 0.70 5.50 -21.466 -26.552 - 0.70 5.60 -21.674 -26.619 - 0.70 5.70 -21.725 -26.722 - 0.70 5.80 -21.778 -26.860 - 0.70 5.90 -21.807 -26.940 - 0.70 6.00 -21.814 -26.997 - 0.70 6.10 -21.837 -27.040 - 0.70 6.20 -21.892 -27.123 - 0.70 6.30 -22.070 -27.186 - 0.70 6.40 -22.251 -27.246 - 0.70 6.50 -22.387 -27.355 - 0.70 6.60 -22.467 -27.424 - 0.70 6.70 -22.497 -27.460 - 0.70 6.80 -22.503 -27.503 - 0.70 6.90 -22.493 -27.546 - 0.70 7.00 -22.491 -27.553 - 0.70 7.10 -22.535 -27.604 - 0.70 7.20 -22.610 -27.692 - 0.70 7.30 -22.658 -27.709 - 0.70 7.40 -22.670 -27.762 - 0.70 7.50 -22.660 -27.830 - 0.70 7.60 -22.634 -27.898 - 0.70 7.70 -22.601 -27.939 - 0.70 7.80 -22.564 -27.962 - 0.70 7.90 -22.526 -27.986 - 0.70 8.00 -22.489 -28.100 - 0.70 8.10 -22.450 -28.229 - 0.70 8.20 -22.410 -28.359 - 0.70 8.30 -22.369 -28.496 - 0.70 8.40 -22.324 -28.623 - 0.70 8.50 -22.280 -28.747 - 0.70 8.60 -22.231 -28.871 - 0.70 8.70 -22.183 -28.998 - 0.70 8.80 -22.132 -29.129 - 0.70 8.90 -22.079 -29.264 - 0.70 9.00 -22.026 -29.403 - 0.80 1.00 -28.254 -25.085 - 0.80 1.10 -28.026 -25.086 - 0.80 1.20 -27.789 -25.086 - 0.80 1.30 -27.538 -25.087 - 0.80 1.40 -27.282 -25.088 - 0.80 1.50 -27.040 -25.089 - 0.80 1.60 -26.817 -25.090 - 0.80 1.70 -26.614 -25.090 - 0.80 1.80 -26.422 -25.092 - 0.80 1.90 -26.229 -25.093 - 0.80 2.00 -26.028 -25.094 - 0.80 2.10 -25.824 -25.095 - 0.80 2.20 -25.630 -25.097 - 0.80 2.30 -25.455 -25.098 - 0.80 2.40 -25.303 -25.100 - 0.80 2.50 -25.175 -25.102 - 0.80 2.60 -25.068 -25.104 - 0.80 2.70 -24.979 -25.106 - 0.80 2.80 -24.904 -25.108 - 0.80 2.90 -24.840 -25.110 - 0.80 3.00 -24.783 -25.113 - 0.80 3.10 -24.733 -25.116 - 0.80 3.20 -24.685 -25.119 - 0.80 3.30 -24.636 -25.122 - 0.80 3.40 -24.579 -25.126 - 0.80 3.50 -24.509 -25.129 - 0.80 3.60 -24.422 -25.133 - 0.80 3.70 -24.315 -25.138 - 0.80 3.80 -24.181 -25.142 - 0.80 3.90 -23.991 -25.147 - 0.80 4.00 -23.638 -25.152 - 0.80 4.10 -22.936 -25.167 - 0.80 4.20 -22.006 -25.370 - 0.80 4.30 -21.814 -25.990 - 0.80 4.40 -21.936 -26.402 - 0.80 4.50 -21.956 -26.856 - 0.80 4.60 -21.884 -27.274 - 0.80 4.70 -21.733 -27.411 - 0.80 4.80 -21.527 -27.396 - 0.80 4.90 -21.341 -27.316 - 0.80 5.00 -21.276 -26.814 - 0.80 5.10 -21.322 -26.634 - 0.80 5.20 -21.324 -26.548 - 0.80 5.30 -21.300 -26.576 - 0.80 5.40 -21.291 -26.589 - 0.80 5.50 -21.466 -26.552 - 0.80 5.60 -21.674 -26.619 - 0.80 5.70 -21.725 -26.722 - 0.80 5.80 -21.778 -26.860 - 0.80 5.90 -21.807 -26.940 - 0.80 6.00 -21.814 -26.997 - 0.80 6.10 -21.837 -27.040 - 0.80 6.20 -21.892 -27.124 - 0.80 6.30 -22.070 -27.186 - 0.80 6.40 -22.251 -27.246 - 0.80 6.50 -22.387 -27.355 - 0.80 6.60 -22.467 -27.424 - 0.80 6.70 -22.497 -27.460 - 0.80 6.80 -22.503 -27.503 - 0.80 6.90 -22.493 -27.546 - 0.80 7.00 -22.491 -27.553 - 0.80 7.10 -22.535 -27.604 - 0.80 7.20 -22.610 -27.692 - 0.80 7.30 -22.658 -27.709 - 0.80 7.40 -22.670 -27.762 - 0.80 7.50 -22.660 -27.830 - 0.80 7.60 -22.634 -27.898 - 0.80 7.70 -22.601 -27.939 - 0.80 7.80 -22.564 -27.962 - 0.80 7.90 -22.526 -27.986 - 0.80 8.00 -22.489 -28.100 - 0.80 8.10 -22.450 -28.229 - 0.80 8.20 -22.410 -28.359 - 0.80 8.30 -22.369 -28.496 - 0.80 8.40 -22.324 -28.624 - 0.80 8.50 -22.280 -28.747 - 0.80 8.60 -22.231 -28.871 - 0.80 8.70 -22.183 -28.998 - 0.80 8.80 -22.132 -29.130 - 0.80 8.90 -22.079 -29.265 - 0.80 9.00 -22.026 -29.404 - 0.90 1.00 -28.277 -25.184 - 0.90 1.10 -28.047 -25.185 - 0.90 1.20 -27.814 -25.185 - 0.90 1.30 -27.572 -25.186 - 0.90 1.40 -27.327 -25.187 - 0.90 1.50 -27.094 -25.187 - 0.90 1.60 -26.878 -25.188 - 0.90 1.70 -26.681 -25.189 - 0.90 1.80 -26.492 -25.190 - 0.90 1.90 -26.299 -25.191 - 0.90 2.00 -26.097 -25.192 - 0.90 2.10 -25.890 -25.193 - 0.90 2.20 -25.693 -25.194 - 0.90 2.30 -25.516 -25.196 - 0.90 2.40 -25.362 -25.197 - 0.90 2.50 -25.232 -25.199 - 0.90 2.60 -25.124 -25.201 - 0.90 2.70 -25.033 -25.203 - 0.90 2.80 -24.958 -25.205 - 0.90 2.90 -24.892 -25.207 - 0.90 3.00 -24.835 -25.209 - 0.90 3.10 -24.783 -25.211 - 0.90 3.20 -24.735 -25.214 - 0.90 3.30 -24.685 -25.217 - 0.90 3.40 -24.628 -25.220 - 0.90 3.50 -24.557 -25.223 - 0.90 3.60 -24.469 -25.227 - 0.90 3.70 -24.362 -25.231 - 0.90 3.80 -24.227 -25.235 - 0.90 3.90 -24.036 -25.239 - 0.90 4.00 -23.680 -25.243 - 0.90 4.10 -22.968 -25.257 - 0.90 4.20 -22.006 -25.464 - 0.90 4.30 -21.813 -26.089 - 0.90 4.40 -21.936 -26.501 - 0.90 4.50 -21.956 -26.950 - 0.90 4.60 -21.884 -27.349 - 0.90 4.70 -21.732 -27.470 - 0.90 4.80 -21.527 -27.440 - 0.90 4.90 -21.341 -27.336 - 0.90 5.00 -21.276 -26.816 - 0.90 5.10 -21.322 -26.635 - 0.90 5.20 -21.324 -26.549 - 0.90 5.30 -21.300 -26.576 - 0.90 5.40 -21.291 -26.589 - 0.90 5.50 -21.466 -26.552 - 0.90 5.60 -21.674 -26.620 - 0.90 5.70 -21.725 -26.722 - 0.90 5.80 -21.778 -26.860 - 0.90 5.90 -21.807 -26.940 - 0.90 6.00 -21.814 -26.997 - 0.90 6.10 -21.837 -27.040 - 0.90 6.20 -21.892 -27.124 - 0.90 6.30 -22.070 -27.186 - 0.90 6.40 -22.251 -27.246 - 0.90 6.50 -22.387 -27.356 - 0.90 6.60 -22.467 -27.425 - 0.90 6.70 -22.497 -27.461 - 0.90 6.80 -22.503 -27.503 - 0.90 6.90 -22.493 -27.546 - 0.90 7.00 -22.491 -27.553 - 0.90 7.10 -22.535 -27.604 - 0.90 7.20 -22.610 -27.692 - 0.90 7.30 -22.658 -27.709 - 0.90 7.40 -22.670 -27.762 - 0.90 7.50 -22.660 -27.830 - 0.90 7.60 -22.634 -27.898 - 0.90 7.70 -22.601 -27.939 - 0.90 7.80 -22.564 -27.962 - 0.90 7.90 -22.526 -27.987 - 0.90 8.00 -22.489 -28.100 - 0.90 8.10 -22.450 -28.229 - 0.90 8.20 -22.410 -28.359 - 0.90 8.30 -22.369 -28.496 - 0.90 8.40 -22.324 -28.624 - 0.90 8.50 -22.280 -28.747 - 0.90 8.60 -22.231 -28.871 - 0.90 8.70 -22.183 -28.999 - 0.90 8.80 -22.132 -29.130 - 0.90 8.90 -22.079 -29.265 - 0.90 9.00 -22.026 -29.404 - 1.00 1.00 -28.296 -25.283 - 1.00 1.10 -28.064 -25.284 - 1.00 1.20 -27.836 -25.284 - 1.00 1.30 -27.602 -25.285 - 1.00 1.40 -27.367 -25.285 - 1.00 1.50 -27.144 -25.286 - 1.00 1.60 -26.936 -25.287 - 1.00 1.70 -26.744 -25.287 - 1.00 1.80 -26.559 -25.288 - 1.00 1.90 -26.368 -25.289 - 1.00 2.00 -26.164 -25.290 - 1.00 2.10 -25.956 -25.291 - 1.00 2.20 -25.756 -25.292 - 1.00 2.30 -25.576 -25.294 - 1.00 2.40 -25.420 -25.295 - 1.00 2.50 -25.289 -25.296 - 1.00 2.60 -25.179 -25.298 - 1.00 2.70 -25.087 -25.300 - 1.00 2.80 -25.011 -25.301 - 1.00 2.90 -24.945 -25.303 - 1.00 3.00 -24.886 -25.305 - 1.00 3.10 -24.834 -25.308 - 1.00 3.20 -24.785 -25.310 - 1.00 3.30 -24.734 -25.312 - 1.00 3.40 -24.676 -25.315 - 1.00 3.50 -24.605 -25.318 - 1.00 3.60 -24.517 -25.321 - 1.00 3.70 -24.409 -25.324 - 1.00 3.80 -24.273 -25.328 - 1.00 3.90 -24.081 -25.331 - 1.00 4.00 -23.723 -25.334 - 1.00 4.10 -23.000 -25.348 - 1.00 4.20 -22.006 -25.559 - 1.00 4.30 -21.813 -26.189 - 1.00 4.40 -21.936 -26.600 - 1.00 4.50 -21.955 -27.044 - 1.00 4.60 -21.884 -27.419 - 1.00 4.70 -21.732 -27.525 - 1.00 4.80 -21.527 -27.479 - 1.00 4.90 -21.341 -27.353 - 1.00 5.00 -21.276 -26.818 - 1.00 5.10 -21.322 -26.636 - 1.00 5.20 -21.324 -26.549 - 1.00 5.30 -21.300 -26.576 - 1.00 5.40 -21.291 -26.589 - 1.00 5.50 -21.466 -26.553 - 1.00 5.60 -21.674 -26.620 - 1.00 5.70 -21.725 -26.722 - 1.00 5.80 -21.778 -26.860 - 1.00 5.90 -21.807 -26.941 - 1.00 6.00 -21.814 -26.997 - 1.00 6.10 -21.837 -27.041 - 1.00 6.20 -21.892 -27.124 - 1.00 6.30 -22.070 -27.186 - 1.00 6.40 -22.251 -27.246 - 1.00 6.50 -22.387 -27.356 - 1.00 6.60 -22.467 -27.425 - 1.00 6.70 -22.497 -27.461 - 1.00 6.80 -22.503 -27.503 - 1.00 6.90 -22.493 -27.546 - 1.00 7.00 -22.491 -27.553 - 1.00 7.10 -22.535 -27.604 - 1.00 7.20 -22.610 -27.692 - 1.00 7.30 -22.658 -27.709 - 1.00 7.40 -22.670 -27.762 - 1.00 7.50 -22.660 -27.830 - 1.00 7.60 -22.634 -27.898 - 1.00 7.70 -22.601 -27.939 - 1.00 7.80 -22.564 -27.962 - 1.00 7.90 -22.526 -27.987 - 1.00 8.00 -22.489 -28.100 - 1.00 8.10 -22.450 -28.229 - 1.00 8.20 -22.410 -28.360 - 1.00 8.30 -22.369 -28.497 - 1.00 8.40 -22.324 -28.624 - 1.00 8.50 -22.280 -28.747 - 1.00 8.60 -22.231 -28.872 - 1.00 8.70 -22.183 -28.999 - 1.00 8.80 -22.132 -29.130 - 1.00 8.90 -22.079 -29.265 - 1.00 9.00 -22.026 -29.405 - 1.10 1.00 -28.312 -25.382 - 1.10 1.10 -28.080 -25.383 - 1.10 1.20 -27.854 -25.383 - 1.10 1.30 -27.628 -25.384 - 1.10 1.40 -27.403 -25.384 - 1.10 1.50 -27.190 -25.385 - 1.10 1.60 -26.990 -25.386 - 1.10 1.70 -26.805 -25.386 - 1.10 1.80 -26.624 -25.387 - 1.10 1.90 -26.434 -25.388 - 1.10 2.00 -26.231 -25.389 - 1.10 2.10 -26.020 -25.390 - 1.10 2.20 -25.819 -25.391 - 1.10 2.30 -25.636 -25.392 - 1.10 2.40 -25.478 -25.393 - 1.10 2.50 -25.345 -25.394 - 1.10 2.60 -25.234 -25.396 - 1.10 2.70 -25.141 -25.397 - 1.10 2.80 -25.063 -25.399 - 1.10 2.90 -24.996 -25.400 - 1.10 3.00 -24.937 -25.402 - 1.10 3.10 -24.884 -25.404 - 1.10 3.20 -24.834 -25.406 - 1.10 3.30 -24.783 -25.408 - 1.10 3.40 -24.724 -25.411 - 1.10 3.50 -24.653 -25.413 - 1.10 3.60 -24.564 -25.416 - 1.10 3.70 -24.455 -25.419 - 1.10 3.80 -24.319 -25.422 - 1.10 3.90 -24.126 -25.425 - 1.10 4.00 -23.766 -25.427 - 1.10 4.10 -23.033 -25.440 - 1.10 4.20 -22.007 -25.653 - 1.10 4.30 -21.813 -26.288 - 1.10 4.40 -21.936 -26.699 - 1.10 4.50 -21.955 -27.136 - 1.10 4.60 -21.883 -27.484 - 1.10 4.70 -21.732 -27.573 - 1.10 4.80 -21.527 -27.512 - 1.10 4.90 -21.341 -27.366 - 1.10 5.00 -21.276 -26.820 - 1.10 5.10 -21.322 -26.636 - 1.10 5.20 -21.324 -26.549 - 1.10 5.30 -21.300 -26.577 - 1.10 5.40 -21.291 -26.589 - 1.10 5.50 -21.466 -26.553 - 1.10 5.60 -21.674 -26.620 - 1.10 5.70 -21.725 -26.722 - 1.10 5.80 -21.778 -26.860 - 1.10 5.90 -21.807 -26.941 - 1.10 6.00 -21.814 -26.997 - 1.10 6.10 -21.837 -27.041 - 1.10 6.20 -21.892 -27.124 - 1.10 6.30 -22.070 -27.187 - 1.10 6.40 -22.251 -27.246 - 1.10 6.50 -22.387 -27.356 - 1.10 6.60 -22.467 -27.425 - 1.10 6.70 -22.497 -27.461 - 1.10 6.80 -22.503 -27.503 - 1.10 6.90 -22.493 -27.546 - 1.10 7.00 -22.491 -27.553 - 1.10 7.10 -22.535 -27.604 - 1.10 7.20 -22.610 -27.692 - 1.10 7.30 -22.658 -27.709 - 1.10 7.40 -22.670 -27.762 - 1.10 7.50 -22.660 -27.830 - 1.10 7.60 -22.634 -27.898 - 1.10 7.70 -22.601 -27.939 - 1.10 7.80 -22.564 -27.962 - 1.10 7.90 -22.526 -27.987 - 1.10 8.00 -22.489 -28.100 - 1.10 8.10 -22.450 -28.229 - 1.10 8.20 -22.410 -28.360 - 1.10 8.30 -22.369 -28.497 - 1.10 8.40 -22.324 -28.624 - 1.10 8.50 -22.280 -28.747 - 1.10 8.60 -22.231 -28.872 - 1.10 8.70 -22.183 -28.999 - 1.10 8.80 -22.132 -29.130 - 1.10 8.90 -22.079 -29.266 - 1.10 9.00 -22.026 -29.405 - 1.20 1.00 -28.326 -25.482 - 1.20 1.10 -28.093 -25.482 - 1.20 1.20 -27.871 -25.482 - 1.20 1.30 -27.652 -25.483 - 1.20 1.40 -27.436 -25.483 - 1.20 1.50 -27.232 -25.484 - 1.20 1.60 -27.041 -25.484 - 1.20 1.70 -26.862 -25.485 - 1.20 1.80 -26.686 -25.486 - 1.20 1.90 -26.499 -25.486 - 1.20 2.00 -26.295 -25.487 - 1.20 2.10 -26.084 -25.488 - 1.20 2.20 -25.880 -25.489 - 1.20 2.30 -25.696 -25.490 - 1.20 2.40 -25.536 -25.491 - 1.20 2.50 -25.401 -25.492 - 1.20 2.60 -25.288 -25.493 - 1.20 2.70 -25.195 -25.495 - 1.20 2.80 -25.116 -25.496 - 1.20 2.90 -25.048 -25.498 - 1.20 3.00 -24.988 -25.499 - 1.20 3.10 -24.935 -25.501 - 1.20 3.20 -24.884 -25.503 - 1.20 3.30 -24.832 -25.505 - 1.20 3.40 -24.772 -25.507 - 1.20 3.50 -24.700 -25.509 - 1.20 3.60 -24.611 -25.511 - 1.20 3.70 -24.502 -25.514 - 1.20 3.80 -24.365 -25.516 - 1.20 3.90 -24.171 -25.518 - 1.20 4.00 -23.810 -25.520 - 1.20 4.10 -23.065 -25.532 - 1.20 4.20 -22.007 -25.748 - 1.20 4.30 -21.813 -26.387 - 1.20 4.40 -21.936 -26.797 - 1.20 4.50 -21.955 -27.226 - 1.20 4.60 -21.883 -27.544 - 1.20 4.70 -21.732 -27.616 - 1.20 4.80 -21.527 -27.540 - 1.20 4.90 -21.341 -27.377 - 1.20 5.00 -21.276 -26.821 - 1.20 5.10 -21.322 -26.637 - 1.20 5.20 -21.324 -26.550 - 1.20 5.30 -21.300 -26.577 - 1.20 5.40 -21.291 -26.590 - 1.20 5.50 -21.466 -26.553 - 1.20 5.60 -21.674 -26.620 - 1.20 5.70 -21.725 -26.722 - 1.20 5.80 -21.778 -26.860 - 1.20 5.90 -21.807 -26.940 - 1.20 6.00 -21.814 -26.997 - 1.20 6.10 -21.837 -27.041 - 1.20 6.20 -21.892 -27.124 - 1.20 6.30 -22.070 -27.187 - 1.20 6.40 -22.251 -27.246 - 1.20 6.50 -22.387 -27.356 - 1.20 6.60 -22.467 -27.425 - 1.20 6.70 -22.497 -27.461 - 1.20 6.80 -22.503 -27.503 - 1.20 6.90 -22.493 -27.546 - 1.20 7.00 -22.491 -27.553 - 1.20 7.10 -22.535 -27.604 - 1.20 7.20 -22.610 -27.692 - 1.20 7.30 -22.658 -27.709 - 1.20 7.40 -22.670 -27.762 - 1.20 7.50 -22.660 -27.830 - 1.20 7.60 -22.634 -27.898 - 1.20 7.70 -22.601 -27.939 - 1.20 7.80 -22.564 -27.962 - 1.20 7.90 -22.526 -27.987 - 1.20 8.00 -22.489 -28.100 - 1.20 8.10 -22.450 -28.229 - 1.20 8.20 -22.410 -28.360 - 1.20 8.30 -22.369 -28.497 - 1.20 8.40 -22.324 -28.624 - 1.20 8.50 -22.280 -28.747 - 1.20 8.60 -22.231 -28.872 - 1.20 8.70 -22.183 -28.999 - 1.20 8.80 -22.132 -29.131 - 1.20 8.90 -22.079 -29.266 - 1.20 9.00 -22.026 -29.405 - 1.30 1.00 -28.339 -25.581 - 1.30 1.10 -28.105 -25.581 - 1.30 1.20 -27.886 -25.582 - 1.30 1.30 -27.673 -25.582 - 1.30 1.40 -27.466 -25.583 - 1.30 1.50 -27.270 -25.583 - 1.30 1.60 -27.087 -25.584 - 1.30 1.70 -26.916 -25.584 - 1.30 1.80 -26.745 -25.585 - 1.30 1.90 -26.561 -25.585 - 1.30 2.00 -26.358 -25.586 - 1.30 2.10 -26.146 -25.587 - 1.30 2.20 -25.941 -25.588 - 1.30 2.30 -25.754 -25.588 - 1.30 2.40 -25.593 -25.589 - 1.30 2.50 -25.457 -25.590 - 1.30 2.60 -25.343 -25.592 - 1.30 2.70 -25.248 -25.593 - 1.30 2.80 -25.168 -25.594 - 1.30 2.90 -25.099 -25.595 - 1.30 3.00 -25.039 -25.597 - 1.30 3.10 -24.984 -25.598 - 1.30 3.20 -24.933 -25.600 - 1.30 3.30 -24.880 -25.601 - 1.30 3.40 -24.820 -25.603 - 1.30 3.50 -24.748 -25.605 - 1.30 3.60 -24.658 -25.607 - 1.30 3.70 -24.548 -25.609 - 1.30 3.80 -24.411 -25.611 - 1.30 3.90 -24.217 -25.613 - 1.30 4.00 -23.854 -25.614 - 1.30 4.10 -23.097 -25.626 - 1.30 4.20 -22.008 -25.843 - 1.30 4.30 -21.813 -26.487 - 1.30 4.40 -21.935 -26.896 - 1.30 4.50 -21.955 -27.314 - 1.30 4.60 -21.883 -27.598 - 1.30 4.70 -21.732 -27.654 - 1.30 4.80 -21.527 -27.565 - 1.30 4.90 -21.341 -27.387 - 1.30 5.00 -21.276 -26.821 - 1.30 5.10 -21.323 -26.637 - 1.30 5.20 -21.324 -26.550 - 1.30 5.30 -21.300 -26.577 - 1.30 5.40 -21.291 -26.590 - 1.30 5.50 -21.466 -26.553 - 1.30 5.60 -21.674 -26.620 - 1.30 5.70 -21.725 -26.722 - 1.30 5.80 -21.778 -26.860 - 1.30 5.90 -21.807 -26.940 - 1.30 6.00 -21.814 -26.997 - 1.30 6.10 -21.837 -27.041 - 1.30 6.20 -21.892 -27.124 - 1.30 6.30 -22.070 -27.187 - 1.30 6.40 -22.251 -27.247 - 1.30 6.50 -22.387 -27.356 - 1.30 6.60 -22.467 -27.425 - 1.30 6.70 -22.497 -27.461 - 1.30 6.80 -22.503 -27.503 - 1.30 6.90 -22.493 -27.546 - 1.30 7.00 -22.491 -27.553 - 1.30 7.10 -22.535 -27.604 - 1.30 7.20 -22.610 -27.692 - 1.30 7.30 -22.658 -27.709 - 1.30 7.40 -22.670 -27.762 - 1.30 7.50 -22.660 -27.830 - 1.30 7.60 -22.634 -27.898 - 1.30 7.70 -22.601 -27.939 - 1.30 7.80 -22.564 -27.962 - 1.30 7.90 -22.526 -27.987 - 1.30 8.00 -22.489 -28.100 - 1.30 8.10 -22.450 -28.229 - 1.30 8.20 -22.410 -28.360 - 1.30 8.30 -22.369 -28.497 - 1.30 8.40 -22.324 -28.624 - 1.30 8.50 -22.280 -28.747 - 1.30 8.60 -22.231 -28.872 - 1.30 8.70 -22.183 -28.999 - 1.30 8.80 -22.132 -29.131 - 1.30 8.90 -22.079 -29.266 - 1.30 9.00 -22.026 -29.406 - 1.40 1.00 -28.351 -25.681 - 1.40 1.10 -28.117 -25.681 - 1.40 1.20 -27.900 -25.681 - 1.40 1.30 -27.692 -25.681 - 1.40 1.40 -27.493 -25.682 - 1.40 1.50 -27.306 -25.682 - 1.40 1.60 -27.131 -25.683 - 1.40 1.70 -26.966 -25.683 - 1.40 1.80 -26.801 -25.684 - 1.40 1.90 -26.621 -25.684 - 1.40 2.00 -26.420 -25.685 - 1.40 2.10 -26.207 -25.686 - 1.40 2.20 -26.001 -25.686 - 1.40 2.30 -25.813 -25.687 - 1.40 2.40 -25.649 -25.688 - 1.40 2.50 -25.512 -25.689 - 1.40 2.60 -25.397 -25.690 - 1.40 2.70 -25.301 -25.691 - 1.40 2.80 -25.220 -25.692 - 1.40 2.90 -25.150 -25.693 - 1.40 3.00 -25.089 -25.694 - 1.40 3.10 -25.034 -25.696 - 1.40 3.20 -24.981 -25.697 - 1.40 3.30 -24.928 -25.699 - 1.40 3.40 -24.867 -25.700 - 1.40 3.50 -24.795 -25.702 - 1.40 3.60 -24.705 -25.703 - 1.40 3.70 -24.595 -25.705 - 1.40 3.80 -24.457 -25.707 - 1.40 3.90 -24.262 -25.708 - 1.40 4.00 -23.899 -25.708 - 1.40 4.10 -23.129 -25.720 - 1.40 4.20 -22.008 -25.936 - 1.40 4.30 -21.813 -26.587 - 1.40 4.40 -21.935 -26.994 - 1.40 4.50 -21.955 -27.399 - 1.40 4.60 -21.883 -27.647 - 1.40 4.70 -21.732 -27.686 - 1.40 4.80 -21.527 -27.585 - 1.40 4.90 -21.341 -27.394 - 1.40 5.00 -21.276 -26.822 - 1.40 5.10 -21.323 -26.637 - 1.40 5.20 -21.324 -26.550 - 1.40 5.30 -21.300 -26.577 - 1.40 5.40 -21.291 -26.590 - 1.40 5.50 -21.466 -26.553 - 1.40 5.60 -21.674 -26.620 - 1.40 5.70 -21.725 -26.722 - 1.40 5.80 -21.778 -26.860 - 1.40 5.90 -21.807 -26.940 - 1.40 6.00 -21.814 -26.997 - 1.40 6.10 -21.837 -27.041 - 1.40 6.20 -21.892 -27.124 - 1.40 6.30 -22.070 -27.187 - 1.40 6.40 -22.251 -27.247 - 1.40 6.50 -22.387 -27.356 - 1.40 6.60 -22.467 -27.425 - 1.40 6.70 -22.497 -27.461 - 1.40 6.80 -22.503 -27.503 - 1.40 6.90 -22.493 -27.546 - 1.40 7.00 -22.491 -27.553 - 1.40 7.10 -22.535 -27.604 - 1.40 7.20 -22.610 -27.692 - 1.40 7.30 -22.658 -27.709 - 1.40 7.40 -22.670 -27.762 - 1.40 7.50 -22.660 -27.830 - 1.40 7.60 -22.634 -27.898 - 1.40 7.70 -22.601 -27.939 - 1.40 7.80 -22.564 -27.962 - 1.40 7.90 -22.526 -27.987 - 1.40 8.00 -22.489 -28.100 - 1.40 8.10 -22.450 -28.229 - 1.40 8.20 -22.410 -28.360 - 1.40 8.30 -22.369 -28.497 - 1.40 8.40 -22.324 -28.624 - 1.40 8.50 -22.280 -28.748 - 1.40 8.60 -22.231 -28.872 - 1.40 8.70 -22.183 -28.999 - 1.40 8.80 -22.132 -29.131 - 1.40 8.90 -22.079 -29.266 - 1.40 9.00 -22.026 -29.406 - 1.50 1.00 -28.363 -25.780 - 1.50 1.10 -28.129 -25.780 - 1.50 1.20 -27.914 -25.781 - 1.50 1.30 -27.711 -25.781 - 1.50 1.40 -27.518 -25.781 - 1.50 1.50 -27.339 -25.782 - 1.50 1.60 -27.172 -25.782 - 1.50 1.70 -27.014 -25.782 - 1.50 1.80 -26.855 -25.783 - 1.50 1.90 -26.679 -25.783 - 1.50 2.00 -26.480 -25.784 - 1.50 2.10 -26.267 -25.784 - 1.50 2.20 -26.060 -25.785 - 1.50 2.30 -25.870 -25.786 - 1.50 2.40 -25.706 -25.787 - 1.50 2.50 -25.567 -25.787 - 1.50 2.60 -25.450 -25.788 - 1.50 2.70 -25.353 -25.789 - 1.50 2.80 -25.271 -25.790 - 1.50 2.90 -25.201 -25.791 - 1.50 3.00 -25.139 -25.792 - 1.50 3.10 -25.082 -25.794 - 1.50 3.20 -25.029 -25.795 - 1.50 3.30 -24.975 -25.796 - 1.50 3.40 -24.914 -25.797 - 1.50 3.50 -24.841 -25.798 - 1.50 3.60 -24.752 -25.800 - 1.50 3.70 -24.641 -25.801 - 1.50 3.80 -24.502 -25.803 - 1.50 3.90 -24.308 -25.803 - 1.50 4.00 -23.943 -25.803 - 1.50 4.10 -23.160 -25.814 - 1.50 4.20 -22.008 -26.029 - 1.50 4.30 -21.813 -26.686 - 1.50 4.40 -21.935 -27.093 - 1.50 4.50 -21.955 -27.481 - 1.50 4.60 -21.883 -27.690 - 1.50 4.70 -21.732 -27.714 - 1.50 4.80 -21.527 -27.602 - 1.50 4.90 -21.341 -27.400 - 1.50 5.00 -21.276 -26.822 - 1.50 5.10 -21.323 -26.638 - 1.50 5.20 -21.324 -26.550 - 1.50 5.30 -21.300 -26.577 - 1.50 5.40 -21.291 -26.590 - 1.50 5.50 -21.466 -26.553 - 1.50 5.60 -21.674 -26.620 - 1.50 5.70 -21.725 -26.722 - 1.50 5.80 -21.778 -26.860 - 1.50 5.90 -21.807 -26.940 - 1.50 6.00 -21.814 -26.997 - 1.50 6.10 -21.837 -27.041 - 1.50 6.20 -21.892 -27.124 - 1.50 6.30 -22.070 -27.187 - 1.50 6.40 -22.251 -27.247 - 1.50 6.50 -22.387 -27.356 - 1.50 6.60 -22.467 -27.425 - 1.50 6.70 -22.497 -27.461 - 1.50 6.80 -22.503 -27.503 - 1.50 6.90 -22.493 -27.546 - 1.50 7.00 -22.491 -27.553 - 1.50 7.10 -22.535 -27.604 - 1.50 7.20 -22.610 -27.692 - 1.50 7.30 -22.658 -27.709 - 1.50 7.40 -22.670 -27.762 - 1.50 7.50 -22.660 -27.830 - 1.50 7.60 -22.634 -27.898 - 1.50 7.70 -22.601 -27.939 - 1.50 7.80 -22.564 -27.962 - 1.50 7.90 -22.526 -27.987 - 1.50 8.00 -22.489 -28.100 - 1.50 8.10 -22.450 -28.229 - 1.50 8.20 -22.410 -28.360 - 1.50 8.30 -22.369 -28.497 - 1.50 8.40 -22.324 -28.624 - 1.50 8.50 -22.280 -28.748 - 1.50 8.60 -22.231 -28.872 - 1.50 8.70 -22.183 -28.999 - 1.50 8.80 -22.132 -29.131 - 1.50 8.90 -22.079 -29.266 - 1.50 9.00 -22.026 -29.406 - 1.60 1.00 -28.375 -25.880 - 1.60 1.10 -28.141 -25.880 - 1.60 1.20 -27.928 -25.880 - 1.60 1.30 -27.730 -25.880 - 1.60 1.40 -27.543 -25.881 - 1.60 1.50 -27.371 -25.881 - 1.60 1.60 -27.211 -25.881 - 1.60 1.70 -27.060 -25.882 - 1.60 1.80 -26.906 -25.882 - 1.60 1.90 -26.735 -25.883 - 1.60 2.00 -26.538 -25.883 - 1.60 2.10 -26.326 -25.884 - 1.60 2.20 -26.118 -25.884 - 1.60 2.30 -25.927 -25.885 - 1.60 2.40 -25.761 -25.885 - 1.60 2.50 -25.621 -25.886 - 1.60 2.60 -25.503 -25.887 - 1.60 2.70 -25.405 -25.888 - 1.60 2.80 -25.322 -25.889 - 1.60 2.90 -25.251 -25.890 - 1.60 3.00 -25.188 -25.891 - 1.60 3.10 -25.131 -25.891 - 1.60 3.20 -25.077 -25.893 - 1.60 3.30 -25.022 -25.894 - 1.60 3.40 -24.961 -25.895 - 1.60 3.50 -24.887 -25.896 - 1.60 3.60 -24.798 -25.897 - 1.60 3.70 -24.687 -25.898 - 1.60 3.80 -24.548 -25.899 - 1.60 3.90 -24.353 -25.899 - 1.60 4.00 -23.988 -25.899 - 1.60 4.10 -23.190 -25.909 - 1.60 4.20 -22.008 -26.121 - 1.60 4.30 -21.813 -26.786 - 1.60 4.40 -21.935 -27.191 - 1.60 4.50 -21.955 -27.559 - 1.60 4.60 -21.883 -27.727 - 1.60 4.70 -21.732 -27.737 - 1.60 4.80 -21.527 -27.616 - 1.60 4.90 -21.341 -27.405 - 1.60 5.00 -21.276 -26.822 - 1.60 5.10 -21.323 -26.638 - 1.60 5.20 -21.324 -26.550 - 1.60 5.30 -21.300 -26.577 - 1.60 5.40 -21.291 -26.590 - 1.60 5.50 -21.466 -26.553 - 1.60 5.60 -21.674 -26.620 - 1.60 5.70 -21.725 -26.722 - 1.60 5.80 -21.778 -26.859 - 1.60 5.90 -21.807 -26.940 - 1.60 6.00 -21.814 -26.997 - 1.60 6.10 -21.837 -27.041 - 1.60 6.20 -21.892 -27.124 - 1.60 6.30 -22.070 -27.187 - 1.60 6.40 -22.251 -27.247 - 1.60 6.50 -22.387 -27.356 - 1.60 6.60 -22.467 -27.425 - 1.60 6.70 -22.497 -27.461 - 1.60 6.80 -22.503 -27.503 - 1.60 6.90 -22.493 -27.546 - 1.60 7.00 -22.491 -27.553 - 1.60 7.10 -22.535 -27.604 - 1.60 7.20 -22.610 -27.692 - 1.60 7.30 -22.658 -27.709 - 1.60 7.40 -22.670 -27.762 - 1.60 7.50 -22.660 -27.830 - 1.60 7.60 -22.634 -27.898 - 1.60 7.70 -22.601 -27.939 - 1.60 7.80 -22.564 -27.962 - 1.60 7.90 -22.526 -27.987 - 1.60 8.00 -22.489 -28.100 - 1.60 8.10 -22.450 -28.229 - 1.60 8.20 -22.410 -28.360 - 1.60 8.30 -22.369 -28.497 - 1.60 8.40 -22.324 -28.624 - 1.60 8.50 -22.280 -28.748 - 1.60 8.60 -22.231 -28.872 - 1.60 8.70 -22.183 -28.999 - 1.60 8.80 -22.132 -29.131 - 1.60 8.90 -22.079 -29.266 - 1.60 9.00 -22.026 -29.406 - 1.70 1.00 -28.388 -25.979 - 1.70 1.10 -28.155 -25.979 - 1.70 1.20 -27.944 -25.980 - 1.70 1.30 -27.749 -25.980 - 1.70 1.40 -27.568 -25.980 - 1.70 1.50 -27.402 -25.980 - 1.70 1.60 -27.248 -25.981 - 1.70 1.70 -27.104 -25.981 - 1.70 1.80 -26.956 -25.981 - 1.70 1.90 -26.789 -25.982 - 1.70 2.00 -26.595 -25.982 - 1.70 2.10 -26.384 -25.983 - 1.70 2.20 -26.176 -25.983 - 1.70 2.30 -25.984 -25.984 - 1.70 2.40 -25.817 -25.984 - 1.70 2.50 -25.675 -25.985 - 1.70 2.60 -25.556 -25.986 - 1.70 2.70 -25.457 -25.986 - 1.70 2.80 -25.373 -25.987 - 1.70 2.90 -25.301 -25.988 - 1.70 3.00 -25.236 -25.989 - 1.70 3.10 -25.178 -25.990 - 1.70 3.20 -25.123 -25.991 - 1.70 3.30 -25.068 -25.991 - 1.70 3.40 -25.006 -25.992 - 1.70 3.50 -24.933 -25.993 - 1.70 3.60 -24.843 -25.994 - 1.70 3.70 -24.732 -25.995 - 1.70 3.80 -24.593 -25.996 - 1.70 3.90 -24.398 -25.995 - 1.70 4.00 -24.033 -25.994 - 1.70 4.10 -23.219 -26.004 - 1.70 4.20 -22.008 -26.211 - 1.70 4.30 -21.812 -26.885 - 1.70 4.40 -21.935 -27.288 - 1.70 4.50 -21.955 -27.633 - 1.70 4.60 -21.883 -27.760 - 1.70 4.70 -21.732 -27.757 - 1.70 4.80 -21.527 -27.627 - 1.70 4.90 -21.341 -27.410 - 1.70 5.00 -21.276 -26.822 - 1.70 5.10 -21.323 -26.638 - 1.70 5.20 -21.324 -26.550 - 1.70 5.30 -21.300 -26.577 - 1.70 5.40 -21.291 -26.590 - 1.70 5.50 -21.466 -26.553 - 1.70 5.60 -21.674 -26.620 - 1.70 5.70 -21.725 -26.722 - 1.70 5.80 -21.778 -26.859 - 1.70 5.90 -21.807 -26.939 - 1.70 6.00 -21.814 -26.996 - 1.70 6.10 -21.837 -27.041 - 1.70 6.20 -21.892 -27.124 - 1.70 6.30 -22.070 -27.187 - 1.70 6.40 -22.251 -27.247 - 1.70 6.50 -22.387 -27.356 - 1.70 6.60 -22.467 -27.425 - 1.70 6.70 -22.497 -27.461 - 1.70 6.80 -22.503 -27.503 - 1.70 6.90 -22.493 -27.546 - 1.70 7.00 -22.491 -27.553 - 1.70 7.10 -22.535 -27.604 - 1.70 7.20 -22.610 -27.692 - 1.70 7.30 -22.658 -27.709 - 1.70 7.40 -22.670 -27.762 - 1.70 7.50 -22.660 -27.830 - 1.70 7.60 -22.634 -27.898 - 1.70 7.70 -22.601 -27.939 - 1.70 7.80 -22.564 -27.962 - 1.70 7.90 -22.526 -27.987 - 1.70 8.00 -22.489 -28.100 - 1.70 8.10 -22.450 -28.229 - 1.70 8.20 -22.410 -28.360 - 1.70 8.30 -22.369 -28.497 - 1.70 8.40 -22.324 -28.624 - 1.70 8.50 -22.280 -28.748 - 1.70 8.60 -22.231 -28.872 - 1.70 8.70 -22.183 -28.999 - 1.70 8.80 -22.132 -29.131 - 1.70 8.90 -22.079 -29.266 - 1.70 9.00 -22.026 -29.406 - 1.80 1.00 -28.403 -26.079 - 1.80 1.10 -28.170 -26.079 - 1.80 1.20 -27.961 -26.079 - 1.80 1.30 -27.769 -26.079 - 1.80 1.40 -27.593 -26.080 - 1.80 1.50 -27.432 -26.080 - 1.80 1.60 -27.285 -26.080 - 1.80 1.70 -27.147 -26.080 - 1.80 1.80 -27.005 -26.081 - 1.80 1.90 -26.842 -26.081 - 1.80 2.00 -26.651 -26.081 - 1.80 2.10 -26.441 -26.082 - 1.80 2.20 -26.233 -26.082 - 1.80 2.30 -26.040 -26.083 - 1.80 2.40 -25.872 -26.083 - 1.80 2.50 -25.729 -26.084 - 1.80 2.60 -25.609 -26.085 - 1.80 2.70 -25.508 -26.085 - 1.80 2.80 -25.423 -26.086 - 1.80 2.90 -25.349 -26.087 - 1.80 3.00 -25.284 -26.087 - 1.80 3.10 -25.225 -26.088 - 1.80 3.20 -25.169 -26.089 - 1.80 3.30 -25.113 -26.089 - 1.80 3.40 -25.051 -26.090 - 1.80 3.50 -24.978 -26.091 - 1.80 3.60 -24.888 -26.091 - 1.80 3.70 -24.777 -26.092 - 1.80 3.80 -24.638 -26.092 - 1.80 3.90 -24.443 -26.092 - 1.80 4.00 -24.079 -26.090 - 1.80 4.10 -23.247 -26.099 - 1.80 4.20 -22.008 -26.299 - 1.80 4.30 -21.812 -26.985 - 1.80 4.40 -21.935 -27.385 - 1.80 4.50 -21.955 -27.701 - 1.80 4.60 -21.883 -27.787 - 1.80 4.70 -21.732 -27.774 - 1.80 4.80 -21.527 -27.637 - 1.80 4.90 -21.341 -27.413 - 1.80 5.00 -21.276 -26.822 - 1.80 5.10 -21.323 -26.638 - 1.80 5.20 -21.324 -26.551 - 1.80 5.30 -21.300 -26.578 - 1.80 5.40 -21.291 -26.590 - 1.80 5.50 -21.466 -26.553 - 1.80 5.60 -21.674 -26.620 - 1.80 5.70 -21.725 -26.722 - 1.80 5.80 -21.778 -26.859 - 1.80 5.90 -21.807 -26.938 - 1.80 6.00 -21.814 -26.996 - 1.80 6.10 -21.837 -27.041 - 1.80 6.20 -21.892 -27.124 - 1.80 6.30 -22.070 -27.187 - 1.80 6.40 -22.251 -27.247 - 1.80 6.50 -22.387 -27.356 - 1.80 6.60 -22.467 -27.425 - 1.80 6.70 -22.497 -27.461 - 1.80 6.80 -22.503 -27.503 - 1.80 6.90 -22.493 -27.546 - 1.80 7.00 -22.491 -27.553 - 1.80 7.10 -22.535 -27.604 - 1.80 7.20 -22.610 -27.692 - 1.80 7.30 -22.658 -27.709 - 1.80 7.40 -22.670 -27.762 - 1.80 7.50 -22.660 -27.830 - 1.80 7.60 -22.634 -27.898 - 1.80 7.70 -22.601 -27.939 - 1.80 7.80 -22.564 -27.962 - 1.80 7.90 -22.526 -27.987 - 1.80 8.00 -22.489 -28.100 - 1.80 8.10 -22.450 -28.229 - 1.80 8.20 -22.410 -28.360 - 1.80 8.30 -22.369 -28.497 - 1.80 8.40 -22.324 -28.624 - 1.80 8.50 -22.280 -28.748 - 1.80 8.60 -22.231 -28.872 - 1.80 8.70 -22.183 -28.999 - 1.80 8.80 -22.132 -29.131 - 1.80 8.90 -22.079 -29.267 - 1.80 9.00 -22.026 -29.406 - 1.90 1.00 -28.420 -26.178 - 1.90 1.10 -28.188 -26.179 - 1.90 1.20 -27.981 -26.179 - 1.90 1.30 -27.792 -26.179 - 1.90 1.40 -27.620 -26.179 - 1.90 1.50 -27.464 -26.179 - 1.90 1.60 -27.323 -26.180 - 1.90 1.70 -27.190 -26.180 - 1.90 1.80 -27.053 -26.180 - 1.90 1.90 -26.894 -26.180 - 1.90 2.00 -26.706 -26.181 - 1.90 2.10 -26.498 -26.181 - 1.90 2.20 -26.290 -26.182 - 1.90 2.30 -26.096 -26.182 - 1.90 2.40 -25.927 -26.183 - 1.90 2.50 -25.782 -26.183 - 1.90 2.60 -25.661 -26.184 - 1.90 2.70 -25.559 -26.184 - 1.90 2.80 -25.472 -26.185 - 1.90 2.90 -25.398 -26.185 - 1.90 3.00 -25.331 -26.186 - 1.90 3.10 -25.271 -26.187 - 1.90 3.20 -25.214 -26.187 - 1.90 3.30 -25.158 -26.188 - 1.90 3.40 -25.095 -26.188 - 1.90 3.50 -25.022 -26.189 - 1.90 3.60 -24.933 -26.189 - 1.90 3.70 -24.822 -26.189 - 1.90 3.80 -24.683 -26.190 - 1.90 3.90 -24.488 -26.189 - 1.90 4.00 -24.124 -26.187 - 1.90 4.10 -23.274 -26.195 - 1.90 4.20 -22.009 -26.385 - 1.90 4.30 -21.813 -27.084 - 1.90 4.40 -21.935 -27.481 - 1.90 4.50 -21.955 -27.764 - 1.90 4.60 -21.883 -27.811 - 1.90 4.70 -21.732 -27.787 - 1.90 4.80 -21.527 -27.645 - 1.90 4.90 -21.341 -27.416 - 1.90 5.00 -21.277 -26.822 - 1.90 5.10 -21.323 -26.638 - 1.90 5.20 -21.324 -26.551 - 1.90 5.30 -21.300 -26.578 - 1.90 5.40 -21.291 -26.590 - 1.90 5.50 -21.466 -26.553 - 1.90 5.60 -21.674 -26.620 - 1.90 5.70 -21.725 -26.722 - 1.90 5.80 -21.778 -26.858 - 1.90 5.90 -21.807 -26.938 - 1.90 6.00 -21.814 -26.996 - 1.90 6.10 -21.837 -27.040 - 1.90 6.20 -21.892 -27.124 - 1.90 6.30 -22.070 -27.187 - 1.90 6.40 -22.251 -27.247 - 1.90 6.50 -22.387 -27.356 - 1.90 6.60 -22.467 -27.425 - 1.90 6.70 -22.497 -27.461 - 1.90 6.80 -22.503 -27.503 - 1.90 6.90 -22.493 -27.546 - 1.90 7.00 -22.491 -27.553 - 1.90 7.10 -22.535 -27.604 - 1.90 7.20 -22.610 -27.692 - 1.90 7.30 -22.658 -27.709 - 1.90 7.40 -22.670 -27.762 - 1.90 7.50 -22.660 -27.830 - 1.90 7.60 -22.634 -27.898 - 1.90 7.70 -22.601 -27.939 - 1.90 7.80 -22.564 -27.962 - 1.90 7.90 -22.526 -27.987 - 1.90 8.00 -22.489 -28.100 - 1.90 8.10 -22.450 -28.229 - 1.90 8.20 -22.410 -28.360 - 1.90 8.30 -22.369 -28.497 - 1.90 8.40 -22.324 -28.624 - 1.90 8.50 -22.280 -28.748 - 1.90 8.60 -22.231 -28.872 - 1.90 8.70 -22.183 -28.999 - 1.90 8.80 -22.132 -29.131 - 1.90 8.90 -22.079 -29.267 - 1.90 9.00 -22.026 -29.406 - 2.00 1.00 -28.440 -26.278 - 2.00 1.10 -28.208 -26.278 - 2.00 1.20 -28.003 -26.278 - 2.00 1.30 -27.817 -26.279 - 2.00 1.40 -27.649 -26.279 - 2.00 1.50 -27.498 -26.279 - 2.00 1.60 -27.361 -26.279 - 2.00 1.70 -27.233 -26.279 - 2.00 1.80 -27.100 -26.280 - 2.00 1.90 -26.946 -26.280 - 2.00 2.00 -26.761 -26.280 - 2.00 2.10 -26.554 -26.281 - 2.00 2.20 -26.346 -26.281 - 2.00 2.30 -26.152 -26.281 - 2.00 2.40 -25.981 -26.282 - 2.00 2.50 -25.836 -26.282 - 2.00 2.60 -25.713 -26.283 - 2.00 2.70 -25.609 -26.283 - 2.00 2.80 -25.521 -26.284 - 2.00 2.90 -25.445 -26.284 - 2.00 3.00 -25.378 -26.285 - 2.00 3.10 -25.316 -26.285 - 2.00 3.20 -25.259 -26.286 - 2.00 3.30 -25.202 -26.286 - 2.00 3.40 -25.139 -26.286 - 2.00 3.50 -25.066 -26.287 - 2.00 3.60 -24.977 -26.287 - 2.00 3.70 -24.866 -26.287 - 2.00 3.80 -24.727 -26.287 - 2.00 3.90 -24.533 -26.286 - 2.00 4.00 -24.169 -26.283 - 2.00 4.10 -23.299 -26.291 - 2.00 4.20 -22.009 -26.468 - 2.00 4.30 -21.813 -27.183 - 2.00 4.40 -21.935 -27.575 - 2.00 4.50 -21.955 -27.820 - 2.00 4.60 -21.883 -27.831 - 2.00 4.70 -21.732 -27.799 - 2.00 4.80 -21.527 -27.651 - 2.00 4.90 -21.341 -27.419 - 2.00 5.00 -21.277 -26.822 - 2.00 5.10 -21.323 -26.638 - 2.00 5.20 -21.324 -26.551 - 2.00 5.30 -21.300 -26.578 - 2.00 5.40 -21.291 -26.590 - 2.00 5.50 -21.466 -26.553 - 2.00 5.60 -21.674 -26.620 - 2.00 5.70 -21.725 -26.721 - 2.00 5.80 -21.778 -26.857 - 2.00 5.90 -21.807 -26.937 - 2.00 6.00 -21.814 -26.995 - 2.00 6.10 -21.837 -27.040 - 2.00 6.20 -21.892 -27.124 - 2.00 6.30 -22.070 -27.187 - 2.00 6.40 -22.251 -27.247 - 2.00 6.50 -22.387 -27.356 - 2.00 6.60 -22.467 -27.425 - 2.00 6.70 -22.497 -27.461 - 2.00 6.80 -22.503 -27.503 - 2.00 6.90 -22.493 -27.546 - 2.00 7.00 -22.491 -27.553 - 2.00 7.10 -22.535 -27.604 - 2.00 7.20 -22.610 -27.692 - 2.00 7.30 -22.658 -27.709 - 2.00 7.40 -22.670 -27.762 - 2.00 7.50 -22.660 -27.830 - 2.00 7.60 -22.634 -27.898 - 2.00 7.70 -22.601 -27.939 - 2.00 7.80 -22.564 -27.962 - 2.00 7.90 -22.526 -27.987 - 2.00 8.00 -22.489 -28.100 - 2.00 8.10 -22.450 -28.229 - 2.00 8.20 -22.410 -28.360 - 2.00 8.30 -22.369 -28.497 - 2.00 8.40 -22.324 -28.624 - 2.00 8.50 -22.280 -28.748 - 2.00 8.60 -22.231 -28.872 - 2.00 8.70 -22.183 -28.999 - 2.00 8.80 -22.132 -29.131 - 2.00 8.90 -22.079 -29.267 - 2.00 9.00 -22.026 -29.406 - 2.10 1.00 -28.463 -26.378 - 2.10 1.10 -28.232 -26.378 - 2.10 1.20 -28.029 -26.378 - 2.10 1.30 -27.845 -26.378 - 2.10 1.40 -27.680 -26.378 - 2.10 1.50 -27.533 -26.379 - 2.10 1.60 -27.401 -26.379 - 2.10 1.70 -27.277 -26.379 - 2.10 1.80 -27.148 -26.379 - 2.10 1.90 -26.997 -26.379 - 2.10 2.00 -26.814 -26.380 - 2.10 2.10 -26.609 -26.380 - 2.10 2.20 -26.402 -26.380 - 2.10 2.30 -26.207 -26.381 - 2.10 2.40 -26.036 -26.381 - 2.10 2.50 -25.889 -26.382 - 2.10 2.60 -25.764 -26.382 - 2.10 2.70 -25.659 -26.382 - 2.10 2.80 -25.570 -26.383 - 2.10 2.90 -25.492 -26.383 - 2.10 3.00 -25.423 -26.384 - 2.10 3.10 -25.361 -26.384 - 2.10 3.20 -25.302 -26.384 - 2.10 3.30 -25.244 -26.385 - 2.10 3.40 -25.181 -26.385 - 2.10 3.50 -25.108 -26.385 - 2.10 3.60 -25.020 -26.385 - 2.10 3.70 -24.910 -26.385 - 2.10 3.80 -24.771 -26.385 - 2.10 3.90 -24.578 -26.383 - 2.10 4.00 -24.214 -26.380 - 2.10 4.10 -23.322 -26.386 - 2.10 4.20 -22.009 -26.548 - 2.10 4.30 -21.813 -27.282 - 2.10 4.40 -21.935 -27.669 - 2.10 4.50 -21.955 -27.869 - 2.10 4.60 -21.884 -27.847 - 2.10 4.70 -21.732 -27.809 - 2.10 4.80 -21.527 -27.657 - 2.10 4.90 -21.342 -27.422 - 2.10 5.00 -21.277 -26.822 - 2.10 5.10 -21.323 -26.638 - 2.10 5.20 -21.324 -26.551 - 2.10 5.30 -21.300 -26.578 - 2.10 5.40 -21.291 -26.590 - 2.10 5.50 -21.466 -26.553 - 2.10 5.60 -21.674 -26.620 - 2.10 5.70 -21.725 -26.721 - 2.10 5.80 -21.778 -26.856 - 2.10 5.90 -21.807 -26.935 - 2.10 6.00 -21.814 -26.994 - 2.10 6.10 -21.837 -27.040 - 2.10 6.20 -21.892 -27.124 - 2.10 6.30 -22.070 -27.187 - 2.10 6.40 -22.251 -27.247 - 2.10 6.50 -22.387 -27.356 - 2.10 6.60 -22.467 -27.425 - 2.10 6.70 -22.497 -27.461 - 2.10 6.80 -22.503 -27.503 - 2.10 6.90 -22.493 -27.546 - 2.10 7.00 -22.491 -27.553 - 2.10 7.10 -22.535 -27.604 - 2.10 7.20 -22.610 -27.692 - 2.10 7.30 -22.658 -27.709 - 2.10 7.40 -22.670 -27.762 - 2.10 7.50 -22.660 -27.830 - 2.10 7.60 -22.634 -27.898 - 2.10 7.70 -22.601 -27.939 - 2.10 7.80 -22.564 -27.962 - 2.10 7.90 -22.526 -27.987 - 2.10 8.00 -22.489 -28.100 - 2.10 8.10 -22.450 -28.229 - 2.10 8.20 -22.410 -28.360 - 2.10 8.30 -22.369 -28.497 - 2.10 8.40 -22.324 -28.624 - 2.10 8.50 -22.280 -28.748 - 2.10 8.60 -22.231 -28.872 - 2.10 8.70 -22.183 -28.999 - 2.10 8.80 -22.132 -29.131 - 2.10 8.90 -22.079 -29.267 - 2.10 9.00 -22.026 -29.406 - 2.20 1.00 -28.490 -26.478 - 2.20 1.10 -28.261 -26.478 - 2.20 1.20 -28.058 -26.478 - 2.20 1.30 -27.877 -26.478 - 2.20 1.40 -27.715 -26.478 - 2.20 1.50 -27.571 -26.478 - 2.20 1.60 -27.442 -26.478 - 2.20 1.70 -27.322 -26.479 - 2.20 1.80 -27.196 -26.479 - 2.20 1.90 -27.048 -26.479 - 2.20 2.00 -26.868 -26.479 - 2.20 2.10 -26.664 -26.480 - 2.20 2.20 -26.457 -26.480 - 2.20 2.30 -26.262 -26.480 - 2.20 2.40 -26.089 -26.481 - 2.20 2.50 -25.941 -26.481 - 2.20 2.60 -25.815 -26.481 - 2.20 2.70 -25.708 -26.482 - 2.20 2.80 -25.617 -26.482 - 2.20 2.90 -25.539 -26.482 - 2.20 3.00 -25.468 -26.483 - 2.20 3.10 -25.404 -26.483 - 2.20 3.20 -25.345 -26.483 - 2.20 3.30 -25.286 -26.483 - 2.20 3.40 -25.223 -26.484 - 2.20 3.50 -25.150 -26.483 - 2.20 3.60 -25.062 -26.483 - 2.20 3.70 -24.953 -26.483 - 2.20 3.80 -24.815 -26.483 - 2.20 3.90 -24.622 -26.481 - 2.20 4.00 -24.260 -26.477 - 2.20 4.10 -23.344 -26.482 - 2.20 4.20 -22.009 -26.625 - 2.20 4.30 -21.813 -27.380 - 2.20 4.40 -21.935 -27.761 - 2.20 4.50 -21.955 -27.910 - 2.20 4.60 -21.884 -27.861 - 2.20 4.70 -21.733 -27.817 - 2.20 4.80 -21.527 -27.662 - 2.20 4.90 -21.342 -27.424 - 2.20 5.00 -21.277 -26.821 - 2.20 5.10 -21.323 -26.638 - 2.20 5.20 -21.324 -26.551 - 2.20 5.30 -21.300 -26.578 - 2.20 5.40 -21.291 -26.590 - 2.20 5.50 -21.466 -26.553 - 2.20 5.60 -21.674 -26.620 - 2.20 5.70 -21.725 -26.720 - 2.20 5.80 -21.778 -26.855 - 2.20 5.90 -21.807 -26.934 - 2.20 6.00 -21.814 -26.993 - 2.20 6.10 -21.837 -27.040 - 2.20 6.20 -21.892 -27.124 - 2.20 6.30 -22.070 -27.187 - 2.20 6.40 -22.251 -27.247 - 2.20 6.50 -22.387 -27.356 - 2.20 6.60 -22.467 -27.425 - 2.20 6.70 -22.497 -27.461 - 2.20 6.80 -22.503 -27.503 - 2.20 6.90 -22.493 -27.546 - 2.20 7.00 -22.491 -27.553 - 2.20 7.10 -22.535 -27.604 - 2.20 7.20 -22.610 -27.692 - 2.20 7.30 -22.658 -27.709 - 2.20 7.40 -22.670 -27.762 - 2.20 7.50 -22.660 -27.830 - 2.20 7.60 -22.634 -27.898 - 2.20 7.70 -22.601 -27.939 - 2.20 7.80 -22.564 -27.962 - 2.20 7.90 -22.526 -27.987 - 2.20 8.00 -22.489 -28.100 - 2.20 8.10 -22.450 -28.229 - 2.20 8.20 -22.410 -28.360 - 2.20 8.30 -22.369 -28.497 - 2.20 8.40 -22.324 -28.624 - 2.20 8.50 -22.280 -28.748 - 2.20 8.60 -22.231 -28.872 - 2.20 8.70 -22.183 -28.999 - 2.20 8.80 -22.132 -29.131 - 2.20 8.90 -22.079 -29.267 - 2.20 9.00 -22.026 -29.406 - 2.30 1.00 -28.522 -26.577 - 2.30 1.10 -28.293 -26.577 - 2.30 1.20 -28.092 -26.578 - 2.30 1.30 -27.913 -26.578 - 2.30 1.40 -27.753 -26.578 - 2.30 1.50 -27.613 -26.578 - 2.30 1.60 -27.487 -26.578 - 2.30 1.70 -27.369 -26.578 - 2.30 1.80 -27.245 -26.578 - 2.30 1.90 -27.099 -26.579 - 2.30 2.00 -26.921 -26.579 - 2.30 2.10 -26.718 -26.579 - 2.30 2.20 -26.511 -26.579 - 2.30 2.30 -26.316 -26.580 - 2.30 2.40 -26.143 -26.580 - 2.30 2.50 -25.994 -26.580 - 2.30 2.60 -25.866 -26.581 - 2.30 2.70 -25.758 -26.581 - 2.30 2.80 -25.665 -26.581 - 2.30 2.90 -25.584 -26.582 - 2.30 3.00 -25.512 -26.582 - 2.30 3.10 -25.447 -26.582 - 2.30 3.20 -25.386 -26.582 - 2.30 3.30 -25.327 -26.582 - 2.30 3.40 -25.264 -26.582 - 2.30 3.50 -25.191 -26.582 - 2.30 3.60 -25.104 -26.582 - 2.30 3.70 -24.995 -26.581 - 2.30 3.80 -24.857 -26.581 - 2.30 3.90 -24.666 -26.579 - 2.30 4.00 -24.305 -26.574 - 2.30 4.10 -23.364 -26.578 - 2.30 4.20 -22.009 -26.697 - 2.30 4.30 -21.813 -27.479 - 2.30 4.40 -21.935 -27.850 - 2.30 4.50 -21.956 -27.944 - 2.30 4.60 -21.885 -27.873 - 2.30 4.70 -21.733 -27.824 - 2.30 4.80 -21.527 -27.666 - 2.30 4.90 -21.342 -27.426 - 2.30 5.00 -21.277 -26.821 - 2.30 5.10 -21.323 -26.638 - 2.30 5.20 -21.324 -26.551 - 2.30 5.30 -21.300 -26.578 - 2.30 5.40 -21.291 -26.590 - 2.30 5.50 -21.466 -26.553 - 2.30 5.60 -21.674 -26.619 - 2.30 5.70 -21.725 -26.719 - 2.30 5.80 -21.778 -26.853 - 2.30 5.90 -21.807 -26.932 - 2.30 6.00 -21.814 -26.992 - 2.30 6.10 -21.837 -27.039 - 2.30 6.20 -21.892 -27.124 - 2.30 6.30 -22.070 -27.187 - 2.30 6.40 -22.251 -27.247 - 2.30 6.50 -22.387 -27.356 - 2.30 6.60 -22.467 -27.425 - 2.30 6.70 -22.497 -27.461 - 2.30 6.80 -22.503 -27.503 - 2.30 6.90 -22.493 -27.546 - 2.30 7.00 -22.491 -27.553 - 2.30 7.10 -22.535 -27.604 - 2.30 7.20 -22.610 -27.692 - 2.30 7.30 -22.658 -27.709 - 2.30 7.40 -22.670 -27.762 - 2.30 7.50 -22.660 -27.831 - 2.30 7.60 -22.634 -27.898 - 2.30 7.70 -22.601 -27.939 - 2.30 7.80 -22.564 -27.962 - 2.30 7.90 -22.526 -27.987 - 2.30 8.00 -22.489 -28.100 - 2.30 8.10 -22.450 -28.229 - 2.30 8.20 -22.410 -28.360 - 2.30 8.30 -22.369 -28.497 - 2.30 8.40 -22.324 -28.624 - 2.30 8.50 -22.280 -28.748 - 2.30 8.60 -22.231 -28.872 - 2.30 8.70 -22.183 -28.999 - 2.30 8.80 -22.132 -29.131 - 2.30 8.90 -22.079 -29.267 - 2.30 9.00 -22.026 -29.406 - 2.40 1.00 -28.558 -26.677 - 2.40 1.10 -28.331 -26.677 - 2.40 1.20 -28.131 -26.677 - 2.40 1.30 -27.953 -26.677 - 2.40 1.40 -27.796 -26.678 - 2.40 1.50 -27.657 -26.678 - 2.40 1.60 -27.533 -26.678 - 2.40 1.70 -27.417 -26.678 - 2.40 1.80 -27.296 -26.678 - 2.40 1.90 -27.151 -26.678 - 2.40 2.00 -26.973 -26.678 - 2.40 2.10 -26.771 -26.679 - 2.40 2.20 -26.565 -26.679 - 2.40 2.30 -26.370 -26.679 - 2.40 2.40 -26.196 -26.680 - 2.40 2.50 -26.046 -26.680 - 2.40 2.60 -25.917 -26.680 - 2.40 2.70 -25.806 -26.680 - 2.40 2.80 -25.712 -26.681 - 2.40 2.90 -25.629 -26.681 - 2.40 3.00 -25.556 -26.681 - 2.40 3.10 -25.488 -26.681 - 2.40 3.20 -25.426 -26.681 - 2.40 3.30 -25.367 -26.681 - 2.40 3.40 -25.303 -26.681 - 2.40 3.50 -25.231 -26.681 - 2.40 3.60 -25.144 -26.680 - 2.40 3.70 -25.036 -26.680 - 2.40 3.80 -24.900 -26.679 - 2.40 3.90 -24.709 -26.677 - 2.40 4.00 -24.350 -26.672 - 2.40 4.10 -23.382 -26.673 - 2.40 4.20 -22.009 -26.765 - 2.40 4.30 -21.813 -27.577 - 2.40 4.40 -21.935 -27.937 - 2.40 4.50 -21.956 -27.970 - 2.40 4.60 -21.885 -27.883 - 2.40 4.70 -21.733 -27.830 - 2.40 4.80 -21.527 -27.670 - 2.40 4.90 -21.342 -27.428 - 2.40 5.00 -21.277 -26.820 - 2.40 5.10 -21.324 -26.638 - 2.40 5.20 -21.324 -26.551 - 2.40 5.30 -21.300 -26.578 - 2.40 5.40 -21.291 -26.590 - 2.40 5.50 -21.466 -26.553 - 2.40 5.60 -21.674 -26.619 - 2.40 5.70 -21.725 -26.719 - 2.40 5.80 -21.778 -26.851 - 2.40 5.90 -21.807 -26.929 - 2.40 6.00 -21.814 -26.990 - 2.40 6.10 -21.837 -27.039 - 2.40 6.20 -21.892 -27.124 - 2.40 6.30 -22.070 -27.187 - 2.40 6.40 -22.251 -27.247 - 2.40 6.50 -22.387 -27.356 - 2.40 6.60 -22.467 -27.425 - 2.40 6.70 -22.497 -27.461 - 2.40 6.80 -22.503 -27.503 - 2.40 6.90 -22.493 -27.546 - 2.40 7.00 -22.491 -27.553 - 2.40 7.10 -22.535 -27.604 - 2.40 7.20 -22.610 -27.692 - 2.40 7.30 -22.658 -27.709 - 2.40 7.40 -22.670 -27.762 - 2.40 7.50 -22.660 -27.831 - 2.40 7.60 -22.634 -27.898 - 2.40 7.70 -22.601 -27.939 - 2.40 7.80 -22.564 -27.962 - 2.40 7.90 -22.526 -27.987 - 2.40 8.00 -22.489 -28.100 - 2.40 8.10 -22.450 -28.229 - 2.40 8.20 -22.410 -28.360 - 2.40 8.30 -22.369 -28.497 - 2.40 8.40 -22.324 -28.624 - 2.40 8.50 -22.280 -28.748 - 2.40 8.60 -22.231 -28.872 - 2.40 8.70 -22.183 -28.999 - 2.40 8.80 -22.132 -29.131 - 2.40 8.90 -22.079 -29.267 - 2.40 9.00 -22.026 -29.406 - 2.50 1.00 -28.600 -26.777 - 2.50 1.10 -28.374 -26.777 - 2.50 1.20 -28.175 -26.777 - 2.50 1.30 -27.998 -26.777 - 2.50 1.40 -27.842 -26.777 - 2.50 1.50 -27.706 -26.777 - 2.50 1.60 -27.583 -26.778 - 2.50 1.70 -27.468 -26.778 - 2.50 1.80 -27.347 -26.778 - 2.50 1.90 -27.202 -26.778 - 2.50 2.00 -27.024 -26.778 - 2.50 2.10 -26.823 -26.778 - 2.50 2.20 -26.617 -26.779 - 2.50 2.30 -26.422 -26.779 - 2.50 2.40 -26.248 -26.779 - 2.50 2.50 -26.097 -26.779 - 2.50 2.60 -25.967 -26.780 - 2.50 2.70 -25.855 -26.780 - 2.50 2.80 -25.758 -26.780 - 2.50 2.90 -25.673 -26.780 - 2.50 3.00 -25.598 -26.781 - 2.50 3.10 -25.529 -26.781 - 2.50 3.20 -25.466 -26.781 - 2.50 3.30 -25.406 -26.780 - 2.50 3.40 -25.342 -26.780 - 2.50 3.50 -25.270 -26.780 - 2.50 3.60 -25.184 -26.779 - 2.50 3.70 -25.077 -26.778 - 2.50 3.80 -24.941 -26.778 - 2.50 3.90 -24.753 -26.775 - 2.50 4.00 -24.395 -26.769 - 2.50 4.10 -23.398 -26.767 - 2.50 4.20 -22.009 -26.828 - 2.50 4.30 -21.813 -27.674 - 2.50 4.40 -21.935 -28.019 - 2.50 4.50 -21.957 -27.989 - 2.50 4.60 -21.886 -27.892 - 2.50 4.70 -21.734 -27.835 - 2.50 4.80 -21.527 -27.673 - 2.50 4.90 -21.342 -27.430 - 2.50 5.00 -21.277 -26.820 - 2.50 5.10 -21.324 -26.638 - 2.50 5.20 -21.324 -26.552 - 2.50 5.30 -21.300 -26.578 - 2.50 5.40 -21.291 -26.590 - 2.50 5.50 -21.466 -26.553 - 2.50 5.60 -21.674 -26.619 - 2.50 5.70 -21.725 -26.717 - 2.50 5.80 -21.778 -26.848 - 2.50 5.90 -21.807 -26.926 - 2.50 6.00 -21.814 -26.988 - 2.50 6.10 -21.837 -27.038 - 2.50 6.20 -21.892 -27.124 - 2.50 6.30 -22.070 -27.187 - 2.50 6.40 -22.251 -27.247 - 2.50 6.50 -22.387 -27.356 - 2.50 6.60 -22.467 -27.425 - 2.50 6.70 -22.497 -27.461 - 2.50 6.80 -22.503 -27.503 - 2.50 6.90 -22.493 -27.546 - 2.50 7.00 -22.491 -27.553 - 2.50 7.10 -22.535 -27.604 - 2.50 7.20 -22.610 -27.692 - 2.50 7.30 -22.658 -27.709 - 2.50 7.40 -22.670 -27.762 - 2.50 7.50 -22.660 -27.831 - 2.50 7.60 -22.634 -27.898 - 2.50 7.70 -22.601 -27.939 - 2.50 7.80 -22.564 -27.962 - 2.50 7.90 -22.526 -27.987 - 2.50 8.00 -22.489 -28.100 - 2.50 8.10 -22.450 -28.229 - 2.50 8.20 -22.410 -28.360 - 2.50 8.30 -22.369 -28.497 - 2.50 8.40 -22.324 -28.624 - 2.50 8.50 -22.280 -28.748 - 2.50 8.60 -22.231 -28.872 - 2.50 8.70 -22.183 -28.999 - 2.50 8.80 -22.132 -29.131 - 2.50 8.90 -22.079 -29.267 - 2.50 9.00 -22.026 -29.406 - 2.60 1.00 -28.648 -26.877 - 2.60 1.10 -28.422 -26.877 - 2.60 1.20 -28.224 -26.877 - 2.60 1.30 -28.048 -26.877 - 2.60 1.40 -27.893 -26.877 - 2.60 1.50 -27.757 -26.877 - 2.60 1.60 -27.636 -26.877 - 2.60 1.70 -27.521 -26.877 - 2.60 1.80 -27.398 -26.878 - 2.60 1.90 -27.253 -26.878 - 2.60 2.00 -27.075 -26.878 - 2.60 2.10 -26.873 -26.878 - 2.60 2.20 -26.668 -26.878 - 2.60 2.30 -26.473 -26.878 - 2.60 2.40 -26.299 -26.879 - 2.60 2.50 -26.147 -26.879 - 2.60 2.60 -26.016 -26.879 - 2.60 2.70 -25.902 -26.879 - 2.60 2.80 -25.803 -26.880 - 2.60 2.90 -25.717 -26.880 - 2.60 3.00 -25.640 -26.880 - 2.60 3.10 -25.569 -26.880 - 2.60 3.20 -25.504 -26.880 - 2.60 3.30 -25.444 -26.880 - 2.60 3.40 -25.380 -26.879 - 2.60 3.50 -25.308 -26.879 - 2.60 3.60 -25.223 -26.878 - 2.60 3.70 -25.117 -26.877 - 2.60 3.80 -24.982 -26.876 - 2.60 3.90 -24.795 -26.873 - 2.60 4.00 -24.439 -26.867 - 2.60 4.10 -23.412 -26.861 - 2.60 4.20 -22.010 -26.886 - 2.60 4.30 -21.814 -27.770 - 2.60 4.40 -21.935 -28.097 - 2.60 4.50 -21.958 -28.000 - 2.60 4.60 -21.887 -27.899 - 2.60 4.70 -21.734 -27.840 - 2.60 4.80 -21.528 -27.676 - 2.60 4.90 -21.342 -27.432 - 2.60 5.00 -21.277 -26.819 - 2.60 5.10 -21.324 -26.638 - 2.60 5.20 -21.325 -26.552 - 2.60 5.30 -21.300 -26.579 - 2.60 5.40 -21.291 -26.590 - 2.60 5.50 -21.466 -26.553 - 2.60 5.60 -21.674 -26.619 - 2.60 5.70 -21.725 -26.716 - 2.60 5.80 -21.778 -26.845 - 2.60 5.90 -21.807 -26.922 - 2.60 6.00 -21.814 -26.986 - 2.60 6.10 -21.837 -27.037 - 2.60 6.20 -21.892 -27.124 - 2.60 6.30 -22.070 -27.187 - 2.60 6.40 -22.251 -27.247 - 2.60 6.50 -22.387 -27.356 - 2.60 6.60 -22.467 -27.425 - 2.60 6.70 -22.497 -27.461 - 2.60 6.80 -22.503 -27.503 - 2.60 6.90 -22.493 -27.546 - 2.60 7.00 -22.491 -27.553 - 2.60 7.10 -22.535 -27.604 - 2.60 7.20 -22.610 -27.692 - 2.60 7.30 -22.658 -27.709 - 2.60 7.40 -22.670 -27.762 - 2.60 7.50 -22.660 -27.831 - 2.60 7.60 -22.634 -27.898 - 2.60 7.70 -22.601 -27.939 - 2.60 7.80 -22.564 -27.962 - 2.60 7.90 -22.526 -27.987 - 2.60 8.00 -22.489 -28.100 - 2.60 8.10 -22.450 -28.229 - 2.60 8.20 -22.410 -28.360 - 2.60 8.30 -22.369 -28.493 - 2.60 8.40 -22.324 -28.624 - 2.60 8.50 -22.280 -28.748 - 2.60 8.60 -22.231 -28.872 - 2.60 8.70 -22.183 -28.999 - 2.60 8.80 -22.132 -29.131 - 2.60 8.90 -22.079 -29.267 - 2.60 9.00 -22.026 -29.406 - 2.70 1.00 -28.700 -26.977 - 2.70 1.10 -28.475 -26.977 - 2.70 1.20 -28.278 -26.977 - 2.70 1.30 -28.103 -26.977 - 2.70 1.40 -27.948 -26.977 - 2.70 1.50 -27.813 -26.977 - 2.70 1.60 -27.691 -26.977 - 2.70 1.70 -27.575 -26.977 - 2.70 1.80 -27.451 -26.977 - 2.70 1.90 -27.303 -26.977 - 2.70 2.00 -27.124 -26.978 - 2.70 2.10 -26.922 -26.978 - 2.70 2.20 -26.717 -26.978 - 2.70 2.30 -26.523 -26.978 - 2.70 2.40 -26.348 -26.978 - 2.70 2.50 -26.196 -26.979 - 2.70 2.60 -26.064 -26.979 - 2.70 2.70 -25.949 -26.979 - 2.70 2.80 -25.849 -26.979 - 2.70 2.90 -25.760 -26.979 - 2.70 3.00 -25.681 -26.979 - 2.70 3.10 -25.608 -26.979 - 2.70 3.20 -25.542 -26.979 - 2.70 3.30 -25.481 -26.979 - 2.70 3.40 -25.417 -26.978 - 2.70 3.50 -25.346 -26.978 - 2.70 3.60 -25.262 -26.977 - 2.70 3.70 -25.156 -26.976 - 2.70 3.80 -25.022 -26.975 - 2.70 3.90 -24.837 -26.971 - 2.70 4.00 -24.484 -26.965 - 2.70 4.10 -23.424 -26.954 - 2.70 4.20 -22.010 -26.939 - 2.70 4.30 -21.814 -27.865 - 2.70 4.40 -21.936 -28.170 - 2.70 4.50 -21.959 -28.016 - 2.70 4.60 -21.888 -27.905 - 2.70 4.70 -21.735 -27.844 - 2.70 4.80 -21.528 -27.679 - 2.70 4.90 -21.342 -27.434 - 2.70 5.00 -21.278 -26.819 - 2.70 5.10 -21.324 -26.638 - 2.70 5.20 -21.325 -26.552 - 2.70 5.30 -21.300 -26.579 - 2.70 5.40 -21.291 -26.590 - 2.70 5.50 -21.466 -26.553 - 2.70 5.60 -21.674 -26.618 - 2.70 5.70 -21.725 -26.714 - 2.70 5.80 -21.778 -26.841 - 2.70 5.90 -21.807 -26.918 - 2.70 6.00 -21.814 -26.982 - 2.70 6.10 -21.837 -27.036 - 2.70 6.20 -21.892 -27.124 - 2.70 6.30 -22.070 -27.187 - 2.70 6.40 -22.251 -27.247 - 2.70 6.50 -22.387 -27.356 - 2.70 6.60 -22.467 -27.425 - 2.70 6.70 -22.497 -27.461 - 2.70 6.80 -22.503 -27.503 - 2.70 6.90 -22.493 -27.546 - 2.70 7.00 -22.491 -27.553 - 2.70 7.10 -22.535 -27.604 - 2.70 7.20 -22.610 -27.692 - 2.70 7.30 -22.658 -27.709 - 2.70 7.40 -22.670 -27.762 - 2.70 7.50 -22.660 -27.831 - 2.70 7.60 -22.634 -27.898 - 2.70 7.70 -22.601 -27.939 - 2.70 7.80 -22.564 -27.962 - 2.70 7.90 -22.526 -27.987 - 2.70 8.00 -22.489 -28.100 - 2.70 8.10 -22.450 -28.229 - 2.70 8.20 -22.410 -28.360 - 2.70 8.30 -22.369 -28.497 - 2.70 8.40 -22.324 -28.624 - 2.70 8.50 -22.280 -28.748 - 2.70 8.60 -22.231 -28.872 - 2.70 8.70 -22.183 -28.999 - 2.70 8.80 -22.132 -29.131 - 2.70 8.90 -22.079 -29.267 - 2.70 9.00 -22.026 -29.406 - 2.80 1.00 -28.759 -27.077 - 2.80 1.10 -28.534 -27.077 - 2.80 1.20 -28.336 -27.077 - 2.80 1.30 -28.162 -27.077 - 2.80 1.40 -28.008 -27.077 - 2.80 1.50 -27.872 -27.077 - 2.80 1.60 -27.748 -27.077 - 2.80 1.70 -27.630 -27.077 - 2.80 1.80 -27.503 -27.077 - 2.80 1.90 -27.352 -27.077 - 2.80 2.00 -27.171 -27.077 - 2.80 2.10 -26.969 -27.077 - 2.80 2.20 -26.764 -27.078 - 2.80 2.30 -26.570 -27.078 - 2.80 2.40 -26.396 -27.078 - 2.80 2.50 -26.244 -27.078 - 2.80 2.60 -26.111 -27.078 - 2.80 2.70 -25.995 -27.079 - 2.80 2.80 -25.893 -27.079 - 2.80 2.90 -25.802 -27.079 - 2.80 3.00 -25.721 -27.079 - 2.80 3.10 -25.646 -27.079 - 2.80 3.20 -25.579 -27.079 - 2.80 3.30 -25.517 -27.078 - 2.80 3.40 -25.453 -27.078 - 2.80 3.50 -25.382 -27.077 - 2.80 3.60 -25.299 -27.076 - 2.80 3.70 -25.195 -27.075 - 2.80 3.80 -25.062 -27.074 - 2.80 3.90 -24.879 -27.070 - 2.80 4.00 -24.528 -27.063 - 2.80 4.10 -23.435 -27.045 - 2.80 4.20 -22.010 -26.986 - 2.80 4.30 -21.814 -27.959 - 2.80 4.40 -21.936 -28.235 - 2.80 4.50 -21.960 -28.029 - 2.80 4.60 -21.889 -27.910 - 2.80 4.70 -21.735 -27.847 - 2.80 4.80 -21.528 -27.682 - 2.80 4.90 -21.342 -27.436 - 2.80 5.00 -21.278 -26.818 - 2.80 5.10 -21.324 -26.639 - 2.80 5.20 -21.325 -26.552 - 2.80 5.30 -21.300 -26.579 - 2.80 5.40 -21.291 -26.590 - 2.80 5.50 -21.466 -26.553 - 2.80 5.60 -21.674 -26.618 - 2.80 5.70 -21.725 -26.712 - 2.80 5.80 -21.778 -26.836 - 2.80 5.90 -21.807 -26.912 - 2.80 6.00 -21.814 -26.978 - 2.80 6.10 -21.837 -27.035 - 2.80 6.20 -21.892 -27.124 - 2.80 6.30 -22.070 -27.187 - 2.80 6.40 -22.251 -27.247 - 2.80 6.50 -22.387 -27.356 - 2.80 6.60 -22.467 -27.425 - 2.80 6.70 -22.497 -27.461 - 2.80 6.80 -22.503 -27.503 - 2.80 6.90 -22.493 -27.546 - 2.80 7.00 -22.491 -27.553 - 2.80 7.10 -22.535 -27.604 - 2.80 7.20 -22.610 -27.692 - 2.80 7.30 -22.658 -27.709 - 2.80 7.40 -22.670 -27.762 - 2.80 7.50 -22.660 -27.831 - 2.80 7.60 -22.634 -27.898 - 2.80 7.70 -22.601 -27.939 - 2.80 7.80 -22.564 -27.962 - 2.80 7.90 -22.526 -27.987 - 2.80 8.00 -22.489 -28.100 - 2.80 8.10 -22.450 -28.229 - 2.80 8.20 -22.410 -28.360 - 2.80 8.30 -22.369 -28.497 - 2.80 8.40 -22.324 -28.624 - 2.80 8.50 -22.280 -28.748 - 2.80 8.60 -22.231 -28.872 - 2.80 8.70 -22.183 -28.999 - 2.80 8.80 -22.132 -29.131 - 2.80 8.90 -22.079 -29.267 - 2.80 9.00 -22.026 -29.406 - 2.90 1.00 -28.822 -27.176 - 2.90 1.10 -28.598 -27.176 - 2.90 1.20 -28.400 -27.176 - 2.90 1.30 -28.225 -27.177 - 2.90 1.40 -28.071 -27.177 - 2.90 1.50 -27.933 -27.177 - 2.90 1.60 -27.808 -27.177 - 2.90 1.70 -27.686 -27.177 - 2.90 1.80 -27.555 -27.177 - 2.90 1.90 -27.400 -27.177 - 2.90 2.00 -27.217 -27.177 - 2.90 2.10 -27.013 -27.177 - 2.90 2.20 -26.808 -27.177 - 2.90 2.30 -26.615 -27.178 - 2.90 2.40 -26.441 -27.178 - 2.90 2.50 -26.290 -27.178 - 2.90 2.60 -26.157 -27.178 - 2.90 2.70 -26.040 -27.178 - 2.90 2.80 -25.937 -27.178 - 2.90 2.90 -25.844 -27.179 - 2.90 3.00 -25.761 -27.179 - 2.90 3.10 -25.684 -27.179 - 2.90 3.20 -25.615 -27.178 - 2.90 3.30 -25.552 -27.178 - 2.90 3.40 -25.488 -27.177 - 2.90 3.50 -25.417 -27.176 - 2.90 3.60 -25.335 -27.175 - 2.90 3.70 -25.232 -27.174 - 2.90 3.80 -25.100 -27.173 - 2.90 3.90 -24.921 -27.169 - 2.90 4.00 -24.572 -27.161 - 2.90 4.10 -23.444 -27.136 - 2.90 4.20 -22.010 -27.028 - 2.90 4.30 -21.815 -28.051 - 2.90 4.40 -21.936 -28.293 - 2.90 4.50 -21.962 -28.040 - 2.90 4.60 -21.891 -27.915 - 2.90 4.70 -21.736 -27.850 - 2.90 4.80 -21.528 -27.685 - 2.90 4.90 -21.342 -27.438 - 2.90 5.00 -21.278 -26.818 - 2.90 5.10 -21.324 -26.639 - 2.90 5.20 -21.325 -26.552 - 2.90 5.30 -21.300 -26.579 - 2.90 5.40 -21.291 -26.590 - 2.90 5.50 -21.466 -26.553 - 2.90 5.60 -21.674 -26.617 - 2.90 5.70 -21.725 -26.709 - 2.90 5.80 -21.778 -26.829 - 2.90 5.90 -21.807 -26.904 - 2.90 6.00 -21.814 -26.974 - 2.90 6.10 -21.837 -27.034 - 2.90 6.20 -21.892 -27.124 - 2.90 6.30 -22.070 -27.187 - 2.90 6.40 -22.251 -27.247 - 2.90 6.50 -22.387 -27.356 - 2.90 6.60 -22.467 -27.425 - 2.90 6.70 -22.497 -27.461 - 2.90 6.80 -22.503 -27.503 - 2.90 6.90 -22.493 -27.546 - 2.90 7.00 -22.491 -27.553 - 2.90 7.10 -22.535 -27.604 - 2.90 7.20 -22.610 -27.692 - 2.90 7.30 -22.658 -27.709 - 2.90 7.40 -22.670 -27.762 - 2.90 7.50 -22.660 -27.831 - 2.90 7.60 -22.634 -27.898 - 2.90 7.70 -22.601 -27.939 - 2.90 7.80 -22.564 -27.962 - 2.90 7.90 -22.526 -27.987 - 2.90 8.00 -22.489 -28.100 - 2.90 8.10 -22.450 -28.229 - 2.90 8.20 -22.410 -28.360 - 2.90 8.30 -22.369 -28.497 - 2.90 8.40 -22.324 -28.624 - 2.90 8.50 -22.280 -28.748 - 2.90 8.60 -22.231 -28.872 - 2.90 8.70 -22.183 -28.999 - 2.90 8.80 -22.132 -29.131 - 2.90 8.90 -22.079 -29.267 - 2.90 9.00 -22.026 -29.406 - 3.00 1.00 -28.891 -27.276 - 3.00 1.10 -28.666 -27.276 - 3.00 1.20 -28.469 -27.276 - 3.00 1.30 -28.293 -27.276 - 3.00 1.40 -28.137 -27.276 - 3.00 1.50 -27.998 -27.277 - 3.00 1.60 -27.869 -27.277 - 3.00 1.70 -27.743 -27.277 - 3.00 1.80 -27.606 -27.277 - 3.00 1.90 -27.447 -27.277 - 3.00 2.00 -27.259 -27.277 - 3.00 2.10 -27.055 -27.277 - 3.00 2.20 -26.850 -27.277 - 3.00 2.30 -26.658 -27.277 - 3.00 2.40 -26.484 -27.277 - 3.00 2.50 -26.333 -27.278 - 3.00 2.60 -26.200 -27.278 - 3.00 2.70 -26.083 -27.278 - 3.00 2.80 -25.979 -27.278 - 3.00 2.90 -25.885 -27.278 - 3.00 3.00 -25.800 -27.278 - 3.00 3.10 -25.721 -27.278 - 3.00 3.20 -25.650 -27.278 - 3.00 3.30 -25.587 -27.277 - 3.00 3.40 -25.523 -27.277 - 3.00 3.50 -25.452 -27.276 - 3.00 3.60 -25.371 -27.275 - 3.00 3.70 -25.269 -27.274 - 3.00 3.80 -25.138 -27.272 - 3.00 3.90 -24.961 -27.268 - 3.00 4.00 -24.615 -27.259 - 3.00 4.10 -23.452 -27.224 - 3.00 4.20 -22.011 -27.066 - 3.00 4.30 -21.815 -28.140 - 3.00 4.40 -21.937 -28.343 - 3.00 4.50 -21.963 -28.047 - 3.00 4.60 -21.892 -27.919 - 3.00 4.70 -21.737 -27.853 - 3.00 4.80 -21.529 -27.688 - 3.00 4.90 -21.342 -27.440 - 3.00 5.00 -21.278 -26.817 - 3.00 5.10 -21.325 -26.638 - 3.00 5.20 -21.325 -26.553 - 3.00 5.30 -21.300 -26.579 - 3.00 5.40 -21.291 -26.590 - 3.00 5.50 -21.466 -26.553 - 3.00 5.60 -21.674 -26.616 - 3.00 5.70 -21.725 -26.706 - 3.00 5.80 -21.778 -26.821 - 3.00 5.90 -21.807 -26.895 - 3.00 6.00 -21.814 -26.967 - 3.00 6.10 -21.837 -27.032 - 3.00 6.20 -21.892 -27.123 - 3.00 6.30 -22.070 -27.187 - 3.00 6.40 -22.251 -27.247 - 3.00 6.50 -22.387 -27.356 - 3.00 6.60 -22.467 -27.425 - 3.00 6.70 -22.497 -27.461 - 3.00 6.80 -22.503 -27.503 - 3.00 6.90 -22.493 -27.546 - 3.00 7.00 -22.491 -27.553 - 3.00 7.10 -22.535 -27.604 - 3.00 7.20 -22.610 -27.692 - 3.00 7.30 -22.658 -27.709 - 3.00 7.40 -22.670 -27.762 - 3.00 7.50 -22.660 -27.831 - 3.00 7.60 -22.634 -27.898 - 3.00 7.70 -22.601 -27.939 - 3.00 7.80 -22.564 -27.962 - 3.00 7.90 -22.526 -27.987 - 3.00 8.00 -22.489 -28.100 - 3.00 8.10 -22.450 -28.229 - 3.00 8.20 -22.410 -28.360 - 3.00 8.30 -22.369 -28.497 - 3.00 8.40 -22.324 -28.624 - 3.00 8.50 -22.280 -28.748 - 3.00 8.60 -22.231 -28.872 - 3.00 8.70 -22.183 -28.999 - 3.00 8.80 -22.132 -29.131 - 3.00 8.90 -22.079 -29.267 - 3.00 9.00 -22.026 -29.406 - 3.10 1.00 -28.964 -27.376 - 3.10 1.10 -28.739 -27.376 - 3.10 1.20 -28.541 -27.376 - 3.10 1.30 -28.364 -27.376 - 3.10 1.40 -28.206 -27.376 - 3.10 1.50 -28.064 -27.376 - 3.10 1.60 -27.931 -27.376 - 3.10 1.70 -27.799 -27.377 - 3.10 1.80 -27.656 -27.377 - 3.10 1.90 -27.490 -27.377 - 3.10 2.00 -27.300 -27.377 - 3.10 2.10 -27.094 -27.377 - 3.10 2.20 -26.889 -27.377 - 3.10 2.30 -26.697 -27.377 - 3.10 2.40 -26.524 -27.377 - 3.10 2.50 -26.374 -27.377 - 3.10 2.60 -26.242 -27.378 - 3.10 2.70 -26.125 -27.378 - 3.10 2.80 -26.020 -27.378 - 3.10 2.90 -25.925 -27.378 - 3.10 3.00 -25.838 -27.378 - 3.10 3.10 -25.757 -27.378 - 3.10 3.20 -25.685 -27.378 - 3.10 3.30 -25.621 -27.377 - 3.10 3.40 -25.556 -27.376 - 3.10 3.50 -25.486 -27.376 - 3.10 3.60 -25.406 -27.375 - 3.10 3.70 -25.306 -27.373 - 3.10 3.80 -25.176 -27.371 - 3.10 3.90 -25.002 -27.367 - 3.10 4.00 -24.658 -27.357 - 3.10 4.10 -23.458 -27.310 - 3.10 4.20 -22.011 -27.098 - 3.10 4.30 -21.816 -28.227 - 3.10 4.40 -21.938 -28.383 - 3.10 4.50 -21.965 -28.052 - 3.10 4.60 -21.894 -27.923 - 3.10 4.70 -21.738 -27.856 - 3.10 4.80 -21.529 -27.639 - 3.10 4.90 -21.343 -27.403 - 3.10 5.00 -21.278 -26.812 - 3.10 5.10 -21.325 -26.638 - 3.10 5.20 -21.325 -26.553 - 3.10 5.30 -21.300 -26.580 - 3.10 5.40 -21.291 -26.590 - 3.10 5.50 -21.466 -26.553 - 3.10 5.60 -21.674 -26.615 - 3.10 5.70 -21.725 -26.702 - 3.10 5.80 -21.778 -26.812 - 3.10 5.90 -21.807 -26.884 - 3.10 6.00 -21.814 -26.960 - 3.10 6.10 -21.837 -27.029 - 3.10 6.20 -21.892 -27.123 - 3.10 6.30 -22.070 -27.187 - 3.10 6.40 -22.251 -27.247 - 3.10 6.50 -22.387 -27.356 - 3.10 6.60 -22.467 -27.425 - 3.10 6.70 -22.497 -27.461 - 3.10 6.80 -22.503 -27.503 - 3.10 6.90 -22.493 -27.546 - 3.10 7.00 -22.491 -27.553 - 3.10 7.10 -22.535 -27.604 - 3.10 7.20 -22.610 -27.692 - 3.10 7.30 -22.658 -27.709 - 3.10 7.40 -22.670 -27.762 - 3.10 7.50 -22.660 -27.831 - 3.10 7.60 -22.634 -27.898 - 3.10 7.70 -22.601 -27.939 - 3.10 7.80 -22.564 -27.962 - 3.10 7.90 -22.526 -27.987 - 3.10 8.00 -22.489 -28.100 - 3.10 8.10 -22.450 -28.229 - 3.10 8.20 -22.410 -28.360 - 3.10 8.30 -22.369 -28.497 - 3.10 8.40 -22.324 -28.624 - 3.10 8.50 -22.280 -28.748 - 3.10 8.60 -22.231 -28.872 - 3.10 8.70 -22.183 -28.999 - 3.10 8.80 -22.132 -29.131 - 3.10 8.90 -22.079 -29.267 - 3.10 9.00 -22.026 -29.406 - 3.20 1.00 -29.041 -27.476 - 3.20 1.10 -28.816 -27.476 - 3.20 1.20 -28.617 -27.476 - 3.20 1.30 -28.439 -27.476 - 3.20 1.40 -28.278 -27.476 - 3.20 1.50 -28.131 -27.476 - 3.20 1.60 -27.993 -27.476 - 3.20 1.70 -27.854 -27.476 - 3.20 1.80 -27.704 -27.476 - 3.20 1.90 -27.532 -27.477 - 3.20 2.00 -27.337 -27.477 - 3.20 2.10 -27.130 -27.477 - 3.20 2.20 -26.925 -27.477 - 3.20 2.30 -26.734 -27.477 - 3.20 2.40 -26.562 -27.477 - 3.20 2.50 -26.413 -27.477 - 3.20 2.60 -26.281 -27.477 - 3.20 2.70 -26.164 -27.477 - 3.20 2.80 -26.059 -27.478 - 3.20 2.90 -25.964 -27.478 - 3.20 3.00 -25.876 -27.478 - 3.20 3.10 -25.794 -27.478 - 3.20 3.20 -25.720 -27.477 - 3.20 3.30 -25.654 -27.477 - 3.20 3.40 -25.589 -27.476 - 3.20 3.50 -25.519 -27.475 - 3.20 3.60 -25.441 -27.474 - 3.20 3.70 -25.341 -27.473 - 3.20 3.80 -25.213 -27.471 - 3.20 3.90 -25.042 -27.466 - 3.20 4.00 -24.701 -27.455 - 3.20 4.10 -23.464 -27.393 - 3.20 4.20 -22.011 -27.127 - 3.20 4.30 -21.816 -28.309 - 3.20 4.40 -21.938 -28.414 - 3.20 4.50 -21.967 -28.056 - 3.20 4.60 -21.896 -27.926 - 3.20 4.70 -21.739 -27.579 - 3.20 4.80 -21.529 -27.500 - 3.20 4.90 -21.343 -27.352 - 3.20 5.00 -21.279 -26.809 - 3.20 5.10 -21.325 -26.638 - 3.20 5.20 -21.325 -26.553 - 3.20 5.30 -21.300 -26.580 - 3.20 5.40 -21.292 -26.590 - 3.20 5.50 -21.466 -26.553 - 3.20 5.60 -21.674 -26.614 - 3.20 5.70 -21.725 -26.696 - 3.20 5.80 -21.778 -26.800 - 3.20 5.90 -21.808 -26.871 - 3.20 6.00 -21.814 -26.951 - 3.20 6.10 -21.837 -27.026 - 3.20 6.20 -21.892 -27.123 - 3.20 6.30 -22.070 -27.187 - 3.20 6.40 -22.251 -27.247 - 3.20 6.50 -22.387 -27.356 - 3.20 6.60 -22.467 -27.425 - 3.20 6.70 -22.497 -27.461 - 3.20 6.80 -22.503 -27.503 - 3.20 6.90 -22.493 -27.546 - 3.20 7.00 -22.491 -27.553 - 3.20 7.10 -22.535 -27.604 - 3.20 7.20 -22.610 -27.692 - 3.20 7.30 -22.658 -27.709 - 3.20 7.40 -22.670 -27.762 - 3.20 7.50 -22.660 -27.831 - 3.20 7.60 -22.634 -27.898 - 3.20 7.70 -22.601 -27.939 - 3.20 7.80 -22.564 -27.962 - 3.20 7.90 -22.526 -27.987 - 3.20 8.00 -22.489 -28.100 - 3.20 8.10 -22.450 -28.229 - 3.20 8.20 -22.410 -28.360 - 3.20 8.30 -22.369 -28.497 - 3.20 8.40 -22.324 -28.624 - 3.20 8.50 -22.280 -28.748 - 3.20 8.60 -22.231 -28.872 - 3.20 8.70 -22.183 -28.999 - 3.20 8.80 -22.132 -29.131 - 3.20 8.90 -22.079 -29.267 - 3.20 9.00 -22.026 -29.406 - 3.30 1.00 -29.122 -27.576 - 3.30 1.10 -28.897 -27.576 - 3.30 1.20 -28.697 -27.576 - 3.30 1.30 -28.516 -27.576 - 3.30 1.40 -28.352 -27.576 - 3.30 1.50 -28.200 -27.576 - 3.30 1.60 -28.054 -27.576 - 3.30 1.70 -27.907 -27.576 - 3.30 1.80 -27.749 -27.576 - 3.30 1.90 -27.571 -27.576 - 3.30 2.00 -27.372 -27.576 - 3.30 2.10 -27.163 -27.577 - 3.30 2.20 -26.959 -27.577 - 3.30 2.30 -26.769 -27.577 - 3.30 2.40 -26.597 -27.577 - 3.30 2.50 -26.449 -27.577 - 3.30 2.60 -26.318 -27.577 - 3.30 2.70 -26.202 -27.577 - 3.30 2.80 -26.097 -27.577 - 3.30 2.90 -26.001 -27.578 - 3.30 3.00 -25.913 -27.578 - 3.30 3.10 -25.829 -27.577 - 3.30 3.20 -25.754 -27.577 - 3.30 3.30 -25.688 -27.577 - 3.30 3.40 -25.622 -27.576 - 3.30 3.50 -25.552 -27.575 - 3.30 3.60 -25.475 -27.574 - 3.30 3.70 -25.377 -27.573 - 3.30 3.80 -25.250 -27.571 - 3.30 3.90 -25.082 -27.565 - 3.30 4.00 -24.744 -27.553 - 3.30 4.10 -23.469 -27.473 - 3.30 4.20 -22.012 -27.152 - 3.30 4.30 -21.817 -28.387 - 3.30 4.40 -21.939 -28.437 - 3.30 4.50 -21.969 -28.058 - 3.30 4.60 -21.898 -27.445 - 3.30 4.70 -21.740 -27.367 - 3.30 4.80 -21.530 -27.416 - 3.30 4.90 -21.343 -27.313 - 3.30 5.00 -21.279 -26.806 - 3.30 5.10 -21.325 -26.638 - 3.30 5.20 -21.325 -26.553 - 3.30 5.30 -21.300 -26.580 - 3.30 5.40 -21.292 -26.590 - 3.30 5.50 -21.466 -26.553 - 3.30 5.60 -21.675 -26.613 - 3.30 5.70 -21.726 -26.690 - 3.30 5.80 -21.778 -26.786 - 3.30 5.90 -21.808 -26.854 - 3.30 6.00 -21.814 -26.939 - 3.30 6.10 -21.837 -27.022 - 3.30 6.20 -21.892 -27.122 - 3.30 6.30 -22.070 -27.187 - 3.30 6.40 -22.251 -27.247 - 3.30 6.50 -22.387 -27.356 - 3.30 6.60 -22.467 -27.425 - 3.30 6.70 -22.497 -27.461 - 3.30 6.80 -22.503 -27.503 - 3.30 6.90 -22.493 -27.546 - 3.30 7.00 -22.491 -27.553 - 3.30 7.10 -22.535 -27.604 - 3.30 7.20 -22.610 -27.692 - 3.30 7.30 -22.658 -27.709 - 3.30 7.40 -22.670 -27.762 - 3.30 7.50 -22.660 -27.831 - 3.30 7.60 -22.634 -27.898 - 3.30 7.70 -22.601 -27.939 - 3.30 7.80 -22.564 -27.962 - 3.30 7.90 -22.526 -27.987 - 3.30 8.00 -22.489 -28.100 - 3.30 8.10 -22.450 -28.229 - 3.30 8.20 -22.410 -28.360 - 3.30 8.30 -22.369 -28.497 - 3.30 8.40 -22.324 -28.624 - 3.30 8.50 -22.280 -28.748 - 3.30 8.60 -22.231 -28.872 - 3.30 8.70 -22.183 -28.999 - 3.30 8.80 -22.132 -29.131 - 3.30 8.90 -22.079 -29.267 - 3.30 9.00 -22.026 -29.406 - 3.40 1.00 -29.206 -27.676 - 3.40 1.10 -28.981 -27.676 - 3.40 1.20 -28.779 -27.676 - 3.40 1.30 -28.596 -27.676 - 3.40 1.40 -28.427 -27.676 - 3.40 1.50 -28.268 -27.676 - 3.40 1.60 -28.114 -27.676 - 3.40 1.70 -27.959 -27.676 - 3.40 1.80 -27.792 -27.676 - 3.40 1.90 -27.607 -27.676 - 3.40 2.00 -27.404 -27.676 - 3.40 2.10 -27.194 -27.676 - 3.40 2.20 -26.990 -27.677 - 3.40 2.30 -26.800 -27.677 - 3.40 2.40 -26.630 -27.677 - 3.40 2.50 -26.483 -27.677 - 3.40 2.60 -26.353 -27.677 - 3.40 2.70 -26.238 -27.677 - 3.40 2.80 -26.133 -27.677 - 3.40 2.90 -26.037 -27.677 - 3.40 3.00 -25.949 -27.677 - 3.40 3.10 -25.864 -27.677 - 3.40 3.20 -25.788 -27.677 - 3.40 3.30 -25.720 -27.677 - 3.40 3.40 -25.654 -27.676 - 3.40 3.50 -25.584 -27.675 - 3.40 3.60 -25.508 -27.674 - 3.40 3.70 -25.412 -27.673 - 3.40 3.80 -25.287 -27.670 - 3.40 3.90 -25.122 -27.664 - 3.40 4.00 -24.787 -27.650 - 3.40 4.10 -23.473 -27.549 - 3.40 4.20 -22.012 -27.173 - 3.40 4.30 -21.818 -28.460 - 3.40 4.40 -21.940 -28.453 - 3.40 4.50 -21.972 -27.597 - 3.40 4.60 -21.901 -27.199 - 3.40 4.70 -21.741 -27.252 - 3.40 4.80 -21.530 -27.360 - 3.40 4.90 -21.343 -27.282 - 3.40 5.00 -21.279 -26.804 - 3.40 5.10 -21.326 -26.637 - 3.40 5.20 -21.325 -26.553 - 3.40 5.30 -21.300 -26.581 - 3.40 5.40 -21.292 -26.590 - 3.40 5.50 -21.467 -26.553 - 3.40 5.60 -21.675 -26.611 - 3.40 5.70 -21.726 -26.682 - 3.40 5.80 -21.778 -26.769 - 3.40 5.90 -21.808 -26.835 - 3.40 6.00 -21.814 -26.926 - 3.40 6.10 -21.837 -27.018 - 3.40 6.20 -21.892 -27.122 - 3.40 6.30 -22.070 -27.187 - 3.40 6.40 -22.251 -27.247 - 3.40 6.50 -22.387 -27.356 - 3.40 6.60 -22.467 -27.425 - 3.40 6.70 -22.497 -27.461 - 3.40 6.80 -22.503 -27.503 - 3.40 6.90 -22.493 -27.546 - 3.40 7.00 -22.491 -27.553 - 3.40 7.10 -22.535 -27.604 - 3.40 7.20 -22.610 -27.692 - 3.40 7.30 -22.658 -27.709 - 3.40 7.40 -22.670 -27.762 - 3.40 7.50 -22.660 -27.831 - 3.40 7.60 -22.634 -27.898 - 3.40 7.70 -22.601 -27.939 - 3.40 7.80 -22.564 -27.962 - 3.40 7.90 -22.526 -27.987 - 3.40 8.00 -22.489 -28.100 - 3.40 8.10 -22.450 -28.229 - 3.40 8.20 -22.410 -28.360 - 3.40 8.30 -22.369 -28.497 - 3.40 8.40 -22.324 -28.624 - 3.40 8.50 -22.280 -28.748 - 3.40 8.60 -22.231 -28.872 - 3.40 8.70 -22.183 -28.999 - 3.40 8.80 -22.132 -29.131 - 3.40 8.90 -22.079 -29.267 - 3.40 9.00 -22.026 -29.406 - 3.50 1.00 -29.293 -27.776 - 3.50 1.10 -29.067 -27.776 - 3.50 1.20 -28.864 -27.776 - 3.50 1.30 -28.677 -27.776 - 3.50 1.40 -28.503 -27.776 - 3.50 1.50 -28.336 -27.776 - 3.50 1.60 -28.173 -27.776 - 3.50 1.70 -28.009 -27.776 - 3.50 1.80 -27.834 -27.776 - 3.50 1.90 -27.641 -27.776 - 3.50 2.00 -27.434 -27.776 - 3.50 2.10 -27.223 -27.776 - 3.50 2.20 -27.019 -27.776 - 3.50 2.30 -26.830 -27.776 - 3.50 2.40 -26.661 -27.777 - 3.50 2.50 -26.515 -27.777 - 3.50 2.60 -26.386 -27.777 - 3.50 2.70 -26.271 -27.777 - 3.50 2.80 -26.168 -27.777 - 3.50 2.90 -26.072 -27.777 - 3.50 3.00 -25.984 -27.777 - 3.50 3.10 -25.899 -27.777 - 3.50 3.20 -25.822 -27.777 - 3.50 3.30 -25.753 -27.776 - 3.50 3.40 -25.686 -27.776 - 3.50 3.50 -25.616 -27.775 - 3.50 3.60 -25.542 -27.774 - 3.50 3.70 -25.447 -27.773 - 3.50 3.80 -25.324 -27.770 - 3.50 3.90 -25.163 -27.763 - 3.50 4.00 -24.829 -27.748 - 3.50 4.10 -23.476 -27.621 - 3.50 4.20 -22.013 -27.192 - 3.50 4.30 -21.819 -28.527 - 3.50 4.40 -21.942 -28.463 - 3.50 4.50 -21.974 -27.232 - 3.50 4.60 -21.903 -27.071 - 3.50 4.70 -21.742 -27.180 - 3.50 4.80 -21.530 -27.322 - 3.50 4.90 -21.343 -27.257 - 3.50 5.00 -21.279 -26.801 - 3.50 5.10 -21.326 -26.637 - 3.50 5.20 -21.325 -26.553 - 3.50 5.30 -21.300 -26.581 - 3.50 5.40 -21.292 -26.590 - 3.50 5.50 -21.467 -26.553 - 3.50 5.60 -21.675 -26.608 - 3.50 5.70 -21.726 -26.672 - 3.50 5.80 -21.778 -26.748 - 3.50 5.90 -21.808 -26.811 - 3.50 6.00 -21.815 -26.909 - 3.50 6.10 -21.837 -27.012 - 3.50 6.20 -21.892 -27.121 - 3.50 6.30 -22.070 -27.187 - 3.50 6.40 -22.251 -27.247 - 3.50 6.50 -22.387 -27.356 - 3.50 6.60 -22.467 -27.425 - 3.50 6.70 -22.497 -27.461 - 3.50 6.80 -22.503 -27.503 - 3.50 6.90 -22.493 -27.546 - 3.50 7.00 -22.491 -27.553 - 3.50 7.10 -22.535 -27.604 - 3.50 7.20 -22.610 -27.692 - 3.50 7.30 -22.658 -27.709 - 3.50 7.40 -22.670 -27.762 - 3.50 7.50 -22.660 -27.831 - 3.50 7.60 -22.634 -27.898 - 3.50 7.70 -22.601 -27.939 - 3.50 7.80 -22.564 -27.962 - 3.50 7.90 -22.526 -27.987 - 3.50 8.00 -22.489 -28.100 - 3.50 8.10 -22.450 -28.229 - 3.50 8.20 -22.410 -28.360 - 3.50 8.30 -22.369 -28.497 - 3.50 8.40 -22.324 -28.624 - 3.50 8.50 -22.280 -28.748 - 3.50 8.60 -22.231 -28.872 - 3.50 8.70 -22.183 -28.999 - 3.50 8.80 -22.132 -29.131 - 3.50 8.90 -22.079 -29.267 - 3.50 9.00 -22.026 -29.406 - 3.60 1.00 -29.382 -27.876 - 3.60 1.10 -29.155 -27.876 - 3.60 1.20 -28.950 -27.876 - 3.60 1.30 -28.760 -27.876 - 3.60 1.40 -28.579 -27.876 - 3.60 1.50 -28.403 -27.876 - 3.60 1.60 -28.231 -27.876 - 3.60 1.70 -28.056 -27.876 - 3.60 1.80 -27.873 -27.876 - 3.60 1.90 -27.674 -27.876 - 3.60 2.00 -27.463 -27.876 - 3.60 2.10 -27.250 -27.876 - 3.60 2.20 -27.046 -27.876 - 3.60 2.30 -26.858 -27.876 - 3.60 2.40 -26.690 -27.876 - 3.60 2.50 -26.545 -27.877 - 3.60 2.60 -26.417 -27.877 - 3.60 2.70 -26.303 -27.877 - 3.60 2.80 -26.200 -27.877 - 3.60 2.90 -26.106 -27.877 - 3.60 3.00 -26.017 -27.877 - 3.60 3.10 -25.933 -27.877 - 3.60 3.20 -25.855 -27.877 - 3.60 3.30 -25.785 -27.876 - 3.60 3.40 -25.717 -27.876 - 3.60 3.50 -25.648 -27.875 - 3.60 3.60 -25.575 -27.874 - 3.60 3.70 -25.483 -27.873 - 3.60 3.80 -25.361 -27.870 - 3.60 3.90 -25.203 -27.862 - 3.60 4.00 -24.872 -27.845 - 3.60 4.10 -23.479 -27.689 - 3.60 4.20 -22.013 -27.208 - 3.60 4.30 -21.820 -28.587 - 3.60 4.40 -21.943 -27.750 - 3.60 4.50 -21.977 -27.070 - 3.60 4.60 -21.906 -26.992 - 3.60 4.70 -21.744 -27.131 - 3.60 4.80 -21.531 -27.295 - 3.60 4.90 -21.343 -27.236 - 3.60 5.00 -21.279 -26.800 - 3.60 5.10 -21.326 -26.637 - 3.60 5.20 -21.325 -26.553 - 3.60 5.30 -21.300 -26.581 - 3.60 5.40 -21.292 -26.590 - 3.60 5.50 -21.467 -26.552 - 3.60 5.60 -21.675 -26.605 - 3.60 5.70 -21.726 -26.660 - 3.60 5.80 -21.779 -26.724 - 3.60 5.90 -21.808 -26.784 - 3.60 6.00 -21.815 -26.889 - 3.60 6.10 -21.837 -27.005 - 3.60 6.20 -21.892 -27.120 - 3.60 6.30 -22.070 -27.186 - 3.60 6.40 -22.251 -27.247 - 3.60 6.50 -22.387 -27.356 - 3.60 6.60 -22.467 -27.425 - 3.60 6.70 -22.497 -27.461 - 3.60 6.80 -22.503 -27.503 - 3.60 6.90 -22.493 -27.546 - 3.60 7.00 -22.491 -27.553 - 3.60 7.10 -22.535 -27.604 - 3.60 7.20 -22.610 -27.692 - 3.60 7.30 -22.658 -27.709 - 3.60 7.40 -22.670 -27.762 - 3.60 7.50 -22.660 -27.831 - 3.60 7.60 -22.634 -27.898 - 3.60 7.70 -22.601 -27.939 - 3.60 7.80 -22.564 -27.962 - 3.60 7.90 -22.526 -27.987 - 3.60 8.00 -22.489 -28.100 - 3.60 8.10 -22.450 -28.229 - 3.60 8.20 -22.410 -28.360 - 3.60 8.30 -22.369 -28.497 - 3.60 8.40 -22.324 -28.624 - 3.60 8.50 -22.280 -28.748 - 3.60 8.60 -22.231 -28.872 - 3.60 8.70 -22.183 -28.999 - 3.60 8.80 -22.132 -29.131 - 3.60 8.90 -22.079 -29.267 - 3.60 9.00 -22.026 -29.406 - 3.70 1.00 -29.473 -27.976 - 3.70 1.10 -29.246 -27.976 - 3.70 1.20 -29.038 -27.976 - 3.70 1.30 -28.843 -27.976 - 3.70 1.40 -28.655 -27.976 - 3.70 1.50 -28.469 -27.976 - 3.70 1.60 -28.286 -27.976 - 3.70 1.70 -28.103 -27.976 - 3.70 1.80 -27.911 -27.976 - 3.70 1.90 -27.706 -27.976 - 3.70 2.00 -27.491 -27.976 - 3.70 2.10 -27.276 -27.976 - 3.70 2.20 -27.072 -27.976 - 3.70 2.30 -26.885 -27.976 - 3.70 2.40 -26.718 -27.976 - 3.70 2.50 -26.574 -27.976 - 3.70 2.60 -26.447 -27.976 - 3.70 2.70 -26.334 -27.977 - 3.70 2.80 -26.232 -27.977 - 3.70 2.90 -26.138 -27.977 - 3.70 3.00 -26.050 -27.977 - 3.70 3.10 -25.966 -27.977 - 3.70 3.20 -25.888 -27.976 - 3.70 3.30 -25.817 -27.976 - 3.70 3.40 -25.748 -27.976 - 3.70 3.50 -25.679 -27.975 - 3.70 3.60 -25.607 -27.974 - 3.70 3.70 -25.518 -27.973 - 3.70 3.80 -25.399 -27.969 - 3.70 3.90 -25.245 -27.961 - 3.70 4.00 -24.914 -27.943 - 3.70 4.10 -23.482 -27.752 - 3.70 4.20 -22.013 -27.222 - 3.70 4.30 -21.820 -28.640 - 3.70 4.40 -21.945 -27.375 - 3.70 4.50 -21.980 -26.974 - 3.70 4.60 -21.908 -26.938 - 3.70 4.70 -21.745 -27.097 - 3.70 4.80 -21.531 -27.276 - 3.70 4.90 -21.343 -27.218 - 3.70 5.00 -21.280 -26.798 - 3.70 5.10 -21.326 -26.636 - 3.70 5.20 -21.325 -26.553 - 3.70 5.30 -21.300 -26.581 - 3.70 5.40 -21.292 -26.589 - 3.70 5.50 -21.468 -26.552 - 3.70 5.60 -21.675 -26.601 - 3.70 5.70 -21.726 -26.646 - 3.70 5.80 -21.779 -26.696 - 3.70 5.90 -21.808 -26.752 - 3.70 6.00 -21.815 -26.866 - 3.70 6.10 -21.837 -26.996 - 3.70 6.20 -21.892 -27.118 - 3.70 6.30 -22.070 -27.186 - 3.70 6.40 -22.251 -27.247 - 3.70 6.50 -22.387 -27.356 - 3.70 6.60 -22.467 -27.425 - 3.70 6.70 -22.497 -27.461 - 3.70 6.80 -22.503 -27.503 - 3.70 6.90 -22.493 -27.546 - 3.70 7.00 -22.491 -27.553 - 3.70 7.10 -22.535 -27.604 - 3.70 7.20 -22.610 -27.692 - 3.70 7.30 -22.658 -27.709 - 3.70 7.40 -22.670 -27.762 - 3.70 7.50 -22.660 -27.831 - 3.70 7.60 -22.634 -27.898 - 3.70 7.70 -22.601 -27.939 - 3.70 7.80 -22.564 -27.962 - 3.70 7.90 -22.526 -27.987 - 3.70 8.00 -22.489 -28.100 - 3.70 8.10 -22.450 -28.229 - 3.70 8.20 -22.410 -28.360 - 3.70 8.30 -22.369 -28.497 - 3.70 8.40 -22.324 -28.624 - 3.70 8.50 -22.280 -28.748 - 3.70 8.60 -22.231 -28.872 - 3.70 8.70 -22.183 -28.999 - 3.70 8.80 -22.132 -29.131 - 3.70 8.90 -22.079 -29.267 - 3.70 9.00 -22.026 -29.406 - 3.80 1.00 -29.566 -28.076 - 3.80 1.10 -29.337 -28.076 - 3.80 1.20 -29.128 -28.076 - 3.80 1.30 -28.927 -28.076 - 3.80 1.40 -28.730 -28.076 - 3.80 1.50 -28.534 -28.076 - 3.80 1.60 -28.341 -28.076 - 3.80 1.70 -28.148 -28.076 - 3.80 1.80 -27.948 -28.076 - 3.80 1.90 -27.736 -28.076 - 3.80 2.00 -27.518 -28.076 - 3.80 2.10 -27.301 -28.076 - 3.80 2.20 -27.097 -28.076 - 3.80 2.30 -26.911 -28.076 - 3.80 2.40 -26.744 -28.076 - 3.80 2.50 -26.601 -28.076 - 3.80 2.60 -26.475 -28.076 - 3.80 2.70 -26.363 -28.076 - 3.80 2.80 -26.262 -28.077 - 3.80 2.90 -26.169 -28.077 - 3.80 3.00 -26.083 -28.077 - 3.80 3.10 -25.999 -28.077 - 3.80 3.20 -25.921 -28.076 - 3.80 3.30 -25.849 -28.076 - 3.80 3.40 -25.778 -28.075 - 3.80 3.50 -25.709 -28.075 - 3.80 3.60 -25.639 -28.074 - 3.80 3.70 -25.554 -28.073 - 3.80 3.80 -25.437 -28.069 - 3.80 3.90 -25.287 -28.060 - 3.80 4.00 -24.957 -28.039 - 3.80 4.10 -23.484 -27.810 - 3.80 4.20 -22.014 -27.234 - 3.80 4.30 -21.821 -28.686 - 3.80 4.40 -21.946 -27.210 - 3.80 4.50 -21.983 -26.911 - 3.80 4.60 -21.911 -26.901 - 3.80 4.70 -21.747 -27.073 - 3.80 4.80 -21.532 -27.262 - 3.80 4.90 -21.344 -27.201 - 3.80 5.00 -21.280 -26.796 - 3.80 5.10 -21.326 -26.637 - 3.80 5.20 -21.325 -26.553 - 3.80 5.30 -21.300 -26.581 - 3.80 5.40 -21.292 -26.589 - 3.80 5.50 -21.468 -26.552 - 3.80 5.60 -21.675 -26.597 - 3.80 5.70 -21.726 -26.629 - 3.80 5.80 -21.779 -26.663 - 3.80 5.90 -21.808 -26.716 - 3.80 6.00 -21.815 -26.838 - 3.80 6.10 -21.837 -26.985 - 3.80 6.20 -21.892 -27.117 - 3.80 6.30 -22.070 -27.186 - 3.80 6.40 -22.251 -27.246 - 3.80 6.50 -22.387 -27.356 - 3.80 6.60 -22.467 -27.425 - 3.80 6.70 -22.497 -27.461 - 3.80 6.80 -22.503 -27.503 - 3.80 6.90 -22.493 -27.546 - 3.80 7.00 -22.491 -27.553 - 3.80 7.10 -22.535 -27.604 - 3.80 7.20 -22.610 -27.692 - 3.80 7.30 -22.658 -27.709 - 3.80 7.40 -22.670 -27.762 - 3.80 7.50 -22.660 -27.831 - 3.80 7.60 -22.634 -27.898 - 3.80 7.70 -22.601 -27.939 - 3.80 7.80 -22.564 -27.962 - 3.80 7.90 -22.526 -27.987 - 3.80 8.00 -22.489 -28.100 - 3.80 8.10 -22.450 -28.229 - 3.80 8.20 -22.410 -28.360 - 3.80 8.30 -22.369 -28.497 - 3.80 8.40 -22.324 -28.624 - 3.80 8.50 -22.280 -28.748 - 3.80 8.60 -22.231 -28.872 - 3.80 8.70 -22.183 -28.999 - 3.80 8.80 -22.132 -29.131 - 3.80 8.90 -22.079 -29.267 - 3.80 9.00 -22.026 -29.406 - 3.90 1.00 -29.660 -28.176 - 3.90 1.10 -29.430 -28.176 - 3.90 1.20 -29.218 -28.176 - 3.90 1.30 -29.011 -28.176 - 3.90 1.40 -28.805 -28.176 - 3.90 1.50 -28.598 -28.176 - 3.90 1.60 -28.395 -28.176 - 3.90 1.70 -28.193 -28.176 - 3.90 1.80 -27.985 -28.176 - 3.90 1.90 -27.767 -28.176 - 3.90 2.00 -27.544 -28.176 - 3.90 2.10 -27.326 -28.176 - 3.90 2.20 -27.122 -28.176 - 3.90 2.30 -26.936 -28.176 - 3.90 2.40 -26.770 -28.176 - 3.90 2.50 -26.628 -28.176 - 3.90 2.60 -26.503 -28.176 - 3.90 2.70 -26.392 -28.176 - 3.90 2.80 -26.292 -28.176 - 3.90 2.90 -26.200 -28.176 - 3.90 3.00 -26.114 -28.176 - 3.90 3.10 -26.032 -28.176 - 3.90 3.20 -25.953 -28.176 - 3.90 3.30 -25.880 -28.175 - 3.90 3.40 -25.808 -28.175 - 3.90 3.50 -25.737 -28.174 - 3.90 3.60 -25.669 -28.174 - 3.90 3.70 -25.589 -28.172 - 3.90 3.80 -25.477 -28.168 - 3.90 3.90 -25.330 -28.159 - 3.90 4.00 -24.999 -28.136 - 3.90 4.10 -23.486 -27.862 - 3.90 4.20 -22.014 -27.245 - 3.90 4.30 -21.822 -27.863 - 3.90 4.40 -21.948 -27.113 - 3.90 4.50 -21.986 -26.867 - 3.90 4.60 -21.913 -26.874 - 3.90 4.70 -21.748 -27.056 - 3.90 4.80 -21.532 -27.252 - 3.90 4.90 -21.344 -27.187 - 3.90 5.00 -21.280 -26.795 - 3.90 5.10 -21.327 -26.636 - 3.90 5.20 -21.325 -26.553 - 3.90 5.30 -21.300 -26.581 - 3.90 5.40 -21.293 -26.589 - 3.90 5.50 -21.469 -26.551 - 3.90 5.60 -21.676 -26.591 - 3.90 5.70 -21.726 -26.609 - 3.90 5.80 -21.779 -26.627 - 3.90 5.90 -21.809 -26.675 - 3.90 6.00 -21.815 -26.807 - 3.90 6.10 -21.837 -26.972 - 3.90 6.20 -21.892 -27.115 - 3.90 6.30 -22.070 -27.186 - 3.90 6.40 -22.251 -27.246 - 3.90 6.50 -22.387 -27.356 - 3.90 6.60 -22.467 -27.425 - 3.90 6.70 -22.497 -27.461 - 3.90 6.80 -22.503 -27.503 - 3.90 6.90 -22.493 -27.546 - 3.90 7.00 -22.491 -27.553 - 3.90 7.10 -22.535 -27.604 - 3.90 7.20 -22.610 -27.692 - 3.90 7.30 -22.658 -27.709 - 3.90 7.40 -22.670 -27.762 - 3.90 7.50 -22.660 -27.831 - 3.90 7.60 -22.634 -27.898 - 3.90 7.70 -22.601 -27.939 - 3.90 7.80 -22.564 -27.962 - 3.90 7.90 -22.526 -27.987 - 3.90 8.00 -22.489 -28.100 - 3.90 8.10 -22.450 -28.229 - 3.90 8.20 -22.410 -28.360 - 3.90 8.30 -22.369 -28.497 - 3.90 8.40 -22.324 -28.624 - 3.90 8.50 -22.280 -28.748 - 3.90 8.60 -22.231 -28.872 - 3.90 8.70 -22.183 -28.999 - 3.90 8.80 -22.132 -29.131 - 3.90 8.90 -22.079 -29.267 - 3.90 9.00 -22.026 -29.406 - 4.00 1.00 -29.755 -28.276 - 4.00 1.10 -29.524 -28.276 - 4.00 1.20 -29.309 -28.276 - 4.00 1.30 -29.096 -28.276 - 4.00 1.40 -28.879 -28.276 - 4.00 1.50 -28.662 -28.276 - 4.00 1.60 -28.448 -28.276 - 4.00 1.70 -28.238 -28.276 - 4.00 1.80 -28.022 -28.276 - 4.00 1.90 -27.799 -28.276 - 4.00 2.00 -27.571 -28.276 - 4.00 2.10 -27.351 -28.276 - 4.00 2.20 -27.146 -28.276 - 4.00 2.30 -26.960 -28.276 - 4.00 2.40 -26.795 -28.276 - 4.00 2.50 -26.654 -28.276 - 4.00 2.60 -26.530 -28.276 - 4.00 2.70 -26.420 -28.276 - 4.00 2.80 -26.321 -28.276 - 4.00 2.90 -26.230 -28.276 - 4.00 3.00 -26.146 -28.276 - 4.00 3.10 -26.064 -28.276 - 4.00 3.20 -25.985 -28.276 - 4.00 3.30 -25.910 -28.275 - 4.00 3.40 -25.836 -28.274 - 4.00 3.50 -25.763 -28.274 - 4.00 3.60 -25.696 -28.273 - 4.00 3.70 -25.625 -28.272 - 4.00 3.80 -25.518 -28.268 - 4.00 3.90 -25.374 -28.258 - 4.00 4.00 -25.043 -28.232 - 4.00 4.10 -23.488 -27.909 - 4.00 4.20 -22.015 -27.255 - 4.00 4.30 -21.824 -27.586 - 4.00 4.40 -21.950 -27.049 - 4.00 4.50 -21.989 -26.835 - 4.00 4.60 -21.916 -26.854 - 4.00 4.70 -21.749 -27.043 - 4.00 4.80 -21.533 -27.246 - 4.00 4.90 -21.344 -27.174 - 4.00 5.00 -21.280 -26.794 - 4.00 5.10 -21.327 -26.635 - 4.00 5.20 -21.325 -26.553 - 4.00 5.30 -21.300 -26.581 - 4.00 5.40 -21.293 -26.588 - 4.00 5.50 -21.469 -26.550 - 4.00 5.60 -21.676 -26.585 - 4.00 5.70 -21.726 -26.586 - 4.00 5.80 -21.779 -26.586 - 4.00 5.90 -21.809 -26.630 - 4.00 6.00 -21.816 -26.771 - 4.00 6.10 -21.837 -26.957 - 4.00 6.20 -21.892 -27.113 - 4.00 6.30 -22.070 -27.186 - 4.00 6.40 -22.251 -27.246 - 4.00 6.50 -22.387 -27.356 - 4.00 6.60 -22.467 -27.425 - 4.00 6.70 -22.497 -27.461 - 4.00 6.80 -22.503 -27.503 - 4.00 6.90 -22.493 -27.546 - 4.00 7.00 -22.491 -27.553 - 4.00 7.10 -22.535 -27.604 - 4.00 7.20 -22.610 -27.692 - 4.00 7.30 -22.658 -27.709 - 4.00 7.40 -22.670 -27.762 - 4.00 7.50 -22.660 -27.831 - 4.00 7.60 -22.634 -27.898 - 4.00 7.70 -22.601 -27.939 - 4.00 7.80 -22.564 -27.962 - 4.00 7.90 -22.526 -27.987 - 4.00 8.00 -22.489 -28.100 - 4.00 8.10 -22.450 -28.229 - 4.00 8.20 -22.410 -28.360 - 4.00 8.30 -22.369 -28.497 - 4.00 8.40 -22.324 -28.624 - 4.00 8.50 -22.280 -28.748 - 4.00 8.60 -22.231 -28.872 - 4.00 8.70 -22.183 -28.999 - 4.00 8.80 -22.132 -29.131 - 4.00 8.90 -22.079 -29.267 - 4.00 9.00 -22.026 -29.406 - 4.10 1.00 -29.851 -28.376 - 4.10 1.10 -29.619 -28.376 - 4.10 1.20 -29.400 -28.376 - 4.10 1.30 -29.180 -28.376 - 4.10 1.40 -28.953 -28.376 - 4.10 1.50 -28.725 -28.376 - 4.10 1.60 -28.503 -28.376 - 4.10 1.70 -28.284 -28.376 - 4.10 1.80 -28.061 -28.376 - 4.10 1.90 -27.830 -28.376 - 4.10 2.00 -27.599 -28.376 - 4.10 2.10 -27.376 -28.376 - 4.10 2.20 -27.170 -28.376 - 4.10 2.30 -26.984 -28.376 - 4.10 2.40 -26.819 -28.376 - 4.10 2.50 -26.679 -28.376 - 4.10 2.60 -26.556 -28.376 - 4.10 2.70 -26.447 -28.376 - 4.10 2.80 -26.350 -28.376 - 4.10 2.90 -26.260 -28.376 - 4.10 3.00 -26.177 -28.376 - 4.10 3.10 -26.096 -28.376 - 4.10 3.20 -26.017 -28.375 - 4.10 3.30 -25.940 -28.374 - 4.10 3.40 -25.862 -28.373 - 4.10 3.50 -25.786 -28.372 - 4.10 3.60 -25.721 -28.371 - 4.10 3.70 -25.661 -28.371 - 4.10 3.80 -25.560 -28.367 - 4.10 3.90 -25.420 -28.356 - 4.10 4.00 -25.086 -28.327 - 4.10 4.10 -23.490 -27.951 - 4.10 4.20 -22.016 -27.264 - 4.10 4.30 -21.825 -27.448 - 4.10 4.40 -21.952 -27.003 - 4.10 4.50 -21.991 -26.811 - 4.10 4.60 -21.919 -26.839 - 4.10 4.70 -21.751 -27.035 - 4.10 4.80 -21.533 -27.242 - 4.10 4.90 -21.344 -27.162 - 4.10 5.00 -21.281 -26.792 - 4.10 5.10 -21.327 -26.635 - 4.10 5.20 -21.325 -26.553 - 4.10 5.30 -21.300 -26.581 - 4.10 5.40 -21.293 -26.588 - 4.10 5.50 -21.470 -26.549 - 4.10 5.60 -21.676 -26.577 - 4.10 5.70 -21.727 -26.560 - 4.10 5.80 -21.780 -26.541 - 4.10 5.90 -21.809 -26.581 - 4.10 6.00 -21.816 -26.731 - 4.10 6.10 -21.837 -26.939 - 4.10 6.20 -21.892 -27.110 - 4.10 6.30 -22.070 -27.185 - 4.10 6.40 -22.251 -27.246 - 4.10 6.50 -22.387 -27.356 - 4.10 6.60 -22.467 -27.425 - 4.10 6.70 -22.497 -27.461 - 4.10 6.80 -22.503 -27.503 - 4.10 6.90 -22.493 -27.546 - 4.10 7.00 -22.491 -27.553 - 4.10 7.10 -22.535 -27.604 - 4.10 7.20 -22.610 -27.692 - 4.10 7.30 -22.658 -27.709 - 4.10 7.40 -22.670 -27.762 - 4.10 7.50 -22.660 -27.831 - 4.10 7.60 -22.634 -27.898 - 4.10 7.70 -22.601 -27.939 - 4.10 7.80 -22.564 -27.962 - 4.10 7.90 -22.526 -27.987 - 4.10 8.00 -22.489 -28.100 - 4.10 8.10 -22.450 -28.229 - 4.10 8.20 -22.410 -28.360 - 4.10 8.30 -22.369 -28.497 - 4.10 8.40 -22.324 -28.624 - 4.10 8.50 -22.280 -28.748 - 4.10 8.60 -22.231 -28.872 - 4.10 8.70 -22.183 -28.999 - 4.10 8.80 -22.132 -29.131 - 4.10 8.90 -22.079 -29.267 - 4.10 9.00 -22.026 -29.406 - 4.20 1.00 -29.947 -28.476 - 4.20 1.10 -29.715 -28.476 - 4.20 1.20 -29.492 -28.476 - 4.20 1.30 -29.264 -28.476 - 4.20 1.40 -29.027 -28.476 - 4.20 1.50 -28.789 -28.476 - 4.20 1.60 -28.558 -28.476 - 4.20 1.70 -28.331 -28.476 - 4.20 1.80 -28.101 -28.476 - 4.20 1.90 -27.863 -28.476 - 4.20 2.00 -27.627 -28.476 - 4.20 2.10 -27.402 -28.476 - 4.20 2.20 -27.194 -28.476 - 4.20 2.30 -27.008 -28.476 - 4.20 2.40 -26.844 -28.476 - 4.20 2.50 -26.705 -28.476 - 4.20 2.60 -26.583 -28.476 - 4.20 2.70 -26.475 -28.476 - 4.20 2.80 -26.378 -28.476 - 4.20 2.90 -26.290 -28.476 - 4.20 3.00 -26.208 -28.476 - 4.20 3.10 -26.128 -28.475 - 4.20 3.20 -26.048 -28.474 - 4.20 3.30 -25.967 -28.473 - 4.20 3.40 -25.886 -28.472 - 4.20 3.50 -25.804 -28.470 - 4.20 3.60 -25.741 -28.469 - 4.20 3.70 -25.699 -28.471 - 4.20 3.80 -25.604 -28.466 - 4.20 3.90 -25.467 -28.454 - 4.20 4.00 -25.130 -28.422 - 4.20 4.10 -23.491 -27.989 - 4.20 4.20 -22.016 -27.272 - 4.20 4.30 -21.826 -27.362 - 4.20 4.40 -21.953 -26.971 - 4.20 4.50 -21.994 -26.793 - 4.20 4.60 -21.921 -26.829 - 4.20 4.70 -21.752 -27.029 - 4.20 4.80 -21.533 -27.239 - 4.20 4.90 -21.344 -27.151 - 4.20 5.00 -21.281 -26.791 - 4.20 5.10 -21.327 -26.634 - 4.20 5.20 -21.325 -26.552 - 4.20 5.30 -21.300 -26.581 - 4.20 5.40 -21.293 -26.587 - 4.20 5.50 -21.471 -26.548 - 4.20 5.60 -21.676 -26.567 - 4.20 5.70 -21.727 -26.530 - 4.20 5.80 -21.780 -26.492 - 4.20 5.90 -21.810 -26.528 - 4.20 6.00 -21.816 -26.687 - 4.20 6.10 -21.838 -26.918 - 4.20 6.20 -21.892 -27.106 - 4.20 6.30 -22.070 -27.185 - 4.20 6.40 -22.251 -27.246 - 4.20 6.50 -22.387 -27.356 - 4.20 6.60 -22.467 -27.425 - 4.20 6.70 -22.497 -27.461 - 4.20 6.80 -22.503 -27.503 - 4.20 6.90 -22.493 -27.546 - 4.20 7.00 -22.491 -27.553 - 4.20 7.10 -22.535 -27.604 - 4.20 7.20 -22.610 -27.692 - 4.20 7.30 -22.658 -27.709 - 4.20 7.40 -22.670 -27.762 - 4.20 7.50 -22.660 -27.831 - 4.20 7.60 -22.634 -27.898 - 4.20 7.70 -22.601 -27.939 - 4.20 7.80 -22.564 -27.962 - 4.20 7.90 -22.526 -27.987 - 4.20 8.00 -22.489 -28.100 - 4.20 8.10 -22.450 -28.229 - 4.20 8.20 -22.410 -28.360 - 4.20 8.30 -22.369 -28.497 - 4.20 8.40 -22.324 -28.624 - 4.20 8.50 -22.280 -28.748 - 4.20 8.60 -22.231 -28.872 - 4.20 8.70 -22.183 -28.999 - 4.20 8.80 -22.132 -29.131 - 4.20 8.90 -22.079 -29.267 - 4.20 9.00 -22.026 -29.406 - 4.30 1.00 -30.044 -28.576 - 4.30 1.10 -29.810 -28.576 - 4.30 1.20 -29.584 -28.576 - 4.30 1.30 -29.349 -28.576 - 4.30 1.40 -29.102 -28.576 - 4.30 1.50 -28.854 -28.576 - 4.30 1.60 -28.615 -28.576 - 4.30 1.70 -28.381 -28.576 - 4.30 1.80 -28.142 -28.576 - 4.30 1.90 -27.897 -28.576 - 4.30 2.00 -27.655 -28.576 - 4.30 2.10 -27.427 -28.576 - 4.30 2.20 -27.219 -28.576 - 4.30 2.30 -27.033 -28.576 - 4.30 2.40 -26.869 -28.576 - 4.30 2.50 -26.730 -28.576 - 4.30 2.60 -26.609 -28.576 - 4.30 2.70 -26.503 -28.576 - 4.30 2.80 -26.408 -28.576 - 4.30 2.90 -26.321 -28.576 - 4.30 3.00 -26.240 -28.576 - 4.30 3.10 -26.160 -28.575 - 4.30 3.20 -26.078 -28.573 - 4.30 3.30 -25.993 -28.571 - 4.30 3.40 -25.905 -28.569 - 4.30 3.50 -25.815 -28.566 - 4.30 3.60 -25.761 -28.566 - 4.30 3.70 -25.738 -28.570 - 4.30 3.80 -25.650 -28.565 - 4.30 3.90 -25.517 -28.552 - 4.30 4.00 -25.174 -28.516 - 4.30 4.10 -23.493 -28.021 - 4.30 4.20 -22.017 -27.279 - 4.30 4.30 -21.827 -27.304 - 4.30 4.40 -21.955 -26.946 - 4.30 4.50 -21.997 -26.780 - 4.30 4.60 -21.923 -26.821 - 4.30 4.70 -21.753 -27.025 - 4.30 4.80 -21.534 -27.238 - 4.30 4.90 -21.344 -27.140 - 4.30 5.00 -21.281 -26.789 - 4.30 5.10 -21.328 -26.633 - 4.30 5.20 -21.325 -26.551 - 4.30 5.30 -21.300 -26.581 - 4.30 5.40 -21.293 -26.586 - 4.30 5.50 -21.471 -26.546 - 4.30 5.60 -21.677 -26.556 - 4.30 5.70 -21.727 -26.498 - 4.30 5.80 -21.780 -26.441 - 4.30 5.90 -21.811 -26.473 - 4.30 6.00 -21.817 -26.639 - 4.30 6.10 -21.838 -26.894 - 4.30 6.20 -21.892 -27.102 - 4.30 6.30 -22.070 -27.184 - 4.30 6.40 -22.251 -27.246 - 4.30 6.50 -22.387 -27.356 - 4.30 6.60 -22.467 -27.425 - 4.30 6.70 -22.497 -27.461 - 4.30 6.80 -22.503 -27.503 - 4.30 6.90 -22.493 -27.546 - 4.30 7.00 -22.491 -27.553 - 4.30 7.10 -22.535 -27.604 - 4.30 7.20 -22.610 -27.692 - 4.30 7.30 -22.658 -27.709 - 4.30 7.40 -22.670 -27.762 - 4.30 7.50 -22.660 -27.831 - 4.30 7.60 -22.634 -27.898 - 4.30 7.70 -22.601 -27.939 - 4.30 7.80 -22.564 -27.962 - 4.30 7.90 -22.526 -27.987 - 4.30 8.00 -22.489 -28.100 - 4.30 8.10 -22.450 -28.229 - 4.30 8.20 -22.410 -28.360 - 4.30 8.30 -22.369 -28.497 - 4.30 8.40 -22.324 -28.624 - 4.30 8.50 -22.280 -28.748 - 4.30 8.60 -22.231 -28.872 - 4.30 8.70 -22.183 -28.999 - 4.30 8.80 -22.132 -29.131 - 4.30 8.90 -22.079 -29.267 - 4.30 9.00 -22.026 -29.406 - 4.40 1.00 -30.141 -28.676 - 4.40 1.10 -29.906 -28.676 - 4.40 1.20 -29.676 -28.676 - 4.40 1.30 -29.434 -28.676 - 4.40 1.40 -29.178 -28.676 - 4.40 1.50 -28.921 -28.676 - 4.40 1.60 -28.674 -28.676 - 4.40 1.70 -28.432 -28.676 - 4.40 1.80 -28.184 -28.676 - 4.40 1.90 -27.932 -28.676 - 4.40 2.00 -27.685 -28.676 - 4.40 2.10 -27.454 -28.676 - 4.40 2.20 -27.244 -28.676 - 4.40 2.30 -27.057 -28.676 - 4.40 2.40 -26.894 -28.676 - 4.40 2.50 -26.756 -28.676 - 4.40 2.60 -26.636 -28.676 - 4.40 2.70 -26.531 -28.676 - 4.40 2.80 -26.438 -28.676 - 4.40 2.90 -26.353 -28.676 - 4.40 3.00 -26.273 -28.675 - 4.40 3.10 -26.192 -28.674 - 4.40 3.20 -26.107 -28.672 - 4.40 3.30 -26.014 -28.668 - 4.40 3.40 -25.918 -28.665 - 4.40 3.50 -25.819 -28.661 - 4.40 3.60 -25.783 -28.663 - 4.40 3.70 -25.779 -28.669 - 4.40 3.80 -25.699 -28.664 - 4.40 3.90 -25.568 -28.650 - 4.40 4.00 -25.219 -28.610 - 4.40 4.10 -23.495 -28.049 - 4.40 4.20 -22.017 -27.281 - 4.40 4.30 -21.827 -27.263 - 4.40 4.40 -21.957 -26.928 - 4.40 4.50 -21.999 -26.769 - 4.40 4.60 -21.926 -26.816 - 4.40 4.70 -21.754 -27.023 - 4.40 4.80 -21.534 -27.238 - 4.40 4.90 -21.344 -27.130 - 4.40 5.00 -21.281 -26.788 - 4.40 5.10 -21.328 -26.631 - 4.40 5.20 -21.325 -26.550 - 4.40 5.30 -21.300 -26.580 - 4.40 5.40 -21.294 -26.585 - 4.40 5.50 -21.472 -26.544 - 4.40 5.60 -21.677 -26.544 - 4.40 5.70 -21.727 -26.463 - 4.40 5.80 -21.781 -26.388 - 4.40 5.90 -21.811 -26.415 - 4.40 6.00 -21.817 -26.588 - 4.40 6.10 -21.838 -26.867 - 4.40 6.20 -21.892 -27.097 - 4.40 6.30 -22.070 -27.184 - 4.40 6.40 -22.251 -27.246 - 4.40 6.50 -22.387 -27.356 - 4.40 6.60 -22.467 -27.425 - 4.40 6.70 -22.497 -27.461 - 4.40 6.80 -22.503 -27.503 - 4.40 6.90 -22.493 -27.546 - 4.40 7.00 -22.491 -27.553 - 4.40 7.10 -22.535 -27.604 - 4.40 7.20 -22.610 -27.692 - 4.40 7.30 -22.658 -27.709 - 4.40 7.40 -22.670 -27.762 - 4.40 7.50 -22.660 -27.831 - 4.40 7.60 -22.634 -27.898 - 4.40 7.70 -22.601 -27.939 - 4.40 7.80 -22.564 -27.962 - 4.40 7.90 -22.526 -27.987 - 4.40 8.00 -22.489 -28.100 - 4.40 8.10 -22.450 -28.229 - 4.40 8.20 -22.410 -28.360 - 4.40 8.30 -22.369 -28.497 - 4.40 8.40 -22.324 -28.624 - 4.40 8.50 -22.280 -28.748 - 4.40 8.60 -22.231 -28.872 - 4.40 8.70 -22.183 -28.999 - 4.40 8.80 -22.132 -29.131 - 4.40 8.90 -22.079 -29.267 - 4.40 9.00 -22.026 -29.406 - 4.50 1.00 -30.239 -28.776 - 4.50 1.10 -30.003 -28.776 - 4.50 1.20 -29.769 -28.776 - 4.50 1.30 -29.520 -28.776 - 4.50 1.40 -29.255 -28.776 - 4.50 1.50 -28.990 -28.776 - 4.50 1.60 -28.736 -28.776 - 4.50 1.70 -28.485 -28.776 - 4.50 1.80 -28.228 -28.776 - 4.50 1.90 -27.967 -28.776 - 4.50 2.00 -27.714 -28.776 - 4.50 2.10 -27.480 -28.776 - 4.50 2.20 -27.269 -28.776 - 4.50 2.30 -27.082 -28.776 - 4.50 2.40 -26.919 -28.776 - 4.50 2.50 -26.783 -28.776 - 4.50 2.60 -26.664 -28.776 - 4.50 2.70 -26.560 -28.776 - 4.50 2.80 -26.469 -28.776 - 4.50 2.90 -26.386 -28.776 - 4.50 3.00 -26.307 -28.775 - 4.50 3.10 -26.224 -28.773 - 4.50 3.20 -26.133 -28.769 - 4.50 3.30 -26.031 -28.764 - 4.50 3.40 -25.924 -28.760 - 4.50 3.50 -25.816 -28.754 - 4.50 3.60 -25.810 -28.760 - 4.50 3.70 -25.824 -28.768 - 4.50 3.80 -25.750 -28.763 - 4.50 3.90 -25.621 -28.747 - 4.50 4.00 -25.263 -28.702 - 4.50 4.10 -23.496 -28.074 - 4.50 4.20 -22.018 -27.259 - 4.50 4.30 -21.828 -27.233 - 4.50 4.40 -21.958 -26.913 - 4.50 4.50 -22.001 -26.761 - 4.50 4.60 -21.928 -26.812 - 4.50 4.70 -21.755 -27.022 - 4.50 4.80 -21.535 -27.239 - 4.50 4.90 -21.344 -27.120 - 4.50 5.00 -21.282 -26.769 - 4.50 5.10 -21.328 -26.618 - 4.50 5.20 -21.325 -26.545 - 4.50 5.30 -21.300 -26.579 - 4.50 5.40 -21.294 -26.584 - 4.50 5.50 -21.473 -26.542 - 4.50 5.60 -21.677 -26.530 - 4.50 5.70 -21.728 -26.426 - 4.50 5.80 -21.782 -26.334 - 4.50 5.90 -21.812 -26.357 - 4.50 6.00 -21.818 -26.536 - 4.50 6.10 -21.838 -26.837 - 4.50 6.20 -21.892 -27.091 - 4.50 6.30 -22.070 -27.183 - 4.50 6.40 -22.251 -27.246 - 4.50 6.50 -22.387 -27.356 - 4.50 6.60 -22.467 -27.425 - 4.50 6.70 -22.497 -27.461 - 4.50 6.80 -22.503 -27.503 - 4.50 6.90 -22.493 -27.546 - 4.50 7.00 -22.491 -27.553 - 4.50 7.10 -22.535 -27.604 - 4.50 7.20 -22.610 -27.692 - 4.50 7.30 -22.658 -27.709 - 4.50 7.40 -22.670 -27.762 - 4.50 7.50 -22.660 -27.831 - 4.50 7.60 -22.634 -27.898 - 4.50 7.70 -22.601 -27.939 - 4.50 7.80 -22.564 -27.962 - 4.50 7.90 -22.526 -27.987 - 4.50 8.00 -22.489 -28.100 - 4.50 8.10 -22.450 -28.229 - 4.50 8.20 -22.410 -28.360 - 4.50 8.30 -22.369 -28.497 - 4.50 8.40 -22.324 -28.624 - 4.50 8.50 -22.280 -28.748 - 4.50 8.60 -22.231 -28.872 - 4.50 8.70 -22.183 -28.999 - 4.50 8.80 -22.132 -29.131 - 4.50 8.90 -22.079 -29.267 - 4.50 9.00 -22.026 -29.406 - 4.60 1.00 -30.336 -28.876 - 4.60 1.10 -30.099 -28.876 - 4.60 1.20 -29.862 -28.876 - 4.60 1.30 -29.606 -28.876 - 4.60 1.40 -29.333 -28.876 - 4.60 1.50 -29.061 -28.876 - 4.60 1.60 -28.799 -28.876 - 4.60 1.70 -28.539 -28.876 - 4.60 1.80 -28.272 -28.876 - 4.60 1.90 -28.003 -28.876 - 4.60 2.00 -27.745 -28.876 - 4.60 2.10 -27.507 -28.876 - 4.60 2.20 -27.295 -28.876 - 4.60 2.30 -27.108 -28.876 - 4.60 2.40 -26.946 -28.876 - 4.60 2.50 -26.810 -28.876 - 4.60 2.60 -26.693 -28.876 - 4.60 2.70 -26.591 -28.876 - 4.60 2.80 -26.501 -28.876 - 4.60 2.90 -26.420 -28.876 - 4.60 3.00 -26.341 -28.875 - 4.60 3.10 -26.255 -28.871 - 4.60 3.20 -26.155 -28.866 - 4.60 3.30 -26.041 -28.859 - 4.60 3.40 -25.924 -28.852 - 4.60 3.50 -25.811 -28.846 - 4.60 3.60 -25.843 -28.858 - 4.60 3.70 -25.871 -28.867 - 4.60 3.80 -25.803 -28.861 - 4.60 3.90 -25.676 -28.845 - 4.60 4.00 -25.308 -28.794 - 4.60 4.10 -23.497 -28.095 - 4.60 4.20 -22.018 -27.244 - 4.60 4.30 -21.829 -27.210 - 4.60 4.40 -21.960 -26.902 - 4.60 4.50 -22.003 -26.755 - 4.60 4.60 -21.929 -26.809 - 4.60 4.70 -21.756 -27.023 - 4.60 4.80 -21.535 -27.241 - 4.60 4.90 -21.344 -26.995 - 4.60 5.00 -21.282 -26.714 - 4.60 5.10 -21.329 -26.599 - 4.60 5.20 -21.325 -26.539 - 4.60 5.30 -21.301 -26.577 - 4.60 5.40 -21.294 -26.583 - 4.60 5.50 -21.474 -26.539 - 4.60 5.60 -21.678 -26.516 - 4.60 5.70 -21.728 -26.388 - 4.60 5.80 -21.782 -26.280 - 4.60 5.90 -21.813 -26.298 - 4.60 6.00 -21.819 -26.482 - 4.60 6.10 -21.839 -26.804 - 4.60 6.20 -21.892 -27.084 - 4.60 6.30 -22.070 -27.182 - 4.60 6.40 -22.251 -27.245 - 4.60 6.50 -22.387 -27.356 - 4.60 6.60 -22.467 -27.425 - 4.60 6.70 -22.497 -27.461 - 4.60 6.80 -22.503 -27.503 - 4.60 6.90 -22.493 -27.546 - 4.60 7.00 -22.491 -27.553 - 4.60 7.10 -22.535 -27.604 - 4.60 7.20 -22.610 -27.692 - 4.60 7.30 -22.658 -27.709 - 4.60 7.40 -22.670 -27.762 - 4.60 7.50 -22.660 -27.831 - 4.60 7.60 -22.634 -27.898 - 4.60 7.70 -22.601 -27.939 - 4.60 7.80 -22.564 -27.962 - 4.60 7.90 -22.526 -27.987 - 4.60 8.00 -22.489 -28.100 - 4.60 8.10 -22.450 -28.229 - 4.60 8.20 -22.410 -28.360 - 4.60 8.30 -22.369 -28.497 - 4.60 8.40 -22.324 -28.624 - 4.60 8.50 -22.280 -28.748 - 4.60 8.60 -22.231 -28.872 - 4.60 8.70 -22.183 -28.999 - 4.60 8.80 -22.132 -29.131 - 4.60 8.90 -22.079 -29.267 - 4.60 9.00 -22.026 -29.406 - 4.70 1.00 -30.433 -28.976 - 4.70 1.10 -30.195 -28.976 - 4.70 1.20 -29.954 -28.976 - 4.70 1.30 -29.693 -28.976 - 4.70 1.40 -29.413 -28.976 - 4.70 1.50 -29.134 -28.976 - 4.70 1.60 -28.865 -28.976 - 4.70 1.70 -28.595 -28.976 - 4.70 1.80 -28.317 -28.976 - 4.70 1.90 -28.039 -28.976 - 4.70 2.00 -27.775 -28.976 - 4.70 2.10 -27.535 -28.976 - 4.70 2.20 -27.322 -28.976 - 4.70 2.30 -27.135 -28.976 - 4.70 2.40 -26.973 -28.976 - 4.70 2.50 -26.839 -28.976 - 4.70 2.60 -26.723 -28.976 - 4.70 2.70 -26.623 -28.976 - 4.70 2.80 -26.535 -28.976 - 4.70 2.90 -26.455 -28.975 - 4.70 3.00 -26.376 -28.974 - 4.70 3.10 -26.283 -28.969 - 4.70 3.20 -26.172 -28.961 - 4.70 3.30 -26.046 -28.951 - 4.70 3.40 -25.919 -28.944 - 4.70 3.50 -25.808 -28.937 - 4.70 3.60 -25.881 -28.956 - 4.70 3.70 -25.922 -28.966 - 4.70 3.80 -25.858 -28.960 - 4.70 3.90 -25.733 -28.942 - 4.70 4.00 -25.353 -28.884 - 4.70 4.10 -23.499 -28.114 - 4.70 4.20 -22.019 -27.233 - 4.70 4.30 -21.830 -27.193 - 4.70 4.40 -21.961 -26.894 - 4.70 4.50 -22.005 -26.751 - 4.70 4.60 -21.931 -26.808 - 4.70 4.70 -21.757 -27.024 - 4.70 4.80 -21.535 -27.082 - 4.70 4.90 -21.345 -26.886 - 4.70 5.00 -21.282 -26.675 - 4.70 5.10 -21.329 -26.584 - 4.70 5.20 -21.325 -26.533 - 4.70 5.30 -21.301 -26.574 - 4.70 5.40 -21.295 -26.581 - 4.70 5.50 -21.474 -26.536 - 4.70 5.60 -21.678 -26.500 - 4.70 5.70 -21.729 -26.350 - 4.70 5.80 -21.783 -26.228 - 4.70 5.90 -21.814 -26.241 - 4.70 6.00 -21.820 -26.428 - 4.70 6.10 -21.839 -26.769 - 4.70 6.20 -21.892 -27.076 - 4.70 6.30 -22.070 -27.181 - 4.70 6.40 -22.251 -27.245 - 4.70 6.50 -22.387 -27.356 - 4.70 6.60 -22.467 -27.425 - 4.70 6.70 -22.497 -27.461 - 4.70 6.80 -22.503 -27.503 - 4.70 6.90 -22.493 -27.546 - 4.70 7.00 -22.491 -27.553 - 4.70 7.10 -22.535 -27.604 - 4.70 7.20 -22.610 -27.692 - 4.70 7.30 -22.658 -27.709 - 4.70 7.40 -22.670 -27.762 - 4.70 7.50 -22.660 -27.831 - 4.70 7.60 -22.634 -27.898 - 4.70 7.70 -22.601 -27.939 - 4.70 7.80 -22.564 -27.962 - 4.70 7.90 -22.526 -27.987 - 4.70 8.00 -22.489 -28.100 - 4.70 8.10 -22.450 -28.229 - 4.70 8.20 -22.410 -28.360 - 4.70 8.30 -22.369 -28.497 - 4.70 8.40 -22.324 -28.624 - 4.70 8.50 -22.280 -28.748 - 4.70 8.60 -22.231 -28.872 - 4.70 8.70 -22.183 -28.999 - 4.70 8.80 -22.132 -29.131 - 4.70 8.90 -22.079 -29.267 - 4.70 9.00 -22.026 -29.406 - 4.80 1.00 -30.530 -29.076 - 4.80 1.10 -30.290 -29.076 - 4.80 1.20 -30.047 -29.076 - 4.80 1.30 -29.780 -29.076 - 4.80 1.40 -29.494 -29.076 - 4.80 1.50 -29.209 -29.076 - 4.80 1.60 -28.932 -29.076 - 4.80 1.70 -28.651 -29.076 - 4.80 1.80 -28.362 -29.076 - 4.80 1.90 -28.076 -29.076 - 4.80 2.00 -27.806 -29.076 - 4.80 2.10 -27.564 -29.076 - 4.80 2.20 -27.350 -29.076 - 4.80 2.30 -27.163 -29.076 - 4.80 2.40 -27.002 -29.076 - 4.80 2.50 -26.869 -29.076 - 4.80 2.60 -26.755 -29.076 - 4.80 2.70 -26.657 -29.076 - 4.80 2.80 -26.571 -29.076 - 4.80 2.90 -26.493 -29.075 - 4.80 3.00 -26.411 -29.072 - 4.80 3.10 -26.308 -29.066 - 4.80 3.20 -26.184 -29.054 - 4.80 3.30 -26.045 -29.043 - 4.80 3.40 -25.913 -29.033 - 4.80 3.50 -25.809 -29.029 - 4.80 3.60 -25.920 -29.054 - 4.80 3.70 -25.975 -29.065 - 4.80 3.80 -25.916 -29.058 - 4.80 3.90 -25.791 -29.038 - 4.80 4.00 -25.397 -28.973 - 4.80 4.10 -23.500 -28.130 - 4.80 4.20 -22.019 -27.225 - 4.80 4.30 -21.831 -27.180 - 4.80 4.40 -21.963 -26.887 - 4.80 4.50 -22.007 -26.747 - 4.80 4.60 -21.933 -26.807 - 4.80 4.70 -21.758 -26.955 - 4.80 4.80 -21.535 -26.975 - 4.80 4.90 -21.345 -26.814 - 4.80 5.00 -21.282 -26.646 - 4.80 5.10 -21.329 -26.571 - 4.80 5.20 -21.326 -26.527 - 4.80 5.30 -21.301 -26.571 - 4.80 5.40 -21.295 -26.578 - 4.80 5.50 -21.475 -26.533 - 4.80 5.60 -21.679 -26.484 - 4.80 5.70 -21.729 -26.313 - 4.80 5.80 -21.784 -26.177 - 4.80 5.90 -21.815 -26.186 - 4.80 6.00 -21.821 -26.375 - 4.80 6.10 -21.840 -26.733 - 4.80 6.20 -21.893 -27.066 - 4.80 6.30 -22.070 -27.180 - 4.80 6.40 -22.252 -27.245 - 4.80 6.50 -22.387 -27.356 - 4.80 6.60 -22.467 -27.425 - 4.80 6.70 -22.497 -27.461 - 4.80 6.80 -22.503 -27.503 - 4.80 6.90 -22.493 -27.546 - 4.80 7.00 -22.491 -27.553 - 4.80 7.10 -22.535 -27.604 - 4.80 7.20 -22.610 -27.692 - 4.80 7.30 -22.658 -27.709 - 4.80 7.40 -22.670 -27.762 - 4.80 7.50 -22.660 -27.831 - 4.80 7.60 -22.634 -27.898 - 4.80 7.70 -22.601 -27.939 - 4.80 7.80 -22.564 -27.962 - 4.80 7.90 -22.526 -27.987 - 4.80 8.00 -22.489 -28.100 - 4.80 8.10 -22.450 -28.229 - 4.80 8.20 -22.410 -28.360 - 4.80 8.30 -22.369 -28.497 - 4.80 8.40 -22.324 -28.624 - 4.80 8.50 -22.280 -28.748 - 4.80 8.60 -22.231 -28.872 - 4.80 8.70 -22.183 -28.999 - 4.80 8.80 -22.132 -29.131 - 4.80 8.90 -22.079 -29.267 - 4.80 9.00 -22.026 -29.406 - 4.90 1.00 -30.625 -29.176 - 4.90 1.10 -30.385 -29.176 - 4.90 1.20 -30.139 -29.176 - 4.90 1.30 -29.868 -29.176 - 4.90 1.40 -29.576 -29.176 - 4.90 1.50 -29.285 -29.176 - 4.90 1.60 -28.999 -29.176 - 4.90 1.70 -28.708 -29.176 - 4.90 1.80 -28.407 -29.176 - 4.90 1.90 -28.112 -29.176 - 4.90 2.00 -27.837 -29.176 - 4.90 2.10 -27.592 -29.176 - 4.90 2.20 -27.378 -29.176 - 4.90 2.30 -27.192 -29.176 - 4.90 2.40 -27.032 -29.176 - 4.90 2.50 -26.901 -29.175 - 4.90 2.60 -26.789 -29.175 - 4.90 2.70 -26.693 -29.175 - 4.90 2.80 -26.610 -29.175 - 4.90 2.90 -26.533 -29.175 - 4.90 3.00 -26.445 -29.171 - 4.90 3.10 -26.328 -29.161 - 4.90 3.20 -26.189 -29.148 - 4.90 3.30 -26.043 -29.133 - 4.90 3.40 -25.908 -29.124 - 4.90 3.50 -25.814 -29.120 - 4.90 3.60 -25.958 -29.152 - 4.90 3.70 -26.030 -29.164 - 4.90 3.80 -25.975 -29.156 - 4.90 3.90 -25.851 -29.135 - 4.90 4.00 -25.440 -29.060 - 4.90 4.10 -23.501 -28.144 - 4.90 4.20 -22.020 -27.220 - 4.90 4.30 -21.831 -27.169 - 4.90 4.40 -21.964 -26.882 - 4.90 4.50 -22.009 -26.745 - 4.90 4.60 -21.934 -26.791 - 4.90 4.70 -21.759 -26.900 - 4.90 4.80 -21.536 -26.906 - 4.90 4.90 -21.345 -26.763 - 4.90 5.00 -21.282 -26.624 - 4.90 5.10 -21.330 -26.560 - 4.90 5.20 -21.326 -26.521 - 4.90 5.30 -21.301 -26.568 - 4.90 5.40 -21.296 -26.576 - 4.90 5.50 -21.476 -26.529 - 4.90 5.60 -21.679 -26.467 - 4.90 5.70 -21.729 -26.277 - 4.90 5.80 -21.785 -26.129 - 4.90 5.90 -21.816 -26.134 - 4.90 6.00 -21.822 -26.324 - 4.90 6.10 -21.840 -26.696 - 4.90 6.20 -21.893 -27.056 - 4.90 6.30 -22.070 -27.178 - 4.90 6.40 -22.252 -27.244 - 4.90 6.50 -22.387 -27.356 - 4.90 6.60 -22.467 -27.425 - 4.90 6.70 -22.497 -27.461 - 4.90 6.80 -22.503 -27.503 - 4.90 6.90 -22.493 -27.546 - 4.90 7.00 -22.491 -27.553 - 4.90 7.10 -22.535 -27.604 - 4.90 7.20 -22.610 -27.692 - 4.90 7.30 -22.658 -27.709 - 4.90 7.40 -22.670 -27.762 - 4.90 7.50 -22.660 -27.831 - 4.90 7.60 -22.634 -27.898 - 4.90 7.70 -22.601 -27.939 - 4.90 7.80 -22.564 -27.962 - 4.90 7.90 -22.526 -27.987 - 4.90 8.00 -22.489 -28.100 - 4.90 8.10 -22.450 -28.229 - 4.90 8.20 -22.410 -28.360 - 4.90 8.30 -22.369 -28.497 - 4.90 8.40 -22.324 -28.624 - 4.90 8.50 -22.280 -28.748 - 4.90 8.60 -22.231 -28.872 - 4.90 8.70 -22.183 -28.999 - 4.90 8.80 -22.132 -29.131 - 4.90 8.90 -22.079 -29.267 - 4.90 9.00 -22.026 -29.406 - 5.00 1.00 -30.718 -29.276 - 5.00 1.10 -30.478 -29.276 - 5.00 1.20 -30.230 -29.276 - 5.00 1.30 -29.955 -29.276 - 5.00 1.40 -29.659 -29.276 - 5.00 1.50 -29.362 -29.276 - 5.00 1.60 -29.067 -29.276 - 5.00 1.70 -28.763 -29.276 - 5.00 1.80 -28.451 -29.276 - 5.00 1.90 -28.147 -29.276 - 5.00 2.00 -27.869 -29.276 - 5.00 2.10 -27.622 -29.276 - 5.00 2.20 -27.407 -29.276 - 5.00 2.30 -27.222 -29.276 - 5.00 2.40 -27.064 -29.275 - 5.00 2.50 -26.935 -29.275 - 5.00 2.60 -26.825 -29.275 - 5.00 2.70 -26.731 -29.275 - 5.00 2.80 -26.651 -29.275 - 5.00 2.90 -26.574 -29.274 - 5.00 3.00 -26.477 -29.268 - 5.00 3.10 -26.342 -29.255 - 5.00 3.20 -26.192 -29.239 - 5.00 3.30 -26.040 -29.225 - 5.00 3.40 -25.905 -29.214 - 5.00 3.50 -25.823 -29.212 - 5.00 3.60 -25.993 -29.249 - 5.00 3.70 -26.085 -29.262 - 5.00 3.80 -26.035 -29.255 - 5.00 3.90 -25.911 -29.231 - 5.00 4.00 -25.483 -29.146 - 5.00 4.10 -23.503 -28.156 - 5.00 4.20 -22.020 -27.216 - 5.00 4.30 -21.832 -27.161 - 5.00 4.40 -21.965 -26.878 - 5.00 4.50 -22.011 -26.740 - 5.00 4.60 -21.936 -26.769 - 5.00 4.70 -21.760 -26.861 - 5.00 4.80 -21.536 -26.857 - 5.00 4.90 -21.345 -26.726 - 5.00 5.00 -21.283 -26.607 - 5.00 5.10 -21.330 -26.550 - 5.00 5.20 -21.326 -26.514 - 5.00 5.30 -21.301 -26.563 - 5.00 5.40 -21.296 -26.572 - 5.00 5.50 -21.477 -26.525 - 5.00 5.60 -21.679 -26.451 - 5.00 5.70 -21.730 -26.242 - 5.00 5.80 -21.786 -26.084 - 5.00 5.90 -21.818 -26.085 - 5.00 6.00 -21.823 -26.275 - 5.00 6.10 -21.841 -26.659 - 5.00 6.20 -21.893 -27.045 - 5.00 6.30 -22.070 -27.176 - 5.00 6.40 -22.252 -27.244 - 5.00 6.50 -22.387 -27.356 - 5.00 6.60 -22.467 -27.425 - 5.00 6.70 -22.497 -27.461 - 5.00 6.80 -22.503 -27.503 - 5.00 6.90 -22.493 -27.546 - 5.00 7.00 -22.491 -27.553 - 5.00 7.10 -22.535 -27.604 - 5.00 7.20 -22.610 -27.692 - 5.00 7.30 -22.658 -27.709 - 5.00 7.40 -22.670 -27.762 - 5.00 7.50 -22.660 -27.831 - 5.00 7.60 -22.634 -27.898 - 5.00 7.70 -22.601 -27.939 - 5.00 7.80 -22.564 -27.962 - 5.00 7.90 -22.526 -27.987 - 5.00 8.00 -22.489 -28.100 - 5.00 8.10 -22.450 -28.229 - 5.00 8.20 -22.410 -28.360 - 5.00 8.30 -22.369 -28.497 - 5.00 8.40 -22.324 -28.624 - 5.00 8.50 -22.280 -28.748 - 5.00 8.60 -22.231 -28.872 - 5.00 8.70 -22.183 -28.999 - 5.00 8.80 -22.132 -29.131 - 5.00 8.90 -22.079 -29.267 - 5.00 9.00 -22.026 -29.406 - 5.10 1.00 -30.810 -29.376 - 5.10 1.10 -30.569 -29.376 - 5.10 1.20 -30.319 -29.376 - 5.10 1.30 -30.041 -29.376 - 5.10 1.40 -29.742 -29.376 - 5.10 1.50 -29.439 -29.376 - 5.10 1.60 -29.134 -29.376 - 5.10 1.70 -28.818 -29.376 - 5.10 1.80 -28.494 -29.376 - 5.10 1.90 -28.183 -29.376 - 5.10 2.00 -27.900 -29.376 - 5.10 2.10 -27.653 -29.376 - 5.10 2.20 -27.438 -29.376 - 5.10 2.30 -27.254 -29.375 - 5.10 2.40 -27.098 -29.375 - 5.10 2.50 -26.971 -29.375 - 5.10 2.60 -26.863 -29.375 - 5.10 2.70 -26.772 -29.375 - 5.10 2.80 -26.694 -29.375 - 5.10 2.90 -26.616 -29.373 - 5.10 3.00 -26.505 -29.365 - 5.10 3.10 -26.350 -29.349 - 5.10 3.20 -26.193 -29.331 - 5.10 3.30 -26.041 -29.315 - 5.10 3.40 -25.907 -29.306 - 5.10 3.50 -25.835 -29.305 - 5.10 3.60 -26.024 -29.346 - 5.10 3.70 -26.140 -29.361 - 5.10 3.80 -26.095 -29.352 - 5.10 3.90 -25.973 -29.327 - 5.10 4.00 -25.523 -29.229 - 5.10 4.10 -23.504 -28.167 - 5.10 4.20 -22.021 -27.214 - 5.10 4.30 -21.832 -27.155 - 5.10 4.40 -21.966 -26.875 - 5.10 4.50 -22.012 -26.729 - 5.10 4.60 -21.937 -26.753 - 5.10 4.70 -21.761 -26.833 - 5.10 4.80 -21.536 -26.822 - 5.10 4.90 -21.345 -26.697 - 5.10 5.00 -21.283 -26.593 - 5.10 5.10 -21.331 -26.541 - 5.10 5.20 -21.326 -26.507 - 5.10 5.30 -21.302 -26.558 - 5.10 5.40 -21.297 -26.568 - 5.10 5.50 -21.478 -26.521 - 5.10 5.60 -21.680 -26.436 - 5.10 5.70 -21.731 -26.211 - 5.10 5.80 -21.787 -26.043 - 5.10 5.90 -21.819 -26.040 - 5.10 6.00 -21.824 -26.230 - 5.10 6.10 -21.841 -26.623 - 5.10 6.20 -21.893 -27.033 - 5.10 6.30 -22.070 -27.174 - 5.10 6.40 -22.252 -27.243 - 5.10 6.50 -22.387 -27.356 - 5.10 6.60 -22.467 -27.425 - 5.10 6.70 -22.497 -27.461 - 5.10 6.80 -22.503 -27.503 - 5.10 6.90 -22.493 -27.546 - 5.10 7.00 -22.491 -27.553 - 5.10 7.10 -22.535 -27.604 - 5.10 7.20 -22.610 -27.692 - 5.10 7.30 -22.658 -27.709 - 5.10 7.40 -22.670 -27.762 - 5.10 7.50 -22.660 -27.831 - 5.10 7.60 -22.634 -27.898 - 5.10 7.70 -22.601 -27.939 - 5.10 7.80 -22.564 -27.962 - 5.10 7.90 -22.526 -27.987 - 5.10 8.00 -22.489 -28.100 - 5.10 8.10 -22.450 -28.229 - 5.10 8.20 -22.410 -28.360 - 5.10 8.30 -22.369 -28.497 - 5.10 8.40 -22.324 -28.624 - 5.10 8.50 -22.280 -28.748 - 5.10 8.60 -22.231 -28.872 - 5.10 8.70 -22.183 -28.999 - 5.10 8.80 -22.132 -29.131 - 5.10 8.90 -22.079 -29.267 - 5.10 9.00 -22.026 -29.406 - 5.20 1.00 -30.898 -29.476 - 5.20 1.10 -30.657 -29.476 - 5.20 1.20 -30.406 -29.476 - 5.20 1.30 -30.126 -29.476 - 5.20 1.40 -29.823 -29.476 - 5.20 1.50 -29.515 -29.476 - 5.20 1.60 -29.200 -29.476 - 5.20 1.70 -28.870 -29.476 - 5.20 1.80 -28.535 -29.476 - 5.20 1.90 -28.218 -29.476 - 5.20 2.00 -27.933 -29.476 - 5.20 2.10 -27.684 -29.476 - 5.20 2.20 -27.471 -29.475 - 5.20 2.30 -27.289 -29.475 - 5.20 2.40 -27.135 -29.475 - 5.20 2.50 -27.009 -29.475 - 5.20 2.60 -26.904 -29.475 - 5.20 2.70 -26.816 -29.475 - 5.20 2.80 -26.740 -29.474 - 5.20 2.90 -26.658 -29.472 - 5.20 3.00 -26.526 -29.461 - 5.20 3.10 -26.355 -29.441 - 5.20 3.20 -26.196 -29.424 - 5.20 3.30 -26.044 -29.409 - 5.20 3.40 -25.912 -29.397 - 5.20 3.50 -25.851 -29.400 - 5.20 3.60 -26.050 -29.442 - 5.20 3.70 -26.194 -29.459 - 5.20 3.80 -26.156 -29.450 - 5.20 3.90 -26.035 -29.423 - 5.20 4.00 -25.561 -29.310 - 5.20 4.10 -23.506 -28.177 - 5.20 4.20 -22.021 -27.213 - 5.20 4.30 -21.833 -27.150 - 5.20 4.40 -21.967 -26.862 - 5.20 4.50 -22.013 -26.719 - 5.20 4.60 -21.939 -26.741 - 5.20 4.70 -21.762 -26.812 - 5.20 4.80 -21.537 -26.796 - 5.20 4.90 -21.345 -26.676 - 5.20 5.00 -21.283 -26.582 - 5.20 5.10 -21.331 -26.530 - 5.20 5.20 -21.327 -26.497 - 5.20 5.30 -21.302 -26.551 - 5.20 5.40 -21.297 -26.563 - 5.20 5.50 -21.479 -26.516 - 5.20 5.60 -21.680 -26.421 - 5.20 5.70 -21.731 -26.182 - 5.20 5.80 -21.788 -26.007 - 5.20 5.90 -21.821 -26.000 - 5.20 6.00 -21.826 -26.188 - 5.20 6.10 -21.842 -26.588 - 5.20 6.20 -21.893 -27.020 - 5.20 6.30 -22.070 -27.171 - 5.20 6.40 -22.252 -27.242 - 5.20 6.50 -22.387 -27.355 - 5.20 6.60 -22.467 -27.425 - 5.20 6.70 -22.497 -27.461 - 5.20 6.80 -22.503 -27.503 - 5.20 6.90 -22.493 -27.546 - 5.20 7.00 -22.491 -27.553 - 5.20 7.10 -22.535 -27.604 - 5.20 7.20 -22.610 -27.692 - 5.20 7.30 -22.658 -27.709 - 5.20 7.40 -22.670 -27.762 - 5.20 7.50 -22.660 -27.831 - 5.20 7.60 -22.634 -27.898 - 5.20 7.70 -22.601 -27.939 - 5.20 7.80 -22.564 -27.962 - 5.20 7.90 -22.526 -27.987 - 5.20 8.00 -22.489 -28.100 - 5.20 8.10 -22.450 -28.229 - 5.20 8.20 -22.410 -28.360 - 5.20 8.30 -22.369 -28.497 - 5.20 8.40 -22.324 -28.624 - 5.20 8.50 -22.280 -28.748 - 5.20 8.60 -22.231 -28.872 - 5.20 8.70 -22.183 -28.999 - 5.20 8.80 -22.132 -29.131 - 5.20 8.90 -22.079 -29.267 - 5.20 9.00 -22.026 -29.406 - 5.30 1.00 -30.980 -29.576 - 5.30 1.10 -30.739 -29.576 - 5.30 1.20 -30.488 -29.576 - 5.30 1.30 -30.206 -29.576 - 5.30 1.40 -29.901 -29.576 - 5.30 1.50 -29.588 -29.576 - 5.30 1.60 -29.262 -29.576 - 5.30 1.70 -28.920 -29.576 - 5.30 1.80 -28.575 -29.576 - 5.30 1.90 -28.252 -29.576 - 5.30 2.00 -27.966 -29.576 - 5.30 2.10 -27.718 -29.575 - 5.30 2.20 -27.506 -29.575 - 5.30 2.30 -27.326 -29.575 - 5.30 2.40 -27.174 -29.575 - 5.30 2.50 -27.050 -29.574 - 5.30 2.60 -26.947 -29.574 - 5.30 2.70 -26.862 -29.574 - 5.30 2.80 -26.788 -29.574 - 5.30 2.90 -26.698 -29.570 - 5.30 3.00 -26.540 -29.556 - 5.30 3.10 -26.360 -29.535 - 5.30 3.20 -26.203 -29.516 - 5.30 3.30 -26.052 -29.503 - 5.30 3.40 -25.922 -29.494 - 5.30 3.50 -25.869 -29.497 - 5.30 3.60 -26.072 -29.538 - 5.30 3.70 -26.245 -29.557 - 5.30 3.80 -26.217 -29.548 - 5.30 3.90 -26.096 -29.519 - 5.30 4.00 -25.598 -29.388 - 5.30 4.10 -23.507 -28.186 - 5.30 4.20 -22.021 -27.214 - 5.30 4.30 -21.833 -27.146 - 5.30 4.40 -21.968 -26.845 - 5.30 4.50 -22.015 -26.711 - 5.30 4.60 -21.940 -26.731 - 5.30 4.70 -21.763 -26.795 - 5.30 4.80 -21.537 -26.776 - 5.30 4.90 -21.345 -26.658 - 5.30 5.00 -21.284 -26.572 - 5.30 5.10 -21.332 -26.519 - 5.30 5.20 -21.327 -26.484 - 5.30 5.30 -21.302 -26.541 - 5.30 5.40 -21.298 -26.556 - 5.30 5.50 -21.481 -26.511 - 5.30 5.60 -21.681 -26.407 - 5.30 5.70 -21.732 -26.156 - 5.30 5.80 -21.789 -25.975 - 5.30 5.90 -21.822 -25.964 - 5.30 6.00 -21.827 -26.151 - 5.30 6.10 -21.843 -26.556 - 5.30 6.20 -21.893 -27.007 - 5.30 6.30 -22.070 -27.168 - 5.30 6.40 -22.252 -27.241 - 5.30 6.50 -22.387 -27.355 - 5.30 6.60 -22.467 -27.425 - 5.30 6.70 -22.497 -27.461 - 5.30 6.80 -22.503 -27.503 - 5.30 6.90 -22.493 -27.546 - 5.30 7.00 -22.491 -27.553 - 5.30 7.10 -22.535 -27.604 - 5.30 7.20 -22.610 -27.692 - 5.30 7.30 -22.658 -27.709 - 5.30 7.40 -22.670 -27.762 - 5.30 7.50 -22.660 -27.831 - 5.30 7.60 -22.634 -27.898 - 5.30 7.70 -22.601 -27.939 - 5.30 7.80 -22.564 -27.962 - 5.30 7.90 -22.526 -27.987 - 5.30 8.00 -22.489 -28.100 - 5.30 8.10 -22.450 -28.229 - 5.30 8.20 -22.410 -28.360 - 5.30 8.30 -22.369 -28.497 - 5.30 8.40 -22.324 -28.624 - 5.30 8.50 -22.280 -28.748 - 5.30 8.60 -22.231 -28.872 - 5.30 8.70 -22.183 -28.999 - 5.30 8.80 -22.132 -29.131 - 5.30 8.90 -22.079 -29.267 - 5.30 9.00 -22.026 -29.406 - 5.40 1.00 -31.055 -29.676 - 5.40 1.10 -30.815 -29.676 - 5.40 1.20 -30.563 -29.676 - 5.40 1.30 -30.282 -29.676 - 5.40 1.40 -29.975 -29.676 - 5.40 1.50 -29.657 -29.676 - 5.40 1.60 -29.321 -29.676 - 5.40 1.70 -28.967 -29.676 - 5.40 1.80 -28.614 -29.676 - 5.40 1.90 -28.287 -29.675 - 5.40 2.00 -28.000 -29.675 - 5.40 2.10 -27.753 -29.675 - 5.40 2.20 -27.543 -29.675 - 5.40 2.30 -27.365 -29.675 - 5.40 2.40 -27.215 -29.674 - 5.40 2.50 -27.093 -29.674 - 5.40 2.60 -26.992 -29.673 - 5.40 2.70 -26.911 -29.674 - 5.40 2.80 -26.838 -29.673 - 5.40 2.90 -26.734 -29.668 - 5.40 3.00 -26.549 -29.651 - 5.40 3.10 -26.367 -29.630 - 5.40 3.20 -26.214 -29.612 - 5.40 3.30 -26.064 -29.599 - 5.40 3.40 -25.937 -29.588 - 5.40 3.50 -25.889 -29.592 - 5.40 3.60 -26.088 -29.634 - 5.40 3.70 -26.292 -29.655 - 5.40 3.80 -26.276 -29.645 - 5.40 3.90 -26.158 -29.615 - 5.40 4.00 -25.631 -29.463 - 5.40 4.10 -23.508 -28.194 - 5.40 4.20 -22.022 -27.215 - 5.40 4.30 -21.834 -27.143 - 5.40 4.40 -21.969 -26.832 - 5.40 4.50 -22.016 -26.706 - 5.40 4.60 -21.941 -26.724 - 5.40 4.70 -21.764 -26.783 - 5.40 4.80 -21.538 -26.761 - 5.40 4.90 -21.345 -26.645 - 5.40 5.00 -21.284 -26.564 - 5.40 5.10 -21.332 -26.506 - 5.40 5.20 -21.328 -26.468 - 5.40 5.30 -21.302 -26.527 - 5.40 5.40 -21.298 -26.548 - 5.40 5.50 -21.482 -26.505 - 5.40 5.60 -21.682 -26.394 - 5.40 5.70 -21.732 -26.134 - 5.40 5.80 -21.790 -25.946 - 5.40 5.90 -21.823 -25.933 - 5.40 6.00 -21.828 -26.118 - 5.40 6.10 -21.843 -26.527 - 5.40 6.20 -21.894 -26.994 - 5.40 6.30 -22.070 -27.165 - 5.40 6.40 -22.252 -27.240 - 5.40 6.50 -22.387 -27.355 - 5.40 6.60 -22.467 -27.425 - 5.40 6.70 -22.497 -27.461 - 5.40 6.80 -22.503 -27.503 - 5.40 6.90 -22.493 -27.546 - 5.40 7.00 -22.491 -27.553 - 5.40 7.10 -22.535 -27.604 - 5.40 7.20 -22.610 -27.692 - 5.40 7.30 -22.658 -27.709 - 5.40 7.40 -22.670 -27.762 - 5.40 7.50 -22.660 -27.831 - 5.40 7.60 -22.634 -27.898 - 5.40 7.70 -22.601 -27.939 - 5.40 7.80 -22.564 -27.962 - 5.40 7.90 -22.526 -27.987 - 5.40 8.00 -22.489 -28.100 - 5.40 8.10 -22.450 -28.229 - 5.40 8.20 -22.410 -28.360 - 5.40 8.30 -22.369 -28.497 - 5.40 8.40 -22.324 -28.624 - 5.40 8.50 -22.280 -28.748 - 5.40 8.60 -22.231 -28.872 - 5.40 8.70 -22.183 -28.999 - 5.40 8.80 -22.132 -29.131 - 5.40 8.90 -22.079 -29.267 - 5.40 9.00 -22.026 -29.406 - 5.50 1.00 -31.118 -29.775 - 5.50 1.10 -30.880 -29.776 - 5.50 1.20 -30.629 -29.776 - 5.50 1.30 -30.349 -29.776 - 5.50 1.40 -30.042 -29.776 - 5.50 1.50 -29.720 -29.776 - 5.50 1.60 -29.375 -29.776 - 5.50 1.70 -29.011 -29.775 - 5.50 1.80 -28.652 -29.775 - 5.50 1.90 -28.323 -29.775 - 5.50 2.00 -28.035 -29.775 - 5.50 2.10 -27.790 -29.775 - 5.50 2.20 -27.583 -29.775 - 5.50 2.30 -27.407 -29.775 - 5.50 2.40 -27.259 -29.774 - 5.50 2.50 -27.137 -29.773 - 5.50 2.60 -27.039 -29.773 - 5.50 2.70 -26.962 -29.773 - 5.50 2.80 -26.888 -29.773 - 5.50 2.90 -26.762 -29.764 - 5.50 3.00 -26.556 -29.745 - 5.50 3.10 -26.379 -29.726 - 5.50 3.20 -26.229 -29.710 - 5.50 3.30 -26.081 -29.698 - 5.50 3.40 -25.956 -29.687 - 5.50 3.50 -25.911 -29.691 - 5.50 3.60 -26.099 -29.729 - 5.50 3.70 -26.335 -29.753 - 5.50 3.80 -26.335 -29.743 - 5.50 3.90 -26.219 -29.710 - 5.50 4.00 -25.662 -29.534 - 5.50 4.10 -23.510 -28.201 - 5.50 4.20 -22.022 -27.216 - 5.50 4.30 -21.834 -27.084 - 5.50 4.40 -21.969 -26.822 - 5.50 4.50 -22.017 -26.702 - 5.50 4.60 -21.942 -26.719 - 5.50 4.70 -21.765 -26.774 - 5.50 4.80 -21.538 -26.746 - 5.50 4.90 -21.346 -26.633 - 5.50 5.00 -21.284 -26.556 - 5.50 5.10 -21.333 -26.491 - 5.50 5.20 -21.328 -26.449 - 5.50 5.30 -21.303 -26.510 - 5.50 5.40 -21.299 -26.536 - 5.50 5.50 -21.483 -26.498 - 5.50 5.60 -21.682 -26.382 - 5.50 5.70 -21.733 -26.114 - 5.50 5.80 -21.791 -25.922 - 5.50 5.90 -21.824 -25.906 - 5.50 6.00 -21.830 -26.089 - 5.50 6.10 -21.844 -26.500 - 5.50 6.20 -21.894 -26.981 - 5.50 6.30 -22.071 -27.162 - 5.50 6.40 -22.252 -27.239 - 5.50 6.50 -22.387 -27.355 - 5.50 6.60 -22.467 -27.425 - 5.50 6.70 -22.497 -27.461 - 5.50 6.80 -22.503 -27.503 - 5.50 6.90 -22.493 -27.546 - 5.50 7.00 -22.491 -27.553 - 5.50 7.10 -22.535 -27.604 - 5.50 7.20 -22.610 -27.692 - 5.50 7.30 -22.658 -27.709 - 5.50 7.40 -22.670 -27.762 - 5.50 7.50 -22.660 -27.831 - 5.50 7.60 -22.634 -27.898 - 5.50 7.70 -22.601 -27.939 - 5.50 7.80 -22.564 -27.962 - 5.50 7.90 -22.526 -27.987 - 5.50 8.00 -22.489 -28.100 - 5.50 8.10 -22.450 -28.229 - 5.50 8.20 -22.410 -28.360 - 5.50 8.30 -22.369 -28.497 - 5.50 8.40 -22.324 -28.624 - 5.50 8.50 -22.280 -28.748 - 5.50 8.60 -22.231 -28.872 - 5.50 8.70 -22.183 -28.999 - 5.50 8.80 -22.132 -29.131 - 5.50 8.90 -22.079 -29.267 - 5.50 9.00 -22.026 -29.406 - 5.60 1.00 -31.165 -29.875 - 5.60 1.10 -30.930 -29.875 - 5.60 1.20 -30.681 -29.875 - 5.60 1.30 -30.404 -29.875 - 5.60 1.40 -30.099 -29.876 - 5.60 1.50 -29.774 -29.876 - 5.60 1.60 -29.422 -29.876 - 5.60 1.70 -29.051 -29.875 - 5.60 1.80 -28.688 -29.875 - 5.60 1.90 -28.359 -29.875 - 5.60 2.00 -28.073 -29.875 - 5.60 2.10 -27.831 -29.875 - 5.60 2.20 -27.626 -29.875 - 5.60 2.30 -27.452 -29.874 - 5.60 2.40 -27.305 -29.873 - 5.60 2.50 -27.183 -29.872 - 5.60 2.60 -27.086 -29.872 - 5.60 2.70 -27.013 -29.872 - 5.60 2.80 -26.937 -29.871 - 5.60 2.90 -26.781 -29.860 - 5.60 3.00 -26.566 -29.840 - 5.60 3.10 -26.397 -29.823 - 5.60 3.20 -26.250 -29.808 - 5.60 3.30 -26.102 -29.798 - 5.60 3.40 -25.980 -29.786 - 5.60 3.50 -25.935 -29.790 - 5.60 3.60 -26.108 -29.825 - 5.60 3.70 -26.371 -29.850 - 5.60 3.80 -26.391 -29.840 - 5.60 3.90 -26.280 -29.806 - 5.60 4.00 -25.690 -29.601 - 5.60 4.10 -23.511 -28.208 - 5.60 4.20 -22.023 -27.218 - 5.60 4.30 -21.835 -27.034 - 5.60 4.40 -21.970 -26.814 - 5.60 4.50 -22.018 -26.698 - 5.60 4.60 -21.943 -26.715 - 5.60 4.70 -21.766 -26.766 - 5.60 4.80 -21.539 -26.729 - 5.60 4.90 -21.346 -26.624 - 5.60 5.00 -21.285 -26.549 - 5.60 5.10 -21.334 -26.474 - 5.60 5.20 -21.328 -26.426 - 5.60 5.30 -21.303 -26.489 - 5.60 5.40 -21.300 -26.523 - 5.60 5.50 -21.484 -26.490 - 5.60 5.60 -21.683 -26.370 - 5.60 5.70 -21.733 -26.097 - 5.60 5.80 -21.792 -25.901 - 5.60 5.90 -21.826 -25.882 - 5.60 6.00 -21.831 -26.063 - 5.60 6.10 -21.844 -26.476 - 5.60 6.20 -21.894 -26.968 - 5.60 6.30 -22.071 -27.158 - 5.60 6.40 -22.252 -27.238 - 5.60 6.50 -22.387 -27.355 - 5.60 6.60 -22.467 -27.425 - 5.60 6.70 -22.497 -27.461 - 5.60 6.80 -22.503 -27.503 - 5.60 6.90 -22.493 -27.546 - 5.60 7.00 -22.491 -27.553 - 5.60 7.10 -22.535 -27.604 - 5.60 7.20 -22.610 -27.692 - 5.60 7.30 -22.658 -27.709 - 5.60 7.40 -22.670 -27.762 - 5.60 7.50 -22.660 -27.831 - 5.60 7.60 -22.634 -27.898 - 5.60 7.70 -22.601 -27.939 - 5.60 7.80 -22.564 -27.962 - 5.60 7.90 -22.526 -27.987 - 5.60 8.00 -22.489 -28.100 - 5.60 8.10 -22.450 -28.229 - 5.60 8.20 -22.410 -28.360 - 5.60 8.30 -22.369 -28.497 - 5.60 8.40 -22.324 -28.624 - 5.60 8.50 -22.280 -28.748 - 5.60 8.60 -22.231 -28.872 - 5.60 8.70 -22.183 -28.999 - 5.60 8.80 -22.132 -29.131 - 5.60 8.90 -22.079 -29.267 - 5.60 9.00 -22.026 -29.406 - 5.70 1.00 -31.188 -29.975 - 5.70 1.10 -30.959 -29.975 - 5.70 1.20 -30.715 -29.975 - 5.70 1.30 -30.443 -29.975 - 5.70 1.40 -30.143 -29.975 - 5.70 1.50 -29.818 -29.975 - 5.70 1.60 -29.463 -29.975 - 5.70 1.70 -29.088 -29.975 - 5.70 1.80 -28.724 -29.975 - 5.70 1.90 -28.396 -29.975 - 5.70 2.00 -28.114 -29.975 - 5.70 2.10 -27.874 -29.975 - 5.70 2.20 -27.672 -29.974 - 5.70 2.30 -27.500 -29.974 - 5.70 2.40 -27.352 -29.972 - 5.70 2.50 -27.228 -29.971 - 5.70 2.60 -27.134 -29.970 - 5.70 2.70 -27.065 -29.971 - 5.70 2.80 -26.981 -29.970 - 5.70 2.90 -26.793 -29.956 - 5.70 3.00 -26.581 -29.936 - 5.70 3.10 -26.421 -29.921 - 5.70 3.20 -26.276 -29.907 - 5.70 3.30 -26.128 -29.897 - 5.70 3.40 -26.008 -29.885 - 5.70 3.50 -25.962 -29.889 - 5.70 3.60 -26.114 -29.922 - 5.70 3.70 -26.400 -29.948 - 5.70 3.80 -26.445 -29.937 - 5.70 3.90 -26.340 -29.901 - 5.70 4.00 -25.716 -29.663 - 5.70 4.10 -23.512 -28.215 - 5.70 4.20 -22.023 -27.220 - 5.70 4.30 -21.835 -26.998 - 5.70 4.40 -21.971 -26.808 - 5.70 4.50 -22.019 -26.696 - 5.70 4.60 -21.944 -26.713 - 5.70 4.70 -21.767 -26.761 - 5.70 4.80 -21.540 -26.715 - 5.70 4.90 -21.346 -26.616 - 5.70 5.00 -21.286 -26.542 - 5.70 5.10 -21.334 -26.455 - 5.70 5.20 -21.329 -26.401 - 5.70 5.30 -21.304 -26.465 - 5.70 5.40 -21.301 -26.507 - 5.70 5.50 -21.486 -26.481 - 5.70 5.60 -21.683 -26.360 - 5.70 5.70 -21.734 -26.081 - 5.70 5.80 -21.792 -25.884 - 5.70 5.90 -21.827 -25.863 - 5.70 6.00 -21.832 -26.042 - 5.70 6.10 -21.845 -26.455 - 5.70 6.20 -21.894 -26.956 - 5.70 6.30 -22.071 -27.154 - 5.70 6.40 -22.252 -27.237 - 5.70 6.50 -22.387 -27.355 - 5.70 6.60 -22.467 -27.425 - 5.70 6.70 -22.497 -27.461 - 5.70 6.80 -22.503 -27.503 - 5.70 6.90 -22.493 -27.546 - 5.70 7.00 -22.491 -27.553 - 5.70 7.10 -22.535 -27.604 - 5.70 7.20 -22.610 -27.692 - 5.70 7.30 -22.658 -27.709 - 5.70 7.40 -22.670 -27.762 - 5.70 7.50 -22.660 -27.831 - 5.70 7.60 -22.634 -27.898 - 5.70 7.70 -22.601 -27.939 - 5.70 7.80 -22.564 -27.962 - 5.70 7.90 -22.526 -27.987 - 5.70 8.00 -22.489 -28.100 - 5.70 8.10 -22.450 -28.229 - 5.70 8.20 -22.410 -28.360 - 5.70 8.30 -22.369 -28.497 - 5.70 8.40 -22.324 -28.624 - 5.70 8.50 -22.280 -28.748 - 5.70 8.60 -22.231 -28.872 - 5.70 8.70 -22.183 -28.999 - 5.70 8.80 -22.132 -29.131 - 5.70 8.90 -22.079 -29.267 - 5.70 9.00 -22.026 -29.406 - 5.80 1.00 -31.181 -30.075 - 5.80 1.10 -30.963 -30.075 - 5.80 1.20 -30.726 -30.075 - 5.80 1.30 -30.461 -30.075 - 5.80 1.40 -30.169 -30.075 - 5.80 1.50 -29.849 -30.075 - 5.80 1.60 -29.494 -30.075 - 5.80 1.70 -29.120 -30.075 - 5.80 1.80 -28.759 -30.075 - 5.80 1.90 -28.435 -30.075 - 5.80 2.00 -28.157 -30.075 - 5.80 2.10 -27.921 -30.074 - 5.80 2.20 -27.721 -30.073 - 5.80 2.30 -27.549 -30.073 - 5.80 2.40 -27.400 -30.070 - 5.80 2.50 -27.275 -30.069 - 5.80 2.60 -27.182 -30.069 - 5.80 2.70 -27.117 -30.070 - 5.80 2.80 -27.019 -30.068 - 5.80 2.90 -26.804 -30.051 - 5.80 3.00 -26.604 -30.034 - 5.80 3.10 -26.450 -30.020 - 5.80 3.20 -26.306 -30.006 - 5.80 3.30 -26.158 -29.997 - 5.80 3.40 -26.038 -29.984 - 5.80 3.50 -25.991 -29.991 - 5.80 3.60 -26.120 -30.019 - 5.80 3.70 -26.421 -30.045 - 5.80 3.80 -26.496 -30.034 - 5.80 3.90 -26.399 -29.996 - 5.80 4.00 -25.738 -29.720 - 5.80 4.10 -23.514 -28.219 - 5.80 4.20 -22.023 -27.223 - 5.80 4.30 -21.836 -26.972 - 5.80 4.40 -21.972 -26.804 - 5.80 4.50 -22.020 -26.694 - 5.80 4.60 -21.945 -26.711 - 5.80 4.70 -21.768 -26.757 - 5.80 4.80 -21.540 -26.703 - 5.80 4.90 -21.347 -26.610 - 5.80 5.00 -21.286 -26.503 - 5.80 5.10 -21.335 -26.258 - 5.80 5.20 -21.329 -26.153 - 5.80 5.30 -21.304 -26.248 - 5.80 5.40 -21.301 -26.405 - 5.80 5.50 -21.487 -26.462 - 5.80 5.60 -21.684 -26.349 - 5.80 5.70 -21.735 -26.068 - 5.80 5.80 -21.793 -25.869 - 5.80 5.90 -21.828 -25.846 - 5.80 6.00 -21.833 -26.023 - 5.80 6.10 -21.846 -26.437 - 5.80 6.20 -21.894 -26.944 - 5.80 6.30 -22.071 -27.150 - 5.80 6.40 -22.252 -27.235 - 5.80 6.50 -22.387 -27.354 - 5.80 6.60 -22.467 -27.425 - 5.80 6.70 -22.497 -27.461 - 5.80 6.80 -22.503 -27.503 - 5.80 6.90 -22.493 -27.546 - 5.80 7.00 -22.491 -27.553 - 5.80 7.10 -22.535 -27.604 - 5.80 7.20 -22.610 -27.692 - 5.80 7.30 -22.658 -27.709 - 5.80 7.40 -22.670 -27.762 - 5.80 7.50 -22.660 -27.831 - 5.80 7.60 -22.634 -27.898 - 5.80 7.70 -22.601 -27.939 - 5.80 7.80 -22.564 -27.962 - 5.80 7.90 -22.526 -27.987 - 5.80 8.00 -22.489 -28.100 - 5.80 8.10 -22.450 -28.229 - 5.80 8.20 -22.410 -28.360 - 5.80 8.30 -22.369 -28.497 - 5.80 8.40 -22.324 -28.624 - 5.80 8.50 -22.280 -28.748 - 5.80 8.60 -22.231 -28.872 - 5.80 8.70 -22.183 -28.999 - 5.80 8.80 -22.132 -29.131 - 5.80 8.90 -22.079 -29.267 - 5.80 9.00 -22.026 -29.406 - 5.90 1.00 -31.139 -30.175 - 5.90 1.10 -30.935 -30.175 - 5.90 1.20 -30.710 -30.175 - 5.90 1.30 -30.456 -30.175 - 5.90 1.40 -30.175 -30.175 - 5.90 1.50 -29.864 -30.175 - 5.90 1.60 -29.516 -30.175 - 5.90 1.70 -29.149 -30.175 - 5.90 1.80 -28.794 -30.175 - 5.90 1.90 -28.477 -30.175 - 5.90 2.00 -28.204 -30.175 - 5.90 2.10 -27.972 -30.174 - 5.90 2.20 -27.774 -30.173 - 5.90 2.30 -27.602 -30.172 - 5.90 2.40 -27.450 -30.169 - 5.90 2.50 -27.323 -30.168 - 5.90 2.60 -27.230 -30.167 - 5.90 2.70 -27.167 -30.169 - 5.90 2.80 -27.047 -30.165 - 5.90 2.90 -26.821 -30.147 - 5.90 3.00 -26.635 -30.132 - 5.90 3.10 -26.486 -30.119 - 5.90 3.20 -26.341 -30.106 - 5.90 3.30 -26.183 -30.091 - 5.90 3.40 -26.065 -30.075 - 5.90 3.50 -26.024 -30.092 - 5.90 3.60 -26.127 -30.116 - 5.90 3.70 -26.434 -30.142 - 5.90 3.80 -26.543 -30.131 - 5.90 3.90 -26.457 -30.091 - 5.90 4.00 -25.758 -29.773 - 5.90 4.10 -23.515 -28.221 - 5.90 4.20 -22.024 -27.225 - 5.90 4.30 -21.836 -26.952 - 5.90 4.40 -21.973 -26.800 - 5.90 4.50 -22.022 -26.693 - 5.90 4.60 -21.946 -26.710 - 5.90 4.70 -21.769 -26.753 - 5.90 4.80 -21.541 -26.692 - 5.90 4.90 -21.347 -26.579 - 5.90 5.00 -21.287 -26.319 - 5.90 5.10 -21.336 -26.070 - 5.90 5.20 -21.330 -25.992 - 5.90 5.30 -21.305 -26.112 - 5.90 5.40 -21.302 -26.323 - 5.90 5.50 -21.488 -26.441 - 5.90 5.60 -21.685 -26.339 - 5.90 5.70 -21.735 -26.057 - 5.90 5.80 -21.794 -25.856 - 5.90 5.90 -21.828 -25.832 - 5.90 6.00 -21.834 -26.008 - 5.90 6.10 -21.846 -26.421 - 5.90 6.20 -21.895 -26.934 - 5.90 6.30 -22.071 -27.146 - 5.90 6.40 -22.252 -27.234 - 5.90 6.50 -22.387 -27.354 - 5.90 6.60 -22.467 -27.425 - 5.90 6.70 -22.497 -27.461 - 5.90 6.80 -22.503 -27.503 - 5.90 6.90 -22.493 -27.546 - 5.90 7.00 -22.491 -27.553 - 5.90 7.10 -22.535 -27.604 - 5.90 7.20 -22.610 -27.692 - 5.90 7.30 -22.658 -27.709 - 5.90 7.40 -22.670 -27.762 - 5.90 7.50 -22.660 -27.831 - 5.90 7.60 -22.634 -27.898 - 5.90 7.70 -22.601 -27.939 - 5.90 7.80 -22.564 -27.962 - 5.90 7.90 -22.526 -27.987 - 5.90 8.00 -22.489 -28.100 - 5.90 8.10 -22.450 -28.229 - 5.90 8.20 -22.410 -28.360 - 5.90 8.30 -22.369 -28.497 - 5.90 8.40 -22.324 -28.624 - 5.90 8.50 -22.280 -28.748 - 5.90 8.60 -22.231 -28.872 - 5.90 8.70 -22.183 -28.999 - 5.90 8.80 -22.132 -29.131 - 5.90 8.90 -22.079 -29.267 - 5.90 9.00 -22.026 -29.406 - 6.00 1.00 -31.073 -30.275 - 6.00 1.10 -30.879 -30.275 - 6.00 1.20 -30.666 -30.275 - 6.00 1.30 -30.426 -30.275 - 6.00 1.40 -30.160 -30.275 - 6.00 1.50 -29.863 -30.275 - 6.00 1.60 -29.528 -30.275 - 6.00 1.70 -29.174 -30.275 - 6.00 1.80 -28.831 -30.275 - 6.00 1.90 -28.522 -30.274 - 6.00 2.00 -28.256 -30.274 - 6.00 2.10 -28.028 -30.273 - 6.00 2.20 -27.832 -30.272 - 6.00 2.30 -27.659 -30.271 - 6.00 2.40 -27.505 -30.267 - 6.00 2.50 -27.374 -30.266 - 6.00 2.60 -27.280 -30.266 - 6.00 2.70 -27.214 -30.267 - 6.00 2.80 -27.069 -30.262 - 6.00 2.90 -26.846 -30.244 - 6.00 3.00 -26.673 -30.231 - 6.00 3.10 -26.526 -30.219 - 6.00 3.20 -26.377 -30.203 - 6.00 3.30 -26.121 -30.123 - 6.00 3.40 -26.074 -30.147 - 6.00 3.50 -26.059 -30.194 - 6.00 3.60 -26.138 -30.214 - 6.00 3.70 -26.440 -30.239 - 6.00 3.80 -26.587 -30.227 - 6.00 3.90 -26.514 -30.186 - 6.00 4.00 -25.775 -29.820 - 6.00 4.10 -23.516 -28.223 - 6.00 4.20 -22.024 -27.218 - 6.00 4.30 -21.837 -26.936 - 6.00 4.40 -21.974 -26.797 - 6.00 4.50 -22.023 -26.692 - 6.00 4.60 -21.948 -26.709 - 6.00 4.70 -21.770 -26.751 - 6.00 4.80 -21.542 -26.680 - 6.00 4.90 -21.348 -26.518 - 6.00 5.00 -21.288 -26.211 - 6.00 5.10 -21.336 -25.958 - 6.00 5.20 -21.330 -25.888 - 6.00 5.30 -21.305 -26.021 - 6.00 5.40 -21.303 -26.265 - 6.00 5.50 -21.490 -26.421 - 6.00 5.60 -21.685 -26.329 - 6.00 5.70 -21.736 -26.047 - 6.00 5.80 -21.795 -25.845 - 6.00 5.90 -21.829 -25.820 - 6.00 6.00 -21.834 -25.995 - 6.00 6.10 -21.847 -26.407 - 6.00 6.20 -21.895 -26.924 - 6.00 6.30 -22.071 -27.142 - 6.00 6.40 -22.252 -27.232 - 6.00 6.50 -22.387 -27.354 - 6.00 6.60 -22.467 -27.425 - 6.00 6.70 -22.497 -27.461 - 6.00 6.80 -22.503 -27.503 - 6.00 6.90 -22.493 -27.546 - 6.00 7.00 -22.491 -27.553 - 6.00 7.10 -22.535 -27.604 - 6.00 7.20 -22.610 -27.692 - 6.00 7.30 -22.658 -27.709 - 6.00 7.40 -22.670 -27.762 - 6.00 7.50 -22.660 -27.831 - 6.00 7.60 -22.634 -27.898 - 6.00 7.70 -22.601 -27.939 - 6.00 7.80 -22.564 -27.962 - 6.00 7.90 -22.526 -27.987 - 6.00 8.00 -22.489 -28.100 - 6.00 8.10 -22.450 -28.229 - 6.00 8.20 -22.410 -28.360 - 6.00 8.30 -22.369 -28.497 - 6.00 8.40 -22.324 -28.624 - 6.00 8.50 -22.280 -28.748 - 6.00 8.60 -22.231 -28.872 - 6.00 8.70 -22.183 -28.999 - 6.00 8.80 -22.132 -29.131 - 6.00 8.90 -22.079 -29.267 - 6.00 9.00 -22.026 -29.406 From 916e9d03e16721b4c4728c30932a7140736f7409 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 6 Dec 2022 15:20:24 -0500 Subject: [PATCH 142/694] add grid_enum indexing for dust initialization in Clouds() initial condition function --- src/grid/initial_conditions.cpp | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 040b3d7c5..38bb51883 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -225,12 +225,6 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real #ifdef DE C.GasEnergy[id] = P/(gama-1.0); #endif // DE - - #ifdef SCALAR - #ifdef DUST - C.scalar[id] = rho*1e-2; - #endif // DUST - #endif // SCALAR } if (i==istart && j==jstart && k==kstart) { n = rho*DENSITY_UNIT / (mu*MP); @@ -1335,9 +1329,9 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_bg/(gama-1.0); #endif #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = C.density[id]*0.0; - #endif + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id]*0.0; + #endif #endif // add clouds for (int nn = 0; nn Date: Fri, 16 Dec 2022 18:21:15 -0500 Subject: [PATCH 143/694] Change C.density to C.host where grid_enum is called --- src/cooling_grackle/cool_grackle.cpp | 14 +++++----- src/grid/grid3D.cpp | 38 ++++++++++++++-------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index 929a355eb..e9ea551ca 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -147,16 +147,16 @@ Cool.fields.y_velocity = NULL; Cool.fields.z_velocity = NULL; chprintf( " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); -Cool.fields.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; -Cool.fields.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; -Cool.fields.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; -Cool.fields.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; -Cool.fields.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; -Cool.fields.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; +Cool.fields.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; +Cool.fields.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; +Cool.fields.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; +Cool.fields.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; +Cool.fields.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; +Cool.fields.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; #ifdef GRACKLE_METALS chprintf( " Allocating memory for: metal density\n"); -Cool.fields.metal_density = &C.density[ H.n_cells*grid_enum::metal_density ]; +Cool.fields.metal_density = &C.host[ H.n_cells*grid_enum::metal_density ]; #else Cool.fields.metal_density = NULL; #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index ee13187b1..e100def1c 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -334,12 +334,12 @@ void Grid3D::AllocateMemory(void) #ifdef CHEMISTRY_GPU - C.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; - C.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; - C.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; - C.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; - C.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; - C.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; + C.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; + C.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; + C.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; + C.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; + C.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; + C.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; #endif // initialize host array @@ -507,25 +507,25 @@ Real Grid3D::Update_Grid(void) #ifdef COOLING_GRACKLE Cool.fields.density = C.density; - Cool.fields.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; - Cool.fields.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; - Cool.fields.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; - Cool.fields.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; - Cool.fields.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; - Cool.fields.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; + Cool.fields.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; + Cool.fields.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; + Cool.fields.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; + Cool.fields.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; + Cool.fields.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; + Cool.fields.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; #ifdef GRACKLE_METALS - Cool.fields.metal_density = &C.density[ H.n_cells*grid_enum::metal_density ]; + Cool.fields.metal_density = &C.host[ H.n_cells*grid_enum::metal_density ]; #endif #endif #ifdef CHEMISTRY_GPU - C.HI_density = &C.density[ H.n_cells*grid_enum::HI_density ]; - C.HII_density = &C.density[ H.n_cells*grid_enum::HII_density ]; - C.HeI_density = &C.density[ H.n_cells*grid_enum::HeI_density ]; - C.HeII_density = &C.density[ H.n_cells*grid_enum::HeII_density ]; - C.HeIII_density = &C.density[ H.n_cells*grid_enum::HeIII_density ]; - C.e_density = &C.density[ H.n_cells*grid_enum::e_density ]; + C.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; + C.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; + C.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; + C.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; + C.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; + C.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; #endif From 309e9f03518732eceeb6bb95e550fb11568bfc14 Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 16 Dec 2022 18:31:33 -0500 Subject: [PATCH 144/694] light enum syntax changes --- src/dust/dust_cuda.cu | 20 ++++++++++---------- src/grid/grid_enum.h | 6 +++--- src/grid/initial_conditions.cpp | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 343c3d4d5..524b58cd0 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -57,19 +57,19 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { // get conserved quanitites - d_gas = dev_conserved[n_cells*grid_enum::density + id]; - d_dust = dev_conserved[n_cells*grid_enum::dust_density + id]; - E = dev_conserved[n_cells*grid_enum::Energy + id]; + d_gas = dev_conserved[id + n_cells*grid_enum::density]; + d_dust = dev_conserved[id + n_cells*grid_enum::dust_density]; + E = dev_conserved[id + n_cells*grid_enum::Energy]; n = d_gas*DENSITY_UNIT / (mu*MP); if (E < 0.0 || E != E) return; - vx = dev_conserved[n_cells*grid_enum::momentum_x + id] / d_gas; - vy = dev_conserved[n_cells*grid_enum::momentum_y + id] / d_gas; - vz = dev_conserved[n_cells*grid_enum::momentum_z + id] / d_gas; + vx = dev_conserved[id + n_cells*grid_enum::momentum_x ] / d_gas; + vy = dev_conserved[id + n_cells*grid_enum::momentum_y ] / d_gas; + vz = dev_conserved[id + n_cells*grid_enum::momentum_z ] / d_gas; #ifdef DE - ge = dev_conserved[n_cells*grid_enum::GasEnergy + id] / d_gas; + ge = dev_conserved[id + n_cells*grid_enum::GasEnergy ] / d_gas; ge = fmax(ge, (Real) TINY_NUMBER); #endif // DE @@ -102,10 +102,10 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density d_dust += dd; - dev_conserved[n_cells*grid_enum::dust_density + id] = d_dust; + dev_conserved[id + n_cells*grid_enum::dust_density ] = d_dust; #ifdef DE - dev_conserved[n_cells*grid_enum::GasEnergy + id] = d_dust*ge; + dev_conserved[id + n_cells*grid_enum::GasEnergy ] = d_dust*ge; #endif } } @@ -130,4 +130,4 @@ __device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { } #endif // DUST -#endif // CUDA \ No newline at end of file +#endif // CUDA diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index ffcc31a9a..6f25676dd 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -4,8 +4,8 @@ // In the final form of this approach, this file will also set nfields (not yet) and NSCALARS (done) // so that adding a field only requires registering it here: // grid knows to allocate memory based on nfields and NSCALARS -// and values can be accessed with density[ncells*grid_enum::enum_name + id] -// example: C.device[H.n_cells*grid_enum::basic_scalar + id] +// and values can be accessed with density[id + ncells*grid_enum::enum_name] +// example: C.device[id + H.n_cells*grid_enum::basic_scalar] // enum notes: @@ -68,7 +68,7 @@ enum : int { #endif num_fields, -//Aliases +//Aliases and manually computed enums nscalars = finalscalar_plus_1 - scalar, }; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 38bb51883..4786fb446 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1347,7 +1347,7 @@ void Grid3D::Clouds() #endif // DE #ifdef DUST - C.density[id+H.n_cells*grid_enum::dust_density] = rho_cl*1e-2; + C.host[id+H.n_cells*grid_enum::dust_density] = rho_cl*1e-2; #endif // DUST } } From 729b44082d80c733916daea5cd42a5eb600af535 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 19 Dec 2022 14:01:59 -0500 Subject: [PATCH 145/694] Add Preliminary MHD Intergrater This commit primarily adds support for MHD to the Van Leer 3D integrator along with myriad small fixes and modifications. All new features have tests as well. The MHD implementation still has bugs but needs to be merged in before we reformat the code so that the merging doesn't provoke too many merge conflicts. MHD - Add MHD support to Van Leer 3D integrator - All MHD code is in the `mhd` namespace with sub-namespaces when relevant (e.g. `mhd::utils`) - Add kernel to compute the CT electric fields and tests for it - Add kernel to update the magnetic field and tests for it - Add MHD support to the PCM reconstruction - Add new `Linear_Wave` initial condition type - Add `ctElectricFields` global device array - Set all MHD input parameters to default to zero - Remove incorrect MHD specializations in the boundary conditions - Add check to make sure MHD has at least 3 ghost cells - Add functions to compute the maximum magnetic divergence, report it, and exit if it's too large - Clean up make.type.mhd and enable cuda error checking - Add example parameter files for - Alfven wave - Fast magnetosonic wave - Slow magnetosonic wave - Ryu & Jones 1995 shock tubes 1a and 2a - Add the following input parameters for generalized linear waves - EigenVec_rho - EigenVec_MomentumX - EigenVec_MomentumY - EigenVec_MomentumZ - EigenVec_E - EigenVec_Bx - EigenVec_By - EigenVec_Bz - pitch - yaw - Added MHD system tests for: - Hydro only constant state test - MHD constant state - Sod test that works with PCM (this will be merged with the hydro test when MHD supports PPMC) - All 4 MHD linear waves - MHD Einfeldt strong rarefaction Testing - Add new method, `runL1ErrorTest`, to `SystemTestRunner` that computes the L1 error compared to the initial conditions. Ideal for wave tests - Updated hydro wave tests to use new `runL1ErrorTest` method HLLD Riemann Solver - Updated interface state indexing for new format - Added documentation - Moved dot product function to `math_utils` Reductions - Add new FP atomic max that works on CUDA or ROCm - Update time step calculation to use the grid reduction with atomic and DeviceVector - Updated tests to match - Remove dev_dti, host_dti_array, and dev_dti_array global variables Utilities Cuda Utilities - Add function `initGpuMemory` which initializes GPU memory so the CUDA compute sanitizer doesn't complain about it. Used to initialize all GPU arrays that I know of. - Add struct `AutomaticLaunchParams` which is a thin wrapper over the occupancy API. Primarily intended for reductions where performance is sensitive to the number of blocks but could be used for any kernel launch DeviceVector - New option to initialize memory Math Utilities New namespace and file for math utilities. Currently contains a semi-general rotation and dot product functions MHD Utilities - The `mhd::utils::computeEnergy` function now works properly with either MHD or hydro and returns the appropriate energy Other - Add a function `to_string_exact` to convert floating point numbers to a string such that it can be exactly deserialized back from a string to the same floating point number. Used in tests currently --- .gitmodules | 3 + Makefile | 6 +- builds/make.host.c3po | 6 +- builds/make.host.spock | 1 + builds/make.type.mhd | 21 +- cholla-tests-data | 2 +- examples/1D/sound_wave.txt | 7 +- examples/2D/sound_wave.txt | 7 +- examples/3D/Brio_and_Wu.txt | 6 +- examples/3D/Ryu_and_Jones_1a.txt | 74 ++ examples/3D/Ryu_and_Jones_2a.txt | 74 ++ examples/3D/alfven_wave.txt | 71 ++ examples/3D/constant.txt | 6 +- examples/3D/fast_magnetosonic.txt | 71 ++ examples/3D/mhd_contact_wave.txt | 71 ++ examples/3D/slow_magnetosonic.txt | 72 ++ examples/3D/sound_wave.txt | 7 +- src/global/global.cpp | 20 + src/global/global.h | 20 +- src/global/global_cuda.cu | 6 +- src/global/global_cuda.h | 12 +- src/grid/boundary_conditions.cpp | 6 +- src/grid/cuda_boundaries.cu | 85 +-- src/grid/grid3D.cpp | 78 ++- src/grid/grid3D.h | 64 +- src/grid/initial_conditions.cpp | 127 ++-- src/hydro/hydro_cuda.cu | 94 ++- src/hydro/hydro_cuda_tests.cu | 36 +- src/integrators/VL_3D_cuda.cu | 162 +++-- src/io/io.h | 19 + src/main.cpp | 17 +- src/mhd/ct_electric_fields.cu | 340 ++++++++++ src/mhd/ct_electric_fields.h | 155 +++++ src/mhd/ct_electric_fields_tests.cu | 207 ++++++ src/mhd/magnetic_divergence.cu | 112 +++ src/mhd/magnetic_divergence.h | 81 +++ src/mhd/magnetic_divergence_tests.cu | 59 ++ src/mhd/magnetic_update.cu | 80 +++ src/mhd/magnetic_update.h | 57 ++ src/mhd/magnetic_update_tests.cu | 150 +++++ src/reconstruction/pcm_cuda.cu | 378 +++++------ src/reconstruction/plmc_cuda.cu | 84 +-- src/reconstruction/plmp_cuda.cu | 46 +- src/reconstruction/ppmc_cuda.cu | 122 ++-- src/reconstruction/ppmp_cuda.cu | 80 +-- src/riemann_solvers/hlld_cuda.cu | 160 ++--- src/riemann_solvers/hlld_cuda.h | 63 +- src/riemann_solvers/hlld_cuda_tests.cu | 260 +++---- src/system_tests/hydro_system_tests.cpp | 221 +++++- .../input_files/blank_settings_file.txt | 3 + ...stant_CorrectInputExpectCorrectOutput.txt} | 0 ...ave3D_CorrectInputExpectCorrectOutput.txt} | 21 +- ...icFieldCorrectInputExpectCorrectOutput.txt | 50 ++ ...icFieldCorrectInputExpectCorrectOutput.txt | 50 ++ ...action_CorrectInputExpectCorrectOutput.txt | 71 ++ ...zedMpi_CorrectInputExpectCorrectOutput.txt | 57 ++ src/system_tests/mhd_system_tests.cpp | 636 ++++++++++++++++++ src/system_tests/system_tester.cpp | 209 +++++- src/system_tests/system_tester.h | 32 +- src/utils/DeviceVector.h | 22 +- src/utils/DeviceVector_tests.cu | 7 +- src/utils/cuda_utilities.h | 75 ++- src/utils/cuda_utilities_tests.cpp | 20 - src/utils/gpu.hpp | 10 +- src/utils/hydro_utilities.h | 34 + src/utils/hydro_utilities_tests.cpp | 42 ++ src/utils/math_utilities.h | 92 +++ src/utils/math_utilities_tests.cpp | 64 ++ src/utils/mhd_utilities.cpp | 18 - src/utils/mhd_utilities.cu | 25 + src/utils/mhd_utilities.h | 97 ++- ...ities_tests.cpp => mhd_utilities_tests.cu} | 120 ++-- src/utils/reduction_utilities.cu | 14 - src/utils/reduction_utilities.h | 191 ++++-- src/utils/reduction_utilities_tests.cu | 44 +- src/utils/testing_utilities.cpp | 88 +-- src/utils/testing_utilities.h | 63 +- 77 files changed, 4765 insertions(+), 1296 deletions(-) create mode 100644 examples/3D/Ryu_and_Jones_1a.txt create mode 100644 examples/3D/Ryu_and_Jones_2a.txt create mode 100644 examples/3D/alfven_wave.txt create mode 100644 examples/3D/fast_magnetosonic.txt create mode 100644 examples/3D/mhd_contact_wave.txt create mode 100644 examples/3D/slow_magnetosonic.txt create mode 100644 src/mhd/ct_electric_fields.cu create mode 100644 src/mhd/ct_electric_fields.h create mode 100644 src/mhd/ct_electric_fields_tests.cu create mode 100644 src/mhd/magnetic_divergence.cu create mode 100644 src/mhd/magnetic_divergence.h create mode 100644 src/mhd/magnetic_divergence_tests.cu create mode 100644 src/mhd/magnetic_update.cu create mode 100644 src/mhd/magnetic_update.h create mode 100644 src/mhd/magnetic_update_tests.cu create mode 100644 src/system_tests/input_files/blank_settings_file.txt rename src/system_tests/input_files/{tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt => tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt} (100%) rename src/system_tests/input_files/{tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt => tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt} (70%) create mode 100644 src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/mhd_system_tests.cpp create mode 100644 src/utils/math_utilities.h create mode 100644 src/utils/math_utilities_tests.cpp delete mode 100644 src/utils/mhd_utilities.cpp create mode 100644 src/utils/mhd_utilities.cu rename src/utils/{mhd_utilities_tests.cpp => mhd_utilities_tests.cu} (81%) diff --git a/.gitmodules b/.gitmodules index e69de29bb..c9a26c699 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cholla-tests-data"] + path = cholla-tests-data + url = https://github.com/cholla-hydro/cholla-tests-data.git diff --git a/Makefile b/Makefile index dd1915537..dcc6d296c 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ CUDA_ARCH ?= sm_70 DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \ src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \ - src/integrators src/io src/main.cpp src/main_tests.cpp \ + src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\ src/model src/mpi src/old_cholla src/particles src/reconstruction \ src/riemann_solvers src/system_tests src/utils src/dust @@ -38,6 +38,9 @@ ifeq ($(TEST), true) CFLAGS += $(TEST_FLAGS) CXXFLAGS += $(TEST_FLAGS) GPUFLAGS += $(TEST_FLAGS) + CFLAGS += $(TEST_FLAGS) + CXXFLAGS += $(TEST_FLAGS) + GPUFLAGS += $(TEST_FLAGS) # HACK # Set the build flags to debug. This is mostly to avoid the approximations @@ -134,6 +137,7 @@ ifdef HIPCONFIG DFLAGS += -DO_HIP CXXFLAGS += $(HIPCONFIG) GPUCXX ?= hipcc + GPUFLAGS += -Wall -ferror-limit=1 LD := $(CXX) LDFLAGS := $(CXXFLAGS) -L$(ROCM_PATH)/lib LIBS += -lamdhip64 diff --git a/builds/make.host.c3po b/builds/make.host.c3po index 79574201c..02f658896 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -3,13 +3,9 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++11 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++11 -ccbin=mpicxx -GPUFLAGS_OPTIMIZE = -g -O3 -std=c++11 -ccbin=mpicxx CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx -Xcompiler -rdynamic GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.spock b/builds/make.host.spock index b839e95ae..8cac7c086 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -6,6 +6,7 @@ CXX = CC CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 + CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 3f67ea88f..486ba2547 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,15 +9,14 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +# Currently MHD only supports PCM reconstruction. Higher order reconstruction +# methods will be added later +DFLAGS += -DPCM DFLAGS += -DHLLD DFLAGS += -DMHD -ifeq ($(findstring cosmology,$(TYPE)),cosmology) -DFLAGS += -DSIMPLE -else +# MHD only supports the Van Leer integrator DFLAGS += -DVL -endif # need this if using Disk_3D # DFLAGS += -DDISK_ICS @@ -45,3 +44,15 @@ DFLAGS += $(OUTPUT) #If not specified, MPI_GPU is off by default #This is set in the system make.host file DFLAGS += $(MPI_GPU) + +# NOTE: The following macros are to help facilitate debugging and should not be +# used on scientific runs + +# Do CUDA error checking +DFLAGS += -DCUDA_ERROR_CHECK + +# Limit the number of steps to evolve. +# DFLAGS += -DN_STEPS_LIMIT=1000 + +# Output on every time step +# DFLAGS += -DOUTPUT_ALWAYS diff --git a/cholla-tests-data b/cholla-tests-data index 66d592821..c069bb7a6 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf +Subproject commit c069bb7a6de79546f60d3ea47f6c10ba19df3c76 diff --git a/examples/1D/sound_wave.txt b/examples/1D/sound_wave.txt index c6555c662..97b7c92b1 100644 --- a/examples/1D/sound_wave.txt +++ b/examples/1D/sound_wave.txt @@ -30,18 +30,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/2D/sound_wave.txt b/examples/2D/sound_wave.txt index d69b1270f..58608bac2 100644 --- a/examples/2D/sound_wave.txt +++ b/examples/2D/sound_wave.txt @@ -33,18 +33,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/3D/Brio_and_Wu.txt b/examples/3D/Brio_and_Wu.txt index a742ae207..fba126396 100644 --- a/examples/3D/Brio_and_Wu.txt +++ b/examples/3D/Brio_and_Wu.txt @@ -6,11 +6,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=256 # number of grid cells in the y dimension -ny=32 +ny=256 # number of grid cells in the z dimension -nz=32 +nz=256 # final output time tout=0.1 # time interval for output diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt new file mode 100644 index 000000000..168fcdffa --- /dev/null +++ b/examples/3D/Ryu_and_Jones_1a.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.08 +# time interval for output +outstep=0.08 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=10.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=20.0 +# Magnetic field of the left state +Bx_l=1.4104739588693909 +By_l=1.4104739588693909 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=-10.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=1.4104739588693909 +By_r=1.4104739588693909 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/examples/3D/Ryu_and_Jones_2a.txt new file mode 100644 index 000000000..f886b4de3 --- /dev/null +++ b/examples/3D/Ryu_and_Jones_2a.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.08 +# velocity of left state +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 +# pressure of left state +P_l=0.95 +# Magnetic field of the left state +Bx_l=7.0898154036220635 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=7.0898154036220635 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/examples/3D/alfven_wave.txt b/examples/3D/alfven_wave.txt new file mode 100644 index 000000000..bfacbc968 --- /dev/null +++ b/examples/3D/alfven_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD Alfven Wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/3) * [0, 0, +/-1, -/+2*sqrt(2), 0, -1, 2*sqrt(2), 0] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0 +rEigenVec_MomentumX=0 +rEigenVec_MomentumY=0 +rEigenVec_MomentumZ=-1 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=1 +rEigenVec_E=0 diff --git a/examples/3D/constant.txt b/examples/3D/constant.txt index ca3b411e2..871fbb7b7 100644 --- a/examples/3D/constant.txt +++ b/examples/3D/constant.txt @@ -42,9 +42,9 @@ vz=0 # pressure P=1.380658e-5 # Magnetic Field -Bx=0.0 -By=0.0 -Bz=0.0 +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 # value of gamma gamma=1.666666667 diff --git a/examples/3D/fast_magnetosonic.txt b/examples/3D/fast_magnetosonic.txt new file mode 100644 index 000000000..bc134a79a --- /dev/null +++ b/examples/3D/fast_magnetosonic.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD fast magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave. +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [6, +/-12, -/+4*sqrt(2), -/+2, 0, 8*sqrt(2), 4, 27] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.4472135954999579 +rEigenVec_MomentumX=0.8944271909999159 +rEigenVec_MomentumY=-0.4472135954999579 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.8944271909999159 +rEigenVec_Bz=0.0 +rEigenVec_E=2.0124611797498106 diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt new file mode 100644 index 000000000..9250bba5a --- /dev/null +++ b/examples/3D/mhd_contact_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD contact wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/2) * [2, +/-2, 0, 0, 0, 0, 0, 1] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=1 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-1 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1.0 +rEigenVec_MomentumX=1.0 +rEigenVec_MomentumY=0.0 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.0 +rEigenVec_Bz=0.0 +rEigenVec_E=0.5 diff --git a/examples/3D/slow_magnetosonic.txt b/examples/3D/slow_magnetosonic.txt new file mode 100644 index 000000000..960952b5f --- /dev/null +++ b/examples/3D/slow_magnetosonic.txt @@ -0,0 +1,72 @@ +# +# Parameter File for MHD slow magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.8944271909999159 +rEigenVec_MomentumX=0.4472135954999579 +rEigenVec_MomentumY=0.8944271909999159 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=-0.4472135954999579 +rEigenVec_Bz=0.0 +rEigenVec_E=0.6708203932499369 + diff --git a/examples/3D/sound_wave.txt b/examples/3D/sound_wave.txt index 0f3866226..6c226c0ab 100644 --- a/examples/3D/sound_wave.txt +++ b/examples/3D/sound_wave.txt @@ -34,18 +34,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/src/global/global.cpp b/src/global/global.cpp index 1f6a5cbfa..ab384144b 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -329,6 +329,26 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->Bz_r = atof(value); else if (strcmp(name, "diaph")==0) parms->diaph = atof(value); + else if (strcmp(name, "rEigenVec_rho")==0) + parms->rEigenVec_rho = atof(value); + else if (strcmp(name, "rEigenVec_MomentumX")==0) + parms->rEigenVec_MomentumX = atof(value); + else if (strcmp(name, "rEigenVec_MomentumY")==0) + parms->rEigenVec_MomentumY = atof(value); + else if (strcmp(name, "rEigenVec_MomentumZ")==0) + parms->rEigenVec_MomentumZ = atof(value); + else if (strcmp(name, "rEigenVec_E")==0) + parms->rEigenVec_E = atof(value); + else if (strcmp(name, "rEigenVec_Bx")==0) + parms->rEigenVec_Bx = atof(value); + else if (strcmp(name, "rEigenVec_By")==0) + parms->rEigenVec_By = atof(value); + else if (strcmp(name, "rEigenVec_Bz")==0) + parms->rEigenVec_Bz = atof(value); + else if (strcmp(name, "pitch")==0) + parms->pitch = atof(value); + else if (strcmp(name, "yaw")==0) + parms->yaw = atof(value); #ifdef PARTICLES else if (strcmp(name, "prng_seed")==0) parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 79d3dbc88..50f0acf03 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -42,7 +42,7 @@ typedef double Real; #define MSUN_CGS 1.98847e33; //Msun in gr #define KPC_CGS 3.086e21; //kpc in cm #define KM_CGS 1e5; //km in cm -#define MH 1.67262171e-24 //Mass of hydrogen [g] +#define MH 1.67262171e-24 //Mass of hydrogen [g] #define TIME_UNIT 3.15569e10 // 1 kyr in s #define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm @@ -235,9 +235,9 @@ struct parameters Real vz; Real P; Real A; - Real Bx; - Real By; - Real Bz; + Real Bx=0; + Real By=0; + Real Bz=0; Real rho_l; Real vx_l; Real vy_l=0; @@ -255,6 +255,16 @@ struct parameters Real By_r; Real Bz_r; Real diaph; + Real rEigenVec_rho = 0; + Real rEigenVec_MomentumX = 0; + Real rEigenVec_MomentumY = 0; + Real rEigenVec_MomentumZ = 0; + Real rEigenVec_E = 0; + Real rEigenVec_Bx = 0; + Real rEigenVec_By = 0; + Real rEigenVec_Bz = 0; + Real pitch = 0; + Real yaw = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. @@ -294,7 +304,7 @@ struct parameters int bc_potential_type; #if defined(COOLING_GRACKLE) || defined (CHEMISTRY_GPU) char UVB_rates_file[MAXLEN]; //File for the UVB photoheating and photoionization rates of HI, HeI and HeII -#endif +#endif #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations {{}} char analysisdir[MAXLEN]; diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu index bd2e235c1..2153b1615 100644 --- a/src/global/global_cuda.cu +++ b/src/global/global_cuda.cu @@ -9,16 +9,12 @@ bool memory_allocated; Real *dev_conserved, *dev_conserved_half; Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; +Real *ctElectricFields; Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z; -Real *dev_dti; //Arrays for potential in GPU: Will be set to NULL if not using GRAVITY Real *dev_grav_potential; Real *temp_potential; Real *buffer_potential; -// Arrays for calc_dt -Real *host_dti_array; -Real *dev_dti_array; - #endif //CUDA diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 9150647c0..61cbc0752 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -25,14 +25,12 @@ extern bool memory_allocated; // Flag becomes true after allocating the memory o // conserved variables extern Real *dev_conserved, *dev_conserved_half; // input states and associated interface fluxes (Q* and F* from Stone, 2008) +// Note that for hydro the size of these arrays is n_fields*n_cells*sizeof(Real) +// while for MHD it is (n_fields-1)*n_cells*sizeof(Real), i.e. they has one +// fewer field than you would expect extern Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; - -// Scalar for storing device side hydro/MHD time steps -extern Real *dev_dti; - -// array of inverse timesteps for dt calculation (brought back by Alwin May 24 2022) -extern Real *host_dti_array; -extern Real *dev_dti_array; +// Constrained transport electric fields +extern Real *ctElectricFields; //Arrays for potential in GPU: Will be set to NULL if not using GRAVITY extern Real *dev_grav_potential; diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index aab040ede..afe0e0a42 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -38,14 +38,14 @@ void Grid3D::Set_Boundary_Conditions_Grid( parameters P){ #ifdef GRAVITY #ifdef CPU_TIME Timer.Pot_Boundaries.Start(); - #endif + #endif //CPU_TIME Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; Set_Boundary_Conditions(P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; #ifdef CPU_TIME Timer.Pot_Boundaries.End(); - #endif - #endif + #endif //CPU_TIME + #endif //GRAVITY } /*! \fn void Set_Boundary_Conditions(parameters P) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index f69ac3c59..8b689601e 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -3,9 +3,9 @@ #include "../global/global_cuda.h" #include "cuda_boundaries.h" -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag); +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a); -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost, int &magneticIdx); +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost); __global__ void PackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int buffer_ncells, int n_fields, int n_cells) { @@ -66,7 +66,7 @@ __global__ void SetGhostCellsKernel(Real * c_head, int f0, int f1, int f2, int f3, int f4, int f5, int isize, int jsize, int ksize, int imin, int jmin, int kmin, int dir){ - int id,i,j,k,gidx,idx,ii, magneticIdx; + int id,i,j,k,gidx,idx,ii; Real a[3] = {1.,1.,1.}; int flags[6] = {f0,f1,f2,f3,f4,f5}; @@ -93,17 +93,11 @@ __global__ void SetGhostCellsKernel(Real * c_head, gidx = i + j*nx + k*nx*ny; // calculate idx (index of real cell) and a[:] for reflection - idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost,magneticIdx); + idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost); if (idx>=0){ for (ii=0; ii1) { // set index on -x face if (ig < n_ghost) { - ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0], irMag); + ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0]); } // set index on +x face else if (ig >= nx-n_ghost) { - ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0], irMag); + ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0]); } // set i index for multi-D problems else { ir = ig; - #ifdef MHD - irMag = ig; - #endif //MHD } // if custom x boundaries are needed, set index to -1 and return if (ir < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add i index to ghost cell mapping idx += ir; - #ifdef MHD - magneticIdx += irMag; - #endif //MHD } @@ -201,33 +185,24 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], // set index on -y face if (jg < n_ghost) { - jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1], jrMag); + jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1]); } // set index on +y face else if (jg >= ny-n_ghost) { - jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1], jrMag); + jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1]); } // set j index for multi-D problems else { jr = jg; - #ifdef MHD - jrMag = jg; - #endif //MHD } // if custom y boundaries are needed, set index to -1 and return if (jr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add j index to ghost cell mapping idx += nx*jr; - #ifdef MHD - magneticIdx += nx*jrMag; - #endif //MHD } @@ -236,38 +211,29 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], // set index on -z face if (kg < n_ghost) { - kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2], krMag); + kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2]); } // set index on +z face else if (kg >= nz-n_ghost) { - kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2], krMag); + kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2]); } // set k index for multi-D problems else { kr = kg; - #ifdef MHD - krMag = kg; - #endif //MHD } // if custom z boundaries are needed, set index to -1 and return if (kr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add k index to ghost cell mapping idx += nx*ny*kr; - #ifdef MHD - magneticIdx += nx*ny*krMag; - #endif //MHD } return idx; } -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag){ +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a){ int id; // lower face @@ -278,45 +244,27 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * // periodic case 1: id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD break; // reflective case 2: id = 2*n_ghost-ig-1; *(a) = -1.0; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // transmissive case 3: id = n_ghost; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // custom case 4: id = -1; - #ifdef MHD - idMag = -1; - #endif //MHD break; // MPI case 5: id = ig; - #ifdef MHD - idMag = id; - #endif //MHD break; // default is periodic default: id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD } } // upper face @@ -349,9 +297,6 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * default: id = ig-nx+2*n_ghost; } - #ifdef MHD - idMag = id; - #endif //MHD } return id; } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index e100def1c..cadcb3653 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -43,6 +43,10 @@ #include "../dust/dust_cuda.h" // provides Dust_Update #endif +#ifdef MHD + #include "../mhd/magnetic_divergence.h" +#endif //MHD + /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ @@ -72,6 +76,15 @@ Grid3D::Grid3D(void) H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL; #endif + #ifdef MHD + // Set the number of ghost cells high enough for MHD + if (H.n_ghost < 3) + { + chprintf("Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting to 3.\n", H.n_ghost); + H.n_ghost = 3; + } + #endif //MHD + } /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) @@ -139,7 +152,7 @@ void Grid3D::Initialize(struct parameters *P) // Set the CFL coefficient (a global variable) C_cfl = 0.3; - + #ifdef AVERAGE_SLOW_CELLS H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS @@ -296,6 +309,7 @@ void Grid3D::AllocateMemory(void) // allocate memory for the conserved variable arrays on the device CudaSafeCall( cudaMalloc((void**)&C.device, H.n_fields*H.n_cells*sizeof(Real)) ); + cuda_utilities::initGpuMemory(C.device, H.n_fields*H.n_cells*sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); C.d_momentum_y = &(C.device[2*H.n_cells]); @@ -316,14 +330,6 @@ void Grid3D::AllocateMemory(void) C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); #endif // DE - - // arrays that hold the max_dti calculation for hydro for each thread block (pre reduction) - int ngrid = (H.n_cells + TPB - 1) / TPB; - CudaSafeCall( cudaHostAlloc(&host_dti_array, ngrid*sizeof(Real), cudaHostAllocDefault) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti_array, ngrid*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti, sizeof(Real)) ); - - #if defined( GRAVITY ) CudaSafeCall( cudaHostAlloc(&C.Grav_potential, H.n_cells*sizeof(Real), cudaHostAllocDefault) ); CudaSafeCall( cudaMalloc((void**)&C.d_Grav_potential, H.n_cells*sizeof(Real)) ); @@ -451,7 +457,7 @@ Real Grid3D::Update_Grid(void) #ifdef VL VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); #endif //VL - #ifdef SIMPLE + #ifdef SIMPLE Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); #endif //SIMPLE #endif //CUDA @@ -493,7 +499,7 @@ Real Grid3D::Update_Grid(void) Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step ); #endif #endif - + #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell Real max_dti_slow; @@ -540,7 +546,7 @@ Real Grid3D::Update_Hydro_Grid( ){ #ifdef ONLY_PARTICLES // Don't integrate the Hydro when only solving for particles return 1e-10; - #endif + #endif //ONLY_PARTICLES Real dti; @@ -551,26 +557,26 @@ Real Grid3D::Update_Hydro_Grid( ){ #ifdef GRAVITY // Extrapolate gravitational potential for hydro step Extrapolate_Grav_Potential(); - #endif + #endif //GRAVITY dti = Update_Grid(); #ifdef CPU_TIME #ifdef CHEMISTRY_GPU Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); - //Subtract the time spent on the Chemical Update - #endif + //Subtract the time spent on the Chemical Update + #endif //CHEMISTRY_GPU Timer.Hydro.End(); #endif //CPU_TIME #ifdef COOLING_GRACKLE #ifdef CPU_TIME Timer.Cooling.Start(); - #endif + #endif //CPU_TIME Do_Cooling_Step_Grackle( ); #ifdef CPU_TIME Timer.Cooling.End(); - #endif + #endif //CPU_TIME #endif//COOLING_GRACKLE @@ -603,6 +609,39 @@ void Grid3D::Update_Time(){ } +#ifdef MHD + void Grid3D::checkMagneticDivergence(Grid3D &G, struct parameters P, int nfile) + { + // Compute the local value of the divergence + H.max_magnetic_divergence = mhd::launchCalculateMagneticDivergence(C.device, H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_cells); + + #ifdef MPI_CHOLLA + // Now that we have the local maximum let's get the global maximum + H.max_magnetic_divergence = ReduceRealMax(H.max_magnetic_divergence); + #endif //MPI_CHOLLA + + // If the magnetic divergence is greater than the limit then raise a warning and exit + if (H.max_magnetic_divergence > H.magnetic_divergence_limit) + { + // Report the error and exit + chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", + H.max_magnetic_divergence, H.magnetic_divergence_limit); + chexit(-1); + } + else if (H.max_magnetic_divergence < 0.0) + { + // Report the error and exit + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", + H.max_magnetic_divergence); + chexit(-1); + } + else // The magnetic divergence is within acceptable bounds + { + chprintf("Global maximum magnetic divergence = %7.4e\n", H.max_magnetic_divergence); + } + } +#endif //MHD + /*! \fn void Reset(void) * \brief Reset the Grid3D class. */ void Grid3D::Reset(void) @@ -623,11 +662,6 @@ void Grid3D::FreeMemory(void) // free the conserved variable arrays CudaSafeCall( cudaFreeHost(C.host) ); - // free the timestep arrays - CudaSafeCall( cudaFreeHost(host_dti_array) ); - cudaFree(dev_dti_array); - cudaFree(dev_dti); - #ifdef GRAVITY CudaSafeCall( cudaFreeHost(C.Grav_potential) ); CudaSafeCall( cudaFree(C.d_Grav_potential) ); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index f121bd423..c98971189 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -245,6 +245,11 @@ struct Header Real sphere_center_y; Real sphere_center_z; + #ifdef MHD + Real max_magnetic_divergence; + Real const magnetic_divergence_limit = 1.0E-14; + #endif //MHD + #ifdef GRAVITY /*! \var n_ghost_potential_offset @@ -450,26 +455,6 @@ class Grid3D void set_dt_Gravity(); #endif - /*! \fn Real calc_dti_CPU_1D() - * \brief Calculate the maximum inverse timestep on 1D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_1D(); - - /*! \fn Real calc_dti_CPU_2D() - * \brief Calculate the maximum inverse timestep on 2D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_2D(); - - /*! \fn Real calc_dti_CPU_3D_function() - * \brief Calculate the maximum inverse timestep on 3D using openMP, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D_function( int g_start, int g_end ); - - /*! \fn Real calc_dti_CPU_3D() - * \brief Calculate the maximum inverse timestep on 3D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D(); - - /*! \fn Real calc_dti_CPU() - * \brief Calculate the maximum inverse timestep, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU(); - /*! \fn void Update_Grid(void) * \brief Update the conserved quantities in each cell. */ Real Update_Grid(void); @@ -480,6 +465,10 @@ class Grid3D void Update_Time(); + #ifdef MHD + void checkMagneticDivergence(Grid3D &G, struct parameters P, int nfile); + #endif //MHD + /*! \fn void Write_Header_Text(FILE *fp) * \brief Write the relevant header info to a text output file. */ void Write_Header_Text(FILE *fp); @@ -553,6 +542,41 @@ class Grid3D * \brief Sine wave perturbation. */ void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + /*! + * \brief Initialize the grid with a simple linear wave. + * + * \param[in] rho The background density + * \param[in] vx The background velocity in the X-direction + * \param[in] vy The background velocity in the Y-direction + * \param[in] vz The background velocity in the Z-direction + * \param[in] P The background pressure + * \param[in] A The amplitude of the wave + * \param[in] Bx The background magnetic field in the X-direction + * \param[in] By The background magnetic field in the Y-direction + * \param[in] Bz The background magnetic field in the Z-direction + * \param[in] rEigenVec_rho The right eigenvector component for the density + * \param[in] rEigenVec_MomentumX The right eigenvector component for the velocity + * in the X-direction + * \param[in] rEigenVec_MomentumY The right eigenvector component for the velocity + * in the Y-direction + * \param[in] rEigenVec_MomentumZ The right eigenvector component for the velocity + * in the Z-direction + * \param[in] rEigenVec_E The right eigenvector component for the energy + * \param[in] rEigenVec_Bx The right eigenvector component for the magnetic + * field in the X-direction + * \param[in] rEigenVec_By The right eigenvector component for the magnetic + * field in the Y-direction + * \param[in] rEigenVec_Bz The right eigenvector component for the magnetic + * field in the Z-direction + * \param[in] pitch The pitch angle of the linear wave + * \param[in] yaw The yaw angle of the linear wave + */ + void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, + Real Bx, Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, + Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, + Real rEigenVec_Bz, Real pitch, Real yaw); + /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 4786fb446..5d98d8367 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -13,10 +13,13 @@ #include "../mpi/mpi_routines.h" #include "../io/io.h" #include "../utils/error_handling.h" +#include "../utils/mhd_utilities.h" +#include "../utils/math_utilities.h" #include #include #include #include +#include using namespace std; @@ -31,6 +34,10 @@ void Grid3D::Set_Initial_Conditions(parameters P) { Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); } else if (strcmp(P.init, "Sound_Wave")==0) { Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + } else if (strcmp(P.init, "Linear_Wave")==0) { + Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, + P.rEigenVec_rho, P.rEigenVec_MomentumX, P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, + P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, P.rEigenVec_Bz, P.pitch, P.yaw); } else if (strcmp(P.init, "Square_Wave")==0) { Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Riemann")==0) { @@ -62,9 +69,9 @@ void Grid3D::Set_Initial_Conditions(parameters P) { } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { Spherical_Overpressure_3D(); } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { - Spherical_Overdensity_3D(); + Spherical_Overdensity_3D(); } else if (strcmp(P.init, "Clouds")==0) { - Clouds(); + Clouds(); } else if (strcmp(P.init, "Read_Grid")==0) { #ifndef ONLY_PARTICLES Read_Grid(P); @@ -116,8 +123,8 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zblocal = H.zbound; H.xblocal_max = H.xblocal + P.xlen; - H.yblocal_max = H.yblocal + P.ylen; - H.zblocal_max = H.zblocal + P.zlen; + H.yblocal_max = H.yblocal + P.ylen; + H.zblocal_max = H.zblocal + P.zlen; #else Real nx_param = (Real) nx_global; @@ -237,7 +244,6 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real } - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) @@ -291,6 +297,61 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) #ifdef DE C.GasEnergy[id] = P/(gama-1.0); #endif //DE + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE + } + } + } + +} + +/*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) + * \brief Sine wave perturbation. */ +void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, + Real Bx, Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, + Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, + Real rEigenVec_Bz, Real pitch, Real yaw) +{ + auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx/2, H.dy/2, H.dz/2, pitch, yaw); + + // set initial values of conserved variables + for(int k=H.n_ghost; k(i, j, k, pitch, yaw); + + //get cell index + int id = i + j*H.nx + k*H.nx*H.ny; + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); + + // set constant initial states. Note that mhd::utils::computeEnergy + // computes the hydro energy if MHD is turned off + Real sine_wave = std::sin(2.0 * PI * x_pos); + + C.density[id] = rho; + C.momentum_x[id] = rho*vx; + C.momentum_y[id] = rho*vy; + C.momentum_z[id] = rho*vz; + C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); + // add small-amplitude perturbations + C.density[id] += A * rEigenVec_rho * sine_wave; + C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; + C.momentum_y[id] += A * rEigenVec_MomentumY * sine_wave; + C.momentum_z[id] += A * rEigenVec_MomentumZ * sine_wave; + C.Energy[id] += A * rEigenVec_E * sine_wave; + + #ifdef MHD + sine_wave = std::sin(2.0 * PI * (x_pos+stagger)); + C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; + C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; + C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; + #endif //MHD } } } @@ -405,26 +466,6 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real kend = H.nz; } - #ifdef MHD - auto setMagnetFields = [&] () - { - Real x_pos_face = x_pos + 0.5 * H.dx; - - if (x_pos_face < diaph) - { - C.magnetic_x[id] = Bx_l; - C.magnetic_y[id] = By_l; - C.magnetic_z[id] = Bz_l; - } - else - { - C.magnetic_x[id] = Bx_r; - C.magnetic_y[id] = By_r; - C.magnetic_z[id] = Bz_r; - } - }; - #endif // MHD - // set initial values of conserved variables for(k=kstart-1; k #include #include +#include #include "../utils/gpu.hpp" #include "../global/global.h" @@ -14,6 +15,7 @@ #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" +#include "../utils/DeviceVector.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields) @@ -425,8 +427,8 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhdUtils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); - Real cf = mhdUtils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); + Real gasP = mhd::utils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); + Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) Real cellMaxInverseDt = fmax((fabs(vx)+cf)/dx, (fabs(vy)+cf)/dy); @@ -470,10 +472,8 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } @@ -514,10 +514,8 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } @@ -556,7 +554,7 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n #ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); #endif //MHD // Compute the maximum inverse crossing time in the cell @@ -569,62 +567,44 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ) { - // set values for GPU kernels - uint threadsPerBlock, numBlocks; - int ngrid = (nx*ny*nz + TPB - 1 )/TPB; - // reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); // Uncomment this if we fix the AtomicDouble bug - Alwin - threadsPerBlock = TPB; - numBlocks = ngrid; - - Real* dev_dti = dev_dti_array; + // Allocate the device memory + cuda_utilities::DeviceVector static dev_dti(1); + // Set the device side inverse time step to the smallest possible double so + // that the reduction isn't using the maximum value of the previous iteration + dev_dti.assign(std::numeric_limits::lowest()); // compute dt and store in dev_dti if (nx > 1 && ny == 1 && nz == 1) //1D { - hipLaunchKernelGGL(Calc_dt_1D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, dx); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D); + hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, nx, dx); } else if (nx > 1 && ny > 1 && nz == 1) //2D { - hipLaunchKernelGGL(Calc_dt_2D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, ny, dx, dy); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D); + hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); } else if (nx > 1 && ny > 1 && nz > 1) //3D { - hipLaunchKernelGGL(Calc_dt_3D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_3D); + hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); } CudaCheckError(); - Real max_dti=0; - - /* Uncomment the below if we fix the AtomicDouble bug - Alwin - // copy device side max_dti to host side max_dti - - - CudaSafeCall( cudaMemcpy(&max_dti, dev_dti, sizeof(Real), cudaMemcpyDeviceToHost) ); - cudaDeviceSynchronize(); - - return max_dti; - */ - - int dev_dti_length = numBlocks; - CudaSafeCall(cudaMemcpy(host_dti_array,dev_dti, dev_dti_length*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i=0;i max_dti_slow){ speed = sqrt(vx*vx + vy*vy + vz*vz); temp = (gamma - 1)*(E - 0.5*(speed*speed)*d)*ENERGY_UNIT/(d*DENSITY_UNIT/0.6/MP)/KB; P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); cs = sqrt(d_inv * gamma * P)*VELOCITY_UNIT*1e-5; // Average this cell - printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, + printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, dev_conserved[id]*DENSITY_UNIT/0.6/MP, temp, speed*VELOCITY_UNIT*1e-5, vx*VELOCITY_UNIT*1e-5, vy*VELOCITY_UNIT*1e-5, vz*VELOCITY_UNIT*1e-5, cs); Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); } } } -#endif //AVERAGE_SLOW_CELLS +#endif //AVERAGE_SLOW_CELLS #ifdef DE @@ -829,7 +809,13 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, //PRESSURE_DE E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + Real centeredBx, centeredBy, centeredBz; + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, centeredBx, centeredBy, centeredBz) + E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); + #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index a6d00e96b..eb9c3f9ed 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -19,6 +19,7 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" +#include "../utils/DeviceVector.h" #include "../hydro/hydro_cuda.h" // Include code to test #if defined(CUDA) @@ -44,38 +45,31 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) Real dx = 1.0; Real dy = 1.0; Real dz = 1.0; - Real *host_conserved; - Real *dev_conserved; - Real *dev_dti_array; + std::vector host_conserved(n_fields); + cuda_utilities::DeviceVector dev_conserved(n_fields); + cuda_utilities::DeviceVector dev_dti(1); Real gamma = 5.0/3.0; - // Allocate host and device arrays and copy data - cudaHostAlloc(&host_conserved, n_fields*sizeof(Real), cudaHostAllocDefault); - CudaSafeCall(cudaMalloc(&dev_conserved, n_fields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&dev_dti_array, sizeof(Real))); - // Set values of conserved variables for input (host) - host_conserved[0] = 1.0; // density - host_conserved[1] = 0.0; // x momentum - host_conserved[2] = 0.0; // y momentum - host_conserved[3] = 0.0; // z momentum - host_conserved[4] = 1.0; // Energy + host_conserved.at(0) = 1.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); - //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) + dev_conserved.cpyHostToDevice(host_conserved); // Run the kernel - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_dti_array, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); + hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved.data(), dev_dti.data(), gamma, n_ghost, + n_fields, nx, ny, nz, dx, dy, dz); CudaCheckError(); - // Copy the dt value back from the GPU - CudaSafeCall(cudaMemcpy(testDt, dev_dti_array, sizeof(Real), cudaMemcpyDeviceToHost)); - // Compare results // Check for equality and if not equal return difference - double fiducialDt = 1.0540925533894598; - double testData = testDt[0]; + double const fiducialDt = 1.0540925533894598; + double const testData = dev_dti.at(0); double absoluteDiff; int64_t ulpsDiff; bool areEqual; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 097708ede..2d23cc636 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -1,8 +1,7 @@ /*! \file VL_3D_cuda.cu - * \brief Definitions of the cuda 3D VL algorithm functions. */ + * \brief Definitions of the cuda 3 D VL algorithm functions. */ -#ifdef CUDA -#ifdef VL +#if defined(CUDA) && defined(VL) #include #include @@ -21,8 +20,11 @@ #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/roe_cuda.h" #include "../riemann_solvers/hllc_cuda.h" -#include "../io/io.h" #include "../riemann_solvers/hll_cuda.h" +#include "../riemann_solvers/hlld_cuda.h" +#include "../mhd/ct_electric_fields.h" +#include "../mhd/magnetic_update.h" +#include "../io/io.h" __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); @@ -33,7 +35,6 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential ) { - //Here, *dev_conserved contains the entire //set of conserved variables on the grid //concatenated into a 1-d array @@ -53,42 +54,87 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int if ( !memory_allocated ){ // allocate memory on the GPU - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; + + // Set the size of the interface and flux arrays + #ifdef MHD + // In MHD/Constrained Transport the interface arrays have one fewer fields + // since the magnetic field that is stored on the face does not require + // reconstructions. Similarly the fluxes have one fewer fields since the + // magnetic field on that face doesn't have an associated flux. Each + // interface array store the magnetic fields on that interface that are + // not perpendicular to the interface and arranged cyclically. I.e. the + // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in + // that order, the `Q_Ly` interface stores the Z and X mangetic fields in + // that order, and the `Q_Lz` interface stores the X and Y magnetic fields + // in that order. These fields start at the (5+NSCALARS)*n_cells and + // (6+NSCALARS)*n_cells locations respectively. The interface state arrays + // store in the interface on the "right" side of the cell, so the flux + // arrays store the fluxes through the right interface + // + // According to the source code of Athena, the following equation relate + // the magnetic flux to the face centered electric fields/EMF. + // -cross(V,B)x is the negative of the x-component of V cross B. Note that + // "X" is the direction the solver is running in this case, not + // necessarily the true "X". + // F_x[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + size_t const arraySize = (n_fields-1) * n_cells * sizeof(Real); + size_t const ctArraySize = 3 * n_cells * sizeof(Real); + #else // not MHD + size_t const arraySize = n_fields*n_cells*sizeof(Real); + #endif //MHD CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_z, n_fields*n_cells*sizeof(Real)) ); + CudaSafeCall( cudaMalloc((void**)&Q_Lx, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Rx, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Ly, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Ry, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Lz, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Rz, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_x, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_y, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_z, arraySize) ); + + cuda_utilities::initGpuMemory(dev_conserved_half, n_fields*n_cells*sizeof(Real)); + cuda_utilities::initGpuMemory(Q_Lx, arraySize); + cuda_utilities::initGpuMemory(Q_Rx, arraySize); + cuda_utilities::initGpuMemory(Q_Ly, arraySize); + cuda_utilities::initGpuMemory(Q_Ry, arraySize); + cuda_utilities::initGpuMemory(Q_Lz, arraySize); + cuda_utilities::initGpuMemory(Q_Rz, arraySize); + cuda_utilities::initGpuMemory(F_x, arraySize); + cuda_utilities::initGpuMemory(F_y, arraySize); + cuda_utilities::initGpuMemory(F_z, arraySize); + + #ifdef MHD + CudaSafeCall( cudaMalloc((void**)&ctElectricFields, ctArraySize) ); + cuda_utilities::initGpuMemory(ctElectricFields, ctArraySize); + #endif //MHD #if defined( GRAVITY ) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; - #else + #else // not GRAVITY dev_grav_potential = NULL; - #endif + #endif //GRAVITY // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. memory_allocated = true; - } #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif + #endif //GRAVITY and GRAVITY_GPU // Step 1: Use PCM reconstruction to put primitive variables into interface arrays hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); - // Step 2: Calculate first-order upwind fluxes #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); @@ -110,18 +156,32 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLL + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(5 + NSCALARS) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(6 + NSCALARS) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(7 + NSCALARS) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif //HLLD CudaCheckError(); + #ifdef MHD + // Step 2.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); + CudaCheckError(); + #endif //MHD // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5*dt, gama, n_fields, density_floor ); CudaCheckError(); - + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells, 0.5*dt, dx, dy, dz); + CudaCheckError(); + #endif //MHD // Step 4: Construct left and right interface values using updated conserved variables #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif + #endif //PCM #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); @@ -131,7 +191,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif + #endif //PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); @@ -166,29 +226,46 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLLC + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(5 + NSCALARS) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(6 + NSCALARS) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(7 + NSCALARS) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif //HLLD CudaCheckError(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); CudaCheckError(); - #endif + #endif //DE + #ifdef MHD + // Step 5.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); + CudaCheckError(); + #endif //MHD // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); CudaCheckError(); + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); + CudaCheckError(); + #endif //MHD + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); - #endif + #endif //DE #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); CudaCheckError(); #endif //TEMPERATURE_FLOOR + return; } @@ -208,6 +285,7 @@ void Free_Memory_VL_3D(){ cudaFree(F_x); cudaFree(F_y); cudaFree(F_z); + cudaFree(ctElectricFields); } @@ -233,11 +311,11 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo, P, E, E_kin, GE; int ipo, jpo, kpo; - #endif + #endif //DE #ifdef DENSITY_FLOOR Real dens_0; - #endif + #endif //DENSITY_FLOOR // threads corresponding to all cells except outer ring of ghost cells do the calculation if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1 && zid > 0 && zid < nz-1) @@ -251,7 +329,13 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de //PRESSURE_DE E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + Real centeredBx, centeredBy, centeredBz; + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, centeredBx, centeredBy, centeredBz) + E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); @@ -266,7 +350,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; - #endif + #endif //DE // update the conserved variable array dev_conserved_half[ id] = dev_conserved[ id] @@ -296,14 +380,14 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de + dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id]) + dtodz * (dev_F_z[(5+i)*n_cells + kmo] - dev_F_z[(5+i)*n_cells + id]); } - #endif + #endif //SCALAR #ifdef DE dev_conserved_half[(n_fields-1)*n_cells + id] = dev_conserved[(n_fields-1)*n_cells + id] + dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id]) + dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id]) + dtodz * (dev_F_z[(n_fields-1)*n_cells + kmo] - dev_F_z[(n_fields-1)*n_cells + id]) + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo)); - #endif + #endif //DE #ifdef DENSITY_FLOOR if ( dev_conserved_half[ id] < density_floor ){ @@ -317,19 +401,11 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dev_conserved_half[4*n_cells + id] *= (density_floor / dens_0); #ifdef DE dev_conserved_half[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif + #endif //DE } - #endif - //if (dev_conserved_half[id] < 0.0 || dev_conserved_half[id] != dev_conserved_half[id] || dev_conserved_half[4*n_cells+id] < 0.0 || dev_conserved_half[4*n_cells+id] != dev_conserved_half[4*n_cells+id]) { - //printf("%3d %3d %3d Thread crashed in half step update. d: %e E: %e\n", xid, yid, zid, dev_conserved_half[id], dev_conserved_half[4*n_cells+id]); - //} - + #endif //DENSITY_FLOOR } } - - - -#endif //VL -#endif //CUDA +#endif //CUDA and VL diff --git a/src/io/io.h b/src/io/io.h index f7dfe6eb7..08489f7da 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -3,6 +3,8 @@ #include "../global/global.h" #include "../grid/grid3D.h" #include +#include +#include /* Write the data */ @@ -26,6 +28,23 @@ void OutputSlices(Grid3D &G, struct parameters P, int nfile); /* MPI-safe printf routine */ int chprintf(const char * __restrict sdata, ...); +/*! + * \brief Convert a floating point number to a string such that it can be + * exactly deserialized back from a string to the same floating point number. + * + * \tparam T Any floating point type + * \param[in] input The floating point number to convert + * \return std::string The string representation of the input floating point + */ +template +std::string to_string_exact(T const &input) +{ + std::stringstream output; + output << std::setprecision(std::numeric_limits::max_digits10); + output << input; + return output.str(); +} + void Create_Log_File( struct parameters P ); void Write_Message_To_Log_File( const char* message ); diff --git a/src/main.cpp b/src/main.cpp index da2348858..5ee396965 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,7 +17,7 @@ #include "particles/supernova.h" #ifdef ANALYSIS #include "analysis/feedback_analysis.h" -#endif +#endif #endif //SUPERNOVA #ifdef STAR_FORMATION #include "particles/star_formation.h" @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) int nfile = 0; // number of output files Real outtime = 0; // current output time - // read in command line arguments if (argc < 2) { @@ -191,6 +190,12 @@ int main(int argc, char *argv[]) // add one to the output file count nfile++; #endif //OUTPUT + + #ifdef MHD + // Check that the initial magnetic field has zero divergence + G.checkMagneticDivergence(G, P, nfile); + #endif //MHD + // increment the next output time outtime += P.outstep; @@ -254,7 +259,7 @@ int main(int argc, char *argv[]) //Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - + #ifdef GRAVITY_ANALYTIC_COMP G.Add_Analytic_Potential(); #endif @@ -266,7 +271,7 @@ int main(int argc, char *argv[]) #ifdef STAR_FORMATION star_formation::Star_Formation(G); - #endif + #endif #ifdef CPU_TIME G.Timer.Total.End(); @@ -332,6 +337,10 @@ int main(int argc, char *argv[]) } #endif + #ifdef MHD + // Check that the magnetic field has zero divergence + G.checkMagneticDivergence(G, P, nfile); + #endif //MHD } /*end loop over timesteps*/ diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu new file mode 100644 index 000000000..64a17bf52 --- /dev/null +++ b/src/mhd/ct_electric_fields.cu @@ -0,0 +1,340 @@ +/*! + * \file ct_electric_fields.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains implementation for the CT electric fields code + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/ct_electric_fields.h" + +namespace mhd +{ + // ========================================================================= + __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, + Real const *fluxY, + Real const *fluxZ, + Real const *dev_conserved, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells) + { + // get a thread index + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip the first two cells since + // those ghost cells can't be reconstructed + if ( xid > 1 + and yid > 1 + and zid > 1 + and xid < nx + and yid < ny + and zid < nz) + { + // According to the source code of Athena, the following equation + // relate the magnetic flux to the face centered electric + // fields/EMF. -cross(V,B)x is the negative of the x-component of V + // cross B. Note that "X" is the direction the solver is running in + // this case, not necessarily the true "X". + // F_x[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + + // Notes on Implementation Details + // - The density flux has the same sign as the velocity on the face + // and we only care about the sign so we're using the density flux + // to perform upwinding checks + // - All slopes are computed without the factor of two shown in + // Stone & Gardiner 2008 eqn. 24. That factor of two is taken care + // of in the final assembly of the electric field + + // Variable to get the sign of the velocity at the interface. + Real signUpwind; + + // Slope and face variables. Format is + // "__". Slope/Face indicates if the + // value is a slope or a face centered EMF, direction indicates the + // direction of the derivative/face and pos/neg indicates if it's + // the slope on the positive or negative side of the edge field + // being computed. Note that the direction for the face is parallel + // to the face and the other direction that is parallel to that face + // is the direction of the electric field being calculated + Real slope_x_pos, slope_x_neg, + slope_y_pos, slope_y_neg, + slope_z_pos, slope_z_neg, + face_x_pos, face_x_neg, + face_y_pos, face_y_neg, + face_z_pos, face_z_neg; + // ================ + // X electric field + // ================ + + // Y-direction slope on the positive Y side + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + + face_y_pos = + fluxZ[cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny) + (6+NSCALARS)*n_cells]; + face_y_neg = + fluxZ[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (6+NSCALARS)*n_cells]; + face_z_pos = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny) + (5+NSCALARS)*n_cells]; + face_z_neg = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + ctElectricFields[threadId + 0*n_cells] = 0.25 * (+ face_y_pos + + face_y_neg + + face_z_pos + + face_z_neg + + slope_y_pos + + slope_y_neg + + slope_z_pos + + slope_z_neg); + + // ================ + // Y electric field + // ================ + + // X-direction slope on the positive X side + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = - fluxZ[cuda_utilities::compute1DIndex(xid , yid, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; + face_x_neg = - fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; + face_z_pos = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid , nx, ny) + (6+NSCALARS)*n_cells]; + face_z_neg = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (6+NSCALARS)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + ctElectricFields[threadId + 1*n_cells] = 0.25 * (+ face_x_pos + + face_x_neg + + face_z_pos + + face_z_neg + + slope_x_pos + + slope_x_neg + + slope_z_pos + + slope_z_neg); + + // ================ + // Z electric field + // ================ + + // Y-direction slope on the positive Y side + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the positive X side + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid, nx, ny) + (6+NSCALARS)*n_cells]; + face_x_neg = + fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (6+NSCALARS)*n_cells]; + face_y_pos = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid , zid, nx, ny) + (5+NSCALARS)*n_cells]; + face_y_neg = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (5+NSCALARS)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + ctElectricFields[threadId + 2*n_cells] = 0.25 * (+ face_x_pos + + face_x_neg + + face_y_pos + + face_y_neg + + slope_x_pos + + slope_x_neg + + slope_y_pos + + slope_y_neg); + } + } + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h new file mode 100644 index 000000000..987633461 --- /dev/null +++ b/src/mhd/ct_electric_fields.h @@ -0,0 +1,155 @@ +/*! + * \file ct_electric_fields.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the kernel that computes the CT electric fields + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/cuda_utilities.h" + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ + /*! + * \brief Namespace for functions required by functions within the mhd + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ + namespace _internal + { + // ===================================================================== + /*! + * \brief Compute and return the slope of the electric field used to + compute the CT electric fields + * + * \param[in] flux The flux array + * \param[in] dev_conserved The conserved variable array + * \param[in] fluxSign The sign of the flux to convert it to magnetic + * field. Also serves to choose which magnetic flux is used, i.e. the Y + * or Z field + * \param[in] ctDirection The direction of the CT field that this slope + will be used to calculate + * \param[in] conservedQuadrent1 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] conservedQuadrent2 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent1 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent2 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] n_cells The total number of cells + * \return Real The slope of the electric field + */ + inline __host__ __device__ Real _ctSlope(Real const *flux, + Real const *dev_conserved, + Real const &fluxSign, + int const &ctDirection, + int const &conservedQuadrent1, + int const &conservedQuadrent2, + int const &fluxQuadrent1, + int const &fluxQuadrent2, + int const &xid, + int const &yid, + int const &zid, + int const &nx, + int const &ny, + int const &n_cells + ) + { + // Compute the various required indices + + // Get the shifted modulos of the ctDirection. + int const modPlus1 = (ctDirection + 1) % 3; + int const modPlus2 = (ctDirection + 2) % 3; + + // Indices for the cell centered values + int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); + int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); + int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); + int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny); + + // Index for the flux + int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), + yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), + zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), + nx, ny); + + // Indices for the face centered magnetic fields that need to be averaged + int const idxB2Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus1 == 0), + yidCentered - int(modPlus1 == 1), + zidCentered - int(modPlus1 == 2), + nx, ny); + int const idxB3Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus2 == 0), + yidCentered - int(modPlus2 == 1), + zidCentered - int(modPlus2 == 2), + nx, ny); + + // Load values for cell centered electric field. B1 (not present) is + // the magnetic field in the same direction as the `ctDirection` + // variable, B2 and B3 are the next two fields cyclically. i.e. if + // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The + // same rules apply for the momentum + Real const density = dev_conserved[idxCentered ]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1+1) *n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2+1) *n_cells]; + Real const B2Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus1+5+NSCALARS)*n_cells] + + dev_conserved[idxB2Shift + (modPlus1+5+NSCALARS)*n_cells]); + Real const B3Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus2+5+NSCALARS)*n_cells] + + dev_conserved[idxB3Shift + (modPlus2+5+NSCALARS)*n_cells]); + + // Compute the electric field in the center with a cross product + Real const electric_centered = (Momentum3*B2Centered - Momentum2*B3Centered) / density; + + // Load face centered electric field, note fluxSign to correctly do + // the shift from magnetic flux to EMF/electric field and to choose + // which field to use + Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+5+NSCALARS)*n_cells]; + + // Compute the slope and return it + return electric_face - electric_centered; + } + // ===================================================================== + }// _mhd_internal namespace + + // ========================================================================= + /*! + * \brief Compute the Constrained Transport electric fields used to evolve + * the magnetic field. Note that this function requires that the density be + * non-zero or it will return Nans. + * + * \param[in] fluxX The flux on the x+1/2 face of each cell + * \param[in] fluxY The flux on the y+1/2 face of each cell + * \param[in] fluxZ The flux on the z+1/2 face of each cell + * \param[in] dev_conserved The device resident grid + * \param[out] ctElectricFields The CT electric fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + */ + __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, + Real const *fluxY, + Real const *fluxZ, + Real const *dev_conserved, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells); + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu new file mode 100644 index 000000000..55b46f3c8 --- /dev/null +++ b/src/mhd/ct_electric_fields_tests.cu @@ -0,0 +1,207 @@ +/*! + * \file ct_electric_fields_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the CT electric fields + * + */ + +// STL Includes +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../mhd/ct_electric_fields.h" +#include "../global/global.h" + +// ============================================================================= +// Tests for the mhd::Calculate_CT_Electric_Fields kernel +// ============================================================================= + +// ============================================================================= +/*! + * \brief Test fixture for tMHDCalculateCTElectricFields test suite + * + */ +class tMHDCalculateCTElectricFields : public ::testing::Test +{ +public: + + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDCalculateCTElectricFields() + : + nx(3), + ny(nx), + nz(nx), + n_cells(nx*ny*nz), + fluxX(n_cells * (7+NSCALARS)), + fluxY(n_cells * (7+NSCALARS)), + fluxZ(n_cells * (7+NSCALARS)), + grid (n_cells * (8+NSCALARS)), + testCTElectricFields(n_cells * 3, -999.), + fiducialData(n_cells * 3, -999.), + dimGrid((n_cells + TPB - 1),1,1), + dimBlock(TPB,1,1) + { + // Allocate device arrays + CudaSafeCall ( cudaMalloc(&dev_fluxX, fluxX.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_fluxY, fluxY.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_fluxZ, fluxZ.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_grid, grid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size()*sizeof(double)) ); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(fluxX), std::end(fluxX), 0.); + std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1); + std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1); + std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1); + } + ~tMHDCalculateCTElectricFields() = default; +protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector fluxX; + std::vector fluxY; + std::vector fluxZ; + std::vector grid; + std::vector testCTElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields; + + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall( cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_grid, grid.data(), grid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_testCTElectricFields, + testCTElectricFields.data(), + testCTElectricFields.size()*sizeof(Real), + cudaMemcpyHostToDevice) ); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, + dimGrid, + dimBlock, + 0, + 0, + dev_fluxX, + dev_fluxY, + dev_fluxZ, + dev_grid, + dev_testCTElectricFields, + nx, + ny, + nz, + n_cells); + CudaCheckError(); + + // Copy test data back + CudaSafeCall( cudaMemcpy(testCTElectricFields.data(), + dev_testCTElectricFields, + testCTElectricFields.size()*sizeof(Real), + cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) + { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults(fiducialData.at(i), + testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + PositiveVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 206.29859653255295; + fiducialData.at(53) = -334.90052254763339; + fiducialData.at(80) = 209.53472440298236; + + // Launch kernel and check results + runTest(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + NegativeVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 203.35149422304994; + fiducialData.at(53) = -330.9860399765279; + fiducialData.at(80) = 208.55149905461991; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) + { + fluxX.at(i) = -fluxX.at(i); + fluxY.at(i) = -fluxY.at(i); + fluxZ.at(i) = -fluxZ.at(i); + } + + // Launch kernel and check results + runTest(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + ZeroVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 204.82504537780144; + fiducialData.at(53) = -332.94328126208063; + fiducialData.at(80) = 209.04311172880114; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) + { + fluxX.at(i) = 0.0; + fluxY.at(i) = 0.0; + fluxZ.at(i) = 0.0; + } + + // Launch kernel and check results + runTest(); +} +// ============================================================================= diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu new file mode 100644 index 000000000..02051e48c --- /dev/null +++ b/src/mhd/magnetic_divergence.cu @@ -0,0 +1,112 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file + * + */ + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../mhd/magnetic_divergence.h" +#include "../utils/cuda_utilities.h" +#include "../utils/reduction_utilities.h" +#include "../utils/DeviceVector.h" + +namespace mhd +{ + // ========================================================================= + __global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *dev_maxDivergence, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells) + { + // Variables to store the divergence + Real cellDivergence; + Real maxDivergence = 0.0; + + // Index variables + int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; + + // Grid stride loop to perform as much of the reduction as possible + for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) + { + // compute the real indices + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot + // have their divergences computed due to a missing face; + if ( xid > 1 and yid > 1 and zid > 1 + and xid < nx and yid < ny and zid < nz) + { + // Compute the various offset indices + id_xMin1 = cuda_utilities::compute1DIndex(xid-1, yid , zid , nx, ny); + id_yMin1 = cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny); + id_zMin1 = cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny); + + // Compute divergence + cellDivergence = + (( dev_conserved[id + (5+NSCALARS)*n_cells] + - dev_conserved[id_xMin1 + (5+NSCALARS)*n_cells]) + / dx) + + (( dev_conserved[id + (6+NSCALARS)*n_cells] + - dev_conserved[id_yMin1 + (6+NSCALARS)*n_cells]) + / dy) + + (( dev_conserved[id + (7+NSCALARS)*n_cells] + - dev_conserved[id_zMin1 + (7+NSCALARS)*n_cells]) + / dz); + + maxDivergence = max(maxDivergence, fabs(cellDivergence)); + } + } + + // Perform reduction across the entire grid + reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); + } + // ========================================================================= + + // ========================================================================= + Real launchCalculateMagneticDivergence(Real const *dev_conserved, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells) + { + // First let's create some variables we'll need. + cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); + cuda_utilities::DeviceVector static dev_maxDivergence(1); + + // Set the device side inverse time step to the smallest possible double + // so that the reduction isn't using the maximum value of the previous + // iteration + dev_maxDivergence.assign(std::numeric_limits::lowest()); + + // Now lets get the local maximum divergence + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_maxDivergence.data(), + dx, dy, dz, + nx, ny, nz, + n_cells); + CudaCheckError(); + + return dev_maxDivergence[0]; + } + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h new file mode 100644 index 000000000..8550591e2 --- /dev/null +++ b/src/mhd/magnetic_divergence.h @@ -0,0 +1,81 @@ +/*! + * \file magnetic_divergence.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the functions that compute the magnetic + * divergence + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ + // ========================================================================= + /*! + * \brief Kernel to compute the maximum divergence of the magnetic field in + * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be + * called with the minimum number of blocks. Recommend using the occupancy + * API + * + * \param[in] dev_conserved The device array of conserved variables + * \param[out] maxDivergence The device scalar to store the reduced divergence at + * \param[in] dx Cell size in the X-direction + * \param[in] dy Cell size in the Y-direction + * \param[in] dz Cell size in the Z-direction + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells + */ + __global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *maxDivergence, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells); + // ========================================================================= + + // ========================================================================= + /*! + * \brief Handling launching and returning the value from the + * `mhd::calculateMagneticDivergence` kernel + * + * \param[in] dev_conserved The device array of conserved variables + * \param[in] dx Cell size in the X-direction + * \param[in] dy Cell size in the Y-direction + * \param[in] dz Cell size in the Z-direction + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells + * \return Real The maximum divergence of the magnetic field in the local + * part of the grid + */ + Real launchCalculateMagneticDivergence(Real const *dev_conserved, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells); + // ========================================================================= + +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu new file mode 100644 index 000000000..ba2695e53 --- /dev/null +++ b/src/mhd/magnetic_divergence_tests.cu @@ -0,0 +1,59 @@ +/*! + * \file magnetic_divergence_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic divergence code + * + */ + + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../mhd/magnetic_divergence.h" +#include "../utils/DeviceVector.h" +#include "../global/global.h" + +// ============================================================================= +// Tests for the magnetic field divergence functions +// ============================================================================= +TEST(tMHDLaunchCalculateMagneticDivergence, CorrectInputExpectCorrectOutput) +{ + // Grid Parameters & testing parameters + size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value + size_t const n_ghost = 4; + size_t const nx = gridSize+2*n_ghost, ny = nx, nz = nx; + size_t const n_cells = nx*ny*nz; + size_t const n_fields = 8; + Real const dx = 3, dy = dx, dz = dx; + std::vector host_grid(n_cells*n_fields); + + // Fill grid with random values and randomly assign maximum value + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(1, 5); + for (size_t i = 0; i < host_grid.size(); i++) + { + host_grid.at(i) = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Get test data + Real testDivergence = mhd::launchCalculateMagneticDivergence(dev_grid.data(), dx, dy, dz, nx, ny, nz, n_cells); + + // Perform Comparison + Real const fiducialDivergence = 3.6318132783263106; + testingUtilities::checkResults(fiducialDivergence, testDivergence, "maximum divergence"); +} +// ============================================================================= +// End of tests for the magnetic field divergence functions +// ============================================================================= diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu new file mode 100644 index 000000000..78f298e05 --- /dev/null +++ b/src/mhd/magnetic_update.cu @@ -0,0 +1,80 @@ +/*! + * \file magnetic_update.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the definition of the kernel to update the magnetic field + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/magnetic_update.h" +#include "../utils/cuda_utilities.h" + +namespace mhd +{ + // ========================================================================= + __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, + Real *destinationGrid, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells, + Real const dt, + Real const dx, + Real const dy, + Real const dz) + { + // get a thread index + int const blockId = blockIdx.x + blockIdx.y*gridDim.x; + int const threadId = threadIdx.x + blockId * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot be + // evolved due to missing electric fields that can't be reconstructed + if ( xid < nx-1 + and yid < ny-1 + and zid < nz-1) + { + // Compute the three dt/dx quantities + Real const dtodx = dt/dx; + Real const dtody = dt/dy; + Real const dtodz = dt/dz; + + // Load the various edge electric fields required. The '1' and '2' + // fields are not shared and the '3' fields are shared by two of the + // updates + Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny))]; + Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny))]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid+1, nx, ny))]; + Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + n_cells]; + Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny)) + n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid+1, nx, ny)) + n_cells]; + Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + 2 * n_cells]; + Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny)) + 2 * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid+1, zid , nx, ny)) + 2 * n_cells]; + + // Perform Updates + + // X field update + destinationGrid[threadId + (5+NSCALARS)*n_cells] = sourceGrid[threadId + (5+NSCALARS)*n_cells] + + dtodz * (electric_y_3 - electric_y_1) + + dtody * (electric_z_1 - electric_z_3); + + // Y field update + destinationGrid[threadId + (6+NSCALARS)*n_cells] = sourceGrid[threadId + (6+NSCALARS)*n_cells] + + dtodx * (electric_z_3 - electric_z_2) + + dtodz * (electric_x_1 - electric_x_3); + + // Z field update + destinationGrid[threadId + (7+NSCALARS)*n_cells] = sourceGrid[threadId + (7+NSCALARS)*n_cells] + + dtody * (electric_x_3 - electric_x_2) + + dtodx * (electric_y_2 - electric_y_3); + } + } + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h new file mode 100644 index 000000000..2c89e26ba --- /dev/null +++ b/src/mhd/magnetic_update.h @@ -0,0 +1,57 @@ +/*! + * \file magnetic_update.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration of the kernel to update the magnetic field + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ + // ========================================================================= + /*! + * \brief Update the magnetic field using the CT electric fields + * + * \param[in] sourceGrid The array which holds the old values of the + * magnetic field + * \param[out] destinationGrid The array to hold the updated values of the + * magnetic field + * \param[in] ctElectricFields The array of constrained transport electric + * fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + * \param[in] dt The time step. If doing the half time step update make sure + * to divide it by two when passing the time step to this kernel + * \param[in] dx The size of each cell in the x-direction + * \param[in] dy The size of each cell in the y-direction + * \param[in] dz The size of each cell in the z-direction + */ + __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, + Real *destinationGrid, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells, + Real const dt, + Real const dx, + Real const dy, + Real const dz); + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu new file mode 100644 index 000000000..7cb4f68f2 --- /dev/null +++ b/src/mhd/magnetic_update_tests.cu @@ -0,0 +1,150 @@ +/*! + * \file magnetic_update_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic update code + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../mhd/magnetic_update.h" + +// ============================================================================= +/*! + * \brief Test fixture for tMHDUpdateMagneticField3D test suite + * + */ +class tMHDUpdateMagneticField3D : public ::testing::Test +{ +public: + + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDUpdateMagneticField3D() + : + nx(2), + ny(nx), + nz(nx), + n_cells(nx*ny*nz), + dt(3.2), + dx(2.5), + dy(2.5), + dz(2.5), + sourceGrid (n_cells * (8+NSCALARS)), + destinationGrid (n_cells * (8+NSCALARS), -999.), + ctElectricFields(n_cells * 3), + fiducialData (n_cells * (8+NSCALARS), -999.), + dimGrid((n_cells + TPB - 1),1,1), + dimBlock(TPB,1,1) + { + // Allocate device arrays + CudaSafeCall ( cudaMalloc(&dev_sourceGrid, sourceGrid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_destinationGrid, destinationGrid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_ctElectricFields, ctElectricFields.size()*sizeof(double)) ); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); + std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1); + } + ~tMHDUpdateMagneticField3D() = default; +protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + Real const dt, dx, dy, dz; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector sourceGrid; + std::vector destinationGrid; + std::vector ctElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData; + + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall( cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, + dimGrid, + dimBlock, + 0, + 0, + dev_sourceGrid, + dev_destinationGrid, + dev_ctElectricFields, + nx, + ny, + nz, + n_cells, + dt, + dx, + dy, + dz); + CudaCheckError(); + + // Copy test data back + CudaSafeCall( cudaMemcpy(destinationGrid.data(), + dev_destinationGrid, + destinationGrid.size()*sizeof(Real), + cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) + { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults(fiducialData.at(i), + destinationGrid.at(i), + "value at i = " + std::to_string(i) + + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDUpdateMagneticField3D, + CorrectInputExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(40) = 42.559999999999995; + fiducialData.at(48) = 44.160000000000004; + fiducialData.at(56) = 57.280000000000001; + + // Launch kernel and check results + runTest(); +} +// ============================================================================= diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 1964ddedf..91fb75223 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -7,7 +7,8 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/pcm_cuda.h" - +#include "../utils/mhd_utilities.h" +#include "../utils/cuda_utilities.h" __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) { @@ -18,11 +19,11 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R #ifdef DE Real ge; - #endif + #endif //DE #ifdef SCALAR Real scalar[NSCALARS]; - #endif + #endif //SCALAR // get a global thread ID int xid = threadIdx.x + blockIdx.x*blockDim.x; @@ -43,10 +44,10 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R for (int i=0; i 0) + { + id = cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny); + dev_bounds_Rx[ id] = d; + dev_bounds_Rx[ n_cells + id] = mx; + dev_bounds_Rx[2*n_cells + id] = my; + dev_bounds_Rx[3*n_cells + id] = mz; + dev_bounds_Rx[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0) + { + // Send the y-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny); + dev_bounds_Ry[ id] = d; + dev_bounds_Ry[ n_cells + id] = mx; + dev_bounds_Ry[2*n_cells + id] = my; + dev_bounds_Ry[3*n_cells + id] = mz; + dev_bounds_Ry[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0) + { + // Send the z-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny); + dev_bounds_Rz[ id] = d; + dev_bounds_Rz[ n_cells + id] = mx; + dev_bounds_Rz[2*n_cells + id] = my; + dev_bounds_Rz[3*n_cells + id] = mz; + dev_bounds_Rz[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Project the left, right, centered and van Leer differences onto the characteristic variables @@ -316,7 +316,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0*lim_slope_a, lim_slope_b); } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { lamdiff = lambda_p - lambda_m; @@ -498,12 +498,12 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_3 += lamdiff * del_vz_m_i; #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -522,24 +522,24 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_L_iph += 0.5*dtodx*sum_4; #ifdef DE ge_L_iph += 0.5*dtodx*sum_ge; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered and van Leer differences onto the characteristic variables @@ -372,7 +372,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_imo = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -382,7 +382,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_imo[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -442,7 +442,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_C = 0.5*(ge_ipo - ge_imo); if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) @@ -520,7 +520,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_i = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -530,7 +530,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_i[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -591,7 +591,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_C = 0.5*(ge_ipt- ge_i); if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables @@ -670,7 +670,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_ipo = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -680,7 +680,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_ipo[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -713,13 +713,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE ge_L = 0.5*(ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; ge_R = 0.5*(ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -909,24 +909,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_5 = A*(del_p_m_i - p_6) + B*p_6; #ifdef DE chi_ge = A*(del_ge_m_i - ge_6) + B*ge_6; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -952,12 +952,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_R += sum_5; #ifdef DE ge_R += sum_ge; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityFluxL, momentumFluxXL, momentumFluxYL, momentumFluxZL, @@ -263,13 +264,13 @@ // Right state Real densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, magneticFluxYR, magneticFluxZR, energyFluxR; - _hlldInternal::_nonStarFluxes(momentumXR, + mhd::_internal::_nonStarFluxes(momentumXR, velocityXR, velocityYR, velocityZR, totalPressureR, energyR, - magneticXR, + magneticX, magneticYR, magneticZR, densityFluxR, @@ -284,7 +285,7 @@ // In this state the flow is supersonic if (speedR <= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, @@ -317,7 +318,7 @@ densityStarFluxL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, magneticStarFluxYL, magneticStarFluxZL, energyStarFluxL; - _hlldInternal::_starFluxes(speedM, + mhd::_internal::_starFluxes(speedM, speedL, densityL, velocityXL, @@ -328,7 +329,7 @@ momentumZL, energyL, totalPressureL, - magneticXL, + magneticX, magneticYL, magneticZL, densityStarL, @@ -357,7 +358,7 @@ // In this state the flow is subsonic if (speedStarL >= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, @@ -381,7 +382,7 @@ densityStarFluxR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, magneticStarFluxYR, magneticStarFluxZR, energyStarFluxR; - _hlldInternal::_starFluxes(speedM, + mhd::_internal::_starFluxes(speedM, speedR, densityR, velocityXR, @@ -392,7 +393,7 @@ momentumZR, energyR, totalPressureR, - magneticXR, + magneticX, magneticYR, magneticZR, densityStarR, @@ -421,7 +422,7 @@ // In this state the flow is subsonic if (speedStarR <= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, @@ -445,8 +446,8 @@ Real velocityDoubleStarY, velocityDoubleStarZ, magneticDoubleStarY, magneticDoubleStarZ, energyDoubleStarL, energyDoubleStarR; - _hlldInternal::_doubleStarState(speedM, - magneticXL, + mhd::_internal::_doubleStarState(speedM, + magneticX, totalPressureStar, densityStarL, velocityStarYL, @@ -473,7 +474,7 @@ Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, energyDoubleStarFlux, magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarL, + mhd::_internal::_doubleStarFluxes(speedStarL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, @@ -500,7 +501,7 @@ magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -525,7 +526,7 @@ Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, energyDoubleStarFlux, magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarR, + mhd::_internal::_doubleStarFluxes(speedStarR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, @@ -552,7 +553,7 @@ magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -575,7 +576,7 @@ }; // ========================================================================= - namespace _hlldInternal + namespace _internal { // ===================================================================== __device__ __host__ void _approximateWaveSpeeds(Real const &densityL, @@ -587,7 +588,7 @@ Real const &velocityZL, Real const &gasPressureL, Real const &totalPressureL, - Real const &magneticXL, + Real const &magneticX, Real const &magneticYL, Real const &magneticZL, Real const &densityR, @@ -599,7 +600,6 @@ Real const &velocityZR, Real const &gasPressureR, Real const &totalPressureR, - Real const &magneticXR, Real const &magneticYR, Real const &magneticZR, Real const &gamma, @@ -612,15 +612,15 @@ Real &densityStarR) { // Get the fast magnetosonic wave speeds - Real magSonicL = mhdUtils::fastMagnetosonicSpeed(densityL, + Real magSonicL = mhd::utils::fastMagnetosonicSpeed(densityL, gasPressureL, - magneticXL, + magneticX, magneticYL, magneticZL, gamma); - Real magSonicR = mhdUtils::fastMagnetosonicSpeed(densityR, + Real magSonicR = mhd::utils::fastMagnetosonicSpeed(densityR, gasPressureR, - magneticXR, + magneticX, magneticYR, magneticZR, gamma); @@ -646,8 +646,8 @@ densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM); // Compute the S_L^* and S_R^* wave speeds - speedStarL = speedM - mhdUtils::alfvenSpeed(magneticXL, densityStarL); - speedStarR = speedM + mhdUtils::alfvenSpeed(magneticXR, densityStarR); + speedStarL = speedM - mhd::utils::alfvenSpeed(magneticX, densityStarL); + speedStarR = speedM + mhd::utils::alfvenSpeed(magneticX, densityStarR); } // ===================================================================== @@ -701,13 +701,13 @@ Real const &magneticFluxY, Real const &magneticFluxZ) { - dev_flux[threadId] = densityFlux; - dev_flux[threadId + n_cells * o1] = momentumFluxX; - dev_flux[threadId + n_cells * o2] = momentumFluxY; - dev_flux[threadId + n_cells * o3] = momentumFluxZ; - dev_flux[threadId + n_cells * 4] = energyFlux; - dev_flux[threadId + n_cells * (o2 + 4 + NSCALARS)] = magneticFluxY; - dev_flux[threadId + n_cells * (o3 + 4 + NSCALARS)] = magneticFluxZ; + dev_flux[threadId] = densityFlux; + dev_flux[threadId + n_cells * o1] = momentumFluxX; + dev_flux[threadId + n_cells * o2] = momentumFluxY; + dev_flux[threadId + n_cells * o3] = momentumFluxZ; + dev_flux[threadId + n_cells * 4] = energyFlux; + dev_flux[threadId + n_cells * (5 + NSCALARS)] = magneticFluxY; + dev_flux[threadId + n_cells * (6 + NSCALARS)] = magneticFluxZ; } // ===================================================================== @@ -752,7 +752,7 @@ if (fabs(density * (speedSide - velocityX) * (speedSide - speedM) - (magneticX * magneticX)) - < totalPressureStar * _hlldInternal::_hlldSmallNumber) + < totalPressureStar * mhd::_internal::_hlldSmallNumber) { velocityStarY = velocityY; velocityStarZ = velocityZ; @@ -780,8 +780,8 @@ energyStar = ( energy * (speedSide - velocityX) - totalPressure * velocityX + totalPressureStar * speedM - + magneticX * (_hlldInternal::_dotProduct(velocityX, velocityY, velocityZ, magneticX, magneticY, magneticZ) - - _hlldInternal::_dotProduct(speedM, velocityStarY, velocityStarZ, magneticX, magneticStarY, magneticStarZ))) + + magneticX * (math_utils::dotProduct(velocityX, velocityY, velocityZ, magneticX, magneticY, magneticZ) + - math_utils::dotProduct(speedM, velocityStarY, velocityStarZ, magneticX, magneticStarY, magneticStarZ))) / (speedSide - speedM); // Now compute the star state fluxes @@ -819,7 +819,7 @@ Real &energyDoubleStarR) { // if Bx is zero then just return the star state - if (magneticX < _hlldInternal::_hlldSmallNumber * totalPressureStar) + if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { velocityDoubleStarY = velocityStarYL; velocityDoubleStarZ = velocityStarZL; @@ -856,17 +856,17 @@ + magXSign * (sqrtDL * sqrtDR) * (velocityStarZR - velocityStarZL)); // Double star energy - Real velDblStarDotMagDblStar = _hlldInternal::_dotProduct(speedM, + Real velDblStarDotMagDblStar = math_utils::dotProduct(speedM, velocityDoubleStarY, velocityDoubleStarZ, magneticX, magneticDoubleStarY, magneticDoubleStarZ); energyDoubleStarL = energyStarL - sqrtDL * magXSign - * (_hlldInternal::_dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL) + * (math_utils::dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL) - velDblStarDotMagDblStar); energyDoubleStarR = energyStarR + sqrtDR * magXSign - * (_hlldInternal::_dotProduct(speedM, velocityStarYR, velocityStarZR, magneticX, magneticStarYR, magneticStarZR) + * (math_utils::dotProduct(speedM, velocityStarYR, velocityStarZR, magneticX, magneticStarYR, magneticStarZR) - velDblStarDotMagDblStar); } } @@ -909,7 +909,7 @@ } // ===================================================================== - } // _hlldInternal namespace - + } // mhd::_internal namespace +} // end namespace mhd #endif // CUDA \ No newline at end of file diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index d8d58dce1..357c850d6 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -13,23 +13,32 @@ #include "../global/global.h" #ifdef CUDA - +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ /*! * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 * - * \param[in] dev_bounds_L - * \param[in] dev_bounds_R - * \param[out] dev_flux - * \param[in] nx - * \param[in] ny - * \param[in] nz - * \param[in] n_ghost - * \param[in] gamma - * \param[in] dir - * \param[in] n_fields + * \param[in] dev_bounds_L The interface states on the left side of the interface + * \param[in] dev_bounds_R The interface states on the right side of the interface + * \param[in] dev_magnetic_face A pointer to the begining of the conserved + * magnetic field array that is stored at the interface. I.e. for the + * X-direction solve this would be the begining of the X-direction fields + * \param[out] dev_flux The output flux + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_ghost Number of ghost cells on each side + * \param[in] gamma The adiabatic index + * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, 2=Z + * \param[in] n_fields The total number of fields */ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, + Real *dev_magnetic_face, Real *dev_flux, int nx, int ny, @@ -44,7 +53,7 @@ * solver * */ - namespace _hlldInternal + namespace _internal { /*! * \brief Used for some comparisons. Value was chosen to match what is @@ -65,7 +74,7 @@ * \param[in] velocityZL Velocity in the Z-direction, left side * \param[in] gasPressureL Gas pressure, left side * \param[in] totalPressureL Total MHD pressure, left side - * \param[in] magneticXL Magnetic field in the X-direction, left side + * \param[in] magneticX Magnetic field in the X-direction, left side * \param[in] magneticYL Magnetic field in the Y-direction, left side * \param[in] magneticZL Magnetic field in the Z-direction, left side * \param[in] densityR Density, right side @@ -77,7 +86,6 @@ * \param[in] velocityZR Velocity in the Z-direction, right side * \param[in] gasPressureR Gas pressure, right side * \param[in] totalPressureR Total MHD pressure, right side - * \param[in] magneticXR Magnetic field in the X-direction, right side * \param[in] magneticYR Magnetic field in the Y-direction, right side * \param[in] magneticZR Magnetic field in the Z-direction, right side * \param[in] gamma Adiabatic index @@ -98,7 +106,7 @@ Real const &velocityZL, Real const &gasPressureL, Real const &totalPressureL, - Real const &magneticXL, + Real const &magneticX, Real const &magneticYL, Real const &magneticZL, Real const &densityR, @@ -110,7 +118,6 @@ Real const &velocityZR, Real const &gasPressureR, Real const &totalPressureR, - Real const &magneticXR, Real const &magneticYR, Real const &magneticZR, Real const &gamma, @@ -266,26 +273,6 @@ Real &magneticStarFluxY, Real &magneticStarFluxZ); - /*! - * \brief Compute the dot product of a and b. - * - * \param[in] a1 The first element of a - * \param[in] a2 The second element of a - * \param[in] a3 The third element of a - * \param[in] b1 The first element of b - * \param[in] b2 The second element of b - * \param[in] b3 The third element of b - * - * \return Real The dot product of a and b - */ - inline __device__ __host__ Real _dotProduct(Real const &a1, - Real const &a2, - Real const &a3, - Real const &b1, - Real const &b2, - Real const &b3) - {return a1*b1 + ((a2*b2) + (a3*b3));}; - /*! * \brief Compute the double star state * @@ -390,6 +377,6 @@ Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ); - } // _hlldInternal namespace - + } // end namespace mhd::_internal +} // end namespace mhd #endif //CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 754c2dba0..c39116d7a 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -21,7 +21,7 @@ #include "../utils/mhd_utilities.h" #include "../riemann_solvers/hlld_cuda.h" // Include code to test -#if defined(CUDA) && defined(HLLD) +#ifdef CUDA // ========================================================================= // Integration tests for the entire HLLD solver. Unit tests are below // ========================================================================= @@ -59,15 +59,23 @@ int const &direction=0) { - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - stateLeft = _cycleXYZ(stateLeft, direction); - stateRight = _cycleXYZ(stateRight, direction); + // Rearrange X, Y, and Z values for the chosen direction + std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); + std::rotate(stateRight.begin()+ 1, stateRight.begin()+ 4 - direction, stateRight.begin()+ 4); + + // Create new vectors that store the values in the way that the HLLD + // solver expects + size_t const magXIndex = 5+NSCALARS; + EXPECT_DOUBLE_EQ(stateLeft.at(magXIndex), stateRight.at(magXIndex)) + << "The left and right magnetic fields are not equal"; + std::vector const magneticX{stateLeft.at(magXIndex)}; + stateLeft.erase(stateLeft.begin() + magXIndex); + stateRight.erase(stateRight.begin() + magXIndex); // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction? - int const ny = 1; // Number of cells in the y-direction? - int const nz = 1; // Number of cells in the z-direction? + int const nx = 1; // Number of cells in the x-direction + int const ny = 1; // Number of cells in the y-direction + int const nz = 1; // Number of cells in the z-direction int const nGhost = 0; // Isn't actually used it appears int nFields = 8; // Total number of conserved fields #ifdef SCALAR @@ -83,33 +91,40 @@ // Create the std::vector to store the fluxes and declare the device // pointers - std::vector testFlux(nFields); + std::vector testFlux(nFields-1, 0); Real *devConservedLeft; Real *devConservedRight; + Real *devConservedMagXFace; Real *devTestFlux; // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedLeft, stateLeft.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedRight, stateRight.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedMagXFace, magneticX.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size()*sizeof(Real))); CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), - nFields*sizeof(Real), + stateLeft.size()*sizeof(Real), cudaMemcpyHostToDevice)); CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), - nFields*sizeof(Real), + stateRight.size()*sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedMagXFace, + magneticX.data(), + magneticX.size()*sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel - hipLaunchKernelGGL(Calculate_HLLD_Fluxes_CUDA, + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, devConservedLeft, // the "left" interface devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface devTestFlux, nx, ny, @@ -122,13 +137,25 @@ CudaCheckError(); CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, - nFields*sizeof(Real), + testFlux.size()*sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); CudaCheckError(); + // Free device arrays + cudaFree(devConservedLeft); + cudaFree(devConservedRight); + cudaFree(devConservedMagXFace); + cudaFree(devTestFlux); + + // The HLLD solver only writes the the first two "slots" for + // magnetic flux so let's rearrange to make sure we have all the + // magnetic fluxes in the right spots + testFlux.insert(testFlux.begin() + magXIndex, 0.0); + std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, testFlux.begin() + 4); // Rotate momentum + return testFlux; } // ===================================================================== @@ -185,10 +212,6 @@ scalarFlux.begin() + NSCALARS); #endif //SCALAR - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - fiducialFlux = _cycleXYZ(fiducialFlux, direction); - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl @@ -242,7 +265,7 @@ output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhdUtils::computeEnergy(input.at(4), + output.at(4) = mhd::utils::computeEnergy(input.at(4), input.at(0), input.at(1), input.at(2), @@ -266,7 +289,7 @@ conservedScalar.begin() + NSCALARS); #endif //SCALAR #ifdef DE - output.push_back(mhdUtils::computeThermalEnergy(output.at(4), + output.push_back(mhd::utils::computeThermalEnergy(output.at(4), output.at(0), output.at(1), output.at(2), @@ -294,52 +317,6 @@ } // ===================================================================== private: - // ===================================================================== - /*! - * \brief Cyclically permute the vector quantities in the list of - * conserved variables so that the same interfaces and fluxes can be - * used to test the HLLD solver in all 3 directions. - * - * \param[in,out] conservedVec The std::vector of conserved variables to - * be cyclically permutated - * \param[in] direction Which plane the interface is. 0 = plane normal - * to X, 1 = plane normal to Y, 2 = plane normal to Z - * - * \return std::vector The cyclically permutated list of conserved - * variables - */ - std::vector inline _cycleXYZ(std::vector conservedVec, - int const &direction) - { - switch (direction) - { - case 0: // Plane normal to X. Default case, do nothing - ; - break; - case 1: // Plane normal to Y - case 2: // Plane normal to Z - // Fall through for both Y and Z normal planes - { - size_t shift = 3 - direction; - auto momentumBegin = conservedVec.begin()+1; - auto magneticBegin = conservedVec.begin()+5; - #ifdef SCALAR - magneticBegin += NSCALARS; - #endif //SCALAR - - std::rotate(momentumBegin, momentumBegin+shift, momentumBegin+3); - std::rotate(magneticBegin, magneticBegin+shift, magneticBegin+3); - } - break; - default: - throw std::invalid_argument(("Invalid Value of `direction`" - " passed to `_cycleXYZ`. Value passed was " - + std::to_string(direction) + ", should be 0, 1, or 2.")); - break; - } - return conservedVec; - } - // ===================================================================== }; // ========================================================================= @@ -1525,6 +1502,61 @@ } // ========================================================================= + // ========================================================================= + /*! + * \brief Test the HLLD Riemann Solver using the constant states from the + * examples in cholla/examples/3D + * + */ + TEST_F(tMHDCalculateHLLDFluxesCUDA, + ConstantStatesExpectCorrectFlux) + { + // Constant Values + Real const gamma = 5./3.; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | + zeroMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); + + for (size_t direction = 2; direction < 3; direction++) + { + { + std::string const outputString {"Left State: Constant state, zero magnetic field\n" + "Right State: Constant state, zero magnetic field\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0,1.380658e-05,0,0,0,0,0,0}; + std::vector const scalarFlux{0,0,0}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(zeroMagneticField, + zeroMagneticField, + gamma, + direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString {"Left State: Constant state, ones magnetic field\n" + "Right State: Constant state, ones magnetic field\n" + "HLLD State: Left Double Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, 3.4694469519536142e-18, 3.4694469519536142e-18}; + std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(onesMagneticField, + onesMagneticField, + gamma, + direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } + } + // ========================================================================= + // ========================================================================= /*! * \brief Test the HLLD Riemann Solver with the degenerate state @@ -1633,11 +1665,11 @@ negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); #endif // SCALAR #ifdef DE - negativePressure.push_back(mhdUtils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma)); - negativeEnergy.push_back(mhdUtils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma)); - negativeDensity.push_back(mhdUtils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma)); - negativeDensityEnergyPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma)); - negativeDensityPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma)); + negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma)); + negativeEnergy.push_back(mhd::utils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma)); + negativeDensity.push_back(mhd::utils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma)); + negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma)); + negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma)); #endif //DE for (size_t direction = 0; direction < 3; direction++) @@ -1726,7 +1758,7 @@ // ========================================================================= // ========================================================================= - // Unit tests for the contents of the _hlldInternal namespace + // Unit tests for the contents of the mhd::_internal namespace // ========================================================================= /*! * \brief A struct to hold some basic test values @@ -1827,10 +1859,10 @@ { for (size_t i = 0; i < names.size(); i++) { - gasPressureL.push_back(mhdUtils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); - gasPressureR.push_back(mhdUtils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); - totalPressureL.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); - totalPressureR.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); + gasPressureL.push_back(mhd::utils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); + gasPressureR.push_back(mhd::utils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); + totalPressureL.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); + totalPressureR.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); } } }; @@ -1839,7 +1871,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_approximateWaveSpeeds function + * \brief Test the mhd::_internal::_approximateWaveSpeeds function * */ TEST(tMHDHlldInternalApproximateWaveSpeeds, @@ -1850,7 +1882,7 @@ std::vector const fiducialSpeedR {24.295526347371595, 12.519790189404299}; std::vector const fiducialSpeedM {-0.81760587897407833, -0.026643804611559244}; std::vector const fiducialSpeedStarL {-19.710500632936679, -4.4880642018724357}; - std::vector const fiducialSpeedStarR {9.777062240423124, 9.17474383484066}; + std::vector const fiducialSpeedStarR {9.6740190040662242, 3.4191202933087519}; std::vector const fiducialDensityStarL{24.101290139122913, 50.132466596958501}; std::vector const fiducialDensityStarR{78.154104734671265, 84.041595114910123}; @@ -1864,7 +1896,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_approximateWaveSpeeds(parameters.densityL[i], + mhd::_internal::_approximateWaveSpeeds(parameters.densityL[i], parameters.momentumXL[i], parameters.momentumYL[i], parameters.momentumZL[i], @@ -1885,7 +1917,6 @@ parameters.velocityZR[i], parameters.gasPressureR[i], parameters.totalPressureR[i], - parameters.magneticXR[i], parameters.magneticYR[i], parameters.magneticZR[i], parameters.gamma, @@ -1924,7 +1955,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_starFluxes function in the non-degenerate + * \brief Test the mhd::_internal::_starFluxes function in the non-degenerate * case * */ @@ -1961,7 +1992,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_starFluxes(parameters.speedM[i], + mhd::_internal::_starFluxes(parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], parameters.velocityXL[i], @@ -2038,7 +2069,7 @@ } /*! - * \brief Test the _hlldInternal::_starFluxes function in the degenerate + * \brief Test the mhd::_internal::_starFluxes function in the degenerate * case * */ @@ -2078,7 +2109,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_starFluxes(parameters.speedM[i], + mhd::_internal::_starFluxes(parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], parameters.velocityXL[i], @@ -2157,7 +2188,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_nonStarFluxes function + * \brief Test the mhd::_internal::_nonStarFluxes function * */ TEST(tMHDHlldInternalNonStarFluxes, @@ -2183,7 +2214,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_nonStarFluxes(parameters.momentumXL[i], + mhd::_internal::_nonStarFluxes(parameters.momentumXL[i], parameters.velocityXL[i], parameters.velocityYL[i], parameters.velocityZL[i], @@ -2228,38 +2259,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_dotProduct function - * - */ - TEST(tMHDHlldInternalDotProduct, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialDotProduct{5149.7597411033557,6127.2319832451567}; - - double testDotProduct; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - testDotProduct = _hlldInternal::_dotProduct(parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i]); - - // Now check results - testingUtilities::checkResults(fiducialDotProduct[i], - testDotProduct, - parameters.names.at(i) + ", DotProduct"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_doubleStarState function. Non-degenerate + * \brief Test the mhd::_internal::_doubleStarState function. Non-degenerate * state * */ @@ -2286,7 +2286,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarState(parameters.speedM[i], + mhd::_internal::_doubleStarState(parameters.speedM[i], parameters.magneticXL[i], parameters.totalPressureStarL[i], parameters.densityStarL[i], @@ -2332,7 +2332,7 @@ } /*! - * \brief Test the _hlldInternal::_doubleStarState function in the + * \brief Test the mhd::_internal::_doubleStarState function in the * degenerate state. * */ @@ -2357,7 +2357,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarState(parameters.speedM[i], + mhd::_internal::_doubleStarState(parameters.speedM[i], 0.0, parameters.totalPressureStarL[i], parameters.densityStarL[i], @@ -2403,7 +2403,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_doubleStarFluxes function + * \brief Test the mhd::_internal::_doubleStarFluxes function * */ TEST(tMHDHlldInternalDoubleStarFluxes, @@ -2428,7 +2428,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarFluxes(parameters.speedSide[i], + mhd::_internal::_doubleStarFluxes(parameters.speedSide[i], parameters.momentumStarFluxX[i], parameters.momentumStarFluxY[i], parameters.momentumStarFluxZ[i], @@ -2480,7 +2480,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_returnFluxes function + * \brief Test the mhd::_internal::_returnFluxes function * */ TEST(tMHDHlldInternalReturnFluxes, @@ -2532,10 +2532,10 @@ int const fiducialMomentumIndexY = threadId + n_cells * o2; int const fiducialMomentumIndexZ = threadId + n_cells * o3; int const fiducialEnergyIndex = threadId + n_cells * 4; - int const fiducialMagneticYIndex = threadId + n_cells * (o2 + 4 + NSCALARS); - int const fiducialMagneticZIndex = threadId + n_cells * (o3 + 4 + NSCALARS); + int const fiducialMagneticYIndex = threadId + n_cells * (5 + NSCALARS); + int const fiducialMagneticZIndex = threadId + n_cells * (6 + NSCALARS); - _hlldInternal::_returnFluxes(threadId, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, @@ -2578,4 +2578,4 @@ } } // ========================================================================= -#endif // CUDA & HLLD \ No newline at end of file +#endif // CUDA \ No newline at end of file diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 72a6dc349..5ed2b050c 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -13,19 +13,17 @@ // Local includes #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" - - +#include "../io/io.h" #ifndef PI #define PI 3.141592653589793 #endif - // ============================================================================= -// Test Suite: tHYDROSYSTEMSodShockTube +// Test Suite: tHYDROtMHDSYSTEMSodShockTube // ============================================================================= /*! - * \defgroup tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \defgroup tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput * \brief Test the Sod Shock tube initial conditions as a parameterized test * with varying numbers of MPI ranks * @@ -42,6 +40,28 @@ class tHYDROSYSTEMSodShockTubeParameterizedMpi TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { + #ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + sodTest.setFixedEpsilon(1E-3); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + + // Set the magnetic fiducial datasets to zero + size_t const size = std::pow(65, 3); + std::vector const magVec(0, size); + + for (auto field: {"magnetic_x","magnetic_y","magnetic_z"}) + { + sodTest.setFiducialData(field, magVec); + datasetNames.push_back(field); + } + + sodTest.setDataSetsToTest(datasetNames); + #endif //MHD + sodTest.numMpiRanks = GetParam(); sodTest.runTest(); } @@ -52,7 +72,7 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, /// @} // ============================================================================= -TEST(tHYDROSYSTEMConstant, +TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner testObject(false, false, false); @@ -70,15 +90,15 @@ TEST(tHYDROSYSTEMConstant, } -TEST(tHYDROSYSTEMSoundWave3D, +TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) { double time = 0.05; double amplitude = 1e-5; double dx = 1./64.; - + double real_kx = 2*PI;//kx of the physical problem - + double kx = real_kx * dx; double speed = 1;//speed of wave is 1 since P = 0.6 and gamma = 1.666667 double phase = kx*0.5 - speed * time * real_kx; //kx*0.5 for half-cell offset @@ -86,12 +106,191 @@ TEST(tHYDROSYSTEMSoundWave3D, systemTest::SystemTestRunner testObject(false, false, false); + #ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + tolerance = 1E-6; + #endif //MHD + testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance); - testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance); + ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance)); + ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance)); //testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); //testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); } + +// ============================================================================= +// Test Suite: tHYDROtMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tHYDROtMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. + * + */ +/// @{ +class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi + :public + ::testing::TestWithParam +{ +public: + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false) + {}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &vx=0.0) + { + // Constant for all tests + size_t const N = 32; + double const domain = 0.5; + double const gamma = 5./3.; + double const tOut = 2*domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=0"); + waveTest.chollaLaunchParams.append(" By=0"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); + } +}; + +// Sound Waves Moving Left and Right +// ================================= +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + SoundWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); +} + +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + SoundWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = -1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); +} + +// Contact Waves Moving Left and Right +// =================================== +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + HydroContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + int const numTimeSteps = 427; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + velocityX); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + ::testing::Values(1)); +/// @} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/input_files/blank_settings_file.txt b/src/system_tests/input_files/blank_settings_file.txt new file mode 100644 index 000000000..e8fbd7e77 --- /dev/null +++ b/src/system_tests/input_files/blank_settings_file.txt @@ -0,0 +1,3 @@ +# This is blank file for system tests that are setting all the parameters +# internally to point at. Without a blank file cholla will crash + diff --git a/src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt similarity index 70% rename from src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt index f1c23ea6e..efdedaceb 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt @@ -14,7 +14,7 @@ tout=0.05 # time interval for output outstep=0.05 # name of initial conditions -init=Sound_Wave +init=Linear_Wave # domain properties xmin=0.0 ymin=0.0 @@ -34,18 +34,31 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-5 # value of gamma gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1 +rEigenVec_MomentumX=1 +rEigenVec_MomentumY=1 +rEigenVec_MomentumZ=1 +rEigenVec_E=1.5 +# Set the magnetic field quantities to zero +Bx=0 +By=0 +Bz=0 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=0 \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..eabea0e60 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..4f52b7cd6 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=0.0 +By=0.0 +Bz=0.0 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..3e4747551 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,71 @@ +# +# Parameter File for 3D Einfeldt Strong Rarefaction MHD test +# Citation: Einfeldt et al. 1991 "On Godunov-Type Methods near Low Densities" +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.16 +# time interval for output +outstep=0.16 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=-2.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=0.45 +# Magnetic field of the left state +Bx_l=0.0 +By_l=0.5 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=2.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.45 +# Magnetic field of the right state +Bx_r=0.0 +By_r=0.5 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 + diff --git a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..6fb66732b --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,57 @@ +# +# Parameter File for 1D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 + diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp new file mode 100644 index 000000000..39cec0b89 --- /dev/null +++ b/src/system_tests/mhd_system_tests.cpp @@ -0,0 +1,636 @@ +/*! + * \file mhd_system_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains all the system tests for the MHD build type + * + */ + +// STL includes +#include + +// External Libraries and Headers +#include + +// Local includes +#include "../system_tests/system_tester.h" +#include "../io/io.h" + +// ============================================================================= +// Test Suite: tMHDSYSTEMConstantParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMConstantParameterizedMpi + * \brief Test the constant initial conditions as a parameterized test + * with varying numbers of MPI ranks + * + */ +/// @{ +class tMHDSYSTEMConstantParameterizedMpi + :public + ::testing::TestWithParam +{ +protected: + systemTest::SystemTestRunner constantTest; +}; + +// Test with all mangetic fields set to zero +TEST_P(tMHDSYSTEMConstantParameterizedMpi, + ZeroMagneticFieldCorrectInputExpectCorrectOutput) +{ + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); +} + +// Test with all mangetic fields set to one +TEST_P(tMHDSYSTEMConstantParameterizedMpi, + MagneticFieldCorrectInputExpectCorrectOutput) +{ + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMConstantParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedAngle + * \brief Test the linear waves initial conditions as a parameterized test + * with varying angles. Details in Gardiner & Stone 2008 + * + */ +/// @{ +class tMHDSYSTEMLinearWavesParameterizedAngle + :public + ::testing::TestWithParam> +{ +public: + tMHDSYSTEMLinearWavesParameterizedAngle() + : waveTest(false, true, false, false){}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz, double const &pitch, + double const &yaw, double const &domain, + int const &domain_direction, double const &vx=0.0) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5./3.; + double const tOut = 2*domain / waveSpeed; + + // Define vector values + double x_len=domain, y_len=domain, z_len=domain; + int nx=N, ny=N, nz=N; + double vx_rot=vx, vy_rot=0, vz_rot=0; + double Bx_rot=1, By_rot=1.5, Bz_rot=0; + + double rEigenVec_Bx_rot = rEigenVec_Bx; + double rEigenVec_By_rot = rEigenVec_By; + double rEigenVec_Bz_rot = rEigenVec_Bz; + + double rEigenVec_MomentumX_rot = rEigenVec_MomentumX; + double rEigenVec_MomentumY_rot = rEigenVec_MomentumY; + double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ; + + switch (domain_direction) + { + case 1: + x_len *= 2; + nx *= 2; + break; + case 2: // swap X and Y + y_len *= 2; + ny *= 2; + std::swap(vx_rot, vy_rot); + std::swap(Bx_rot, By_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); + break; + case 3: // swap X and Z + z_len *= 2; + nz *= 2; + std::swap(vx_rot, vz_rot); + std::swap(Bx_rot, Bz_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); + break; + default: + throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); + break; + } + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx_rot)); + waveTest.chollaLaunchParams.append(" vy=" + to_string_exact(vy_rot)); + waveTest.chollaLaunchParams.append(" vz=" + to_string_exact(vz_rot)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact(Bx_rot)); + waveTest.chollaLaunchParams.append(" By=" + to_string_exact(By_rot)); + waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact(Bz_rot)); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); + waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); + waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } +}; + +// Fast Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; // + for left wave + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * -4; + double const rEigenVec_MomentumY = prefix * 2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Alfven Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Contact Wave Moving Right +// =================================== +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + MHDContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {641, 620, 654}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction, velocityX); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(1.35*allowedL1Error, 1.35*allowedError); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMLinearWavesParameterizedAngle, + ::testing::Values( + std::make_tuple(0.0*M_PI, 0.0*M_PI, 0.5, 1), + std::make_tuple(0.0*M_PI, 0.5*M_PI, 0.5, 2), + std::make_tuple(0.5*M_PI, 0.0*M_PI, 0.5, 3) + //std::make_tuple(std::asin(2./3.), std::asin(2./std::sqrt(5.)), 1.5, 1) + )); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMSodShockTube +// TODO: This is temporary. Remove once PPMP is implemented for MHD and replace +// with the hydro sod test +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \brief Test the Sod Shock tube initial conditions as a parameterized test + * with varying numbers of MPI ranks + * + */ +/// @{ +class tMHDSYSTEMSodShockTubeParameterizedMpi + :public + ::testing::TestWithParam +{ +protected: + systemTest::SystemTestRunner sodTest; +}; + +TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, + CorrectInputExpectCorrectOutput) +{ + sodTest.numMpiRanks = GetParam(); + sodTest.runTest(); +} + +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, + tMHDSYSTEMSodShockTubeParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMEinfeldtStrongRarefaction +// ============================================================================= +TEST(tMHDSYSTEMEinfeldtStrongRarefaction, + CorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner rarefactionTest; + rarefactionTest.runTest(); +} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. Details in Gardiner & Stone 2008 + * + */ +/// @{ +class tMHDSYSTEMLinearWavesParameterizedMpi + :public + ::testing::TestWithParam +{ +public: + tMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false){}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5./3.; + double const domain = 0.5; + double const tOut = 2*domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=0"); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=1"); + waveTest.chollaLaunchParams.append(" By=1.5"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz)); + } +}; + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, + SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, + SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMLinearWavesParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index c59e6e770..a2835ce7c 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -21,6 +21,7 @@ // Local includes #include "../system_tests/system_tester.h" // Include the header file #include "../utils/testing_utilities.h" +#include "../io/io.h" // ============================================================================= // Public Members @@ -154,8 +155,7 @@ void systemTest::SystemTestRunner::runTest() else { // This is a field data set - testData = loadTestFieldData(dataSetName, - testDims); + testData = loadTestFieldData(dataSetName, testDims); // Get fiducial data fiducialData = _loadFiducialFieldData(dataSetName); } @@ -178,14 +178,11 @@ void systemTest::SystemTestRunner::runTest() // Check for equality and iff not equal return difference double absoluteDiff; int64_t ulpsDiff; - // Fixed epsilon is changed from the default since AMD/Clang - // appear to differ from NVIDIA/GCC/XL by roughly 1E-12 - double fixedEpsilon = 5.0E-12; bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, ulpsDiff, - fixedEpsilon); + _fixedEpsilon); ASSERT_TRUE(areEqual) << std::endl << "Difference in " @@ -203,6 +200,136 @@ void systemTest::SystemTestRunner::runTest() } // ============================================================================= +// ============================================================================= +void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, + double const &maxAllowedError) +{ + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) + { + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + launchCholla(); + } + + // Check that there is hydro data and no particle data + if (_particleDataExists) + { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles"; + throw std::runtime_error(errMessage); + } + if (not _hydroDataExists) + { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; + throw std::runtime_error(errMessage); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) return; + + // Make sure we have all the required data files and open the data files + _testHydroFieldsFileVec.resize(numMpiRanks); + std::vector initialHydroFieldsFileVec(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) + { + // Initial time data + std::string fileName = "/0.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + + // Final time data + fileName = "/1.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + } + + // Get the list of test dataset names + _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]); + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) _checkNumTimeSteps(); + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " + << _testDataSetNames.size() + << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() + << " datasets" << std::endl << std::endl; + + // Loop over the datasets to be tested + double L2Norm = 0; + double maxError = 0; + for (auto dataSetName: _fiducialDataSetNames) + { + if (dataSetName == "GasEnergy") + { + continue; + } + + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + + "' or contains it more than once."; + + // Get data vectors + std::vector initialDims(3,1); + std::vector initialData; + std::vector finalDims(3,1); + std::vector finalData; + + // This is a field data set + initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); + // Get fiducial data + finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); + + // Check that they're the same length + ASSERT_EQ(initialData.size(), finalData.size()) + << "The initial and final '" + << dataSetName + << "' datasets are not the same length"; + + // Compute the L1 Error. + double L1Error = 0; + for (size_t i = 0; i < initialData.size(); i++) + { + double const diff = std::abs(initialData.at(i) - finalData.at(i)); + L1Error += diff; + maxError = (diff > maxError)? diff: maxError; + } + + L1Error *= (1./static_cast(initialDims[0]*initialDims[1]*initialDims[2])); + L2Norm += L1Error * L1Error; + + // Perform the correctness check + EXPECT_LT(L1Error, maxAllowedL1Error) << "the L1 error for the " + << dataSetName + << " data has exceeded the allowed value"; + } + + // Check the L1 Norm + L2Norm = std::sqrt(L2Norm); + EXPECT_LT(L2Norm, maxAllowedL1Error) + << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; +} +// ============================================================================= + // ============================================================================= void systemTest::SystemTestRunner::launchCholla() { @@ -250,11 +377,11 @@ void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) { // First check if there's a fiducial data file - if (_fiducialFileExists) + if (_fiducialDataSets.count(fieldName) > 0) { - std::string errMessage = "Error: Fiducial data file already exists for test '" - + _fullTestFileName - + "' and cannot be overwritten."; + std::string errMessage = "Error: Fiducial dataset for field '" + + fieldName + + "' already exists and cannot be overwritten"; throw std::runtime_error(errMessage); } @@ -333,17 +460,41 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); // Generate the input paths. Strip out everything after a "/" since that - // probably indicates a parameterized test + // probably indicates a parameterized test. Also, check that the files exist + // and load fiducial HDF5 file if required _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." + ::globalChollaMachine.getString(); - _chollaSettingsPath = ::globalChollaRoot.getString() - + "/src/system_tests/input_files/" - + _fullTestFileName + ".txt"; + _checkFileExists(_chollaPath); + if (useSettingsFile) + { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + + _fullTestFileName + ".txt"; + _checkFileExists(_chollaSettingsPath); + } + else + { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + + "blank_settings_file.txt"; + _checkFileExists(_chollaSettingsPath); + } + if (useFiducialFile) + { _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; + _checkFileExists(_fiducialFilePath); + _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); + _fiducialDataSetNames = _findDataSetNames(_fiducialFile); + _fiducialFileExists = true; + } + else + { + _fiducialFilePath = ""; + } // Generate output paths, these files don't exist yet _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; @@ -360,17 +511,6 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, + "' either already exists or could not be created." << std::endl; } - - // Check that the files exist and load fiducial HDF5 file if required - _checkFileExists(_chollaPath); - if (useSettingsFile) _checkFileExists(_chollaSettingsPath); - if (useFiducialFile) - { - _checkFileExists(_fiducialFilePath); - _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); - _fiducialDataSetNames = _findDataSetNames(_fiducialFile); - _fiducialFileExists = true; - }; } // ============================================================================= @@ -463,22 +603,25 @@ void systemTest::SystemTestRunner::_checkNumTimeSteps() // ============================================================================= std::vector systemTest::SystemTestRunner::loadTestFieldData( std::string dataSetName, - std::vector &testDims) + std::vector &testDims, + std::vector file) { - // Get the file we're using - std::vector file; + // Switch which fileset we're using if it's a particle dataset if (dataSetName == "particle_density") { file = _testParticlesFileVec; dataSetName = "density"; } - else + else if (file.size() == 0) { file = _testHydroFieldsFileVec; } - // Get the size of each dimension - H5::Attribute dimensions = file[0].openAttribute("dims"); + // Get the size of each dimension. First check if the field is a magnetic + // field or not to make sure we're retreiving the right dimensions + std::string dimsName = (dataSetName.find("magnetic") != std::string::npos)? + "magnetic_field_dims": "dims"; + H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); // Allocate the vector @@ -510,7 +653,9 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( // Get dims_local std::vector dimsLocal(3,1); - H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); + std::string dimsNameLocal = (dataSetName.find("magnetic") != std::string::npos)? + "magnetic_field_dims_local": "dims_local"; + H5::Attribute dimsLocalAttr = file[rank].openAttribute(dimsNameLocal.c_str()); dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); // Now we add the data to the larger vector diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 6d5aa1925..29b8b74d0 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -72,9 +72,24 @@ class systemTest::SystemTestRunner */ void runTest(); + /*! + * \brief Compute the L1 error for each field compared to the initial + * conditions. Doesn't work with particle data + * + * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test + * \param[in] maxAllowedError The maximum allowed for any value in the test + * + */ + void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError=1E-7); + + /*! + * \brief Launch Cholla as it is set up + * + */ void launchCholla(); void openHydroTestData(); + /*! * \brief Get the Cholla Path object * @@ -128,6 +143,13 @@ class systemTest::SystemTestRunner */ std::vector getDataSetsToTest(){return _fiducialDataSetNames;}; + /*! + * \brief Set the Fixed Epsilon value + * + * \param[in] newVal The new value of fixed epsilon + */ + void setFixedEpsilon(double const &newVal){_fixedEpsilon = newVal;}; + /*! * \brief Choose which datasets to test. By default it tests all the * datasets in the fiducial data. A warning will be thrown if not all the @@ -193,10 +215,12 @@ class systemTest::SystemTestRunner * * \param[in] dataSetName The name of the dataset to get * \param[out] testDims An vector with the length of each dimension in it + * \param[in] file (optional) The vector of HDF5 files to load * \return std::vector A vector containing the data */ std::vector loadTestFieldData(std::string dataSetName, - std::vector &testDims); + std::vector &testDims, + std::vector file={}); /*! * \brief Generate a std::vector of the specified size populated by a sine @@ -241,7 +265,7 @@ class systemTest::SystemTestRunner * \param[in] useSettingsFile Indicate if you're using a settings file. If * `true` then the settings file is automatically found based on the naming * convention. If false then the user MUST provide all the required settings - * with the SystemTestRunner::setChollaLaunchParams method + * with the SystemTestRunner::chollaLaunchParams member variable */ SystemTestRunner(bool const &particleData=false, bool const &hydroData=true, @@ -290,6 +314,10 @@ class systemTest::SystemTestRunner /// The total number of particles in the fiducial dataset size_t _fiducialTotalNumParticles=0; + /// Fixed epsilon is changed from the default since AMD/Clang + /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 + double _fixedEpsilon = 5.0E-12; + /// Flag to indicate if a fiducial HDF5 data file is being used or a /// programmatically generated H5File object. `true` = use a file, `false` = /// use generated H5File object diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 422f3d151..ca0cacba8 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -1,5 +1,5 @@ /*! - * \file device_vector.h + * \file DeviceVector.h * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Contains the declartion and implementation of the DeviceVector * class. Note that since this is a templated class the implementation must be @@ -48,8 +48,10 @@ namespace cuda_utilities * * \param[in] size The number of elements desired in the array. Can be * any positive integer. + * \param[in] initialize (optional) If true then initialize the GPU + * memory to int(0) */ - DeviceVector(size_t const size) {_allocate(size);} + DeviceVector(size_t const size, bool const initialize=false); /*! * \brief Destroy the Device Vector object by calling the `_deAllocate` @@ -178,7 +180,7 @@ namespace cuda_utilities void _allocate(size_t const size) { _size=size; - CudaSafeCall(cudaMalloc(&_ptr, size*sizeof(T))); + CudaSafeCall(cudaMalloc(&_ptr, _size*sizeof(T))); } /*! @@ -198,11 +200,23 @@ namespace cuda_utilities // ============================================================================= namespace cuda_utilities { - // ========================================================================= // Public Methods // ========================================================================= + // ========================================================================= + template + DeviceVector::DeviceVector(size_t const size, bool const initialize) + { + _allocate(size); + + if (initialize) + { + CudaSafeCall(cudaMemset(_ptr, 0, _size*sizeof(T))); + } + } + // ========================================================================= + // ========================================================================= template void DeviceVector::resize(size_t const newSize) diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 26a63dbca..3db21baee 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -1,5 +1,5 @@ /*! - * \file device_vector_tests.cu + * \file DeviceVector_tests.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Tests for the DeviceVector class * @@ -81,7 +81,7 @@ TEST(tALLDeviceVectorDestructor, // Get the pointer information cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); + cudaPointerGetAttributes(&ptrAttributes, devVector.data()); // Warning strings std::string typeMessage = "ptrAttributes.type should be 0 since " @@ -106,6 +106,9 @@ TEST(tALLDeviceVectorDestructor, #endif // O_HIP EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; + + // Reconstruct DeviceVector object to avoid error + new (&devVector) cuda_utilities::DeviceVector{vectorSize}; } TEST(tALLDeviceVectorStdVectorHostToDeviceCopyAndIndexing, diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 9c07a95a6..3f0ae5fba 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -12,7 +12,6 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" - namespace cuda_utilities { /*! @@ -75,25 +74,61 @@ namespace cuda_utilities } } - // ========================================================================= /*! - * \brief Set the value that `pointer` points at in GPU memory to `value`. - * This only sets the first value in memory so if `pointer` points to an - * array then only `pointer[0]` will be set; i.e. this effectively does - * `pointer = &value` - * - * \tparam T Any scalar type - * \param[in] pointer The location in GPU memory - * \param[in] value The value to set `*pointer` to - */ - template - void setScalarDeviceMemory(T *pointer, T const value) + * \brief Initialize GPU memory + * + * \param[in] ptr The pointer to GPU memory + * \param[in] N The size of the array in bytes + */ + inline void initGpuMemory(Real *ptr, size_t N) { - CudaSafeCall( - cudaMemcpy(pointer, // destination - &value, // source - sizeof(T), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemset(ptr, 0, N)); } - // ========================================================================= -} \ No newline at end of file + + // ===================================================================== + /*! + * \brief Struct to determine the optimal number of blocks and threads + * per block to use when launching a kernel. The member + * variables are `threadsPerBlock` and `numBlocks` which are chosen with + the occupancy API. Can target any device on the system through the + * optional constructor argument. + * NOTE: On AMD there's currently an issue that stops kernels from being + * passed. As a workaround for now this struct just returns the maximum + * number of blocks and threads per block that a MI250X can run at once. + * + */ + template + struct AutomaticLaunchParams + { + public: + /*! + * \brief Construct a new Reduction Launch Params object. By default it + * generates values of numBlocks and threadsPerBlock suitable for a + * kernel with a grid-stride loop. For a kernel with one thread per + * element set the optional `numElements` argument to the number of + * elements + * + * \param[in] kernel The kernel to determine the launch parameters for + * \param[in] numElements The number of elements in the array that + the kernel operates on + */ + AutomaticLaunchParams(T &kernel, size_t numElements=0) + { + cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); + + if (numElements > 0) + { + numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; + } + } + + /// Defaulted Destructor + ~AutomaticLaunchParams()=default; + + /// The maximum number of threads per block that the device supports + int threadsPerBlock; + /// The maximum number of scheduleable blocks on the device + int numBlocks; + }; + // ===================================================================== +} // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index ddefebfd7..dc2f20066 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -120,23 +120,3 @@ TEST(tALLCompute1DIndex, EXPECT_EQ(fiducialId, testId); } // ============================================================================= - -// ============================================================================= -TEST(tALLSetScalarDeviceMemory, - TypeDoubleInputExpectCorrectValueSet) -{ - double value = 173.246; - double *dev_ptr, host_val; - CudaSafeCall(cudaMalloc(&dev_ptr, sizeof(double))); - - cuda_utilities::setScalarDeviceMemory(dev_ptr, value); - - CudaSafeCall( - cudaMemcpy(&host_val, // destination - dev_ptr, // source - sizeof(double), - cudaMemcpyDeviceToHost)); - - EXPECT_EQ(value, host_val); -} -// ============================================================================= diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 4c285965e..66f2885f2 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -20,7 +20,7 @@ static void __attribute__((unused)) check(const hipfftResult err, const char *co exit(err); } -#endif // PARIS PARIC_GALACTIC +#endif //CUFFT PARIS PARIS_GALACTIC #define WARPSIZE 64 static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; @@ -53,6 +53,7 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaMemcpy hipMemcpy #define cudaMemcpyAsync hipMemcpyAsync #define cudaMemcpyPeer hipMemcpyPeer +#define cudaMemcpyPeer hipMemcpyPeer #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice #define cudaMemcpyHostToDevice hipMemcpyHostToDevice @@ -65,10 +66,11 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaGetDeviceProperties hipGetDeviceProperties #define cudaPointerAttributes hipPointerAttribute_t #define cudaPointerGetAttributes hipPointerGetAttributes +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize // Texture definitions #define cudaArray hipArray -#define cudaMallocArray hipMallocArray +#define cudaMallocArray hipMallocArray #define cudaFreeArray hipFreeArray #define cudaMemcpyToArray hipMemcpyToArray #define cudaMemcpy2DToArray hipMemcpy2DToArray @@ -87,8 +89,10 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaTextureDesc hipTextureDesc #define cudaAddressModeClamp hipAddressModeClamp #define cudaFilterModeLinear hipFilterModeLinear -#define cudaFilterModePoint hipFilterModePoint +#define cudaFilterModePoint hipFilterModePoint // Texture Definitions +#define cudaPointerAttributes hipPointerAttribute_t +#define cudaPointerGetAttributes hipPointerGetAttributes // FFT definitions #define cufftDestroy hipfftDestroy diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 990eb2a83..b89175835 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -71,6 +71,40 @@ namespace hydro_utilities { return P; } + /*! + * \brief Compute the kinetic energy from the density and velocities + * + * \param[in] d The density + * \param[in] vx The x velocity + * \param[in] vy The y velocity + * \param[in] vz The z velocity + * \return Real The kinetic energy + */ + inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, + Real const &vx, + Real const &vy, + Real const &vz) + { + return 0.5 * d * (vx*vx + vy*vy * vz*vz); + } + + /*! + * \brief Compute the kinetic energy from the density and momenta + * + * \param[in] d The density + * \param[in] mx The x momentum + * \param[in] my The y momentum + * \param[in] mz The z momentum + * \return Real The kinetic energy + */ + inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, + Real const &mx, + Real const &my, + Real const &mz) + { + return (0.5 / d) * (mx*mx + my*my * mz*mz); + } + inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); return sqrt(gamma * P / d); diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index e8a066d12..e0e3cf455 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -127,3 +127,45 @@ TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } + +TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducialEnergies{0.0, + 6.307524975350106e-145, + 7.3762470327090601e+249}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + coef*parameters.d.at(i), + coef*parameters.vx.at(i), + coef*parameters.vy.at(i), + coef*parameters.vz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), + testEnergy, + parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducialEnergies{0.0, + 0.0, + 7.2568536478335773e+147}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( + coef*parameters.d.at(i), + coef*parameters.mx.at(i), + coef*parameters.my.at(i), + coef*parameters.mz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), + testEnergy, + parameters.names.at(i)); + } +} \ No newline at end of file diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h new file mode 100644 index 000000000..735cec996 --- /dev/null +++ b/src/utils/math_utilities.h @@ -0,0 +1,92 @@ +/*! + * \file math_utilities.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains various functions for common mathematical operations + * + */ + +#pragma once + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +namespace math_utils +{ + // ========================================================================= + /*! + * \brief Rotate cartesian coordinates. All arguments are cast to double + * then rotated. If the type is 'int' then the value is rounded to the + * nearest int + * + * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when + * pitch=0 and yaw=90 x1_rot = -x2 + * + * \tparam T The return type + * \param[in] x_1 x1 coordinate + * \param[in] x_2 x2 coordinate + * \param[in] x_3 x3 coordinate + * \param[in] pitch Pitch angle in radians + * \param[in] yaw Yaw angle in radians + * \return std::tuple The new, rotated, coordinates in the + * order . Intended to be captured with structured binding + */ + template + inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, + Real const &x_3, Real const &pitch, Real const &yaw) + { + // Compute the sines and cosines. Correct for floating point errors if + // the angle is 0.5*M_PI + Real const sin_yaw = std::sin(yaw); + Real const cos_yaw = (yaw==0.5*M_PI)? 0: std::cos(yaw); + Real const sin_pitch = std::sin(pitch); + Real const cos_pitch = (pitch==0.5*M_PI)? 0: std::cos(pitch); + + // Perform the rotation + Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw); + Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw); + Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); + + if (std::is_same::value) + { + return {round(x_1_rot), + round(x_2_rot), + round(x_3_rot)}; + } + else if (std::is_same::value) + { + return {x_1_rot, x_2_rot, x_3_rot}; + } + } + // ========================================================================= + + // ========================================================================= + /*! + * \brief Compute the dot product of a and b. + * + * \param[in] a1 The first element of a + * \param[in] a2 The second element of a + * \param[in] a3 The third element of a + * \param[in] b1 The first element of b + * \param[in] b2 The second element of b + * \param[in] b3 The third element of b + * + * \return Real The dot product of a and b + */ + inline __device__ __host__ Real dotProduct(Real const &a1, + Real const &a2, + Real const &a3, + Real const &b1, + Real const &b2, + Real const &b3) + {return a1*b1 + ((a2*b2) + (a3*b3));}; + // ========================================================================= + +}//math_utils diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp new file mode 100644 index 000000000..38a2902d6 --- /dev/null +++ b/src/utils/math_utilities_tests.cpp @@ -0,0 +1,64 @@ +/*! + * \file math_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the contents of math_utilities.h + * + */ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/math_utilities.h" +#include "../global/global.h" + +// ============================================================================= +TEST(tALLRotateCoords, + CorrectInputExpectCorrectOutput) +{ + // Fiducial values + double const x_1 = 19.2497333410; + double const x_2 = 60.5197699003; + double const x_3 = 86.0613942621; + double const pitch = 1.239 * M_PI; + double const yaw = 0.171 * M_PI; + double const x_1_rot_fid = -31.565679455456568; + double const x_2_rot_fid = 14.745363873361605; + double const x_3_rot_fid = -76.05402749550727; + + auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); + + testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); +} +// ============================================================================= + +// ========================================================================= +/*! + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLDotProduct, + CorrectInputExpectCorrectOutput) +{ + std::vector a{21.503067766457753, 48.316634031589935, 81.12177317622657}, + b{38.504606872151484, 18.984145880030045, 89.52561861038686}; + + double const fiducialDotProduct = 9007.6941261535867; + + double testDotProduct; + + testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), + b.at(0), b.at(1), b.at(2)); + + // Now check results + testingUtilities::checkResults(fiducialDotProduct, + testDotProduct, + "dot product"); +} +// ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.cpp b/src/utils/mhd_utilities.cpp deleted file mode 100644 index c7747830e..000000000 --- a/src/utils/mhd_utilities.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/*! - * \file mhd_utilities.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of various utility functions for MHD - * - */ - -// STL Includes - -// External Includes - -// Local Includes -#include "../utils/mhd_utilities.h" - -namespace mhdUtils -{ - -} // end namespace mhdUtils \ No newline at end of file diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu new file mode 100644 index 000000000..9e947b6c6 --- /dev/null +++ b/src/utils/mhd_utilities.cu @@ -0,0 +1,25 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../utils/mhd_utilities.h" + +namespace mhd{ +namespace utils +{ + +}//utils + +} // end namespace mhd \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index f28cbb400..d859ab1db 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -15,14 +15,17 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/cuda_utilities.h" -/*! - * \brief Namespace for MHD utilities - * - */ -namespace mhdUtils -{ - namespace // Anonymouse namespace +namespace mhd{ +namespace utils{ + /*! + * \brief Namespace for functions required by functions within the mhd::utils + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ + namespace _internal { // ===================================================================== /*! @@ -59,11 +62,12 @@ namespace mhdUtils return sqrt( (term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)) ); } // ===================================================================== - }// Anonymouse namespace + }// mhd::utils::_internal namespace // ========================================================================= /*! - * \brief Compute the MHD energy in the cell + * \brief Compute the energy in a cell. If MHD is not defined then simply + * return the hydro only energy * * \param[in] pressure The gas pressure * \param[in] density The density @@ -87,9 +91,13 @@ namespace mhdUtils Real const &gamma) { // Compute and return energy - return (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) - + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))) - + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + Real energy = (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) + + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))); + #ifdef MHD + energy += 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + #endif //MHD + + return energy; } // ========================================================================= @@ -157,6 +165,23 @@ namespace mhdUtils } // ========================================================================= + // ========================================================================= + /*! + * \brief Compute the magnetic energy + * + * \param[in] magneticX The magnetic field in the X-direction + * \param[in] magneticY The magnetic field in the Y-direction + * \param[in] magneticZ The magnetic field in the Z-direction + * \return Real The magnetic energy + */ + inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, + Real const &magneticY, + Real const &magneticZ) + { + return 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + } + // ========================================================================= + // ========================================================================= /*! * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas @@ -199,13 +224,13 @@ namespace mhdUtils Real const &gamma) { // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - 1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, + pressure, + magneticX, + magneticY, + magneticZ, + gamma, + 1.0); } // ========================================================================= @@ -229,13 +254,13 @@ namespace mhdUtils Real const &gamma) { // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - -1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, + pressure, + magneticX, + magneticY, + magneticZ, + gamma, + -1.0); } // ========================================================================= @@ -285,10 +310,20 @@ namespace mhdUtils Real &avgBy, Real &avgBz) { - avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid-1) + yid*nx + zid*nx*ny)]); - avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid + (yid-1)*nx + zid*nx*ny)]); - avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid + yid*nx + (zid-1)*nx*ny)]); + // Ternary operator to check that no values outside of the magnetic field + // arrays are loaded. If the cell is on the edge that doesn't have magnetic + // fields on both sides then instead set the centered magnetic field to be + // equal to the magnetic field of the closest edge. T + avgBx = (xid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): + /*if false*/ dev_conserved[(5+NSCALARS)*n_cells + id]; + avgBy = (yid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): + /*if false*/ dev_conserved[(6+NSCALARS)*n_cells + id]; + avgBz = (zid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): + /*if false*/ dev_conserved[(7+NSCALARS)*n_cells + id]; } // ========================================================================= - -} // end namespace mhdUtils \ No newline at end of file +} // end namespace mhd::utils +} // end namespace mhd \ No newline at end of file diff --git a/src/utils/mhd_utilities_tests.cpp b/src/utils/mhd_utilities_tests.cu similarity index 81% rename from src/utils/mhd_utilities_tests.cpp rename to src/utils/mhd_utilities_tests.cu index c5cbb25fb..d56ae2bad 100644 --- a/src/utils/mhd_utilities_tests.cpp +++ b/src/utils/mhd_utilities_tests.cu @@ -11,6 +11,7 @@ #include #include #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -45,12 +46,11 @@ namespace } // ============================================================================= - // ============================================================================= -// Tests for the mhdUtils::computeEnergy function +// Tests for the mhd::utils::computeEnergy function // ============================================================================= /*! - * \brief Test the mhdUtils::computeEnergy function with the standard set of + * \brief Test the mhd::utils::computeEnergy function with the standard set of * parameters * */ @@ -64,7 +64,7 @@ TEST(tMHDComputeEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhdUtils::computeEnergy(parameters.pressureGas.at(i), + Real testEnergy = mhd::utils::computeEnergy(parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), parameters.velocityY.at(i), @@ -81,7 +81,7 @@ TEST(tMHDComputeEnergy, } /*! - * \brief Test the mhdUtils::computeEnergy function with a the standard set of + * \brief Test the mhd::utils::computeEnergy function with a the standard set of * parameters except pressure is now negative * */ @@ -95,7 +95,7 @@ TEST(tMHDComputeEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhdUtils::computeEnergy(-parameters.pressureGas.at(i), + Real testEnergy = mhd::utils::computeEnergy(-parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), parameters.velocityY.at(i), @@ -111,14 +111,14 @@ TEST(tMHDComputeEnergy, } } // ============================================================================= -// End of tests for the mhdUtils::computeEnergy function +// End of tests for the mhd::utils::computeEnergy function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeGasPressure function +// Tests for the mhd::utils::computeGasPressure function // ============================================================================= /*! - * \brief Test the mhdUtils::computeGasPressure function with the standard set of + * \brief Test the mhd::utils::computeGasPressure function with the standard set of * parameters. Energy has been increased to avoid negative pressures * */ @@ -133,7 +133,7 @@ TEST(tMHDComputeGasPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -150,7 +150,7 @@ TEST(tMHDComputeGasPressure, } /*! - * \brief Test the mhdUtils::computeGasPressure function with a the standard set + * \brief Test the mhd::utils::computeGasPressure function with a the standard set * of parameters which produce negative pressures * */ @@ -161,7 +161,7 @@ TEST(tMHDComputeGasPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeGasPressure(parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeGasPressure(parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -178,15 +178,15 @@ TEST(tMHDComputeGasPressure, } } // ============================================================================= -// End of tests for the mhdUtils::computeGasPressure function +// End of tests for the mhd::utils::computeGasPressure function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeThermalEnergy function +// Tests for the mhd::utils::computeThermalEnergy function // ============================================================================= /*! - * \brief Test the mhdUtils::computeThermalEnergy function with the standard set + * \brief Test the mhd::utils::computeThermalEnergy function with the standard set * of parameters. * */ @@ -201,7 +201,7 @@ TEST(tMHDComputeThermalEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -217,14 +217,46 @@ TEST(tMHDComputeThermalEnergy, } } // ============================================================================= -// End of tests for the mhdUtils::computeThermalEnergyfunction +// End of tests for the mhd::utils::computeThermalEnergy function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::computeMagneticEnergy function +// ============================================================================= +/*! + * \brief Test the mhd::utils::computeMagneticEnergy function with the standard + * set of parameters. + * + */ +TEST(tMHDcomputeMagneticEnergy, + CorrectInputExpectCorrectOutput) +{ + testParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialEnergy{0.0, + 805356.08013056568, + 6.7079331637514162e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), + parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + testingUtilities::checkResults(fiducialEnergy.at(i), + testMagneticEnergy, + parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::computeMagneticEnergy function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeTotalPressure function +// Tests for the mhd::utils::computeTotalPressure function // ============================================================================= /*! - * \brief Test the mhdUtils::computeTotalPressure function with the standard set + * \brief Test the mhd::utils::computeTotalPressure function with the standard set * of parameters. * */ @@ -238,7 +270,7 @@ TEST(tMHDComputeTotalPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhdUtils::computeTotalPressure(parameters.pressureGas.at(i), + Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); @@ -250,7 +282,7 @@ TEST(tMHDComputeTotalPressure, } /*! - * \brief Test the mhdUtils::computeTotalPressure function with a the standard + * \brief Test the mhd::utils::computeTotalPressure function with a the standard * set of parameters. Gas pressure has been multiplied and made negative to * generate negative total pressures * @@ -263,7 +295,7 @@ TEST(tMHDComputeTotalPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhdUtils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), + Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); @@ -275,14 +307,14 @@ TEST(tMHDComputeTotalPressure, } } // ============================================================================= -// End of tests for the mhdUtils::computeTotalPressure function +// End of tests for the mhd::utils::computeTotalPressure function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::fastMagnetosonicSpeed function +// Tests for the mhd::utils::fastMagnetosonicSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard * set of parameters. All values are reduced by 1e-25 in the large number case * to avoid overflow * @@ -298,7 +330,7 @@ TEST(tMHDFastMagnetosonicSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( coef.at(i)*parameters.density.at(i), coef.at(i)*parameters.pressureGas.at(i), coef.at(i)*parameters.magneticX.at(i), @@ -313,7 +345,7 @@ TEST(tMHDFastMagnetosonicSpeed, } /*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard * set of parameters, density is negative. All values are reduced by 1e-25 in * the large number case to avoid overflow. * @@ -329,7 +361,7 @@ TEST(tMHDFastMagnetosonicSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( -coef.at(i)*parameters.density.at(i), coef.at(i)*parameters.pressureGas.at(i), coef.at(i)*parameters.magneticX.at(i), @@ -343,14 +375,14 @@ TEST(tMHDFastMagnetosonicSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::fastMagnetosonicSpeed function +// End of tests for the mhd::utils::fastMagnetosonicSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::slowMagnetosonicSpeed function +// Tests for the mhd::utils::slowMagnetosonicSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard * set of parameters. All values are reduced by 1e-25 in the large number case * to avoid overflow * @@ -367,7 +399,7 @@ TEST(tMHDSlowMagnetosonicSpeed, for (size_t i = 2; i < parameters.names.size(); i++) { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, @@ -382,7 +414,7 @@ TEST(tMHDSlowMagnetosonicSpeed, } /*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard * set of parameters, density is negative. All values are reduced by 1e-25 in * the large number case to avoid overflow. * @@ -399,7 +431,7 @@ TEST(tMHDSlowMagnetosonicSpeed, for (size_t i = 2; i < parameters.names.size(); i++) { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, @@ -413,14 +445,14 @@ TEST(tMHDSlowMagnetosonicSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::slowMagnetosonicSpeed function +// End of tests for the mhd::utils::slowMagnetosonicSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::alfvenSpeed function +// Tests for the mhd::utils::alfvenSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of * parameters. * */ @@ -434,7 +466,7 @@ TEST(tMHDAlfvenSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), @@ -444,7 +476,7 @@ TEST(tMHDAlfvenSpeed, } /*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of * parameters except density is negative * */ @@ -458,7 +490,7 @@ TEST(tMHDAlfvenSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), @@ -467,11 +499,11 @@ TEST(tMHDAlfvenSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::alfvenSpeed function +// End of tests for the mhd::utils::alfvenSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::cellCenteredMagneticFields function +// Tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) @@ -497,7 +529,7 @@ TEST(tMHDCellCenteredMagneticFields, double testAvgBx, testAvgBy, testAvgBz; // Call the function to test - mhdUtils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz); + mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz); // Check the results testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); @@ -505,5 +537,5 @@ TEST(tMHDCellCenteredMagneticFields, testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } // ============================================================================= -// End of tests for the mhdUtils::cellCenteredMagneticFields function +// End of tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 820f27826..65933e42f 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -42,19 +42,5 @@ gridReduceMax(maxVal, out); } // ===================================================================== - - // ===================================================================== - void reductionLaunchParams(uint &numBlocks, uint &threadsPerBlock, uint const &deviceNum) - { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, deviceNum); - - // Divide the total number of allowed threads by the number of - // threads per block - threadsPerBlock = prop.maxThreadsPerBlock; - numBlocks = (prop.maxThreadsPerMultiProcessor * prop.multiProcessorCount) - / threadsPerBlock; - } - // ===================================================================== }//reduction_utilities #endif //CUDA \ No newline at end of file diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 6935d481b..9aef9600d 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -8,7 +8,7 @@ #pragma once // STL Includes -#include +#include // External Includes @@ -78,7 +78,99 @@ } // ===================================================================== + #ifndef O_HIP // ===================================================================== + // This section handles the atomics. It is complicated because CUDA + // doesn't currently support atomics with non-integral types. + // This code is taken from + // https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh + // with slight tweaks for our use case. + // ===================================================================== + /*! + * \brief Do a device side bit cast + * + * \tparam To The output type + * \tparam From The input type + * \param from The input value + * \return To The bit cast version of From as type To + */ + template + __device__ constexpr To bit_cast(const From& from) noexcept + { + // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it + To to{}; + static_assert(sizeof(To) == sizeof(From)); + memcpy(&to, &from, sizeof(To)); + return to; + } + + /*! + * \brief Encode a float as an int + * + * \param val The float to encode + * \return int The encoded int + */ + inline __device__ int encode(float val) + { + int i = bit_cast(val); + return i >= 0 ? i : (1 << 31) | ~i; + } + + /*! + * \brief Encode a double as a long long int + * + * \param val The double to encode + * \return long long The encoded long long int + */ + inline __device__ long long encode(double val) + { + std::int64_t i = bit_cast(val); + return i >= 0 ? i : (1ULL << 63) | ~i; + } + + /*! + * \brief Decodes an int as a float + * + * \param val The int to decode + * \return float The decoded float + */ + inline __device__ float decode(int val) + { + if (val < 0) val = (1 << 31) | ~val; + return bit_cast(val); + } + + /*! + * \brief Decodes a long long int as a double + * + * \param val The long long to decode + * \return double The decoded double + */ + inline __device__ double decode(long long val) + { + if (val < 0) val = (1ULL << 63) | ~val; + return bit_cast(val); + } + #endif //O_HIP + /*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ float atomicMaxBits(float* address, float val) + { + #ifdef O_HIP + return atomicMax(address, val); + #else //O_HIP + int old = atomicMax((int*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + /*! * \brief Perform an atomic reduction to find the maximum value of `val` * @@ -88,27 +180,52 @@ * the grid. Typically this should be a partial reduction that has * already been reduced to the block level */ - __inline__ __device__ double atomicMax_double(double* address, double val) + inline __device__ double atomicMaxBits(double* address, double val) + { + #ifdef O_HIP + return atomicMax(address, val); + #else //O_HIP + long long old = atomicMax((long long*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + + /*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ float atomicMinBits(float* address, float val) { - unsigned long long int* address_as_ull = (unsigned long long int*) address; - unsigned long long int old = *address_as_ull, assumed; - // Explanation of loop here: - // https://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu - // The loop is to make sure the value at address doesn't change - // between the load at the atomic since the entire operation isn't - // atomic - - // While it appears that this could result in many times more atomic - // operations than required, in practice it's only a handful of - // extra operation even in the worst case. Running with 16,000 - // blocks gives ~8-37 atomics after brief testing - do { - assumed = old; - old = atomicCAS(address_as_ull, - assumed, - __double_as_longlong(fmax(__longlong_as_double(assumed),val))); - } while (assumed != old); - return __longlong_as_double(old); + #ifdef O_HIP + return atomicMin(address, val); + #else //O_HIP + int old = atomicMin((int*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + + /*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ double atomicMinBits(double* address, double val) + { + #ifdef O_HIP + return atomicMin(address, val); + #else //O_HIP + long long old = atomicMin((long long*)address, encode(val)); + return decode(old); + #endif //O_HIP } // ===================================================================== @@ -119,6 +236,10 @@ * before the kernel launch that uses this function to avoid any * potential race condition; the `cuda_utilities::setScalarDeviceMemory` * function exists for this purpose. + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. * * \details This function can perform a reduction to find the maximum of * the thread local variable `val` across the entire grid. It relies on a @@ -128,7 +249,7 @@ * by using as many threads per block as possible and as few blocks as * possible since each block has to perform an atomic operation. To * accomplish this it is reccommened that you use the - * `reductionLaunchParams` functions to get the optimal number of blocks + * `AutomaticLaunchParams` functions to get the optimal number of blocks * and threads per block to launch rather than relying on Cholla defaults * and then within the kernel using a grid-stride loop to make sure the * kernel works with any combination of threads and blocks. Note that @@ -147,17 +268,23 @@ __inline__ __device__ void gridReduceMax(Real val, Real* out) { // __syncthreads(); // Wait for all threads to calculate val; + // __syncthreads(); // Wait for all threads to calculate val; // Reduce the entire block in parallel val = blockReduceMax(val); // Write block level reduced value to the output scalar atomically - if (threadIdx.x == 0) atomicMax_double(out, val); + if (threadIdx.x == 0) atomicMaxBits(out, val); } // ===================================================================== // ===================================================================== /*! + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. * \brief Find the maximum value in the array. Make sure to initialize * `out` correctly before using this kernel; the * `cuda_utilities::setScalarDeviceMemory` function exists for this @@ -171,23 +298,5 @@ */ __global__ void kernelReduceMax(Real *in, Real* out, size_t N); // ===================================================================== - - // ===================================================================== - /*! - * \brief Determine the optimal number of blocks and threads per block to - * use when launching a reduction kernel - * - * \param[out] numBlocks The maximum number of blocks that are - * scheduleable by the device in use when each block has the maximum - * number of threads - * \param[out] threadsPerBlock The maximum threads per block supported by - * the device in use - * \param[in] deviceNum optional: which device is being targeted. - * Defaults to zero - */ - void reductionLaunchParams(uint &numBlocks, - uint &threadsPerBlock, - uint const &deviceNum=0); - // ===================================================================== } // namespace reduction_utilities #endif //CUDA diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index 2314b33be..64613cc5b 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -17,6 +17,8 @@ // Local Includes #include "../utils/testing_utilities.h" #include "../utils/reduction_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../utils/DeviceVector.h" #include "../global/global.h" @@ -28,8 +30,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) { // Launch parameters // ================= - uint numBlocks, threadsPerBlock; - reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); + cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax); // Grid Parameters & testing parameters // ==================================== @@ -37,7 +38,6 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) size_t const size = std::pow(gridSize, 3);; Real const maxValue = 4; std::vector host_grid(size); - Real host_max = -DBL_MAX; // Fill grid with random values and assign maximum value std::mt19937 prng(1); @@ -52,44 +52,22 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // Allocating and copying to device // ================================ - Real *dev_grid; - CudaSafeCall(cudaMalloc(&dev_grid, host_grid.size() * sizeof(Real))); - CudaSafeCall(cudaMemcpy(dev_grid, host_grid.data(), host_grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); - Real *dev_max_array; - CudaSafeCall(cudaMalloc(&dev_max_array, numBlocks*sizeof(Real))); - // Sets all bytes to 0. - cudaMemset(dev_max_array,0,numBlocks*sizeof(Real)); - - Real host_max_array[numBlocks]; - //Real *host_max_array = (Real *) malloc(numBlocks*sizeof(Real)); - //CudaSafeCall( cudaHostAlloc(&host_max_array, numBlocks*sizeof(Real), cudaHostAllocDefault) ); + cuda_utilities::DeviceVector static dev_max(1); + dev_max.assign(std::numeric_limits::lowest()); // Do the reduction // ================ - hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, numBlocks, threadsPerBlock, 0, 0, dev_grid, dev_max_array, host_grid.size()); + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_grid.data(), dev_max.data(), host_grid.size()); CudaCheckError(); - - // Copy back and sync - // ================== - CudaSafeCall(cudaMemcpy(&host_max_array, dev_max_array, numBlocks*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i = 0; i < numBlocks; i++) - { - host_max = fmax(host_max,host_max_array[i]); - } - - //free(host_max_array); - - cudaFree(dev_max_array); - - cudaFree(dev_grid); - // Perform comparison - testingUtilities::checkResults(maxValue, host_max, "maximum value found"); + testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 9b8bee948..6035b68b5 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -80,52 +80,6 @@ namespace testingUtilities } // ========================================================================= - // ========================================================================= - void checkResults(double fiducialNumber, - double testNumber, - std::string outString, - double fixedEpsilon, - int ulpsEpsilon) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual; - - if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff); - } - else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon); - } - else - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon, - ulpsEpsilon); - } - - EXPECT_TRUE(areEqual) - << "Difference in " << outString << std::endl - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - // ========================================================================= - void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon=5.0E-12) { @@ -139,24 +93,24 @@ namespace testingUtilities outString += k; outString += "]"; - checkResults(fid_value,test_value,outString,fixedEpsilon); + ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value,test_value,outString,fixedEpsilon)); } void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) { std::vector testDims(3,1); std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); for (size_t i = 0; i < testDims[0]; i++) + { + for (size_t j = 0; j < testDims[1]; j++) { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - wrapperEqual(i,j,k,dataSetName,testData.at(index),value); - } - } + for (size_t k = 0; k < testDims[2]; k++) + { + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value)); + } } + } } void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, @@ -166,17 +120,17 @@ namespace testingUtilities std::vector testDims(3,1); std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance); - } - } - } + { + for (size_t j = 0; j < testDims[1]; j++) + { + for (size_t k = 0; k < testDims[2]; k++) + { + double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance)); + } + } + } } diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 927a61f28..b98780247 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -10,6 +10,10 @@ // STL includes #include +#include +#include +#include + #include "../system_tests/system_tester.h" // provide systemTest class // ============================================================================= @@ -106,6 +110,8 @@ namespace testingUtilities * \brief A simple function to compare two doubles with the nearlyEqualDbl * function, perform a GTest assert on the result, and print out the values * + * \tparam checkType The type of GTest assertion to use. "0" for and + * "EXPECT" and "1" for an "ASSERT" * \param[in] fiducialNumber The fiducial number to test against * \param[in] testNumber The unverified number to test * \param[in] outString A string to be printed in the first line of the output @@ -115,11 +121,66 @@ namespace testingUtilities * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative * values are ignored and default behaviour is used */ + template void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, - int ulpsEpsilon = -999); + int ulpsEpsilon = -999) + { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual; + + if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff); + } + else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff, + fixedEpsilon); + } + else + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff, + fixedEpsilon, + ulpsEpsilon); + } + + std::stringstream outputMessage; + outputMessage << std::setprecision(std::numeric_limits::max_digits10) + << "Difference in " << outString << std::endl + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + + if (checkType == 0) + { + EXPECT_TRUE(areEqual) << outputMessage.str(); + } + else if (checkType == 1) + { + ASSERT_TRUE(areEqual) << outputMessage.str(); + } + else + { + throw std::runtime_error("Incorrect template argument passed to " + "checkResults. Options are 0 and 1 but " + + std::to_string(checkType) + " was passed"); + } + } // ========================================================================= // ========================================================================= From 0c0952942f539d3b6a56a810cf353f5dbd690bd9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 16:08:14 -0500 Subject: [PATCH 146/694] Replace MHD indexing with grid_enums --- src/grid/grid3D.cpp | 12 ++++----- src/grid/grid_enum.h | 28 +++++++++++++++++--- src/hydro/hydro_cuda.cu | 12 ++++----- src/integrators/VL_3D_cuda.cu | 27 ++++++++++--------- src/mhd/ct_electric_fields.cu | 36 +++++++++++++------------- src/mhd/ct_electric_fields.h | 10 +++---- src/mhd/ct_electric_fields_tests.cu | 6 ++--- src/mhd/magnetic_divergence.cu | 12 ++++----- src/mhd/magnetic_update.cu | 6 ++--- src/reconstruction/pcm_cuda.cu | 24 ++++++++--------- src/riemann_solvers/hlld_cuda.cu | 13 +++++----- src/riemann_solvers/hlld_cuda_tests.cu | 32 +++++++++++------------ src/utils/mhd_utilities.h | 12 ++++----- 13 files changed, 126 insertions(+), 104 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index cadcb3653..bd67e87e6 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -299,9 +299,9 @@ void Grid3D::AllocateMemory(void) #endif #endif //SCALAR #ifdef MHD - C.magnetic_x = &(C.host[(5 + NSCALARS)*H.n_cells]); - C.magnetic_y = &(C.host[(6 + NSCALARS)*H.n_cells]); - C.magnetic_z = &(C.host[(7 + NSCALARS)*H.n_cells]); + C.magnetic_x = &(C.host[(grid_enum::magnetic_x)*H.n_cells]); + C.magnetic_y = &(C.host[(grid_enum::magnetic_y)*H.n_cells]); + C.magnetic_z = &(C.host[(grid_enum::magnetic_z)*H.n_cells]); #endif //MHD #ifdef DE C.GasEnergy = &(C.host[(H.n_fields-1)*H.n_cells]); @@ -322,9 +322,9 @@ void Grid3D::AllocateMemory(void) #endif #endif // SCALAR #ifdef MHD - C.d_magnetic_x = &(C.device[(5 + NSCALARS)*H.n_cells]); - C.d_magnetic_y = &(C.device[(6 + NSCALARS)*H.n_cells]); - C.d_magnetic_z = &(C.device[(7 + NSCALARS)*H.n_cells]); + C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); + C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]); + C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]); #endif //MHD #ifdef DE C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 6f25676dd..315533f5d 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -2,8 +2,8 @@ // An enum which holds offsets for grid quantities // In the final form of this approach, this file will also set nfields (not yet) and NSCALARS (done) -// so that adding a field only requires registering it here: -// grid knows to allocate memory based on nfields and NSCALARS +// so that adding a field only requires registering it here: +// grid knows to allocate memory based on nfields and NSCALARS // and values can be accessed with density[id + ncells*grid_enum::enum_name] // example: C.device[id + H.n_cells*grid_enum::basic_scalar] @@ -55,7 +55,7 @@ enum : int { #endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct // so that anything after starts with scalar + NSCALARS #ifdef MHD @@ -69,7 +69,29 @@ enum : int { num_fields, //Aliases and manually computed enums + #ifdef MHD + num_flux_fields = num_fields-1, + num_interface_fields = num_fields-1, + #else + num_flux_fields = num_fields, + num_interface_fields = num_fields, + #endif //MHD nscalars = finalscalar_plus_1 - scalar, + magnetic_start = magnetic_x, + magnetic_end = magnetic_z, + // Note that the direction of the flux, the suffix _? indicates the direction of the electric field, not the magnetic flux + fluxX_magnetic_z = magnetic_start, + fluxX_magnetic_y = magnetic_start+1, + fluxY_magnetic_x = magnetic_start, + fluxY_magnetic_z = magnetic_start+1, + fluxZ_magnetic_y = magnetic_start, + fluxZ_magnetic_x = magnetic_start+1, + Q_x_magnetic_y = magnetic_start, + Q_x_magnetic_z = magnetic_start+1, + Q_y_magnetic_z = magnetic_start, + Q_y_magnetic_x = magnetic_start+1, + Q_z_magnetic_x = magnetic_start, + Q_z_magnetic_y = magnetic_start+1 }; } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c9fa2b682..add8af96a 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1193,12 +1193,12 @@ __device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, in Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); #ifdef MHD // Average MHD - Average_Cell_Single_Field( 5+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 5+NSCALARS, i-1, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j-1, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k-1, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_x, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_y, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_z, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_x, i-1, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_y, i, j-1, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field( grid_enum::magnetic_z, i, j, k-1, nx, ny, nz, ncells, conserved ); #endif //MHD #ifdef DE // Average GasEnergy diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2d23cc636..1f9a6a459 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -67,8 +67,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in // that order, the `Q_Ly` interface stores the Z and X mangetic fields in // that order, and the `Q_Lz` interface stores the X and Y magnetic fields - // in that order. These fields start at the (5+NSCALARS)*n_cells and - // (6+NSCALARS)*n_cells locations respectively. The interface state arrays + // in that order. These fields can be indexed with the Q_?_dir grid_enums. The interface state arrays // store in the interface on the "right" side of the cell, so the flux // arrays store the fluxes through the right interface // @@ -77,12 +76,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // -cross(V,B)x is the negative of the x-component of V cross B. Note that // "X" is the direction the solver is running in this case, not // necessarily the true "X". - // F_x[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z - // F_x[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y - // F_y[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X - // F_y[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z - // F_z[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y - // F_z[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X size_t const arraySize = (n_fields-1) * n_cells * sizeof(Real); size_t const ctArraySize = 3 * n_cells * sizeof(Real); #else // not MHD @@ -157,9 +156,9 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLL #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(5 + NSCALARS) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(6 + NSCALARS) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(7 + NSCALARS) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLLD CudaCheckError(); @@ -227,9 +226,9 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLLC #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(5 + NSCALARS) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(6 + NSCALARS) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(7 + NSCALARS) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLLD CudaCheckError(); diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 64a17bf52..310da1be2 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -44,12 +44,12 @@ namespace mhd // fields/EMF. -cross(V,B)x is the negative of the x-component of V // cross B. Note that "X" is the direction the solver is running in // this case, not necessarily the true "X". - // F_x[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z - // F_x[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y - // F_y[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X - // F_y[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z - // F_z[(5+NSCALARS)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y - // F_z[(6+NSCALARS)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X // Notes on Implementation Details // - The density flux has the same sign as the velocity on the face @@ -147,10 +147,10 @@ namespace mhd // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field - face_y_pos = + fluxZ[cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny) + (6+NSCALARS)*n_cells]; - face_y_neg = + fluxZ[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (6+NSCALARS)*n_cells]; - face_z_pos = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny) + (5+NSCALARS)*n_cells]; - face_z_neg = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; + face_y_pos = + fluxZ[cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_y_neg = + fluxZ[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_z_pos = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + face_z_neg = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. @@ -233,10 +233,10 @@ namespace mhd // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field - face_x_pos = - fluxZ[cuda_utilities::compute1DIndex(xid , yid, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; - face_x_neg = - fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (5+NSCALARS)*n_cells]; - face_z_pos = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid , nx, ny) + (6+NSCALARS)*n_cells]; - face_z_neg = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (6+NSCALARS)*n_cells]; + face_x_pos = - fluxZ[cuda_utilities::compute1DIndex(xid , yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_x_neg = - fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_z_pos = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid , nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + face_z_neg = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. @@ -319,10 +319,10 @@ namespace mhd // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field - face_x_pos = + fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid, nx, ny) + (6+NSCALARS)*n_cells]; - face_x_neg = + fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (6+NSCALARS)*n_cells]; - face_y_pos = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid , zid, nx, ny) + (5+NSCALARS)*n_cells]; - face_y_neg = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (5+NSCALARS)*n_cells]; + face_x_pos = + fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_x_neg = + fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_y_pos = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid , zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + face_y_neg = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 987633461..9e122d0dd 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -107,10 +107,10 @@ namespace mhd Real const density = dev_conserved[idxCentered ]; Real const Momentum2 = dev_conserved[idxCentered + (modPlus1+1) *n_cells]; Real const Momentum3 = dev_conserved[idxCentered + (modPlus2+1) *n_cells]; - Real const B2Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus1+5+NSCALARS)*n_cells] - + dev_conserved[idxB2Shift + (modPlus1+5+NSCALARS)*n_cells]); - Real const B3Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus2+5+NSCALARS)*n_cells] - + dev_conserved[idxB3Shift + (modPlus2+5+NSCALARS)*n_cells]); + Real const B2Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus1+grid_enum::magnetic_start)*n_cells] + + dev_conserved[idxB2Shift + (modPlus1+grid_enum::magnetic_start)*n_cells]); + Real const B3Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus2+grid_enum::magnetic_start)*n_cells] + + dev_conserved[idxB3Shift + (modPlus2+grid_enum::magnetic_start)*n_cells]); // Compute the electric field in the center with a cross product Real const electric_centered = (Momentum3*B2Centered - Momentum2*B3Centered) / density; @@ -118,7 +118,7 @@ namespace mhd // Load face centered electric field, note fluxSign to correctly do // the shift from magnetic flux to EMF/electric field and to choose // which field to use - Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+5+NSCALARS)*n_cells]; + Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+grid_enum::magnetic_start)*n_cells]; // Compute the slope and return it return electric_face - electric_centered; diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 55b46f3c8..becbe2afa 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -44,9 +44,9 @@ public: ny(nx), nz(nx), n_cells(nx*ny*nz), - fluxX(n_cells * (7+NSCALARS)), - fluxY(n_cells * (7+NSCALARS)), - fluxZ(n_cells * (7+NSCALARS)), + fluxX(n_cells * (grid_enum::num_flux_fields)), + fluxY(n_cells * (grid_enum::num_flux_fields)), + fluxZ(n_cells * (grid_enum::num_flux_fields)), grid (n_cells * (8+NSCALARS)), testCTElectricFields(n_cells * 3, -999.), fiducialData(n_cells * 3, -999.), diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 02051e48c..0d8bf90a1 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -59,14 +59,14 @@ namespace mhd // Compute divergence cellDivergence = - (( dev_conserved[id + (5+NSCALARS)*n_cells] - - dev_conserved[id_xMin1 + (5+NSCALARS)*n_cells]) + (( dev_conserved[id + (grid_enum::magnetic_x)*n_cells] + - dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) / dx) - + (( dev_conserved[id + (6+NSCALARS)*n_cells] - - dev_conserved[id_yMin1 + (6+NSCALARS)*n_cells]) + + (( dev_conserved[id + (grid_enum::magnetic_y)*n_cells] + - dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) / dy) - + (( dev_conserved[id + (7+NSCALARS)*n_cells] - - dev_conserved[id_zMin1 + (7+NSCALARS)*n_cells]) + + (( dev_conserved[id + (grid_enum::magnetic_z)*n_cells] + - dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) / dz); maxDivergence = max(maxDivergence, fabs(cellDivergence)); diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 78f298e05..fb6a89fec 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -61,17 +61,17 @@ namespace mhd // Perform Updates // X field update - destinationGrid[threadId + (5+NSCALARS)*n_cells] = sourceGrid[threadId + (5+NSCALARS)*n_cells] + destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + dtodz * (electric_y_3 - electric_y_1) + dtody * (electric_z_1 - electric_z_3); // Y field update - destinationGrid[threadId + (6+NSCALARS)*n_cells] = sourceGrid[threadId + (6+NSCALARS)*n_cells] + destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + dtodx * (electric_z_3 - electric_z_2) + dtodz * (electric_x_1 - electric_x_3); // Z field update - destinationGrid[threadId + (7+NSCALARS)*n_cells] = sourceGrid[threadId + (7+NSCALARS)*n_cells] + destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + dtody * (electric_x_3 - electric_x_2) + dtodx * (electric_y_2 - electric_y_3); } diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 91fb75223..4998dcd92 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -325,8 +325,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Lx[(5+NSCALARS)*n_cells + id] = cellCenteredBy; - dev_bounds_Lx[(6+NSCALARS)*n_cells + id] = cellCenteredBz; + dev_bounds_Lx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; + dev_bounds_Lx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; #endif //MHD #ifdef DE dev_bounds_Lx[(n_fields-1)*n_cells + id] = ge; @@ -345,8 +345,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Ly[(5+NSCALARS)*n_cells + id] = cellCenteredBz; - dev_bounds_Ly[(6+NSCALARS)*n_cells + id] = cellCenteredBx; + dev_bounds_Ly[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz; + dev_bounds_Ly[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx; #endif //MHD #ifdef DE dev_bounds_Ly[(n_fields-1)*n_cells + id] = ge; @@ -365,8 +365,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Lz[(5+NSCALARS)*n_cells + id] = cellCenteredBx; - dev_bounds_Lz[(6+NSCALARS)*n_cells + id] = cellCenteredBy; + dev_bounds_Lz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx; + dev_bounds_Lz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy; #endif //MHD #ifdef DE dev_bounds_Lz[(n_fields-1)*n_cells + id] = ge; @@ -388,8 +388,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Rx[(5+NSCALARS)*n_cells + id] = cellCenteredBy; - dev_bounds_Rx[(6+NSCALARS)*n_cells + id] = cellCenteredBz; + dev_bounds_Rx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; + dev_bounds_Rx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; #endif //MHD #ifdef DE dev_bounds_Rx[(n_fields-1)*n_cells + id] = ge; @@ -412,8 +412,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Ry[(5+NSCALARS)*n_cells + id] = cellCenteredBz; - dev_bounds_Ry[(6+NSCALARS)*n_cells + id] = cellCenteredBx; + dev_bounds_Ry[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz; + dev_bounds_Ry[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx; #endif //MHD #ifdef DE dev_bounds_Ry[(n_fields-1)*n_cells + id] = ge; @@ -436,8 +436,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - dev_bounds_Rz[(5+NSCALARS)*n_cells + id] = cellCenteredBx; - dev_bounds_Rz[(6+NSCALARS)*n_cells + id] = cellCenteredBy; + dev_bounds_Rz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx; + dev_bounds_Rz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy; #endif //MHD #ifdef DE dev_bounds_Rz[(n_fields-1)*n_cells + id] = ge; diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index ca20fdb1f..18fe8578e 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -16,6 +16,7 @@ #include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/math_utilities.h" +#include "../grid/grid_enum.h" #ifdef DE //PRESSURE_DE #include "../utils/hydro_utilities.h" @@ -73,8 +74,8 @@ namespace mhd Real momentumYL = dev_bounds_L[threadId + n_cells * o2]; Real momentumZL = dev_bounds_L[threadId + n_cells * o3]; Real energyL = dev_bounds_L[threadId + n_cells * 4]; - Real magneticYL = dev_bounds_L[threadId + n_cells * (5 + NSCALARS)]; - Real magneticZL = dev_bounds_L[threadId + n_cells * (6 + NSCALARS)]; + Real magneticYL = dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_y)]; + Real magneticZL = dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_z)]; #ifdef SCALAR Real scalarConservedL[NSCALARS]; @@ -93,8 +94,8 @@ namespace mhd Real momentumYR = dev_bounds_R[threadId + n_cells * o2]; Real momentumZR = dev_bounds_R[threadId + n_cells * o3]; Real energyR = dev_bounds_R[threadId + n_cells * 4]; - Real magneticYR = dev_bounds_R[threadId + n_cells * (5 + NSCALARS)]; - Real magneticZR = dev_bounds_R[threadId + n_cells * (6 + NSCALARS)]; + Real magneticYR = dev_bounds_R[threadId + n_cells * (grid_enum::Q_x_magnetic_y)]; + Real magneticZR = dev_bounds_R[threadId + n_cells * (grid_enum::Q_x_magnetic_z)]; #ifdef SCALAR Real scalarConservedR[NSCALARS]; @@ -706,8 +707,8 @@ namespace mhd dev_flux[threadId + n_cells * o2] = momentumFluxY; dev_flux[threadId + n_cells * o3] = momentumFluxZ; dev_flux[threadId + n_cells * 4] = energyFlux; - dev_flux[threadId + n_cells * (5 + NSCALARS)] = magneticFluxY; - dev_flux[threadId + n_cells * (6 + NSCALARS)] = magneticFluxZ; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_z)] = magneticFluxY; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_y)] = magneticFluxZ; } // ===================================================================== diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c39116d7a..ad4ac1547 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -16,6 +16,7 @@ // Local Includes #include "../global/global_cuda.h" +#include "../grid/grid_enum.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../utils/mhd_utilities.h" @@ -65,12 +66,11 @@ // Create new vectors that store the values in the way that the HLLD // solver expects - size_t const magXIndex = 5+NSCALARS; - EXPECT_DOUBLE_EQ(stateLeft.at(magXIndex), stateRight.at(magXIndex)) + EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x)) << "The left and right magnetic fields are not equal"; - std::vector const magneticX{stateLeft.at(magXIndex)}; - stateLeft.erase(stateLeft.begin() + magXIndex); - stateRight.erase(stateRight.begin() + magXIndex); + std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; + stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); + stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); // Simulation Paramters int const nx = 1; // Number of cells in the x-direction @@ -153,7 +153,7 @@ // The HLLD solver only writes the the first two "slots" for // magnetic flux so let's rearrange to make sure we have all the // magnetic fluxes in the right spots - testFlux.insert(testFlux.begin() + magXIndex, 0.0); + testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0); std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, testFlux.begin() + 4); // Rotate momentum return testFlux; @@ -294,9 +294,9 @@ output.at(1), output.at(2), output.at(3), - output.at(5 + NSCALARS), - output.at(6 + NSCALARS), - output.at(7 + NSCALARS), + output.at(grid_enum::magnetic_x), + output.at(grid_enum::magnetic_y), + output.at(grid_enum::magnetic_z), gamma)); #endif //DE return output; @@ -1665,11 +1665,11 @@ negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); #endif // SCALAR #ifdef DE - negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma)); - negativeEnergy.push_back(mhd::utils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma)); - negativeDensity.push_back(mhd::utils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma)); - negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma)); - negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma)); + negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(grid_enum::magnetic_x),negativePressure.at(grid_enum::magnetic_y),negativePressure.at(grid_enum::magnetic_z),gamma)); + negativeEnergy.push_back(mhd::utils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(grid_enum::magnetic_x),negativeEnergy.at(grid_enum::magnetic_y),negativeEnergy.at(grid_enum::magnetic_z),gamma)); + negativeDensity.push_back(mhd::utils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(grid_enum::magnetic_x),negativeDensity.at(grid_enum::magnetic_y),negativeDensity.at(grid_enum::magnetic_z),gamma)); + negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(grid_enum::magnetic_x),negativeDensityEnergyPressure.at(grid_enum::magnetic_y),negativeDensityEnergyPressure.at(grid_enum::magnetic_z),gamma)); + negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(grid_enum::magnetic_x),negativeDensityPressure.at(grid_enum::magnetic_y),negativeDensityPressure.at(grid_enum::magnetic_z),gamma)); #endif //DE for (size_t direction = 0; direction < 3; direction++) @@ -2532,8 +2532,8 @@ int const fiducialMomentumIndexY = threadId + n_cells * o2; int const fiducialMomentumIndexZ = threadId + n_cells * o3; int const fiducialEnergyIndex = threadId + n_cells * 4; - int const fiducialMagneticYIndex = threadId + n_cells * (5 + NSCALARS); - int const fiducialMagneticZIndex = threadId + n_cells * (6 + NSCALARS); + int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); + int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); mhd::_internal::_returnFluxes(threadId, o1, diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index d859ab1db..abe1c80e9 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -315,14 +315,14 @@ namespace utils{ // fields on both sides then instead set the centered magnetic field to be // equal to the magnetic field of the closest edge. T avgBx = (xid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): - /*if false*/ dev_conserved[(5+NSCALARS)*n_cells + id]; + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + dev_conserved[(grid_enum::magnetic_x)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; avgBy = (yid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): - /*if false*/ dev_conserved[(6+NSCALARS)*n_cells + id]; + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + dev_conserved[(grid_enum::magnetic_y)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; avgBz = (zid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): - /*if false*/ dev_conserved[(7+NSCALARS)*n_cells + id]; + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + dev_conserved[(grid_enum::magnetic_z)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; } // ========================================================================= } // end namespace mhd::utils From 3121feaf3c0cd82c2af518fb5dd9a3661b8376fd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 17:21:11 -0500 Subject: [PATCH 147/694] Remove duplicate lines in the makefile --- Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile b/Makefile index dcc6d296c..f03a7478d 100644 --- a/Makefile +++ b/Makefile @@ -38,9 +38,6 @@ ifeq ($(TEST), true) CFLAGS += $(TEST_FLAGS) CXXFLAGS += $(TEST_FLAGS) GPUFLAGS += $(TEST_FLAGS) - CFLAGS += $(TEST_FLAGS) - CXXFLAGS += $(TEST_FLAGS) - GPUFLAGS += $(TEST_FLAGS) # HACK # Set the build flags to debug. This is mostly to avoid the approximations From 27011bf49e9a2c4d001a91474ceb25f429afdcaf Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 17:23:03 -0500 Subject: [PATCH 148/694] Remove HIP error limit when compiling --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f03a7478d..9d302d693 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ ifdef HIPCONFIG DFLAGS += -DO_HIP CXXFLAGS += $(HIPCONFIG) GPUCXX ?= hipcc - GPUFLAGS += -Wall -ferror-limit=1 + GPUFLAGS += -Wall LD := $(CXX) LDFLAGS := $(CXXFLAGS) -L$(ROCM_PATH)/lib LIBS += -lamdhip64 From 49aaef5904612f6896df08ad2449237d738ea09d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 17:45:00 -0500 Subject: [PATCH 149/694] Fix MHD usage of grid_enums so other builds will compile --- src/grid/grid_enum.h | 10 ++++++++-- src/mhd/ct_electric_fields.cu | 5 +++-- src/mhd/ct_electric_fields.h | 8 +++----- src/mhd/ct_electric_fields_tests.cu | 2 ++ src/mhd/magnetic_divergence.cu | 5 +++-- src/mhd/magnetic_divergence_tests.cu | 2 ++ src/mhd/magnetic_update.cu | 5 +++-- src/mhd/magnetic_update_tests.cu | 2 ++ src/riemann_solvers/hlld_cuda.cu | 10 ++++------ src/riemann_solvers/hlld_cuda_tests.cu | 4 +++- src/utils/mhd_utilities.h | 2 ++ src/utils/mhd_utilities_tests.cu | 2 ++ 12 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 315533f5d..b96f0f4ca 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -68,7 +68,9 @@ enum : int { #endif num_fields, -//Aliases and manually computed enums + //Aliases and manually computed enums + nscalars = finalscalar_plus_1 - scalar, + #ifdef MHD num_flux_fields = num_fields-1, num_interface_fields = num_fields-1, @@ -76,9 +78,11 @@ enum : int { num_flux_fields = num_fields, num_interface_fields = num_fields, #endif //MHD - nscalars = finalscalar_plus_1 - scalar, + + #ifdef MHD magnetic_start = magnetic_x, magnetic_end = magnetic_z, + // Note that the direction of the flux, the suffix _? indicates the direction of the electric field, not the magnetic flux fluxX_magnetic_z = magnetic_start, fluxX_magnetic_y = magnetic_start+1, @@ -86,12 +90,14 @@ enum : int { fluxY_magnetic_z = magnetic_start+1, fluxZ_magnetic_y = magnetic_start, fluxZ_magnetic_x = magnetic_start+1, + Q_x_magnetic_y = magnetic_start, Q_x_magnetic_z = magnetic_start+1, Q_y_magnetic_z = magnetic_start, Q_y_magnetic_x = magnetic_start+1, Q_z_magnetic_x = magnetic_start, Q_z_magnetic_y = magnetic_start+1 + #endif // MHD }; } diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 310da1be2..cd90ae2ac 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -11,7 +11,7 @@ // Local Includes #include "../mhd/ct_electric_fields.h" - +#ifdef MHD namespace mhd { // ========================================================================= @@ -337,4 +337,5 @@ namespace mhd } } // ========================================================================= -} // end namespace mhd \ No newline at end of file +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 9e122d0dd..3db5927ad 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -17,10 +17,7 @@ #include "../utils/gpu.hpp" #include "../utils/cuda_utilities.h" -/*! - * \brief Namespace for MHD code - * - */ +#ifdef MHD namespace mhd { /*! @@ -152,4 +149,5 @@ namespace mhd int const nz, int const n_cells); // ========================================================================= -} // end namespace mhd \ No newline at end of file +} // end namespace mhd +#endif // MHD \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index becbe2afa..5ba3a999a 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -20,6 +20,7 @@ #include "../mhd/ct_electric_fields.h" #include "../global/global.h" +#ifdef MHD // ============================================================================= // Tests for the mhd::Calculate_CT_Electric_Fields kernel // ============================================================================= @@ -205,3 +206,4 @@ TEST_F(tMHDCalculateCTElectricFields, runTest(); } // ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 0d8bf90a1..50aec4f1b 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -20,7 +20,7 @@ #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" #include "../utils/DeviceVector.h" - +#ifdef MHD namespace mhd { // ========================================================================= @@ -109,4 +109,5 @@ namespace mhd return dev_maxDivergence[0]; } // ========================================================================= -} // end namespace mhd \ No newline at end of file +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index ba2695e53..9751f6e0b 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -21,6 +21,7 @@ #include "../utils/DeviceVector.h" #include "../global/global.h" +#ifdef MHD // ============================================================================= // Tests for the magnetic field divergence functions // ============================================================================= @@ -57,3 +58,4 @@ TEST(tMHDLaunchCalculateMagneticDivergence, CorrectInputExpectCorrectOutput) // ============================================================================= // End of tests for the magnetic field divergence functions // ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index fb6a89fec..550fb7188 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -12,7 +12,7 @@ // Local Includes #include "../mhd/magnetic_update.h" #include "../utils/cuda_utilities.h" - +#ifdef MHD namespace mhd { // ========================================================================= @@ -77,4 +77,5 @@ namespace mhd } } // ========================================================================= -} // end namespace mhd \ No newline at end of file +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 7cb4f68f2..bebef2621 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -19,6 +19,7 @@ #include "../utils/cuda_utilities.h" #include "../mhd/magnetic_update.h" +#ifdef MHD // ============================================================================= /*! * \brief Test fixture for tMHDUpdateMagneticField3D test suite @@ -148,3 +149,4 @@ TEST_F(tMHDUpdateMagneticField3D, runTest(); } // ============================================================================= +#endif // MHD diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 18fe8578e..a7793150b 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -23,10 +23,8 @@ #endif // DE #ifdef CUDA -/*! - * \brief Namespace for MHD code - * - */ + +#ifdef MHD namespace mhd { // ========================================================================= @@ -912,5 +910,5 @@ namespace mhd } // mhd::_internal namespace } // end namespace mhd - -#endif // CUDA \ No newline at end of file +#endif // MHD +#endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index ad4ac1547..a6ec74358 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -23,6 +23,7 @@ #include "../riemann_solvers/hlld_cuda.h" // Include code to test #ifdef CUDA +#ifdef MHD // ========================================================================= // Integration tests for the entire HLLD solver. Unit tests are below // ========================================================================= @@ -2578,4 +2579,5 @@ } } // ========================================================================= -#endif // CUDA \ No newline at end of file +#endif // MHD +#endif // CUDA diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index abe1c80e9..6b587e535 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -282,6 +282,7 @@ namespace utils{ // ========================================================================= // ========================================================================= + #ifdef MHD /*! * \brief Compute the cell centered average of the magnetic fields in a * given cell @@ -324,6 +325,7 @@ namespace utils{ /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + dev_conserved[(grid_enum::magnetic_z)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; } + #endif // MHD // ========================================================================= } // end namespace mhd::utils } // end namespace mhd \ No newline at end of file diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index d56ae2bad..dcbbebca8 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -505,6 +505,7 @@ TEST(tMHDAlfvenSpeed, // ============================================================================= // Tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= +#ifdef MHD TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) { @@ -536,6 +537,7 @@ TEST(tMHDCellCenteredMagneticFields, testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } +#endif // MHD // ============================================================================= // End of tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= From 095b619ac4e909fbb937e4e8fa96cb3e2936a749 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 17:58:32 -0500 Subject: [PATCH 150/694] Finish replacing NSCALARS in MHD test code Note: The scalar loops in PCM and the HLLD solver haven't been touched --- src/mhd/ct_electric_fields_tests.cu | 2 +- src/mhd/magnetic_update_tests.cu | 6 +++--- src/riemann_solvers/hlld_cuda_tests.cu | 22 +++++++++++----------- src/utils/mhd_utilities_tests.cu | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 5ba3a999a..b526ab7e0 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -48,7 +48,7 @@ public: fluxX(n_cells * (grid_enum::num_flux_fields)), fluxY(n_cells * (grid_enum::num_flux_fields)), fluxZ(n_cells * (grid_enum::num_flux_fields)), - grid (n_cells * (8+NSCALARS)), + grid (n_cells * (grid_enum::num_fields)), testCTElectricFields(n_cells * 3, -999.), fiducialData(n_cells * 3, -999.), dimGrid((n_cells + TPB - 1),1,1), diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index bebef2621..501803e1c 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -44,10 +44,10 @@ public: dx(2.5), dy(2.5), dz(2.5), - sourceGrid (n_cells * (8+NSCALARS)), - destinationGrid (n_cells * (8+NSCALARS), -999.), + sourceGrid (n_cells * (grid_enum::num_fields)), + destinationGrid (n_cells * (grid_enum::num_fields), -999.), ctElectricFields(n_cells * 3), - fiducialData (n_cells * (8+NSCALARS), -999.), + fiducialData (n_cells * (grid_enum::num_fields), -999.), dimGrid((n_cells + TPB - 1),1,1), dimBlock(TPB,1,1) { diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index a6ec74358..0de90e6f9 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -204,13 +204,13 @@ #endif //DE #ifdef SCALAR std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; - fieldNames.insert(fieldNames.begin()+5, + fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(), - scalarNames.begin() + NSCALARS); + scalarNames.begin() + grid_enum::nscalars); - fiducialFlux.insert(fiducialFlux.begin()+5, + fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(), - scalarFlux.begin() + NSCALARS); + scalarFlux.begin() + grid_enum::nscalars); #endif //SCALAR ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) @@ -285,9 +285,9 @@ primitiveScalars.end(), conservedScalar.begin(), [&](Real const &c){ return c*output.at(0); }); - output.insert(output.begin()+5, + output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(), - conservedScalar.begin() + NSCALARS); + conservedScalar.begin() + grid_enum::nscalars); #endif //SCALAR #ifdef DE output.push_back(mhd::utils::computeThermalEnergy(output.at(4), @@ -1659,11 +1659,11 @@ #ifdef SCALAR std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; - negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); + negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); #endif // SCALAR #ifdef DE negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(grid_enum::magnetic_x),negativePressure.at(grid_enum::magnetic_y),negativePressure.at(grid_enum::magnetic_z),gamma)); diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index dcbbebca8..d07b690d4 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -517,7 +517,7 @@ TEST(tMHDCellCenteredMagneticFields, size_t const n_cells = std::pow(5,3); // Make sure the vector is large enough that the locations where the // magnetic field would be in the real grid are filled - std::vector testGrid(n_cells * (8+NSCALARS)); + std::vector testGrid(n_cells * (grid_enum::num_fields)); // Populate the grid with values where testGrid.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique // value From 379b86e15def7090e5aeceb95bb5c2ea81bd8148 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 20 Dec 2022 17:11:38 -0800 Subject: [PATCH 151/694] Change a few comments and make dust compile on crusher --- src/dust/dust_cuda.cu | 4 ++-- src/hydro/hydro_cuda.cu | 1 + src/utils/reduction_utilities.h | 2 -- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 524b58cd0..50356c3c5 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -111,7 +111,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g } // McKinnon et al. (2017) -__device__ Real calc_tau_sp(Real n, Real T) { +__device__ __host__ Real calc_tau_sp(Real n, Real T) { Real YR_IN_S = 3.154e7; Real a1 = 1; // dust grain size in units of 0.1 micrometers Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 @@ -125,7 +125,7 @@ __device__ Real calc_tau_sp(Real n, Real T) { } // McKinnon et al. (2017) -__device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { +__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index add8af96a..3b60f97f9 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -604,6 +604,7 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n } CudaCheckError(); + // Note: dev_dti[0] is DeviceVector syntactic sugar for returning a value via cudaMemcpy return dev_dti[0]; } diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 9aef9600d..39089ac2e 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -267,8 +267,6 @@ */ __inline__ __device__ void gridReduceMax(Real val, Real* out) { - // __syncthreads(); // Wait for all threads to calculate val; - // __syncthreads(); // Wait for all threads to calculate val; // Reduce the entire block in parallel val = blockReduceMax(val); From 9ea73366ac94fa75399d7516cf49c11304ede3c1 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 20 Dec 2022 17:25:13 -0800 Subject: [PATCH 152/694] change -Wall and -Wno-unused-result order for frontier builds to reduce warnings --- Makefile | 2 +- builds/make.host.frontier | 4 ++-- src/grid/initial_conditions.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9d302d693..fa2bc4500 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ ifdef HIPCONFIG DFLAGS += -DO_HIP CXXFLAGS += $(HIPCONFIG) GPUCXX ?= hipcc - GPUFLAGS += -Wall + #GPUFLAGS += -Wall LD := $(CXX) LDFLAGS := $(CXXFLAGS) -L$(ROCM_PATH)/lib LIBS += -lamdhip64 diff --git a/builds/make.host.frontier b/builds/make.host.frontier index c225b3655..69f715871 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -11,8 +11,8 @@ CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result -GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wno-unused-result -GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wno-unused-result +GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result +GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings #HIPCONFIG = $(shell hipconfig -C) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 5d98d8367..8c8c5b3f9 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1,5 +1,5 @@ /*! \file initial_conditions.cpp -/* \brief Definitions of initial conditions for different tests. + * \brief Definitions of initial conditions for different tests. Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. Functions are members of the Grid3D class. */ From e9c610526f7bb4e0bd33bb0b35db1734d4158b97 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 2 Jan 2023 12:47:06 -0500 Subject: [PATCH 153/694] Move magnetic divergence out of grid3D.cpp All the magnetic divergence stuff now lives in the magnetic_divergence* files. I also simplified the functions a bit --- src/grid/grid3D.cpp | 38 -------------- src/grid/grid3D.h | 11 ++-- src/main.cpp | 4 +- src/mhd/magnetic_divergence.cu | 76 ++++++++++++++++++---------- src/mhd/magnetic_divergence.h | 27 ---------- src/mhd/magnetic_divergence_tests.cu | 37 +++++++++----- 6 files changed, 82 insertions(+), 111 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index bd67e87e6..8025f3744 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -43,11 +43,6 @@ #include "../dust/dust_cuda.h" // provides Dust_Update #endif -#ifdef MHD - #include "../mhd/magnetic_divergence.h" -#endif //MHD - - /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ Grid3D::Grid3D(void) @@ -609,39 +604,6 @@ void Grid3D::Update_Time(){ } -#ifdef MHD - void Grid3D::checkMagneticDivergence(Grid3D &G, struct parameters P, int nfile) - { - // Compute the local value of the divergence - H.max_magnetic_divergence = mhd::launchCalculateMagneticDivergence(C.device, H.dx, H.dy, H.dz, H.nx, H.ny, H.nz, H.n_cells); - - #ifdef MPI_CHOLLA - // Now that we have the local maximum let's get the global maximum - H.max_magnetic_divergence = ReduceRealMax(H.max_magnetic_divergence); - #endif //MPI_CHOLLA - - // If the magnetic divergence is greater than the limit then raise a warning and exit - if (H.max_magnetic_divergence > H.magnetic_divergence_limit) - { - // Report the error and exit - chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", - H.max_magnetic_divergence, H.magnetic_divergence_limit); - chexit(-1); - } - else if (H.max_magnetic_divergence < 0.0) - { - // Report the error and exit - chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", - H.max_magnetic_divergence); - chexit(-1); - } - else // The magnetic divergence is within acceptable bounds - { - chprintf("Global maximum magnetic divergence = %7.4e\n", H.max_magnetic_divergence); - } - } -#endif //MHD - /*! \fn void Reset(void) * \brief Reset the Grid3D class. */ void Grid3D::Reset(void) diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index c98971189..6e8fb4c94 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -412,7 +412,7 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, - *d_Energy, *d_scalar, *d_basic_scalar, + *d_Energy, *d_scalar, *d_basic_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; @@ -466,7 +466,12 @@ class Grid3D void Update_Time(); #ifdef MHD - void checkMagneticDivergence(Grid3D &G, struct parameters P, int nfile); + /*! + * \brief Compute the maximum magnetic divergence in the grid and report + * an error if it exceeds Grid3D::H::magnetic_divergence_limit or is + * negative. + */ + void checkMagneticDivergence(); #endif //MHD /*! \fn void Write_Header_Text(FILE *fp) @@ -679,7 +684,7 @@ class Grid3D void Spherical_Overdensity_3D(); void Clouds(); - + void Uniform_Grid(); void Zeldovich_Pancake( struct parameters P ); diff --git a/src/main.cpp b/src/main.cpp index 5ee396965..48dd32c70 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -193,7 +193,7 @@ int main(int argc, char *argv[]) #ifdef MHD // Check that the initial magnetic field has zero divergence - G.checkMagneticDivergence(G, P, nfile); + G.checkMagneticDivergence(); #endif //MHD // increment the next output time @@ -339,7 +339,7 @@ int main(int argc, char *argv[]) #ifdef MHD // Check that the magnetic field has zero divergence - G.checkMagneticDivergence(G, P, nfile); + G.checkMagneticDivergence(); #endif //MHD } /*end loop over timesteps*/ diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 50aec4f1b..642cba86d 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -16,11 +16,15 @@ // External Includes // Local Includes +#include "../grid/grid3D.h" +#include "../io/io.h" #include "../mhd/magnetic_divergence.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" #include "../utils/DeviceVector.h" +#include "../utils/error_handling.h" #ifdef MHD + namespace mhd { // ========================================================================= @@ -77,37 +81,53 @@ namespace mhd reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); } // ========================================================================= +} // end namespace mhd - // ========================================================================= - Real launchCalculateMagneticDivergence(Real const *dev_conserved, - Real const dx, - Real const dy, - Real const dz, - int const nx, - int const ny, - int const nz, - int const n_cells) - { - // First let's create some variables we'll need. - cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); - cuda_utilities::DeviceVector static dev_maxDivergence(1); +// ============================================================================= +void Grid3D::checkMagneticDivergence() +{ + // Compute the local value of the divergence + // First let's create some variables we'll need. + cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); + cuda_utilities::DeviceVector static dev_maxDivergence(1); + + // Set the device side inverse time step to the smallest possible double + // so that the reduction isn't using the maximum value of the previous + // iteration + dev_maxDivergence.assign(std::numeric_limits::lowest()); - // Set the device side inverse time step to the smallest possible double - // so that the reduction isn't using the maximum value of the previous - // iteration - dev_maxDivergence.assign(std::numeric_limits::lowest()); + // Now lets get the local maximum divergence + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + C.device, dev_maxDivergence.data(), + H.dx, H.dy, H.dz, + H.nx, H.ny, H.nz, + H.n_cells); + CudaCheckError(); + H.max_magnetic_divergence = dev_maxDivergence[0]; - // Now lets get the local maximum divergence - hipLaunchKernelGGL(mhd::calculateMagneticDivergence, - launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - dev_conserved, dev_maxDivergence.data(), - dx, dy, dz, - nx, ny, nz, - n_cells); - CudaCheckError(); + #ifdef MPI_CHOLLA + // Now that we have the local maximum let's get the global maximum + H.max_magnetic_divergence = ReduceRealMax(H.max_magnetic_divergence); + #endif //MPI_CHOLLA - return dev_maxDivergence[0]; + // If the magnetic divergence is greater than the limit then raise a warning and exit + if (H.max_magnetic_divergence > H.magnetic_divergence_limit) + { + // Report the error and exit + chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", H.max_magnetic_divergence, H.magnetic_divergence_limit); + chexit(-1); } - // ========================================================================= -} // end namespace mhd + else if (H.max_magnetic_divergence < 0.0) + { + // Report the error and exit + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", H.max_magnetic_divergence); + chexit(-1); + } + else // The magnetic divergence is within acceptable bounds + { + chprintf("Global maximum magnetic divergence = %7.4e\n", H.max_magnetic_divergence); + } +} +// ============================================================================= #endif // MHD diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h index 8550591e2..304bf0823 100644 --- a/src/mhd/magnetic_divergence.h +++ b/src/mhd/magnetic_divergence.h @@ -51,31 +51,4 @@ namespace mhd int const nz, int const n_cells); // ========================================================================= - - // ========================================================================= - /*! - * \brief Handling launching and returning the value from the - * `mhd::calculateMagneticDivergence` kernel - * - * \param[in] dev_conserved The device array of conserved variables - * \param[in] dx Cell size in the X-direction - * \param[in] dy Cell size in the Y-direction - * \param[in] dz Cell size in the Z-direction - * \param[in] nx Number of cells in the X-direction - * \param[in] ny Number of cells in the Y-direction - * \param[in] nz Number of cells in the Z-direction - * \param[in] n_cells Total number of cells - * \return Real The maximum divergence of the magnetic field in the local - * part of the grid - */ - Real launchCalculateMagneticDivergence(Real const *dev_conserved, - Real const dx, - Real const dy, - Real const dz, - int const nx, - int const ny, - int const nz, - int const n_cells); - // ========================================================================= - } // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 9751f6e0b..3b1704aad 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -17,6 +17,7 @@ // Local Includes #include "../utils/testing_utilities.h" +#include "../grid/grid3D.h" #include "../mhd/magnetic_divergence.h" #include "../utils/DeviceVector.h" #include "../global/global.h" @@ -25,35 +26,45 @@ // ============================================================================= // Tests for the magnetic field divergence functions // ============================================================================= -TEST(tMHDLaunchCalculateMagneticDivergence, CorrectInputExpectCorrectOutput) +TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) { // Grid Parameters & testing parameters size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value size_t const n_ghost = 4; - size_t const nx = gridSize+2*n_ghost, ny = nx, nz = nx; - size_t const n_cells = nx*ny*nz; - size_t const n_fields = 8; - Real const dx = 3, dy = dx, dz = dx; - std::vector host_grid(n_cells*n_fields); - // Fill grid with random values and randomly assign maximum value + // Instantiate Grid3D object + Grid3D G; + G.H.dx = 3; + G.H.dy = G.H.dx; + G.H.dz = G.H.dx; + G.H.nx = gridSize+2*n_ghost; + G.H.ny = G.H.nx; + G.H.nz = G.H.nx; + G.H.n_cells = G.H.nx * G.H.ny * G.H.nz; + G.H.n_fields = 8; + + // Setup host grid. Fill host grid with random values and randomly assign + // maximum value + std::vector host_grid(G.H.n_cells * G.H.n_fields); std::mt19937 prng(1); std::uniform_real_distribution doubleRand(1, 5); for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + host_grid.at(i) = doubleRand(prng) / 1E15; } // Allocating and copying to device cuda_utilities::DeviceVector dev_grid(host_grid.size()); + G.C.device = dev_grid.data(); dev_grid.cpyHostToDevice(host_grid); - // Get test data - Real testDivergence = mhd::launchCalculateMagneticDivergence(dev_grid.data(), dx, dy, dz, nx, ny, nz, n_cells); - + // Perform test + InitializeChollaMPI(NULL, NULL); + G.checkMagneticDivergence(); + MPI_Finalize(); // Perform Comparison - Real const fiducialDivergence = 3.6318132783263106; - testingUtilities::checkResults(fiducialDivergence, testDivergence, "maximum divergence"); + Real const fiducialDivergence = 3.6318132783263106 / 1E15; + testingUtilities::checkResults(fiducialDivergence, G.H.max_magnetic_divergence, "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions From 9b17a930653cb3de357de66f49db67ad82824602 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 2 Jan 2023 14:07:03 -0500 Subject: [PATCH 154/694] Reduce the number of make jobs in run_tests.sh Reduced the number of jobs launched to match the number of threads available. Will hopefully reduce memory pressure in github actions builds --- builds/run_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builds/run_tests.sh b/builds/run_tests.sh index 519c9d928..80fcab2a1 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -110,7 +110,7 @@ buildCholla () { echo -e "\nBuilding Cholla...\n" builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} } # ============================================================================== @@ -121,7 +121,7 @@ buildChollaTests () { echo builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} TEST=true + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true } # ============================================================================== From 7ffad0728cca4fbaf2de0c7859639cf5e8872639 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 11:08:00 -0500 Subject: [PATCH 155/694] Move magnetic divergence out of Grid3D and into MHD The divergence calculations have all been moved out of Grid3D and into the `mhd` namespace. There wasn't any real reason for it to be in Grid3D and now all the MHD stuff is in the `mhd` namespace --- src/grid/grid3D.h | 18 +---------------- src/main.cpp | 7 +++++-- src/mhd/magnetic_divergence.cu | 29 +++++++++++++++------------- src/mhd/magnetic_divergence.h | 16 +++++++++++++++ src/mhd/magnetic_divergence_tests.cu | 5 ++--- 5 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 6e8fb4c94..d05b90214 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -245,12 +245,6 @@ struct Header Real sphere_center_y; Real sphere_center_z; - #ifdef MHD - Real max_magnetic_divergence; - Real const magnetic_divergence_limit = 1.0E-14; - #endif //MHD - - #ifdef GRAVITY /*! \var n_ghost_potential_offset * \brief Number of offset betewen hydro_ghost_cells and potential_ghost_cells */ @@ -464,17 +458,7 @@ class Grid3D Real Update_Hydro_Grid(void); void Update_Time(); - - #ifdef MHD - /*! - * \brief Compute the maximum magnetic divergence in the grid and report - * an error if it exceeds Grid3D::H::magnetic_divergence_limit or is - * negative. - */ - void checkMagneticDivergence(); - #endif //MHD - - /*! \fn void Write_Header_Text(FILE *fp) + /*! \fn void Write_Header_Text(FILE *fp) * \brief Write the relevant header info to a text output file. */ void Write_Header_Text(FILE *fp); diff --git a/src/main.cpp b/src/main.cpp index 48dd32c70..9e59bd651 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,6 +22,9 @@ #ifdef STAR_FORMATION #include "particles/star_formation.h" #endif +#ifdef MHD +#include "mhd/magnetic_divergence.h" +#endif //MHD #include "grid/grid_enum.h" @@ -193,7 +196,7 @@ int main(int argc, char *argv[]) #ifdef MHD // Check that the initial magnetic field has zero divergence - G.checkMagneticDivergence(); + mhd::checkMagneticDivergence(G); #endif //MHD // increment the next output time @@ -339,7 +342,7 @@ int main(int argc, char *argv[]) #ifdef MHD // Check that the magnetic field has zero divergence - G.checkMagneticDivergence(); + mhd::checkMagneticDivergence(G); #endif //MHD } /*end loop over timesteps*/ diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 642cba86d..b41185d43 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -81,10 +81,9 @@ namespace mhd reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); } // ========================================================================= -} // end namespace mhd // ============================================================================= -void Grid3D::checkMagneticDivergence() +Real checkMagneticDivergence(Grid3D const &G) { // Compute the local value of the divergence // First let's create some variables we'll need. @@ -99,35 +98,39 @@ void Grid3D::checkMagneticDivergence() // Now lets get the local maximum divergence hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - C.device, dev_maxDivergence.data(), - H.dx, H.dy, H.dz, - H.nx, H.ny, H.nz, - H.n_cells); + G.C.device, dev_maxDivergence.data(), + G.H.dx, G.H.dy, G.H.dz, + G.H.nx, G.H.ny, G.H.nz, + G.H.n_cells); CudaCheckError(); - H.max_magnetic_divergence = dev_maxDivergence[0]; + Real max_magnetic_divergence = dev_maxDivergence[0]; #ifdef MPI_CHOLLA // Now that we have the local maximum let's get the global maximum - H.max_magnetic_divergence = ReduceRealMax(H.max_magnetic_divergence); + max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); #endif //MPI_CHOLLA // If the magnetic divergence is greater than the limit then raise a warning and exit - if (H.max_magnetic_divergence > H.magnetic_divergence_limit) + Real static const magnetic_divergence_limit = 1.0E-14; + if (max_magnetic_divergence > magnetic_divergence_limit) { // Report the error and exit - chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", H.max_magnetic_divergence, H.magnetic_divergence_limit); + chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", max_magnetic_divergence, magnetic_divergence_limit); chexit(-1); } - else if (H.max_magnetic_divergence < 0.0) + else if (max_magnetic_divergence < 0.0) { // Report the error and exit - chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", H.max_magnetic_divergence); + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence); chexit(-1); } else // The magnetic divergence is within acceptable bounds { - chprintf("Global maximum magnetic divergence = %7.4e\n", H.max_magnetic_divergence); + chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence); } + + return max_magnetic_divergence; } // ============================================================================= +} // end namespace mhd #endif // MHD diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h index 304bf0823..a32c7ec74 100644 --- a/src/mhd/magnetic_divergence.h +++ b/src/mhd/magnetic_divergence.h @@ -14,6 +14,7 @@ // Local Includes #include "../global/global.h" +#include "../grid/grid3D.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" @@ -51,4 +52,19 @@ namespace mhd int const nz, int const n_cells); // ========================================================================= + + // ========================================================================= + /*! + * \brief Compute the maximum magnetic divergence in the grid and report + * an error if it exceeds the magnetic divergence limit or is negative. The + * magnetic divergence limit is 1E-14 as determined by Athena as a + * reasonable upper bound for correctness. + * + * \param G The grid object + * \return Real The maximum magnetic divergence found in the grid. Can + * usually be ignored since all checking is done in the fucntion, mostly + * this return is for testing. + */ + Real checkMagneticDivergence(Grid3D const &G); + // ========================================================================= } // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 3b1704aad..509d9af30 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -17,7 +17,6 @@ // Local Includes #include "../utils/testing_utilities.h" -#include "../grid/grid3D.h" #include "../mhd/magnetic_divergence.h" #include "../utils/DeviceVector.h" #include "../global/global.h" @@ -60,11 +59,11 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) // Perform test InitializeChollaMPI(NULL, NULL); - G.checkMagneticDivergence(); + double max_magnetic_divergence = mhd::checkMagneticDivergence(G); MPI_Finalize(); // Perform Comparison Real const fiducialDivergence = 3.6318132783263106 / 1E15; - testingUtilities::checkResults(fiducialDivergence, G.H.max_magnetic_divergence, "maximum divergence"); + testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions From bfe2198d54f8aff433e673420e8f1ceb4ec743b9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 11:48:38 -0500 Subject: [PATCH 156/694] Cell centered B fields now use structered binding `mhd::utils::cellCenteredMagneticFields` now returns the centered magnetic fields using structured binding so the resultant values can be declared const. --- src/hydro/hydro_cuda.cu | 13 +++-------- src/integrators/VL_3D_cuda.cu | 3 +-- src/reconstruction/pcm_cuda.cu | 6 ++--- ...zedMpi_CorrectInputExpectCorrectOutput.txt | 2 +- src/utils/gpu.hpp | 1 - src/utils/mhd_utilities.h | 23 ++++++++++++------- src/utils/mhd_utilities_tests.cu | 3 +-- 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 3b60f97f9..e6b110ffc 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -524,9 +524,6 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, E; - #ifdef MHD - Real avgBx, avgBy, avgBz; - #endif //MHD int xid, yid, zid, n_cells; n_cells = nx*ny*nz; @@ -554,7 +551,7 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n #ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); + auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); #endif //MHD // Compute the maximum inverse crossing time in the cell @@ -631,9 +628,6 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, max_dti; Real speed, temp, P, cs; - #ifdef MHD - Real avgBx, avgBy, avgBz; - #endif //MHD // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; @@ -654,7 +648,7 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n #ifdef MHD // Compute the cell centered magnetic field using a straight average of the faces - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); + auto [avgBx, avgBy, avgBz] = mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); #endif //MHD // Compute the maximum inverse crossing time in the cell @@ -813,8 +807,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - Real centeredBx, centeredBy, centeredBz; - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, centeredBx, centeredBy, centeredBz) + auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 1f9a6a459..f007b6ce3 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -331,8 +331,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - Real centeredBx, centeredBy, centeredBz; - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, centeredBx, centeredBy, centeredBz) + auto const [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 4998dcd92..e6d48999a 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -299,10 +299,8 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, } #endif //SCALAR #ifdef MHD - Real cellCenteredBx, cellCenteredBy, cellCenteredBz; - mhd::utils::cellCenteredMagneticFields(dev_conserved, - id, xid, yid, zid, n_cells, nx, ny, - cellCenteredBx, cellCenteredBy, cellCenteredBz); + auto const [cellCenteredBx, cellCenteredBy, cellCenteredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, + id, xid, yid, zid, n_cells, nx, ny); #endif //MHD #ifdef DE Real const ge = dev_conserved[(n_fields-1)*n_cells + id]; diff --git a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 6fb66732b..71dd9bd91 100644 --- a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -1,5 +1,5 @@ # -# Parameter File for 1D Sod Shock tube +# Parameter File for 3D Sod Shock tube # ################################################ diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 66f2885f2..461f9821b 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -53,7 +53,6 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaMemcpy hipMemcpy #define cudaMemcpyAsync hipMemcpyAsync #define cudaMemcpyPeer hipMemcpyPeer -#define cudaMemcpyPeer hipMemcpyPeer #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice #define cudaMemcpyHostToDevice hipMemcpyHostToDevice diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 6b587e535..ef64b9536 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -298,32 +298,39 @@ namespace utils{ * \param[out] avgBx The cell centered average magnetic field in the x-direction * \param[out] avgBy The cell centered average magnetic field in the y-direction * \param[out] avgBz The cell centered average magnetic field in the z-direction + * + * \return Real local struct with the X, Y, and Z cell centered magnetic + * fields. Intended to be called with structured binding like `auto [x, y, + * z] = mhd::utils::cellCenteredMagneticFields(*args*) */ - inline __host__ __device__ void cellCenteredMagneticFields(Real const *dev_conserved, + inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, size_t const &id, size_t const &xid, size_t const &yid, size_t const &zid, size_t const &n_cells, size_t const &nx, - size_t const &ny, - Real &avgBx, - Real &avgBy, - Real &avgBz) + size_t const &ny) { // Ternary operator to check that no values outside of the magnetic field // arrays are loaded. If the cell is on the edge that doesn't have magnetic // fields on both sides then instead set the centered magnetic field to be // equal to the magnetic field of the closest edge. T - avgBx = (xid > 0) ? + Real avgBx = (xid > 0) ? /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + dev_conserved[(grid_enum::magnetic_x)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; - avgBy = (yid > 0) ? + Real avgBy = (yid > 0) ? /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + dev_conserved[(grid_enum::magnetic_y)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; - avgBz = (zid > 0) ? + Real avgBz = (zid > 0) ? /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + dev_conserved[(grid_enum::magnetic_z)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; + + struct returnStruct + { + Real x, y, z; + }; + return returnStruct{avgBx, avgBy, avgBz}; } #endif // MHD // ========================================================================= diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index d07b690d4..83500c68f 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -527,10 +527,9 @@ TEST(tMHDCellCenteredMagneticFields, double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, fiducialAvgBz = 883.5; - double testAvgBx, testAvgBy, testAvgBz; // Call the function to test - mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz); + auto [testAvgBx, testAvgBy, testAvgBz] = mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); // Check the results testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); From 05f086eeac36c03e4fad182b9853e7a698b634c1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 12:00:14 -0500 Subject: [PATCH 157/694] Fix threadguard in magnetic field update --- src/mhd/magnetic_update.cu | 6 +++--- src/mhd/magnetic_update_tests.cu | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 550fb7188..28a1d9465 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -36,9 +36,9 @@ namespace mhd // Thread guard to avoid overrun and to skip ghost cells that cannot be // evolved due to missing electric fields that can't be reconstructed - if ( xid < nx-1 - and yid < ny-1 - and zid < nz-1) + if ( xid < nx-2 + and yid < ny-2 + and zid < nz-2) { // Compute the three dt/dx quantities Real const dtodx = dt/dx; diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 501803e1c..f4d0d44a0 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -36,7 +36,7 @@ public: */ tMHDUpdateMagneticField3D() : - nx(2), + nx(3), ny(nx), nz(nx), n_cells(nx*ny*nz), @@ -141,9 +141,9 @@ TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput) { // Fiducial values - fiducialData.at(40) = 42.559999999999995; - fiducialData.at(48) = 44.160000000000004; - fiducialData.at(56) = 57.280000000000001; + fiducialData.at(135) = 142.68000000000001; + fiducialData.at(162) = 151.75999999999999; + fiducialData.at(189) = 191.56; // Launch kernel and check results runTest(); From 1c50a705a9e7b2263ed28df17e3259ece782ab92 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 12:02:43 -0500 Subject: [PATCH 158/694] Fix copy-pasted comment --- src/mhd/magnetic_divergence.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index b41185d43..447e11163 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -90,9 +90,8 @@ Real checkMagneticDivergence(Grid3D const &G) cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); cuda_utilities::DeviceVector static dev_maxDivergence(1); - // Set the device side inverse time step to the smallest possible double - // so that the reduction isn't using the maximum value of the previous - // iteration + // Set the device side divergence to the smallest possible double so that + // the reduction isn't using the maximum value of the previous iteration dev_maxDivergence.assign(std::numeric_limits::lowest()); // Now lets get the local maximum divergence From 0be3a500582afec7a193745bddaf186b73c2c1f5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 15:44:05 -0500 Subject: [PATCH 159/694] Add citations in comments --- src/integrators/VL_3D_cuda.cu | 5 ++++- src/mhd/ct_electric_fields.cu | 31 +++++++++++++++----------- src/mhd/ct_electric_fields.h | 10 ++++++--- src/mhd/magnetic_divergence.cu | 5 ++++- src/mhd/magnetic_update.cu | 7 +++++- src/mhd/magnetic_update.h | 4 +++- src/riemann_solvers/hlld_cuda.cu | 37 +++++++++++++++++++++++++++----- src/riemann_solvers/hlld_cuda.h | 14 +++++++----- 8 files changed, 83 insertions(+), 30 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index f007b6ce3..9a9ee6cb1 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -1,5 +1,8 @@ /*! \file VL_3D_cuda.cu - * \brief Definitions of the cuda 3 D VL algorithm functions. */ + * \brief Definitions of the cuda 3 D VL algorithm functions. MHD algorithm + * from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" + */ #if defined(CUDA) && defined(VL) diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index cd90ae2ac..aa0ac3061 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -1,7 +1,9 @@ /*! * \file ct_electric_fields.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains implementation for the CT electric fields code + * \brief Contains implementation for the CT electric fields code. Method from + * Stone & Gardiner 2009 "A simple unsplit Godunov method for multidimensional + * MHD" hereafter referred to as "S&G 2009" * */ @@ -80,7 +82,7 @@ namespace mhd // X electric field // ================ - // Y-direction slope on the positive Y side + // Y-direction slope on the positive Y side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -96,7 +98,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } - // Y-direction slope on the negative Y side + // Y-direction slope on the negative Y side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -112,7 +114,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } - // Z-direction slope on the positive Z side + // Z-direction slope on the positive Z side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -128,7 +130,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } - // Z-direction slope on the negative Z side + // Z-direction slope on the negative Z side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -154,6 +156,7 @@ namespace mhd // sum and average face centered electric fields and slopes to get the // edge averaged electric field. + // S&G 2009 equation 22 ctElectricFields[threadId + 0*n_cells] = 0.25 * (+ face_y_pos + face_y_neg + face_z_pos @@ -167,7 +170,7 @@ namespace mhd // Y electric field // ================ - // X-direction slope on the positive X side + // X-direction slope on the positive X side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -183,7 +186,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } - // X-direction slope on the negative X side + // X-direction slope on the negative X side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -199,7 +202,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } - // Z-direction slope on the positive Z side + // Z-direction slope on the positive Z side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -215,7 +218,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } - // Z-direction slope on the negative Z side + // Z-direction slope on the negative Z side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; if (signUpwind > 0.0) { @@ -240,6 +243,7 @@ namespace mhd // sum and average face centered electric fields and slopes to get the // edge averaged electric field. + // S&G 2009 equation 22 ctElectricFields[threadId + 1*n_cells] = 0.25 * (+ face_x_pos + face_x_neg + face_z_pos @@ -253,7 +257,7 @@ namespace mhd // Z electric field // ================ - // Y-direction slope on the positive Y side + // Y-direction slope on the positive Y side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -269,7 +273,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } - // Y-direction slope on the negative Y side + // Y-direction slope on the negative Y side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -285,7 +289,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } - // X-direction slope on the positive X side + // X-direction slope on the positive X side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -301,7 +305,7 @@ namespace mhd + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } - // X-direction slope on the negative X side + // X-direction slope on the negative X side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; if (signUpwind > 0.0) { @@ -326,6 +330,7 @@ namespace mhd // sum and average face centered electric fields and slopes to get the // edge averaged electric field. + // S&G 2009 equation 22 ctElectricFields[threadId + 2*n_cells] = 0.25 * (+ face_x_pos + face_x_neg + face_y_pos diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 3db5927ad..9ba8780ee 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -1,7 +1,9 @@ /*! * \file ct_electric_fields.h * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the declaration for the kernel that computes the CT electric fields + * \brief Contains the declaration for the kernel that computes the CT electric + * fields. Method from Stone & Gardiner 2009 "A simple unsplit Godunov method + * for multidimensional MHD" hereafter referred to as "S&G 2009" * */ @@ -31,7 +33,8 @@ namespace mhd // ===================================================================== /*! * \brief Compute and return the slope of the electric field used to - compute the CT electric fields + * compute the CT electric fields. This function implements S&G 2009 + * equation 24 * * \param[in] flux The flux array * \param[in] dev_conserved The conserved variable array @@ -118,10 +121,11 @@ namespace mhd Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+grid_enum::magnetic_start)*n_cells]; // Compute the slope and return it + // S&G 2009 equation 24 return electric_face - electric_centered; } // ===================================================================== - }// _mhd_internal namespace + }// mhd::_internal namespace // ========================================================================= /*! diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 447e11163..fc84cbb3f 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -5,7 +5,9 @@ * for the various kernels, functions, and tools required for the 3D VL+CT MHD * integrator. Due to the CUDA/HIP compiler requiring that device functions be * directly accessible to the file they're used in most device functions will be - * implemented in the header file + * implemented in the header file. Uses the same method described in Stone et + * al. 2008 "ATHENA: A new code for astrophysical MHD", hereafter referred to as + * Stone et al. 2008 * */ @@ -62,6 +64,7 @@ namespace mhd id_zMin1 = cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny); // Compute divergence + // Stone et al. 2008 equation 25 cellDivergence = (( dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 28a1d9465..541fb83ba 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -1,7 +1,9 @@ /*! * \file magnetic_update.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the definition of the kernel to update the magnetic field + * \brief Contains the definition of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" * */ @@ -61,16 +63,19 @@ namespace mhd // Perform Updates // X field update + // S&G 2009 equation 10 destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + dtodz * (electric_y_3 - electric_y_1) + dtody * (electric_z_1 - electric_z_3); // Y field update + // S&G 2009 equation 11 destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + dtodx * (electric_z_3 - electric_z_2) + dtodz * (electric_x_1 - electric_x_3); // Z field update + // S&G 2009 equation 12 destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + dtody * (electric_x_3 - electric_x_2) + dtodx * (electric_y_2 - electric_y_3); diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h index 2c89e26ba..4b71689b6 100644 --- a/src/mhd/magnetic_update.h +++ b/src/mhd/magnetic_update.h @@ -1,7 +1,9 @@ /*! * \file magnetic_update.h * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the declaration of the kernel to update the magnetic field + * \brief Contains the declaration of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" * */ diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index a7793150b..8b5ac667e 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -1,7 +1,9 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of the HLLD solver + * \brief Contains the implementation of the HLLD solver from Miyoshi & Kusano 2005 + * "A multi-state HLL approximate Riemann solver for ideal magnetohydrodynamics", + * hereafter referred to as M&K 2005 * */ @@ -241,6 +243,7 @@ namespace mhd // If we're in the L state then assign fluxes and return. // In this state the flow is supersonic + // M&K 2005 equation 66 if (speedL >= 0.0) { mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, @@ -282,6 +285,7 @@ namespace mhd // If we're in the R state then assign fluxes and return. // In this state the flow is supersonic + // M&K 2005 equation 66 if (speedR <= 0.0) { mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, @@ -307,6 +311,8 @@ namespace mhd // ================================================================= // Shared quantity // note that velocityStarX = speedM + // M&K 2005 equation 23, might need to switch to eqn. 41 in the + // future though they should produce identical results Real totalPressureStar = totalPressureL + densityL * (speedL - velocityXL) * (speedM - velocityXL); @@ -355,6 +361,7 @@ namespace mhd // If we're in the L* state then assign fluxes and return. // In this state the flow is subsonic + // M&K 2005 equation 66 if (speedStarL >= 0.0) { mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, @@ -419,6 +426,7 @@ namespace mhd // If we're in the R* state then assign fluxes and return. // In this state the flow is subsonic + // M&K 2005 equation 66 if (speedStarR <= 0.0) { mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, @@ -468,6 +476,7 @@ namespace mhd energyDoubleStarR); // Compute and return L** fluxes + // M&K 2005 equation 66 if (speedM >= 0.0) { Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -520,6 +529,7 @@ namespace mhd return; } // Compute and return R** fluxes + // M&K 2005 equation 66 else if (speedStarR >= 0.0) { Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -626,11 +636,13 @@ namespace mhd // Compute the S_L and S_R wave speeds. // Version suggested by Miyoshi & Kusano 2005 and used in Athena + // M&K 2005 equation 67 Real magSonicMax = fmax(magSonicL, magSonicR); speedL = fmin(velocityXL, velocityXR) - magSonicMax; speedR = fmax(velocityXL, velocityXR) + magSonicMax; // Compute the S_M wave speed + // M&K 2005 equation 38 speedM = // Numerator ( momentumXR * (speedR - velocityXR) - momentumXL * (speedL - velocityXL) @@ -641,10 +653,12 @@ namespace mhd - densityL * (speedL - velocityXL)); // Compute the densities in the star state + // M&K 2005 equation 43 densityStarL = densityL * (speedL - velocityXL) / (speedL - speedM); densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM); // Compute the S_L^* and S_R^* wave speeds + // M&K 2005 equation 51 speedStarL = speedM - mhd::utils::alfvenSpeed(magneticX, densityStarL); speedStarR = speedM + mhd::utils::alfvenSpeed(magneticX, densityStarR); } @@ -668,6 +682,7 @@ namespace mhd Real &magneticFluxZ, Real &energyFlux) { + // M&K 2005 equation 2 densityFlux = momentumX; momentumFluxX = momentumX * velocityX + totalPressure - magneticX * magneticX; @@ -748,6 +763,7 @@ namespace mhd Real &magneticStarFluxZ) { // Check for and handle the degenerate case + // Explained at the top of page 326 in M&K 2005 if (fabs(density * (speedSide - velocityX) * (speedSide - speedM) - (magneticX * magneticX)) @@ -760,22 +776,26 @@ namespace mhd } else { + // Denominator for M&K 2005 equations 44-47 Real const denom = density * (speedSide - velocityX) * (speedSide - speedM) - (magneticX * magneticX); // Compute the velocity and magnetic field in the star state + // M&K 2005 equations 44 & 46 Real coef = magneticX * (speedM - velocityX) / denom; velocityStarY = velocityY - magneticY * coef; velocityStarZ = velocityZ - magneticZ * coef; + // M&K 2005 equations 45 & 47 Real tmpPower = (speedSide - velocityX); - tmpPower = tmpPower * tmpPower; - coef = (density * tmpPower - (magneticX * magneticX)) / denom; + tmpPower = tmpPower * tmpPower; + coef = (density * tmpPower - (magneticX * magneticX)) / denom; magneticStarY = magneticY * coef; magneticStarZ = magneticZ * coef; } + // M&K 2005 equation 48 energyStar = ( energy * (speedSide - velocityX) - totalPressure * velocityX + totalPressureStar * speedM @@ -784,6 +804,7 @@ namespace mhd / (speedSide - speedM); // Now compute the star state fluxes + // M&K 2005 equations 64 densityStarFlux = densityFlux + speedSide * (densityStar - density);; momentumStarFluxX = momentumFluxX + speedSide * (densityStar * speedM - momentumX);; momentumStarFluxY = momentumFluxY + speedSide * (densityStar * velocityStarY - momentumY);; @@ -818,14 +839,16 @@ namespace mhd Real &energyDoubleStarR) { // if Bx is zero then just return the star state + // Explained at the top of page 328 in M&K 2005. Essentially when + // magneticX is 0 this reduces to the HLLC solver if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { velocityDoubleStarY = velocityStarYL; velocityDoubleStarZ = velocityStarZL; magneticDoubleStarY = magneticStarYL; magneticDoubleStarZ = magneticStarZL; - energyDoubleStarL = energyStarL; - energyDoubleStarR = energyStarR; + energyDoubleStarL = energyStarL; + energyDoubleStarR = energyStarR; } else { @@ -839,6 +862,7 @@ namespace mhd // and magnetic fields along with the energy // Double Star velocities + // M&K 2005 equations 59 & 60 velocityDoubleStarY = inverseDensities * (sqrtDL * velocityStarYL + sqrtDR * velocityStarYR + magXSign * (magneticStarYR - magneticStarYL)); @@ -847,6 +871,7 @@ namespace mhd + magXSign * (magneticStarZR - magneticStarZL)); // Double star magnetic fields + // M&K 2005 equations 61 & 62 magneticDoubleStarY = inverseDensities * (sqrtDL * magneticStarYR + sqrtDR * magneticStarYL + magXSign * (sqrtDL * sqrtDR) * (velocityStarYR - velocityStarYL)); @@ -861,6 +886,7 @@ namespace mhd magneticX, magneticDoubleStarY, magneticDoubleStarZ); + // M&K 2005 equation 63 energyDoubleStarL = energyStarL - sqrtDL * magXSign * (math_utils::dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL) - velDblStarDotMagDblStar); @@ -899,6 +925,7 @@ namespace mhd Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ) { + // M&K 2005 equation 65 momentumDoubleStarFluxX = momentumStarFluxX + speedStarSide * (velocityDoubleStarX - velocityStarX) * densityStar; momentumDoubleStarFluxY = momentumStarFluxY + speedStarSide * (velocityDoubleStarY - velocityStarY) * densityStar; momentumDoubleStarFluxZ = momentumStarFluxZ + speedStarSide * (velocityDoubleStarZ - velocityStarZ) * densityStar; diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 357c850d6..332768f8a 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -1,7 +1,9 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the declaration of the HLLD solver + * \brief Contains the declaration of the HLLD solver from Miyoshi & Kusano 2005 + * "A multi-state HLL approximate Riemann solver for ideal magnetohydrodynamics", + * hereafter referred to as M&K 2005 * */ @@ -63,7 +65,8 @@ namespace mhd /*! * \brief Compute the left, right, star, and middle wave speeds. Also - * returns the densities in the star states + * returns the densities in the star states. M&K 2005 equations 38, 43, + * 51, and 67 * * \param[in] densityL Density, left side * \param[in] momentumXL Momentum in the X-direction, left side @@ -198,7 +201,8 @@ namespace mhd Real const &energyFlux); /*! - * \brief Compute the fluxes in the left or right star state + * \brief Compute the fluxes in the left or right star state. M&K 2005 + * equations 44-48, 64 * * \param[in] speedM Speed of the central wave * \param[in] speedSide Speed of the non-star wave on the side being computed @@ -274,7 +278,7 @@ namespace mhd Real &magneticStarFluxZ); /*! - * \brief Compute the double star state + * \brief Compute the double star state. M&K 2005 equations 59-63 * * \param[in] speedM * \param[in] magneticX @@ -321,7 +325,7 @@ namespace mhd Real &energyDoubleStarR); /*! - * \brief Compute the double star state fluxes + * \brief Compute the double star state fluxes. M&K 2005 equation 65 * * \param[in] speedStarSide The star speed on the side being computed * \param[in] momentumStarFluxX From ef4f4035db3db7a68df3797364d7410617a64548 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 5 Jan 2023 15:56:36 -0500 Subject: [PATCH 160/694] More citations in comments --- src/integrators/VL_3D_cuda.cu | 10 +++++----- src/mhd/ct_electric_fields.cu | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 9a9ee6cb1..2fb413870 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -74,11 +74,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // store in the interface on the "right" side of the cell, so the flux // arrays store the fluxes through the right interface // - // According to the source code of Athena, the following equation relate - // the magnetic flux to the face centered electric fields/EMF. - // -cross(V,B)x is the negative of the x-component of V cross B. Note that - // "X" is the direction the solver is running in this case, not - // necessarily the true "X". + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the face + // centered electric fields/EMF. -cross(V,B)x is the negative of the + // x-component of V cross B. Note that "X" is the direction the solver is + // running in this case, not necessarily the true "X". // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index aa0ac3061..542dda3a7 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -41,11 +41,11 @@ namespace mhd and yid < ny and zid < nz) { - // According to the source code of Athena, the following equation - // relate the magnetic flux to the face centered electric - // fields/EMF. -cross(V,B)x is the negative of the x-component of V - // cross B. Note that "X" is the direction the solver is running in - // this case, not necessarily the true "X". + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the + // face centered electric fields/EMF. -cross(V,B)x is the negative + // of the x-component of V cross B. Note that "X" is the direction + // the solver is running in this case, not necessarily the true "X". // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X From d05c4e61b215f3511804e62e3ca55355dce79e34 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 10 Jan 2023 13:56:49 -0500 Subject: [PATCH 161/694] Add MHD support to python_scripts/cat_dset_3D.py --- python_scripts/cat_dset_3D.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 0c6d4b3ad..1a823e268 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -11,11 +11,10 @@ iend = 1*n_proc dnamein = './hdf5/raw/' dnameout = './hdf5/' -DE = 0 # loop over outputs for n in range(ns, ne+1): - + # loop over files for a given output for i in range(istart, iend): @@ -26,7 +25,7 @@ # read in the header data from the input file head = filein.attrs - # if it's the first input file, write the header attributes + # if it's the first input file, write the header attributes # and create the datasets in the output file if (i == 0): nx = head['dims'][0] @@ -47,8 +46,17 @@ my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True) mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True) E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True) - if (DE): + try: GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True) + except KeyError: + print('No Dual energy data present'); + try: + [nx_mag, ny_mag, nz_mag] = head['magnetic_field_dims'] + bx = fileout.create_dataset("magnetic_x", (nx_mag, ny_mag, nz_mag), chunks=True) + by = fileout.create_dataset("magnetic_y", (nx_mag, ny_mag, nz_mag), chunks=True) + bz = fileout.create_dataset("magnetic_z", (nx_mag, ny_mag, nz_mag), chunks=True) + except KeyError: + print('No magnetic field data present'); # write data from individual processor file to # correct location in concatenated file @@ -63,9 +71,18 @@ fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - if (DE): + try: fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - + except KeyError: + print('No Dual energy data present'); + try: + [nxl_mag, nyl_mag, nzl_mag] = head['magnetic_field_dims_local'] + fileout['magnetic_x'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_z'] + except KeyError: + print('No magnetic field data present'); + filein.close() fileout.close() From 75ff03a5b55cd4a057ff2e5c8e990b4a98fe5835 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 10 Jan 2023 14:01:12 -0500 Subject: [PATCH 162/694] Remove support for averaging slow cells in MHD We're not entirely sure how to do this and since it probably isn't required we're removing support for now. Also, slightly refactored the MHD part of the timestep calculation to combine two #ifdef statements into one --- src/hydro/hydro_cuda.cu | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index e6b110ffc..7ad6e994a 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -548,14 +548,12 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n vy = dev_conserved[2*n_cells + id] * d_inv; vz = dev_conserved[3*n_cells + id] * d_inv; E = dev_conserved[4*n_cells + id]; + + // Compute the maximum inverse crossing time in the cell #ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - #endif //MHD - - // Compute the maximum inverse crossing time in the cell - #ifdef MHD max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); #else // not MHD max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); @@ -646,17 +644,8 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n vz = dev_conserved[3*n_cells + id] * d_inv; E = dev_conserved[4*n_cells + id]; - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of the faces - auto [avgBx, avgBy, avgBz] = mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - #endif //MHD - // Compute the maximum inverse crossing time in the cell - #ifdef MHD - max_dti = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma); - #else // not MHD - max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); - #endif //MHD + max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); if (max_dti > max_dti_slow){ speed = sqrt(vx*vx + vy*vy + vz*vz); @@ -1185,15 +1174,7 @@ __device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, in Average_Cell_Single_Field( 3, i, j, k, nx, ny, nz, ncells, conserved ); // Average Energy Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); - #ifdef MHD - // Average MHD - Average_Cell_Single_Field( grid_enum::magnetic_x, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( grid_enum::magnetic_y, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( grid_enum::magnetic_z, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( grid_enum::magnetic_x, i-1, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( grid_enum::magnetic_y, i, j-1, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( grid_enum::magnetic_z, i, j, k-1, nx, ny, nz, ncells, conserved ); - #endif //MHD + #ifdef DE // Average GasEnergy Average_Cell_Single_Field( n_fields-1, i, j, k, nx, ny, nz, ncells, conserved ); From 0125f6214b81d9b1f693270b3f5507f3fed56707 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Wed, 11 Jan 2023 13:21:38 -0500 Subject: [PATCH 163/694] Reset spherical overdensity ICs to old values. Gravity system test now passes. --- src/grid/initial_conditions.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 4786fb446..3d6e5355a 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1187,18 +1187,19 @@ void Grid3D::Spherical_Overdensity_3D() int i, j, k, id; Real x_pos, y_pos, z_pos, r, center_x, center_y, center_z; Real density, pressure, overDensity, overPressure, energy, radius, background_density; - Real mu = 0.6; Real vx, vy, vz, v2; - center_x = 0.0; - center_y = 0.0; - center_z = 0.0; - overDensity = 1000 * mu * MP / DENSITY_UNIT; // 100 particles per cm^3 + center_x = 0.5; + center_y = 0.5; + center_z = 0.5; + //overDensity = 1000 * mu * MP / DENSITY_UNIT; // 100 particles per cm^3 + overDensity = 1; overPressure = 0; vx = 0; vy = 0; vz = 0; - radius = 0.02; - background_density = mu * MP / DENSITY_UNIT; // 1 particles per cm^3 + radius = 0.2; + //background_density = mu * MP / DENSITY_UNIT; // 1 particles per cm^3 + background_density = 0.0005; H.sphere_density = overDensity; H.sphere_radius = radius; H.sphere_background_density = background_density; From 93eff660d65c7d91addc69db3307c4886739cd6c Mon Sep 17 00:00:00 2001 From: ojwg Date: Wed, 11 Jan 2023 09:39:18 -0500 Subject: [PATCH 164/694] use starburst 99 SN rate info + requested PR changes. --- src/global/global.cpp | 4 + src/global/global.h | 3 + src/particles/feedback_CIC_gpu.cu | 100 ++- src/particles/starburst99_snr.txt | 1007 +++++++++++++++++++++++++++++ src/particles/supernova.h | 19 +- 5 files changed, 1109 insertions(+), 24 deletions(-) create mode 100755 src/particles/starburst99_snr.txt diff --git a/src/global/global.cpp b/src/global/global.cpp index 1f6a5cbfa..f81931647 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -333,6 +333,10 @@ void parse_param(char *name,char *value, struct parameters *parms){ else if (strcmp(name, "prng_seed")==0) parms->prng_seed = atoi(value); #endif // PARTICLES +#ifdef SUPERNOVA + else if (strcmp(name, "snr_filename")==0) + strncpy(parms->snr_filename, value, MAXLEN); +#endif #ifdef ROTATED_PROJECTION else if (strcmp(name, "nxr")==0) parms->nxr = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 79d3dbc88..8d658f8c1 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -260,6 +260,9 @@ struct parameters // machine dependent seed will be generated. std::uint_fast64_t prng_seed = 0; #endif // PARTICLES +#ifdef SUPERNOVA + char snr_filename[MAXLEN]; +#endif #ifdef ROTATED_PROJECTION int nxr; int nzr; diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 8cdfbfb23..f5de01463 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -1,9 +1,13 @@ #if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS) +#include +#include +#include #include #include #include #include +#include #include "../grid/grid3D.h" #include "../global/global_cuda.h" #include "../global/global.h" @@ -21,7 +25,8 @@ namespace supernova { curandStateMRG32k3a_t* randStates; part_int_t n_states; - Real t_buff, dt_buff; + Real *dev_snr, snr_dt, time_sn_start, time_sn_end; + int snr_n; } @@ -43,27 +48,81 @@ __device__ double atomicMax(double* address, double val) __global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { int id = blockIdx.x*blockDim.x + threadIdx.x; curand_init(seed, id, 0, &states[id]); - } /** - * @brief Initialize the cuRAND state, which is analogous to the concept of generators in CPU code. - * The state object maintains configuration and status the cuRAND context for each thread on the GPU. - * Initialize more than the number of local particles since the latter will change through MPI transfers. + * @brief Does 2 things: + * -# Read in SN rate data from Starburst 99. If no file exists, assume a constant rate. + * -# Initialize the cuRAND state, which is analogous to the concept of generators in CPU code. + * The state object maintains configuration and status the cuRAND context for each thread on the GPU. + * Initialize more than the number of local particles since the latter will change through MPI transfers. * - * @param n_local + * @param P pointer to parameters struct. Passes in starburst 99 filename and random number gen seed. + * @param n_local number of local particles on the GPU * @param allocation_factor */ void supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { printf("supernova::initState start\n"); - t_buff = 0; - dt_buff = 0; - n_states = n_local*allocation_factor; + std::string snr_filename(P->snr_filename); + if (snr_filename.size()) { + chprintf("Specified a SNR filename %s.\n", &snr_filename[0]); + + // read in array of supernova rate values. + std::ifstream snr_in(snr_filename); + if (!snr_in.is_open()) { + chprintf("ERROR: but couldn't read SNR file.\n"); + exit(-1); + } + + std::vector snr_time; + std::vector snr; + + const int N_HEADER = 7; // S'99 has 7 rows of header information + const char* s99_delim = " "; // S'99 data separator + std::string line; + int line_counter = 0; + + while (snr_in.good()) { + std::getline(snr_in, line); + if (line_counter++ < N_HEADER) continue; // skip header processing + + int i = 0; + char *data = strtok(const_cast(line.c_str()), s99_delim); + while (data != nullptr) { + if (i == 0) { + // in the following divide by # years per kyr (1000) + snr_time.push_back(std::stof(std::string(data)) / 1000); + } + else if (i == 1) { + snr.push_back(pow(10, std::stof(std::string(data))) / 1000); + } + if (i > 0) break; // only care about the first 2 items. Once i = 1 can break here. + data = strtok(nullptr, s99_delim); + i++; + } + } + + time_sn_end = snr_time[snr_time.size() - 1]; + time_sn_start = snr_time[0]; + // the following is the time interval between data points + // (i.e. assumes regular temporal spacing) + snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1); + + CHECK(cudaMalloc((void**) &dev_snr, snr.size() * sizeof(Real))); + CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + } else { + chprintf("No SN rate file specified. Using constant rate\n"); + time_sn_start = DEFAULT_SN_START; + time_sn_end = DEFAULT_SN_END; + } + // Now ititialize the poisson random number generator state. + n_states = n_local*allocation_factor; cudaMalloc((void**) &randStates, n_states*sizeof(curandStateMRG32k3a_t)); - int ngrid = (n_states + TPB_FEEDBACK- 1) / TPB_FEEDBACK; + int ngrid = (n_states + TPB_FEEDBACK - 1) / TPB_FEEDBACK; dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); @@ -73,6 +132,15 @@ void supernova::initState(struct parameters *P, part_int_t n_local, Real allocat } +__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end) { + if (t < t_start|| t >= t_end) return 0; + if (dev_snr == nullptr) return supernova::DEFAULT_SNR; + + int index = (int)( (t - t_start) / snr_dt); + return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; +} + + __device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real *momentum_y, Real *momentum_z, Real *energy, int index, Real dx, Real dy, Real dz){ Real dens = fmax(density[index], DENS_FLOOR); Real d_inv = 1.0 / dens; @@ -125,7 +193,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states, - Real* prev_dens, int* prev_N, short direction){ + Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, Real time_sn_end) { __shared__ Real s_info[FEED_INFO_N*TPB_FEEDBACK]; // for collecting SN feedback information, like # of SNe or # resolved. int tid = threadIdx.x; @@ -180,11 +248,11 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real if (!ignore && in_local) { int N = 0; - if ((t - age_dev[gtid]) <= supernova::SN_ERA) { + if ((t - age_dev[gtid]) <= time_sn_end) { // only calculate this if there will be SN feedback if (direction == -1) N = -prev_N[gtid]; else { curandStateMRG32k3a_t state = states[gtid]; - N = curand_poisson (&state, supernova::SNR * mass_dev[gtid] * dt); + N = curand_poisson (&state, GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt); states[gtid] = state; prev_N[gtid] = N; } @@ -455,7 +523,8 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, - G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction); + G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, + dev_snr, snr_dt, time_sn_start, time_sn_end); CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } @@ -471,7 +540,8 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, - G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction); + G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, + dev_snr, snr_dt, time_sn_start, time_sn_end); CHECK(cudaDeviceSynchronize()); } diff --git a/src/particles/starburst99_snr.txt b/src/particles/starburst99_snr.txt new file mode 100755 index 000000000..449821025 --- /dev/null +++ b/src/particles/starburst99_snr.txt @@ -0,0 +1,1007 @@ + MODEL DESIGNATION: MW_center + MODEL GENERATED: Mon Nov 28 15:05:08 2022 + + RESULTS FOR THE SUPERNOVA RATE + + ALL SUPERNOVAE TYPE IB SUPERNOVAE ALL SUPERNOVAE STARS + SUPERNOVAE + TIME TOTAL RATE POWER ENERGY TOTAL RATE POWER ENERGY TYPICAL MASS LOWEST PROG. MASS POWER ENERGY + 0.100E+05 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.937 51.436 + 0.110E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.943 52.483 + 0.210E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.950 52.767 + 0.310E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.956 52.940 + 0.410E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.962 53.064 + 0.510E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.968 53.162 + 0.610E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.974 53.243 + 0.710E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.979 53.312 + 0.810E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.985 53.372 + 0.910E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.990 53.426 + 0.101E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.996 53.474 + 0.111E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.001 53.518 + 0.121E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.005 53.558 + 0.131E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.010 53.595 + 0.141E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.013 53.629 + 0.151E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.015 53.662 + 0.161E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.691 + 0.171E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.719 + 0.181E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.013 53.746 + 0.191E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.007 53.770 + 0.201E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.997 53.792 + 0.211E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.983 53.813 + 0.221E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.962 53.832 + 0.231E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.932 53.849 + 0.241E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.946 53.866 + 0.251E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.970 53.883 + 0.261E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.901 + 0.271E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.026 53.919 + 0.281E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.009 53.936 + 0.291E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.094 53.955 + 0.301E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.202 53.978 + 0.311E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.265 54.004 + 0.321E+07 -4.388 39.113 51.612 -4.388 39.113 51.612 99.7 99.3 40.333 54.032 + 0.331E+07 -3.308 40.193 52.727 -3.308 40.193 52.727 96.2 91.5 40.545 54.075 + 0.341E+07 -3.317 40.184 53.007 -3.317 40.184 53.007 88.8 84.8 40.534 54.113 + 0.351E+07 -3.306 40.195 53.179 -3.306 40.195 53.179 82.5 79.0 40.509 54.145 + 0.361E+07 -3.296 40.205 53.304 -3.296 40.205 53.304 77.0 73.9 40.484 54.174 + 0.371E+07 -3.266 40.235 53.408 -3.266 40.235 53.408 72.1 69.5 40.482 54.201 + 0.381E+07 -3.277 40.223 53.489 -3.277 40.223 53.489 67.9 65.5 40.454 54.225 + 0.391E+07 -3.269 40.232 53.559 -3.269 40.232 53.559 64.1 61.9 40.402 54.245 + 0.401E+07 -3.266 40.235 53.620 -3.266 40.235 53.620 60.7 58.8 40.372 54.263 + 0.411E+07 -3.269 40.232 53.672 -3.269 40.232 53.672 57.7 56.0 40.349 54.280 + 0.421E+07 -3.265 40.236 53.720 -3.265 40.236 53.720 55.0 53.5 40.335 54.295 + 0.431E+07 -3.262 40.239 53.763 -3.262 40.239 53.763 52.5 51.2 40.333 54.310 + 0.441E+07 -3.258 40.243 53.803 -3.258 40.243 53.803 50.3 49.1 40.328 54.324 + 0.451E+07 -3.255 40.246 53.839 -3.255 40.246 53.839 48.3 47.1 40.324 54.337 + 0.461E+07 -3.251 40.249 53.873 -3.251 40.249 53.873 46.4 45.4 40.319 54.350 + 0.471E+07 -3.227 40.274 53.906 -3.495 40.006 53.891 44.7 43.7 40.332 54.363 + 0.481E+07 -3.245 40.255 53.936 -30.000 -30.000 53.891 43.1 42.2 40.312 54.375 + 0.491E+07 -3.243 40.258 53.964 -30.000 -30.000 53.891 41.7 40.8 40.313 54.387 + 0.501E+07 -3.249 40.252 53.989 -30.000 -30.000 53.891 40.3 39.5 40.301 54.398 + 0.511E+07 -3.263 40.238 54.013 -30.000 -30.000 53.891 39.1 38.4 40.283 54.408 + 0.521E+07 -3.264 40.237 54.035 -30.000 -30.000 53.891 37.9 37.3 40.275 54.418 + 0.531E+07 -3.243 40.258 54.058 -30.000 -30.000 53.891 36.9 36.3 40.291 54.428 + 0.541E+07 -3.266 40.235 54.078 -30.000 -30.000 53.891 35.9 35.3 40.269 54.438 + 0.551E+07 -3.267 40.234 54.097 -3.693 39.808 53.902 35.0 34.4 40.266 54.447 + 0.561E+07 -3.267 40.234 54.115 -3.267 40.234 53.931 34.1 33.6 40.263 54.456 + 0.571E+07 -3.268 40.233 54.133 -3.268 40.233 53.957 33.3 32.8 40.258 54.464 + 0.581E+07 -3.269 40.232 54.150 -3.269 40.232 53.982 32.5 32.0 40.255 54.473 + 0.591E+07 -3.267 40.234 54.166 -3.267 40.234 54.006 31.7 31.3 40.254 54.481 + 0.601E+07 -3.267 40.234 54.182 -3.267 40.234 54.029 31.0 30.6 40.250 54.489 + 0.611E+07 -3.268 40.233 54.197 -3.268 40.233 54.050 30.4 30.0 40.250 54.497 + 0.621E+07 -3.268 40.233 54.212 -3.268 40.233 54.071 29.7 29.4 40.250 54.505 + 0.631E+07 -3.268 40.233 54.226 -3.268 40.233 54.090 29.1 28.8 40.250 54.512 + 0.641E+07 -3.269 40.232 54.240 -3.269 40.232 54.109 28.6 28.2 40.246 54.519 + 0.651E+07 -3.269 40.232 54.253 -3.269 40.232 54.126 28.0 27.7 40.245 54.527 + 0.661E+07 -3.269 40.232 54.266 -3.404 40.097 54.139 27.5 27.2 40.244 54.534 + 0.671E+07 -3.270 40.231 54.278 -30.000 -30.000 54.139 27.0 26.7 40.243 54.541 + 0.681E+07 -3.270 40.231 54.290 -30.000 -30.000 54.139 26.5 26.2 40.242 54.548 + 0.691E+07 -3.271 40.230 54.302 -30.000 -30.000 54.139 26.0 25.8 40.241 54.554 + 0.701E+07 -3.271 40.230 54.314 -30.000 -30.000 54.139 25.6 25.3 40.241 54.561 + 0.711E+07 -3.273 40.228 54.325 -30.000 -30.000 54.139 25.2 24.9 40.239 54.567 + 0.721E+07 -3.280 40.221 54.335 -30.000 -30.000 54.139 24.8 24.5 40.232 54.574 + 0.731E+07 -3.282 40.219 54.346 -30.000 -30.000 54.139 24.4 24.1 40.231 54.580 + 0.741E+07 -3.283 40.218 54.356 -30.000 -30.000 54.139 24.0 23.8 40.229 54.586 + 0.751E+07 -3.285 40.216 54.366 -30.000 -30.000 54.139 23.6 23.4 40.228 54.592 + 0.761E+07 -3.286 40.215 54.375 -30.000 -30.000 54.139 23.3 23.1 40.227 54.598 + 0.771E+07 -3.287 40.214 54.385 -30.000 -30.000 54.139 23.0 22.8 40.225 54.604 + 0.781E+07 -3.288 40.212 54.394 -30.000 -30.000 54.139 22.6 22.5 40.224 54.609 + 0.791E+07 -3.290 40.211 54.403 -30.000 -30.000 54.139 22.3 22.2 40.223 54.615 + 0.801E+07 -3.291 40.210 54.411 -30.000 -30.000 54.139 22.0 21.9 40.221 54.620 + 0.811E+07 -3.292 40.208 54.420 -30.000 -30.000 54.139 21.7 21.6 40.219 54.626 + 0.821E+07 -3.294 40.207 54.428 -30.000 -30.000 54.139 21.5 21.3 40.216 54.631 + 0.831E+07 -3.295 40.206 54.436 -30.000 -30.000 54.139 21.2 21.0 40.214 54.636 + 0.841E+07 -3.296 40.204 54.444 -30.000 -30.000 54.139 20.9 20.8 40.212 54.641 + 0.851E+07 -3.298 40.203 54.452 -30.000 -30.000 54.139 20.7 20.5 40.210 54.646 + 0.861E+07 -3.299 40.202 54.460 -30.000 -30.000 54.139 20.4 20.3 40.208 54.651 + 0.871E+07 -3.300 40.201 54.467 -30.000 -30.000 54.139 20.2 20.0 40.207 54.656 + 0.881E+07 -3.328 40.173 54.474 -30.000 -30.000 54.139 20.0 19.8 40.179 54.661 + 0.891E+07 -3.340 40.161 54.481 -30.000 -30.000 54.139 19.8 19.6 40.167 54.665 + 0.901E+07 -3.344 40.157 54.487 -30.000 -30.000 54.139 19.6 19.4 40.162 54.669 + 0.911E+07 -3.348 40.153 54.493 -30.000 -30.000 54.139 19.4 19.2 40.158 54.674 + 0.921E+07 -3.352 40.149 54.500 -30.000 -30.000 54.139 19.2 19.1 40.154 54.678 + 0.931E+07 -3.356 40.145 54.506 -30.000 -30.000 54.139 19.0 18.9 40.149 54.682 + 0.941E+07 -3.360 40.141 54.511 -30.000 -30.000 54.139 18.8 18.7 40.145 54.686 + 0.951E+07 -3.363 40.137 54.517 -30.000 -30.000 54.139 18.6 18.5 40.141 54.690 + 0.961E+07 -3.367 40.134 54.523 -30.000 -30.000 54.139 18.5 18.4 40.137 54.694 + 0.971E+07 -3.371 40.130 54.528 -30.000 -30.000 54.139 18.3 18.2 40.134 54.697 + 0.981E+07 -3.374 40.127 54.534 -30.000 -30.000 54.139 18.1 18.1 40.130 54.701 + 0.991E+07 -3.378 40.123 54.539 -30.000 -30.000 54.139 18.0 17.9 40.126 54.705 + 0.100E+08 -3.381 40.120 54.544 -30.000 -30.000 54.139 17.8 17.8 40.123 54.708 + 0.101E+08 -3.385 40.116 54.549 -30.000 -30.000 54.139 17.7 17.6 40.119 54.712 + 0.102E+08 -3.388 40.113 54.554 -30.000 -30.000 54.139 17.6 17.5 40.116 54.715 + 0.103E+08 -3.391 40.110 54.559 -30.000 -30.000 54.139 17.4 17.3 40.112 54.719 + 0.104E+08 -3.394 40.107 54.564 -30.000 -30.000 54.139 17.3 17.2 40.109 54.722 + 0.105E+08 -3.398 40.103 54.569 -30.000 -30.000 54.139 17.2 17.1 40.106 54.725 + 0.106E+08 -3.401 40.100 54.573 -30.000 -30.000 54.139 17.0 16.9 40.102 54.728 + 0.107E+08 -3.404 40.097 54.578 -30.000 -30.000 54.139 16.9 16.8 40.099 54.732 + 0.108E+08 -3.407 40.094 54.582 -30.000 -30.000 54.139 16.8 16.7 40.096 54.735 + 0.109E+08 -3.410 40.091 54.587 -30.000 -30.000 54.139 16.7 16.6 40.093 54.738 + 0.110E+08 -3.413 40.088 54.591 -30.000 -30.000 54.139 16.5 16.5 40.090 54.741 + 0.111E+08 -3.416 40.085 54.595 -30.000 -30.000 54.139 16.4 16.4 40.087 54.744 + 0.112E+08 -3.418 40.083 54.600 -30.000 -30.000 54.139 16.3 16.3 40.084 54.747 + 0.113E+08 -3.421 40.080 54.604 -30.000 -30.000 54.139 16.2 16.1 40.081 54.750 + 0.114E+08 -3.424 40.077 54.608 -30.000 -30.000 54.139 16.1 16.0 40.078 54.753 + 0.115E+08 -3.427 40.074 54.612 -30.000 -30.000 54.139 16.0 15.9 40.076 54.756 + 0.116E+08 -3.430 40.071 54.616 -30.000 -30.000 54.139 15.9 15.8 40.073 54.759 + 0.117E+08 -3.432 40.069 54.620 -30.000 -30.000 54.139 15.8 15.7 40.070 54.761 + 0.118E+08 -3.435 40.066 54.623 -30.000 -30.000 54.139 15.7 15.6 40.067 54.764 + 0.119E+08 -3.438 40.063 54.627 -30.000 -30.000 54.139 15.6 15.5 40.064 54.767 + 0.120E+08 -3.440 40.061 54.631 -30.000 -30.000 54.139 15.5 15.5 40.062 54.770 + 0.121E+08 -3.443 40.058 54.634 -30.000 -30.000 54.139 15.4 15.4 40.059 54.772 + 0.122E+08 -3.445 40.056 54.638 -30.000 -30.000 54.139 15.3 15.3 40.056 54.775 + 0.123E+08 -3.448 40.053 54.642 -30.000 -30.000 54.139 15.2 15.2 40.054 54.777 + 0.124E+08 -3.450 40.051 54.645 -30.000 -30.000 54.139 15.1 15.1 40.051 54.780 + 0.125E+08 -3.453 40.048 54.649 -30.000 -30.000 54.139 15.1 15.0 40.049 54.783 + 0.126E+08 -3.454 40.047 54.652 -30.000 -30.000 54.139 15.0 14.9 40.048 54.785 + 0.127E+08 -3.456 40.045 54.655 -30.000 -30.000 54.139 14.9 14.8 40.045 54.788 + 0.128E+08 -3.458 40.042 54.659 -30.000 -30.000 54.139 14.8 14.8 40.043 54.790 + 0.129E+08 -3.461 40.040 54.662 -30.000 -30.000 54.139 14.7 14.7 40.041 54.792 + 0.130E+08 -3.463 40.038 54.665 -30.000 -30.000 54.139 14.7 14.6 40.038 54.795 + 0.131E+08 -3.465 40.035 54.668 -30.000 -30.000 54.139 14.6 14.5 40.036 54.797 + 0.132E+08 -3.468 40.033 54.672 -30.000 -30.000 54.139 14.5 14.5 40.034 54.800 + 0.133E+08 -3.470 40.031 54.675 -30.000 -30.000 54.139 14.4 14.4 40.032 54.802 + 0.134E+08 -3.472 40.029 54.678 -30.000 -30.000 54.139 14.4 14.3 40.029 54.804 + 0.135E+08 -3.474 40.027 54.681 -30.000 -30.000 54.139 14.3 14.2 40.027 54.807 + 0.136E+08 -3.477 40.024 54.684 -30.000 -30.000 54.139 14.2 14.2 40.025 54.809 + 0.137E+08 -3.479 40.022 54.687 -30.000 -30.000 54.139 14.1 14.1 40.023 54.811 + 0.138E+08 -3.481 40.020 54.690 -30.000 -30.000 54.139 14.1 14.0 40.021 54.813 + 0.139E+08 -3.483 40.018 54.693 -30.000 -30.000 54.139 14.0 14.0 40.019 54.815 + 0.140E+08 -3.485 40.016 54.696 -30.000 -30.000 54.139 13.9 13.9 40.016 54.818 + 0.141E+08 -3.487 40.014 54.698 -30.000 -30.000 54.139 13.9 13.8 40.014 54.820 + 0.142E+08 -3.489 40.012 54.701 -30.000 -30.000 54.139 13.8 13.8 40.012 54.822 + 0.143E+08 -3.491 40.010 54.704 -30.000 -30.000 54.139 13.7 13.7 40.010 54.824 + 0.144E+08 -3.493 40.008 54.707 -30.000 -30.000 54.139 13.7 13.7 40.008 54.826 + 0.145E+08 -3.495 40.006 54.709 -30.000 -30.000 54.139 13.6 13.6 40.006 54.828 + 0.146E+08 -3.497 40.004 54.712 -30.000 -30.000 54.139 13.6 13.5 40.004 54.830 + 0.147E+08 -3.499 40.002 54.715 -30.000 -30.000 54.139 13.5 13.5 40.002 54.832 + 0.148E+08 -3.501 40.000 54.717 -30.000 -30.000 54.139 13.4 13.4 40.000 54.834 + 0.149E+08 -3.503 39.998 54.720 -30.000 -30.000 54.139 13.4 13.4 39.998 54.836 + 0.150E+08 -3.505 39.996 54.723 -30.000 -30.000 54.139 13.3 13.3 39.996 54.838 + 0.151E+08 -3.507 39.994 54.725 -30.000 -30.000 54.139 13.3 13.2 39.994 54.840 + 0.152E+08 -3.509 39.992 54.728 -30.000 -30.000 54.139 13.2 13.2 39.992 54.842 + 0.153E+08 -3.511 39.990 54.730 -30.000 -30.000 54.139 13.2 13.1 39.990 54.844 + 0.154E+08 -3.513 39.988 54.733 -30.000 -30.000 54.139 13.1 13.1 39.988 54.846 + 0.155E+08 -3.515 39.986 54.735 -30.000 -30.000 54.139 13.1 13.0 39.986 54.848 + 0.156E+08 -3.517 39.984 54.738 -30.000 -30.000 54.139 13.0 13.0 39.985 54.850 + 0.157E+08 -3.518 39.983 54.740 -30.000 -30.000 54.139 13.0 12.9 39.983 54.852 + 0.158E+08 -3.520 39.981 54.742 -30.000 -30.000 54.139 12.9 12.9 39.981 54.853 + 0.159E+08 -3.522 39.979 54.745 -30.000 -30.000 54.139 12.9 12.8 39.979 54.855 + 0.160E+08 -3.524 39.977 54.747 -30.000 -30.000 54.139 12.8 12.8 39.977 54.857 + 0.161E+08 -3.526 39.975 54.749 -30.000 -30.000 54.139 12.8 12.7 39.975 54.859 + 0.162E+08 -3.528 39.973 54.752 -30.000 -30.000 54.139 12.7 12.7 39.974 54.861 + 0.163E+08 -3.529 39.972 54.754 -30.000 -30.000 54.139 12.7 12.6 39.972 54.862 + 0.164E+08 -3.531 39.970 54.756 -30.000 -30.000 54.139 12.6 12.6 39.970 54.864 + 0.165E+08 -3.533 39.968 54.758 -30.000 -30.000 54.139 12.6 12.5 39.968 54.866 + 0.166E+08 -3.535 39.966 54.761 -30.000 -30.000 54.139 12.5 12.5 39.967 54.868 + 0.167E+08 -3.536 39.965 54.763 -30.000 -30.000 54.139 12.5 12.4 39.965 54.869 + 0.168E+08 -3.538 39.963 54.765 -30.000 -30.000 54.139 12.4 12.4 39.963 54.871 + 0.169E+08 -3.540 39.961 54.767 -30.000 -30.000 54.139 12.4 12.4 39.961 54.873 + 0.170E+08 -3.541 39.960 54.769 -30.000 -30.000 54.139 12.3 12.3 39.960 54.874 + 0.171E+08 -3.543 39.958 54.771 -30.000 -30.000 54.139 12.3 12.3 39.958 54.876 + 0.172E+08 -3.545 39.956 54.773 -30.000 -30.000 54.139 12.3 12.2 39.956 54.878 + 0.173E+08 -3.546 39.955 54.775 -30.000 -30.000 54.139 12.2 12.2 39.955 54.879 + 0.174E+08 -3.548 39.953 54.778 -30.000 -30.000 54.139 12.2 12.1 39.953 54.881 + 0.175E+08 -3.550 39.951 54.780 -30.000 -30.000 54.139 12.1 12.1 39.951 54.883 + 0.176E+08 -3.551 39.950 54.782 -30.000 -30.000 54.139 12.1 12.1 39.950 54.884 + 0.177E+08 -3.553 39.948 54.784 -30.000 -30.000 54.139 12.0 12.0 39.948 54.886 + 0.178E+08 -3.554 39.947 54.786 -30.000 -30.000 54.139 12.0 12.0 39.947 54.887 + 0.179E+08 -3.554 39.947 54.788 -30.000 -30.000 54.139 12.0 11.9 39.947 54.889 + 0.180E+08 -3.556 39.945 54.790 -30.000 -30.000 54.139 11.9 11.9 39.945 54.890 + 0.181E+08 -3.557 39.944 54.791 -30.000 -30.000 54.139 11.9 11.9 39.944 54.892 + 0.182E+08 -3.559 39.942 54.793 -30.000 -30.000 54.139 11.8 11.8 39.942 54.894 + 0.183E+08 -3.560 39.941 54.795 -30.000 -30.000 54.139 11.8 11.8 39.941 54.895 + 0.184E+08 -3.562 39.939 54.797 -30.000 -30.000 54.139 11.8 11.8 39.939 54.897 + 0.185E+08 -3.563 39.938 54.799 -30.000 -30.000 54.139 11.7 11.7 39.938 54.898 + 0.186E+08 -3.565 39.936 54.801 -30.000 -30.000 54.139 11.7 11.7 39.936 54.900 + 0.187E+08 -3.566 39.935 54.803 -30.000 -30.000 54.139 11.7 11.6 39.935 54.901 + 0.188E+08 -3.568 39.933 54.805 -30.000 -30.000 54.139 11.6 11.6 39.933 54.903 + 0.189E+08 -3.569 39.932 54.807 -30.000 -30.000 54.139 11.6 11.6 39.932 54.904 + 0.190E+08 -3.571 39.930 54.808 -30.000 -30.000 54.139 11.6 11.5 39.931 54.905 + 0.191E+08 -3.572 39.929 54.810 -30.000 -30.000 54.139 11.5 11.5 39.929 54.907 + 0.192E+08 -3.573 39.928 54.812 -30.000 -30.000 54.139 11.5 11.5 39.928 54.908 + 0.193E+08 -3.575 39.926 54.814 -30.000 -30.000 54.139 11.5 11.4 39.926 54.910 + 0.194E+08 -3.576 39.924 54.816 -30.000 -30.000 54.139 11.4 11.4 39.925 54.911 + 0.195E+08 -3.578 39.923 54.817 -30.000 -30.000 54.139 11.4 11.4 39.923 54.913 + 0.196E+08 -3.579 39.922 54.819 -30.000 -30.000 54.139 11.4 11.3 39.922 54.914 + 0.197E+08 -3.581 39.920 54.821 -30.000 -30.000 54.139 11.3 11.3 39.920 54.915 + 0.198E+08 -3.582 39.919 54.822 -30.000 -30.000 54.139 11.3 11.3 39.919 54.917 + 0.199E+08 -3.583 39.918 54.824 -30.000 -30.000 54.139 11.3 11.2 39.918 54.918 + 0.200E+08 -3.585 39.916 54.826 -30.000 -30.000 54.139 11.2 11.2 39.916 54.919 + 0.201E+08 -3.586 39.915 54.828 -30.000 -30.000 54.139 11.2 11.2 39.915 54.921 + 0.202E+08 -3.587 39.913 54.829 -30.000 -30.000 54.139 11.2 11.1 39.914 54.922 + 0.203E+08 -3.589 39.912 54.831 -30.000 -30.000 54.139 11.1 11.1 39.912 54.924 + 0.204E+08 -3.590 39.911 54.832 -30.000 -30.000 54.139 11.1 11.1 39.911 54.925 + 0.205E+08 -3.592 39.909 54.834 -30.000 -30.000 54.139 11.1 11.1 39.909 54.926 + 0.206E+08 -3.593 39.908 54.836 -30.000 -30.000 54.139 11.0 11.0 39.908 54.927 + 0.207E+08 -3.594 39.907 54.837 -30.000 -30.000 54.139 11.0 11.0 39.907 54.929 + 0.208E+08 -3.595 39.905 54.839 -30.000 -30.000 54.139 11.0 11.0 39.906 54.930 + 0.209E+08 -3.597 39.904 54.841 -30.000 -30.000 54.139 10.9 10.9 39.904 54.931 + 0.210E+08 -3.598 39.903 54.842 -30.000 -30.000 54.139 10.9 10.9 39.903 54.933 + 0.211E+08 -3.599 39.901 54.844 -30.000 -30.000 54.139 10.9 10.9 39.902 54.934 + 0.212E+08 -3.601 39.900 54.845 -30.000 -30.000 54.139 10.9 10.8 39.900 54.935 + 0.213E+08 -3.602 39.899 54.847 -30.000 -30.000 54.139 10.8 10.8 39.899 54.936 + 0.214E+08 -3.603 39.897 54.848 -30.000 -30.000 54.139 10.8 10.8 39.898 54.938 + 0.215E+08 -3.604 39.896 54.850 -30.000 -30.000 54.139 10.8 10.8 39.897 54.939 + 0.216E+08 -3.606 39.895 54.851 -30.000 -30.000 54.139 10.8 10.7 39.895 54.940 + 0.217E+08 -3.607 39.894 54.853 -30.000 -30.000 54.139 10.7 10.7 39.894 54.941 + 0.218E+08 -3.609 39.892 54.854 -30.000 -30.000 54.139 10.7 10.7 39.892 54.943 + 0.219E+08 -3.610 39.891 54.856 -30.000 -30.000 54.139 10.7 10.7 39.891 54.944 + 0.220E+08 -3.611 39.890 54.857 -30.000 -30.000 54.139 10.6 10.6 39.890 54.945 + 0.221E+08 -3.612 39.889 54.859 -30.000 -30.000 54.139 10.6 10.6 39.889 54.946 + 0.222E+08 -3.613 39.887 54.860 -30.000 -30.000 54.139 10.6 10.6 39.887 54.947 + 0.223E+08 -3.615 39.886 54.862 -30.000 -30.000 54.139 10.6 10.6 39.886 54.949 + 0.224E+08 -3.616 39.885 54.863 -30.000 -30.000 54.139 10.5 10.5 39.885 54.950 + 0.225E+08 -3.617 39.884 54.865 -30.000 -30.000 54.139 10.5 10.5 39.884 54.951 + 0.226E+08 -3.618 39.882 54.866 -30.000 -30.000 54.139 10.5 10.5 39.883 54.952 + 0.227E+08 -3.620 39.881 54.867 -30.000 -30.000 54.139 10.5 10.4 39.881 54.953 + 0.228E+08 -3.621 39.880 54.869 -30.000 -30.000 54.139 10.4 10.4 39.880 54.954 + 0.229E+08 -3.622 39.879 54.870 -30.000 -30.000 54.139 10.4 10.4 39.879 54.956 + 0.230E+08 -3.623 39.878 54.872 -30.000 -30.000 54.139 10.4 10.4 39.878 54.957 + 0.231E+08 -3.624 39.876 54.873 -30.000 -30.000 54.139 10.4 10.4 39.876 54.958 + 0.232E+08 -3.626 39.875 54.874 -30.000 -30.000 54.139 10.3 10.3 39.875 54.959 + 0.233E+08 -3.627 39.874 54.876 -30.000 -30.000 54.139 10.3 10.3 39.874 54.960 + 0.234E+08 -3.628 39.873 54.877 -30.000 -30.000 54.139 10.3 10.3 39.873 54.961 + 0.235E+08 -3.629 39.872 54.878 -30.000 -30.000 54.139 10.3 10.3 39.872 54.962 + 0.236E+08 -3.630 39.871 54.880 -30.000 -30.000 54.139 10.2 10.2 39.871 54.964 + 0.237E+08 -3.632 39.869 54.881 -30.000 -30.000 54.139 10.2 10.2 39.869 54.965 + 0.238E+08 -3.632 39.868 54.882 -30.000 -30.000 54.139 10.2 10.2 39.868 54.966 + 0.239E+08 -3.634 39.867 54.884 -30.000 -30.000 54.139 10.2 10.2 39.867 54.967 + 0.240E+08 -3.635 39.866 54.885 -30.000 -30.000 54.139 10.2 10.1 39.866 54.968 + 0.241E+08 -3.636 39.865 54.886 -30.000 -30.000 54.139 10.1 10.1 39.865 54.969 + 0.242E+08 -3.637 39.863 54.888 -30.000 -30.000 54.139 10.1 10.1 39.863 54.970 + 0.243E+08 -3.638 39.863 54.889 -30.000 -30.000 54.139 10.1 10.1 39.863 54.971 + 0.244E+08 -3.640 39.861 54.890 -30.000 -30.000 54.139 10.1 10.1 39.861 54.972 + 0.245E+08 -3.641 39.860 54.892 -30.000 -30.000 54.139 10.0 10.0 39.860 54.973 + 0.246E+08 -3.642 39.859 54.893 -30.000 -30.000 54.139 10.0 10.0 39.859 54.974 + 0.247E+08 -3.643 39.858 54.894 -30.000 -30.000 54.139 10.0 10.0 39.858 54.975 + 0.248E+08 -3.644 39.857 54.895 -30.000 -30.000 54.139 10.0 10.0 39.857 54.976 + 0.249E+08 -3.645 39.856 54.897 -30.000 -30.000 54.139 10.0 9.9 39.856 54.977 + 0.250E+08 -3.646 39.855 54.898 -30.000 -30.000 54.139 9.9 9.9 39.855 54.978 + 0.251E+08 -3.647 39.854 54.899 -30.000 -30.000 54.139 9.9 9.9 39.854 54.979 + 0.252E+08 -3.648 39.852 54.900 -30.000 -30.000 54.139 9.9 9.9 39.852 54.980 + 0.253E+08 -3.650 39.851 54.902 -30.000 -30.000 54.139 9.9 9.9 39.851 54.981 + 0.254E+08 -3.651 39.850 54.903 -30.000 -30.000 54.139 9.9 9.8 39.850 54.983 + 0.255E+08 -3.652 39.849 54.904 -30.000 -30.000 54.139 9.8 9.8 39.849 54.984 + 0.256E+08 -3.653 39.848 54.905 -30.000 -30.000 54.139 9.8 9.8 39.848 54.985 + 0.257E+08 -3.654 39.847 54.906 -30.000 -30.000 54.139 9.8 9.8 39.847 54.986 + 0.258E+08 -3.655 39.846 54.908 -30.000 -30.000 54.139 9.8 9.8 39.846 54.987 + 0.259E+08 -3.656 39.845 54.909 -30.000 -30.000 54.139 9.8 9.7 39.845 54.987 + 0.260E+08 -3.657 39.844 54.910 -30.000 -30.000 54.139 9.7 9.7 39.844 54.988 + 0.261E+08 -3.658 39.843 54.911 -30.000 -30.000 54.139 9.7 9.7 39.843 54.989 + 0.262E+08 -3.659 39.842 54.912 -30.000 -30.000 54.139 9.7 9.7 39.842 54.990 + 0.263E+08 -3.660 39.841 54.913 -30.000 -30.000 54.139 9.7 9.7 39.841 54.991 + 0.264E+08 -3.661 39.839 54.915 -30.000 -30.000 54.139 9.7 9.7 39.840 54.992 + 0.265E+08 -3.662 39.839 54.916 -30.000 -30.000 54.139 9.6 9.6 39.839 54.993 + 0.266E+08 -3.663 39.838 54.917 -30.000 -30.000 54.139 9.6 9.6 39.838 54.994 + 0.267E+08 -3.665 39.836 54.918 -30.000 -30.000 54.139 9.6 9.6 39.836 54.995 + 0.268E+08 -3.665 39.836 54.919 -30.000 -30.000 54.139 9.6 9.6 39.836 54.996 + 0.269E+08 -3.667 39.834 54.920 -30.000 -30.000 54.139 9.6 9.6 39.834 54.997 + 0.270E+08 -3.667 39.833 54.921 -30.000 -30.000 54.139 9.6 9.5 39.834 54.998 + 0.271E+08 -3.669 39.832 54.923 -30.000 -30.000 54.139 9.5 9.5 39.832 54.999 + 0.272E+08 -3.669 39.831 54.924 -30.000 -30.000 54.139 9.5 9.5 39.832 55.000 + 0.273E+08 -3.671 39.830 54.925 -30.000 -30.000 54.139 9.5 9.5 39.830 55.001 + 0.274E+08 -3.671 39.829 54.926 -30.000 -30.000 54.139 9.5 9.5 39.829 55.002 + 0.275E+08 -3.673 39.828 54.927 -30.000 -30.000 54.139 9.5 9.5 39.828 55.003 + 0.276E+08 -3.674 39.827 54.928 -30.000 -30.000 54.139 9.4 9.4 39.827 55.004 + 0.277E+08 -3.674 39.827 54.929 -30.000 -30.000 54.139 9.4 9.4 39.827 55.005 + 0.278E+08 -3.676 39.825 54.930 -30.000 -30.000 54.139 9.4 9.4 39.825 55.005 + 0.279E+08 -3.677 39.824 54.931 -30.000 -30.000 54.139 9.4 9.4 39.824 55.006 + 0.280E+08 -3.677 39.823 54.932 -30.000 -30.000 54.139 9.4 9.4 39.824 55.007 + 0.281E+08 -3.679 39.822 54.933 -30.000 -30.000 54.139 9.4 9.3 39.822 55.008 + 0.282E+08 -3.679 39.821 54.934 -30.000 -30.000 54.139 9.3 9.3 39.821 55.009 + 0.283E+08 -3.681 39.820 54.935 -30.000 -30.000 54.139 9.3 9.3 39.820 55.010 + 0.284E+08 -3.681 39.820 54.937 -30.000 -30.000 54.139 9.3 9.3 39.820 55.011 + 0.285E+08 -3.683 39.818 54.938 -30.000 -30.000 54.139 9.3 9.3 39.818 55.012 + 0.286E+08 -3.684 39.817 54.939 -30.000 -30.000 54.139 9.3 9.3 39.817 55.013 + 0.287E+08 -3.684 39.817 54.940 -30.000 -30.000 54.139 9.3 9.3 39.817 55.013 + 0.288E+08 -3.685 39.815 54.941 -30.000 -30.000 54.139 9.2 9.2 39.815 55.014 + 0.289E+08 -3.686 39.815 54.942 -30.000 -30.000 54.139 9.2 9.2 39.815 55.015 + 0.290E+08 -3.688 39.813 54.943 -30.000 -30.000 54.139 9.2 9.2 39.813 55.016 + 0.291E+08 -3.688 39.813 54.944 -30.000 -30.000 54.139 9.2 9.2 39.813 55.017 + 0.292E+08 -3.689 39.812 54.945 -30.000 -30.000 54.139 9.2 9.2 39.812 55.018 + 0.293E+08 -3.690 39.811 54.946 -30.000 -30.000 54.139 9.2 9.2 39.811 55.019 + 0.294E+08 -3.691 39.810 54.947 -30.000 -30.000 54.139 9.1 9.1 39.810 55.019 + 0.295E+08 -3.692 39.809 54.948 -30.000 -30.000 54.139 9.1 9.1 39.809 55.020 + 0.296E+08 -3.693 39.808 54.949 -30.000 -30.000 54.139 9.1 9.1 39.808 55.021 + 0.297E+08 -3.694 39.807 54.950 -30.000 -30.000 54.139 9.1 9.1 39.807 55.022 + 0.298E+08 -3.695 39.806 54.951 -30.000 -30.000 54.139 9.1 9.1 39.806 55.023 + 0.299E+08 -3.696 39.805 54.952 -30.000 -30.000 54.139 9.1 9.1 39.805 55.024 + 0.300E+08 -3.697 39.804 54.953 -30.000 -30.000 54.139 9.1 9.0 39.804 55.024 + 0.301E+08 -3.697 39.803 54.954 -30.000 -30.000 54.139 9.0 9.0 39.804 55.025 + 0.302E+08 -3.699 39.802 54.955 -30.000 -30.000 54.139 9.0 9.0 39.802 55.026 + 0.303E+08 -3.700 39.801 54.956 -30.000 -30.000 54.139 9.0 9.0 39.801 55.027 + 0.304E+08 -3.685 39.816 54.957 -30.000 -30.000 54.139 9.0 9.0 39.816 55.028 + 0.305E+08 -3.678 39.822 54.958 -30.000 -30.000 54.139 9.0 9.0 39.823 55.029 + 0.306E+08 -3.679 39.822 54.959 -30.000 -30.000 54.139 9.0 9.0 39.822 55.029 + 0.307E+08 -3.679 39.821 54.960 -30.000 -30.000 54.139 9.0 8.9 39.821 55.030 + 0.308E+08 -3.680 39.821 54.961 -30.000 -30.000 54.139 8.9 8.9 39.821 55.031 + 0.309E+08 -3.681 39.820 54.962 -30.000 -30.000 54.139 8.9 8.9 39.820 55.032 + 0.310E+08 -3.681 39.819 54.963 -30.000 -30.000 54.139 8.9 8.9 39.819 55.033 + 0.311E+08 -3.682 39.819 54.964 -30.000 -30.000 54.139 8.9 8.9 39.819 55.034 + 0.312E+08 -3.683 39.818 54.964 -30.000 -30.000 54.139 8.9 8.9 39.818 55.034 + 0.313E+08 -3.683 39.818 54.965 -30.000 -30.000 54.139 8.9 8.9 39.818 55.035 + 0.314E+08 -3.684 39.817 54.966 -30.000 -30.000 54.139 8.8 8.8 39.817 55.036 + 0.315E+08 -3.685 39.816 54.967 -30.000 -30.000 54.139 8.8 8.8 39.816 55.037 + 0.316E+08 -3.685 39.816 54.968 -30.000 -30.000 54.139 8.8 8.8 39.816 55.038 + 0.317E+08 -3.686 39.815 54.969 -30.000 -30.000 54.139 8.8 8.8 39.815 55.039 + 0.318E+08 -3.686 39.814 54.970 -30.000 -30.000 54.139 8.8 8.8 39.814 55.039 + 0.319E+08 -3.687 39.814 54.971 -30.000 -30.000 54.139 8.8 8.8 39.814 55.040 + 0.320E+08 -3.688 39.813 54.972 -30.000 -30.000 54.139 8.8 8.8 39.813 55.041 + 0.321E+08 -3.688 39.813 54.973 -30.000 -30.000 54.139 8.7 8.7 39.813 55.042 + 0.322E+08 -3.689 39.812 54.974 -30.000 -30.000 54.139 8.7 8.7 39.812 55.043 + 0.323E+08 -3.690 39.811 54.975 -30.000 -30.000 54.139 8.7 8.7 39.811 55.043 + 0.324E+08 -3.690 39.811 54.976 -30.000 -30.000 54.139 8.7 8.7 39.811 55.044 + 0.325E+08 -3.691 39.810 54.977 -30.000 -30.000 54.139 8.7 8.7 39.810 55.045 + 0.326E+08 -3.691 39.810 54.978 -30.000 -30.000 54.139 8.7 8.7 39.810 55.046 + 0.327E+08 -3.692 39.809 54.979 -30.000 -30.000 54.139 8.7 8.7 39.809 55.047 + 0.328E+08 -3.692 39.809 54.980 -30.000 -30.000 54.139 8.7 8.6 39.809 55.047 + 0.329E+08 -3.693 39.808 54.981 -30.000 -30.000 54.139 8.6 8.6 39.808 55.048 + 0.330E+08 -3.693 39.808 54.982 -30.000 -30.000 54.139 8.6 8.6 39.808 55.049 + 0.331E+08 -3.694 39.807 54.982 -30.000 -30.000 54.139 8.6 8.6 39.807 55.050 + 0.332E+08 -3.694 39.807 54.983 -30.000 -30.000 54.139 8.6 8.6 39.807 55.051 + 0.333E+08 -3.695 39.806 54.984 -30.000 -30.000 54.139 8.6 8.6 39.806 55.051 + 0.334E+08 -3.696 39.805 54.985 -30.000 -30.000 54.139 8.6 8.6 39.805 55.052 + 0.335E+08 -3.696 39.805 54.986 -30.000 -30.000 54.139 8.6 8.5 39.805 55.053 + 0.336E+08 -3.697 39.804 54.987 -30.000 -30.000 54.139 8.5 8.5 39.804 55.054 + 0.337E+08 -3.697 39.803 54.988 -30.000 -30.000 54.139 8.5 8.5 39.804 55.054 + 0.338E+08 -3.698 39.803 54.989 -30.000 -30.000 54.139 8.5 8.5 39.803 55.055 + 0.339E+08 -3.699 39.802 54.990 -30.000 -30.000 54.139 8.5 8.5 39.802 55.056 + 0.340E+08 -3.699 39.802 54.991 -30.000 -30.000 54.139 8.5 8.5 39.802 55.057 + 0.341E+08 -3.700 39.801 54.991 -30.000 -30.000 54.139 8.5 8.5 39.801 55.058 + 0.342E+08 -3.701 39.800 54.992 -30.000 -30.000 54.139 8.5 8.5 39.800 55.058 + 0.343E+08 -3.701 39.800 54.993 -30.000 -30.000 54.139 8.5 8.4 39.800 55.059 + 0.344E+08 -3.702 39.799 54.994 -30.000 -30.000 54.139 8.4 8.4 39.799 55.060 + 0.345E+08 -3.702 39.799 54.995 -30.000 -30.000 54.139 8.4 8.4 39.799 55.061 + 0.346E+08 -3.703 39.798 54.996 -30.000 -30.000 54.139 8.4 8.4 39.798 55.061 + 0.347E+08 -3.703 39.797 54.997 -30.000 -30.000 54.139 8.4 8.4 39.798 55.062 + 0.348E+08 -3.703 39.798 54.998 -30.000 -30.000 54.139 8.4 8.4 39.798 55.063 + 0.349E+08 -3.705 39.796 54.998 -30.000 -30.000 54.139 8.4 8.4 39.796 55.064 + 0.350E+08 -3.705 39.796 54.999 -30.000 -30.000 54.139 8.4 8.4 39.796 55.064 + 0.351E+08 -3.706 39.795 55.000 -30.000 -30.000 54.139 8.4 8.3 39.795 55.065 + 0.352E+08 -3.706 39.795 55.001 -30.000 -30.000 54.139 8.3 8.3 39.795 55.066 + 0.353E+08 -3.707 39.794 55.002 -30.000 -30.000 54.139 8.3 8.3 39.794 55.066 + 0.354E+08 -3.707 39.794 55.003 -30.000 -30.000 54.139 8.3 8.3 39.794 55.067 + 0.355E+08 -3.708 39.793 55.004 -30.000 -30.000 54.139 8.3 8.3 39.793 55.068 + 0.356E+08 -3.708 39.793 55.004 -30.000 -30.000 54.139 8.3 8.3 39.793 55.069 + 0.357E+08 -3.709 39.792 55.005 -30.000 -30.000 54.139 8.3 8.3 39.792 55.069 + 0.358E+08 -3.709 39.792 55.006 -30.000 -30.000 54.139 8.3 8.3 39.792 55.070 + 0.359E+08 -3.710 39.791 55.007 -30.000 -30.000 54.139 8.3 8.3 39.791 55.071 + 0.360E+08 -3.710 39.791 55.008 -30.000 -30.000 54.139 8.2 8.2 39.791 55.072 + 0.361E+08 -3.711 39.790 55.009 -30.000 -30.000 54.139 8.2 8.2 39.790 55.072 + 0.362E+08 -3.712 39.789 55.009 -30.000 -30.000 54.139 8.2 8.2 39.789 55.073 + 0.363E+08 -3.712 39.789 55.010 -30.000 -30.000 54.139 8.2 8.2 39.789 55.074 + 0.364E+08 -3.713 39.788 55.011 -30.000 -30.000 54.139 8.2 8.2 39.788 55.074 + 0.365E+08 -3.713 39.788 55.012 -30.000 -30.000 54.139 8.2 8.2 39.788 55.075 + 0.366E+08 -3.713 39.788 55.013 -30.000 -30.000 54.139 8.2 8.2 39.788 55.076 + 0.367E+08 -3.714 39.787 55.013 -30.000 -30.000 54.139 8.2 8.2 39.787 55.076 + 0.368E+08 -3.715 39.786 55.014 -30.000 -30.000 54.139 8.2 8.1 39.786 55.077 + 0.369E+08 -3.715 39.786 55.015 -30.000 -30.000 54.139 8.1 8.1 39.786 55.078 + 0.370E+08 -3.716 39.785 55.016 -30.000 -30.000 54.139 8.1 8.1 39.785 55.079 + 0.371E+08 -3.716 39.785 55.017 -30.000 -30.000 54.139 8.1 8.1 39.785 55.079 + 0.372E+08 -3.717 39.784 55.017 -30.000 -30.000 54.139 8.1 8.1 39.784 55.080 + 0.373E+08 -3.717 39.784 55.018 -30.000 -30.000 54.139 8.1 8.1 39.784 55.081 + 0.374E+08 -3.718 39.783 55.019 -30.000 -30.000 54.139 8.1 8.1 39.783 55.081 + 0.375E+08 -3.718 39.783 55.020 -30.000 -30.000 54.139 8.1 8.1 39.783 55.082 + 0.376E+08 -3.719 39.782 55.021 -30.000 -30.000 54.139 8.1 8.1 39.782 55.083 + 0.377E+08 -3.719 39.781 55.021 -30.000 -30.000 54.139 8.1 8.0 39.781 55.083 + 0.378E+08 -3.720 39.781 55.022 -30.000 -30.000 54.139 8.0 8.0 39.781 55.084 + 0.379E+08 -3.720 39.781 55.023 -30.000 -30.000 54.139 8.0 8.0 39.781 55.085 + 0.380E+08 -3.721 39.780 55.024 -30.000 -30.000 54.139 8.0 8.0 39.780 55.085 + 0.381E+08 -3.722 39.779 55.025 -30.000 -30.000 54.139 8.0 8.0 39.779 55.086 + 0.382E+08 -3.922 39.579 55.025 -30.000 -30.000 54.139 8.0 0.0 39.579 55.087 + 0.383E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.745 55.087 + 0.384E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.746 55.087 + 0.385E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.744 55.087 + 0.386E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.736 55.087 + 0.387E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.738 55.087 + 0.388E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.741 55.087 + 0.389E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.739 55.087 + 0.390E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.740 55.087 + 0.391E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.739 55.087 + 0.392E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.393E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.727 55.087 + 0.394E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.395E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.728 55.087 + 0.396E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.397E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.723 55.087 + 0.398E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.724 55.087 + 0.399E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.724 55.087 + 0.400E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.719 55.087 + 0.401E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.719 55.087 + 0.402E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.403E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.717 55.087 + 0.404E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.405E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.717 55.087 + 0.406E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.407E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.705 55.087 + 0.408E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.703 55.087 + 0.409E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.712 55.087 + 0.410E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.710 55.087 + 0.411E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.702 55.087 + 0.412E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.702 55.087 + 0.413E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.697 55.087 + 0.414E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.697 55.087 + 0.415E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.701 55.087 + 0.416E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.696 55.087 + 0.417E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.696 55.087 + 0.418E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.693 55.087 + 0.419E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.693 55.087 + 0.420E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.692 55.087 + 0.421E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.687 55.087 + 0.422E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.684 55.087 + 0.423E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.685 55.087 + 0.424E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.683 55.087 + 0.425E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.685 55.087 + 0.426E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.682 55.087 + 0.427E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.682 55.087 + 0.428E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.429E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.430E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.431E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.680 55.087 + 0.432E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.675 55.087 + 0.433E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.675 55.087 + 0.434E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.669 55.087 + 0.435E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.673 55.087 + 0.436E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.669 55.087 + 0.437E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.661 55.087 + 0.438E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.666 55.087 + 0.439E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.670 55.087 + 0.440E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.441E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.661 55.087 + 0.442E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.663 55.087 + 0.443E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.444E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.667 55.087 + 0.445E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.659 55.087 + 0.446E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.657 55.087 + 0.447E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.651 55.087 + 0.448E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.449E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.666 55.087 + 0.450E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.663 55.087 + 0.451E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.656 55.087 + 0.452E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.646 55.087 + 0.453E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.454E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.647 55.087 + 0.455E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.658 55.087 + 0.456E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.658 55.087 + 0.457E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.458E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.459E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.648 55.087 + 0.460E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.652 55.087 + 0.461E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.652 55.087 + 0.462E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.646 55.087 + 0.463E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.653 55.087 + 0.464E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.465E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.649 55.087 + 0.466E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.649 55.087 + 0.467E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.468E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.469E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.639 55.087 + 0.470E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.471E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.472E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.644 55.087 + 0.473E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.474E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.631 55.087 + 0.475E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.625 55.087 + 0.476E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.644 55.087 + 0.477E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.639 55.087 + 0.478E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.626 55.087 + 0.479E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.633 55.087 + 0.480E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.635 55.087 + 0.481E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.627 55.087 + 0.482E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.628 55.087 + 0.483E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.614 55.087 + 0.484E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.618 55.087 + 0.485E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.486E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.487E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.488E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.605 55.087 + 0.489E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.596 55.087 + 0.490E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.597 55.087 + 0.491E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.598 55.087 + 0.492E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.578 55.087 + 0.493E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.575 55.087 + 0.494E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.576 55.087 + 0.495E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.587 55.087 + 0.496E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.574 55.087 + 0.497E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.573 55.087 + 0.498E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.572 55.087 + 0.499E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.571 55.087 + 0.500E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.562 55.087 + 0.501E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.561 55.087 + 0.502E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.555 55.087 + 0.503E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.554 55.087 + 0.504E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.562 55.087 + 0.505E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.549 55.087 + 0.506E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.547 55.087 + 0.507E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.547 55.087 + 0.508E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.546 55.087 + 0.509E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.534 55.087 + 0.510E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.528 55.087 + 0.511E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.543 55.087 + 0.512E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.526 55.087 + 0.513E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.528 55.087 + 0.514E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.527 55.087 + 0.515E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.526 55.087 + 0.516E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.522 55.087 + 0.517E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.509 55.087 + 0.518E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.507 55.087 + 0.519E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.506 55.087 + 0.520E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.501 55.087 + 0.521E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.500 55.087 + 0.522E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.494 55.087 + 0.523E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.484 55.087 + 0.524E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.492 55.087 + 0.525E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.496 55.087 + 0.526E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.481 55.087 + 0.527E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.466 55.087 + 0.528E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.480 55.087 + 0.529E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.478 55.087 + 0.530E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.478 55.087 + 0.531E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.459 55.087 + 0.532E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.462 55.087 + 0.533E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.475 55.087 + 0.534E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.535E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.456 55.087 + 0.536E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.451 55.087 + 0.537E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.538E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.539E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.452 55.087 + 0.540E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.444 55.087 + 0.541E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.438 55.087 + 0.542E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.434 55.087 + 0.543E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.433 55.087 + 0.544E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.432 55.087 + 0.545E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.431 55.087 + 0.546E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.430 55.087 + 0.547E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.429 55.087 + 0.548E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.409 55.087 + 0.549E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.408 55.087 + 0.550E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.417 55.087 + 0.551E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.406 55.087 + 0.552E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.405 55.087 + 0.553E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.405 55.087 + 0.554E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.404 55.087 + 0.555E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.403 55.087 + 0.556E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.398 55.087 + 0.557E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.401 55.087 + 0.558E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.396 55.087 + 0.559E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.400 55.087 + 0.560E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.388 55.087 + 0.561E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.381 55.087 + 0.562E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.376 55.087 + 0.563E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.375 55.087 + 0.564E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.371 55.087 + 0.565E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.357 55.087 + 0.566E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.356 55.087 + 0.567E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.352 55.087 + 0.568E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.354 55.087 + 0.569E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.354 55.087 + 0.570E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.345 55.087 + 0.571E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.344 55.087 + 0.572E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.343 55.087 + 0.573E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.350 55.087 + 0.574E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.326 55.087 + 0.575E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.345 55.087 + 0.576E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.347 55.087 + 0.577E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.347 55.087 + 0.578E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.328 55.087 + 0.579E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.327 55.087 + 0.580E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.321 55.087 + 0.581E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.326 55.087 + 0.582E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.328 55.087 + 0.583E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.324 55.087 + 0.584E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.315 55.087 + 0.585E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.322 55.087 + 0.586E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.304 55.087 + 0.587E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.303 55.087 + 0.588E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.288 55.087 + 0.589E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.280 55.087 + 0.590E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.276 55.087 + 0.591E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.275 55.087 + 0.592E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.274 55.087 + 0.593E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.273 55.087 + 0.594E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.595E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.273 55.087 + 0.596E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.597E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.598E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.250 55.087 + 0.599E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.267 55.087 + 0.600E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.248 55.087 + 0.601E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.248 55.087 + 0.602E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.247 55.087 + 0.603E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.246 55.087 + 0.604E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.245 55.087 + 0.605E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.244 55.087 + 0.606E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.236 55.087 + 0.607E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.215 55.087 + 0.608E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.198 55.087 + 0.609E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.198 55.087 + 0.610E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.213 55.087 + 0.611E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.212 55.087 + 0.612E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.195 55.087 + 0.613E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.194 55.087 + 0.614E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.188 55.087 + 0.615E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.187 55.087 + 0.616E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.183 55.087 + 0.617E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.182 55.087 + 0.618E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.173 55.087 + 0.619E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.172 55.087 + 0.620E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.166 55.087 + 0.621E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.168 55.087 + 0.622E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.169 55.087 + 0.623E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.167 55.087 + 0.624E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.125 55.087 + 0.625E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.162 55.087 + 0.626E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.151 55.087 + 0.627E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.126 55.087 + 0.628E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.118 55.087 + 0.629E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.080 55.087 + 0.630E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.079 55.087 + 0.631E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.078 55.087 + 0.632E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.078 55.087 + 0.633E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.077 55.087 + 0.634E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.076 55.087 + 0.635E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.101 55.087 + 0.636E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.075 55.087 + 0.637E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.074 55.087 + 0.638E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.074 55.087 + 0.639E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.094 55.087 + 0.640E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.040 55.087 + 0.641E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.039 55.087 + 0.642E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.064 55.087 + 0.643E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.037 55.087 + 0.644E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.062 55.087 + 0.645E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.024 55.087 + 0.646E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.023 55.087 + 0.647E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.022 55.087 + 0.648E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.034 55.087 + 0.649E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.988 55.087 + 0.650E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.036 55.087 + 0.651E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.987 55.087 + 0.652E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.986 55.087 + 0.653E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.003 55.087 + 0.654E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.985 55.087 + 0.655E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.984 55.087 + 0.656E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.983 55.087 + 0.657E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.982 55.087 + 0.658E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.982 55.087 + 0.659E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.981 55.087 + 0.660E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.997 55.087 + 0.661E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.951 55.087 + 0.662E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.895 55.087 + 0.663E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.894 55.087 + 0.664E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.962 55.087 + 0.665E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.849 55.087 + 0.666E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.930 55.087 + 0.667E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.848 55.087 + 0.668E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.890 55.087 + 0.669E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.886 55.087 + 0.670E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.889 55.087 + 0.671E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.888 55.087 + 0.672E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.888 55.087 + 0.673E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.844 55.087 + 0.674E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.796 55.087 + 0.675E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.842 55.087 + 0.676E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.842 55.087 + 0.677E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.791 55.087 + 0.678E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.793 55.087 + 0.679E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.789 55.087 + 0.680E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.839 55.087 + 0.681E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.791 55.087 + 0.682E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.726 55.087 + 0.683E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.725 55.087 + 0.684E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.786 55.087 + 0.685E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.724 55.087 + 0.686E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.723 55.087 + 0.687E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.722 55.087 + 0.688E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.722 55.087 + 0.689E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.640 55.087 + 0.690E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.639 55.087 + 0.691E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.639 55.087 + 0.692E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.642 55.087 + 0.693E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.637 55.087 + 0.694E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.534 55.087 + 0.695E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.614 55.087 + 0.696E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.613 55.087 + 0.697E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.638 55.087 + 0.698E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.634 55.087 + 0.699E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.633 55.087 + 0.700E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.633 55.087 + 0.701E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.533 55.087 + 0.702E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.631 55.087 + 0.703E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.627 55.087 + 0.704E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.527 55.087 + 0.705E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.362 55.087 + 0.706E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.523 55.087 + 0.707E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.427 55.087 + 0.708E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.360 55.087 + 0.709E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.425 55.087 + 0.710E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.263 55.087 + 0.711E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.150 55.087 + 0.712E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.059 55.087 + 0.713E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.412 55.087 + 0.714E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.411 55.087 + 0.715E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.410 55.087 + 0.716E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.408 55.087 + 0.717E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.407 55.087 + 0.718E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.406 55.087 + 0.719E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.405 55.087 + 0.720E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.404 55.087 + 0.721E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.402 55.087 + 0.722E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.401 55.087 + 0.723E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.401 55.087 + 0.724E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.399 55.087 + 0.725E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.398 55.087 + 0.726E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.397 55.087 + 0.727E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.395 55.087 + 0.728E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.394 55.087 + 0.729E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.393 55.087 + 0.730E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.392 55.087 + 0.731E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.391 55.087 + 0.732E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.390 55.087 + 0.733E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.389 55.087 + 0.734E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.388 55.087 + 0.735E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.387 55.087 + 0.736E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.386 55.087 + 0.737E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.384 55.087 + 0.738E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.383 55.087 + 0.739E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.382 55.087 + 0.740E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.381 55.087 + 0.741E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.379 55.087 + 0.742E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.379 55.087 + 0.743E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.377 55.087 + 0.744E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.377 55.087 + 0.745E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.375 55.087 + 0.746E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.375 55.087 + 0.747E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.373 55.087 + 0.748E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.372 55.087 + 0.749E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.370 55.087 + 0.750E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.370 55.087 + 0.751E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.369 55.087 + 0.752E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.368 55.087 + 0.753E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.367 55.087 + 0.754E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.365 55.087 + 0.755E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.364 55.087 + 0.756E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.363 55.087 + 0.757E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.362 55.087 + 0.758E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.361 55.087 + 0.759E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.360 55.087 + 0.760E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.359 55.087 + 0.761E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.358 55.087 + 0.762E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.357 55.087 + 0.763E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.356 55.087 + 0.764E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.354 55.087 + 0.765E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.354 55.087 + 0.766E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.352 55.087 + 0.767E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.351 55.087 + 0.768E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.350 55.087 + 0.769E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.349 55.087 + 0.770E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.348 55.087 + 0.771E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.347 55.087 + 0.772E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.346 55.087 + 0.773E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.345 55.087 + 0.774E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.344 55.087 + 0.775E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.343 55.087 + 0.776E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.342 55.087 + 0.777E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.341 55.087 + 0.778E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.340 55.087 + 0.779E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.339 55.087 + 0.780E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.337 55.087 + 0.781E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.337 55.087 + 0.782E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.336 55.087 + 0.783E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.335 55.087 + 0.784E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.334 55.087 + 0.785E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.333 55.087 + 0.786E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.331 55.087 + 0.787E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.330 55.087 + 0.788E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.329 55.087 + 0.789E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.329 55.087 + 0.790E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.327 55.087 + 0.791E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.326 55.087 + 0.792E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.325 55.087 + 0.793E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.324 55.087 + 0.794E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.323 55.087 + 0.795E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.322 55.087 + 0.796E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.321 55.087 + 0.797E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.320 55.087 + 0.798E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.319 55.087 + 0.799E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.318 55.087 + 0.800E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.317 55.087 + 0.801E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.316 55.087 + 0.802E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.315 55.087 + 0.803E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.314 55.087 + 0.804E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.313 55.087 + 0.805E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.312 55.087 + 0.806E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.311 55.087 + 0.807E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.310 55.087 + 0.808E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.309 55.087 + 0.809E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.308 55.087 + 0.810E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.307 55.087 + 0.811E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.306 55.087 + 0.812E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.306 55.087 + 0.813E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.304 55.087 + 0.814E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.303 55.087 + 0.815E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.302 55.087 + 0.816E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.301 55.087 + 0.817E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.300 55.087 + 0.818E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.300 55.087 + 0.819E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.299 55.087 + 0.820E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.297 55.087 + 0.821E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.296 55.087 + 0.822E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.296 55.087 + 0.823E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.294 55.087 + 0.824E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.294 55.087 + 0.825E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.292 55.087 + 0.826E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.292 55.087 + 0.827E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.291 55.087 + 0.828E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.290 55.087 + 0.829E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.289 55.087 + 0.830E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.288 55.087 + 0.831E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.287 55.087 + 0.832E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.286 55.087 + 0.833E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.285 55.087 + 0.834E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.284 55.087 + 0.835E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.283 55.087 + 0.836E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.282 55.087 + 0.837E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.282 55.087 + 0.838E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.280 55.087 + 0.839E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.279 55.087 + 0.840E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.278 55.087 + 0.841E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.277 55.087 + 0.842E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.276 55.087 + 0.843E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.275 55.087 + 0.844E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.274 55.087 + 0.845E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.274 55.087 + 0.846E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.273 55.087 + 0.847E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.272 55.087 + 0.848E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.270 55.087 + 0.849E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.270 55.087 + 0.850E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.269 55.087 + 0.851E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.268 55.087 + 0.852E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.267 55.087 + 0.853E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.266 55.087 + 0.854E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.266 55.087 + 0.855E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.264 55.087 + 0.856E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.264 55.087 + 0.857E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.263 55.087 + 0.858E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.262 55.087 + 0.859E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.260 55.087 + 0.860E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.260 55.087 + 0.861E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.259 55.087 + 0.862E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.258 55.087 + 0.863E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.257 55.087 + 0.864E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.256 55.087 + 0.865E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.255 55.087 + 0.866E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.254 55.087 + 0.867E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.253 55.087 + 0.868E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.253 55.087 + 0.869E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.251 55.087 + 0.870E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.251 55.087 + 0.871E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.250 55.087 + 0.872E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.249 55.087 + 0.873E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.248 55.087 + 0.874E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.247 55.087 + 0.875E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.246 55.087 + 0.876E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.245 55.087 + 0.877E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.244 55.087 + 0.878E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.244 55.087 + 0.879E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.243 55.087 + 0.880E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.242 55.087 + 0.881E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.241 55.087 + 0.882E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.240 55.087 + 0.883E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.239 55.087 + 0.884E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.238 55.087 + 0.885E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.237 55.087 + 0.886E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.237 55.087 + 0.887E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.236 55.087 + 0.888E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.235 55.087 + 0.889E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.234 55.087 + 0.890E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.233 55.087 + 0.891E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.232 55.087 + 0.892E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.232 55.087 + 0.893E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.231 55.087 + 0.894E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.230 55.087 + 0.895E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.229 55.087 + 0.896E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.228 55.087 + 0.897E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.227 55.087 + 0.898E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.226 55.087 + 0.899E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.225 55.087 + 0.900E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.225 55.087 + 0.901E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.224 55.087 + 0.902E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.223 55.087 + 0.903E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.222 55.087 + 0.904E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.221 55.087 + 0.905E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.221 55.087 + 0.906E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.220 55.087 + 0.907E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.219 55.087 + 0.908E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.218 55.087 + 0.909E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.218 55.087 + 0.910E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.217 55.087 + 0.911E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.216 55.087 + 0.912E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.216 55.087 + 0.913E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.215 55.087 + 0.914E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.215 55.087 + 0.915E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.214 55.087 + 0.916E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.213 55.087 + 0.917E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.212 55.087 + 0.918E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.212 55.087 + 0.919E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.211 55.087 + 0.920E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.210 55.087 + 0.921E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.210 55.087 + 0.922E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.209 55.087 + 0.923E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.208 55.087 + 0.924E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.208 55.087 + 0.925E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.207 55.087 + 0.926E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.206 55.087 + 0.927E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.206 55.087 + 0.928E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.205 55.087 + 0.929E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.204 55.087 + 0.930E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.204 55.087 + 0.931E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.203 55.087 + 0.932E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.202 55.087 + 0.933E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.202 55.087 + 0.934E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.201 55.087 + 0.935E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.201 55.087 + 0.936E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.200 55.087 + 0.937E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.199 55.087 + 0.938E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.198 55.087 + 0.939E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.198 55.087 + 0.940E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.197 55.087 + 0.941E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.197 55.087 + 0.942E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.196 55.087 + 0.943E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.195 55.087 + 0.944E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.195 55.087 + 0.945E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.194 55.087 + 0.946E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.193 55.087 + 0.947E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.193 55.087 + 0.948E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.192 55.087 + 0.949E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.191 55.087 + 0.950E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.191 55.087 + 0.951E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.190 55.087 + 0.952E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.189 55.087 + 0.953E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.189 55.087 + 0.954E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.188 55.087 + 0.955E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.187 55.087 + 0.956E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.187 55.087 + 0.957E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.186 55.087 + 0.958E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.185 55.087 + 0.959E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.185 55.087 + 0.960E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.184 55.087 + 0.961E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.183 55.087 + 0.962E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.183 55.087 + 0.963E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.182 55.087 + 0.964E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.181 55.087 + 0.965E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.181 55.087 + 0.966E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.180 55.087 + 0.967E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.179 55.087 + 0.968E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.179 55.087 + 0.969E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.178 55.087 + 0.970E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.177 55.087 + 0.971E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.177 55.087 + 0.972E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.176 55.087 + 0.973E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.175 55.087 + 0.974E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.175 55.087 + 0.975E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.174 55.087 + 0.976E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.173 55.087 + 0.977E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.173 55.087 + 0.978E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.172 55.087 + 0.979E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.172 55.087 + 0.980E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.171 55.087 + 0.981E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.170 55.087 + 0.982E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.169 55.087 + 0.983E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.169 55.087 + 0.984E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.168 55.087 + 0.985E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.167 55.087 + 0.986E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.167 55.087 + 0.987E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.166 55.087 + 0.988E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.166 55.087 + 0.989E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.165 55.087 + 0.990E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.164 55.087 + 0.991E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.164 55.087 + 0.992E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.163 55.087 + 0.993E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.162 55.087 + 0.994E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.162 55.087 + 0.995E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.161 55.087 + 0.996E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.160 55.087 + 0.997E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.159 55.087 + 0.998E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.159 55.087 + 0.999E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.158 55.087 diff --git a/src/particles/supernova.h b/src/particles/supernova.h index e55b96bee..5490c44d0 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -15,19 +15,20 @@ namespace supernova { const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; - // supernova rate: 1SN / 100 solar masses per 40^4 kyr - static const Real SNR=2.5e-7; - static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; - static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN - static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) - static const Real MU = 0.6; - static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) - static const Real SN_ERA = 4.0e4; // assume SN occur during first 40 Myr after cluster formation. + // supernova rate: 1SN / 100 solar masses per 36 Myr + static const Real DEFAULT_SNR = 2.8e-7; + static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; + static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN + static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + static const Real MU = 0.6; + static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) + static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) + static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) extern curandStateMRG32k3a_t* randStates; extern part_int_t n_states; - extern Real t_buff, dt_buff; + extern Real *dev_snr, snr_dt, time_sn_end, time_sn_start; void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); From 8e96c35cb4f29ae03f7be56cfa6a3525c628aeac Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 12 Jan 2023 10:50:55 -0500 Subject: [PATCH 165/694] cat_dset_3D.py now inherits output dataset types from input data resolve #221 --- python_scripts/cat_dset_3D.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 1a823e268..e37525b5b 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -41,20 +41,20 @@ for unit in units: fileout.attrs[unit] = [head[unit][0]] - d = fileout.create_dataset("density", (nx, ny, nz), chunks=True) - mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True) - my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True) - mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True) - E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True) + d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) + mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) + my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) + mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) + E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) try: - GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True) + GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) except KeyError: print('No Dual energy data present'); try: [nx_mag, ny_mag, nz_mag] = head['magnetic_field_dims'] - bx = fileout.create_dataset("magnetic_x", (nx_mag, ny_mag, nz_mag), chunks=True) - by = fileout.create_dataset("magnetic_y", (nx_mag, ny_mag, nz_mag), chunks=True) - bz = fileout.create_dataset("magnetic_z", (nx_mag, ny_mag, nz_mag), chunks=True) + bx = fileout.create_dataset("magnetic_x", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_x'].dtype) + by = fileout.create_dataset("magnetic_y", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_y'].dtype) + bz = fileout.create_dataset("magnetic_z", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_z'].dtype) except KeyError: print('No magnetic field data present'); From fc9c48b53fc5ba5d75355a1eec7af71772c620ed Mon Sep 17 00:00:00 2001 From: bcaddy <41171425+bcaddy@users.noreply.github.com> Date: Fri, 11 Nov 2022 14:26:51 -0500 Subject: [PATCH 166/694] Add source code formatting tools Adds the following: - `.clang-format` file with the formatting we decided on based off of Google format - `.git-blame-ignore-revs` file to help us ignore formatting changes - whitespace removal commit to `.git-blame-ignore-revs` - `tools/clang-format_runner.sh` script to run `clang-format` on all the C++ source files in Cholla - `cpp-lint.yml` GitHub Actions workflow automatically check formatting on each push and PR --- .clang-format | 250 +++++++++++++++++++++++++++++++++ .git-blame-rev-ignore-revs | 9 ++ .github/workflows/cpp-lint.yml | 28 ++++ tools/clang-format_runner.sh | 22 +++ 4 files changed, 309 insertions(+) create mode 100644 .clang-format create mode 100644 .git-blame-rev-ignore-revs create mode 100644 .github/workflows/cpp-lint.yml create mode 100755 tools/clang-format_runner.sh diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..3aaa1d316 --- /dev/null +++ b/.clang-format @@ -0,0 +1,250 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Linux +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Leave +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: true +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +PackConstructorInitializers: NextLine +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 3 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RequiresClausePosition: OwnLine +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: c++17 +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.git-blame-rev-ignore-revs b/.git-blame-rev-ignore-revs new file mode 100644 index 000000000..160ec665e --- /dev/null +++ b/.git-blame-rev-ignore-revs @@ -0,0 +1,9 @@ +# To tell git blame to ignore these commits run this command in the repo. +# `git config blame.ignoreRevsFile .git-blame-ignore-revs` +# Requires git v2.23 or greater. Each entry must include the full 40 character +# hash + +# Strip all trailing whitespace +40fcc44334cc92572beb726961e23beb6be8ae2f + +# Reformat Code with clang-format \ No newline at end of file diff --git a/.github/workflows/cpp-lint.yml b/.github/workflows/cpp-lint.yml new file mode 100644 index 000000000..d48f1da60 --- /dev/null +++ b/.github/workflows/cpp-lint.yml @@ -0,0 +1,28 @@ +name: cpp-linter + +on: [pull_request, push] + +jobs: + cpp-format: + runs-on: ubuntu-latest + + # Setup environment variables + env: + CLANG_FORMAT_VERSION: 15 + + steps: + - uses: actions/checkout@v3 + - name: Install clang-format + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - + sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" + sudo apt install clang-format-15 + sudo ln --symbolic --force /usr/bin/clang-format-15 /usr/bin/clang-format + - name: Verify clang-format installation + run: | + clang-format-15 --version + which clang-format-15 + clang-format --version + which clang-format + - name: Check if files are properly formatted + run: tools/clang-format_runner.sh --dry-run --Werror \ No newline at end of file diff --git a/tools/clang-format_runner.sh b/tools/clang-format_runner.sh new file mode 100755 index 000000000..52e4a2e18 --- /dev/null +++ b/tools/clang-format_runner.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# Description: +# Run clang-format on all the source files in Cholla. Any command line arguments +# provided to this script are passed directly to clang-format +# +# Dependencies: +# - clang-format v15 or greater +# - GNU Find, the default macos version won't work + +# Get the location of Cholla +cholla_root=$(git rev-parse --show-toplevel) + +# Get a list of all the files to format +readarray -t files <<<$(find ${cholla_root} -regex '.*\.\(h\|hpp\|c\|cpp\|cu\|cuh\)$' -print) + +for VAR in $LIST +do + echo "$VAR" +done + +clang-format -i --verbose "$@" -style="file" "${files[@]}" \ No newline at end of file From 3ed7094d6c1bda15c52b2f4f6a7dd4b2ae563015 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 16 Nov 2022 13:21:25 -0500 Subject: [PATCH 167/694] Add clang-tidy support & DockerFiles Clang Tidy - Add `.clang-tidy` file - Disables checks we probably don't want to run - Set naming convention based on our current discussion - Disables all checks that we do want to run but that aren't passing yet - Integrate clang-tidy into build system - Add clang-tidy run to build workflow and rename it "Build & Lint" - Since it shares most of the setup with the build and there's no point in linting if it doesn't even compile I added the clang-tidy run to the end of the build - Tweak hydro_cuda_tests to work with clang-tidy on ROCm Docker - Add docker files to Cholla repo - Point GHA at new images that contain clang tools and are owned by the "chollahydro" user on DockerHub Other - Remove `-foffload=disable` as it's no longer used - Rename cpp-lint action to code_formatting - Add readme badges for new workflow - Remove extraneous googletest build from GHA builds --- .clang-tidy | 198 ++++++++++++++++++ .../{build_tests.yml => build_and_lint.yml} | 33 +-- .../{cpp-lint.yml => code_formatting.yml} | 12 +- .gitignore | 1 + Makefile | 56 ++++- README.md | 4 +- builds/setup.c3po.gcc.sh | 2 +- builds/setup.crc.gcc.sh | 2 +- builds/setup.github.gcc.sh | 1 - builds/setup.summit.gcc.sh | 3 +- docker/cuda/Dockerfile | 29 +++ docker/rocm/Dockerfile | 43 ++++ src/hydro/hydro_cuda_tests.cu | 7 +- 13 files changed, 347 insertions(+), 44 deletions(-) create mode 100644 .clang-tidy rename .github/workflows/{build_tests.yml => build_and_lint.yml} (76%) rename .github/workflows/{cpp-lint.yml => code_formatting.yml} (61%) create mode 100644 docker/cuda/Dockerfile create mode 100644 docker/rocm/Dockerfile diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..4a5f5703d --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,198 @@ +# This settings file for clang-tidy chooses which checks to run, the settings +# for those checks, etc. It uses as many of the default values as possible and +# runs all checks with some exclusions by default. +# +# The full list of clang-format 15 checks and documentation can be found +# [here](https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/index.html) +# +# The "Checks" command should have 5 sections seperated by a newline: +# 1. Turn on all checks by default. Done with "*" +# 2. Turn off the catagories of checks we don't want +# 3. Turn off specific, individual checks we don't want +# 4. Turn on checks that we do want from from the catagories of checks that we +# didn't want +# 5. Turn off the checks that we do want but that aren't passing yet +--- +Checks: "*, + + -abseil-*, + -altera-*, + -android-*, + -boost-*, + -darwin-*, + -fuchsia-*, + -linuxkernel-*, + -llvmlibc-*, + -*objc*, + -*osx*, + -zircon-*, + + -modernize-use-trailing-return-type, + -readability-avoid-const-params-in-decls, + -readability-static-accessed-through-instance, + + google-readability-avoid-underscore-in-googletest-name, + google-upgrade-googletest-case, + + -bugprone-assignment-in-if-condition, + -bugprone-branch-clone, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-integer-division, + -bugprone-macro-parentheses, + -bugprone-narrowing-conversions, + -bugprone-reserved-identifier, + -bugprone-signed-char-misuse, + -bugprone-string-integer-assignment, + -cert-dcl37-c, + -cert-dcl50-cpp, + -cert-dcl51-cpp, + -cert-dcl59-cpp, + -cert-env33-c, + -cert-err33-c, + -cert-err34-c, + -cert-err58-cpp, + -cert-msc32-c, + -cert-msc51-cpp, + -cert-str34-c, + -clang-analyzer-core.CallAndMessage, + -clang-analyzer-core.UndefinedBinaryOperatorResult, + -clang-analyzer-core.uninitialized.ArraySubscript, + -clang-analyzer-core.uninitialized.Assign, + -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-optin.performance.Padding, + -clang-analyzer-security.insecureAPI.strcpy, + -clang-analyzer-valist.Uninitialized, + -clang-diagnostic-format, + -clang-diagnostic-macro-redefined, + -clang-diagnostic-unknown-cuda-version, + -clang-diagnostic-unused-command-line-argument, + -clang-diagnostic-unused-result, + -concurrency-mt-unsafe, + -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-avoid-non-const-global-variables, + -cppcoreguidelines-explicit-virtual-functions, + -cppcoreguidelines-init-variables, + -cppcoreguidelines-macro-usage, + -cppcoreguidelines-narrowing-conversions, + -cppcoreguidelines-no-malloc, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-owning-memory, + -cppcoreguidelines-prefer-member-initializer, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-constant-array-index, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-cstyle-cast, + -cppcoreguidelines-pro-type-member-init, + -cppcoreguidelines-pro-type-reinterpret-cast, + -cppcoreguidelines-pro-type-vararg, + -cppcoreguidelines-special-member-functions, + -cppcoreguidelines-virtual-class-destructor, + -google-build-namespaces, + -google-build-using-namespace, + -google-explicit-constructor, + -google-global-names-in-headers, + -google-readability-braces-around-statements, + -google-readability-casting, + -google-readability-namespace-comments, + -google-readability-todo, + -google-runtime-int, + -hicpp-avoid-c-arrays, + -hicpp-braces-around-statements, + -hicpp-deprecated-headers, + -hicpp-explicit-conversions, + -hicpp-member-init, + -hicpp-multiway-paths-covered, + -hicpp-no-array-decay, + -hicpp-no-malloc, + -hicpp-signed-bitwise, + -hicpp-special-member-functions, + -hicpp-use-auto, + -hicpp-use-emplace, + -hicpp-use-equals-default, + -hicpp-use-noexcept, + -hicpp-use-nullptr, + -hicpp-use-override, + -hicpp-vararg, + -llvm-else-after-return, + -llvm-header-guard, + -llvm-include-order, + -llvm-namespace-comment, + -misc-confusable-identifiers, + -misc-const-correctness, + -misc-non-private-member-variables-in-classes, + -misc-unused-parameters, + -modernize-avoid-c-arrays, + -modernize-deprecated-headers, + -modernize-loop-convert, + -modernize-macro-to-enum, + -modernize-redundant-void-arg, + -modernize-use-auto, + -modernize-use-bool-literals, + -modernize-use-default-member-init, + -modernize-use-emplace, + -modernize-use-equals-default, + -modernize-use-nodiscard, + -modernize-use-noexcept, + -modernize-use-nullptr, + -modernize-use-override, + -modernize-use-using, + -performance-faster-string-find, + -performance-for-range-copy, + -performance-inefficient-vector-operation, + -performance-unnecessary-value-param, + -readability-braces-around-statements, + -readability-const-return-type, + -readability-convert-member-functions-to-static, + -readability-delete-null-pointer, + -readability-duplicate-include, + -readability-else-after-return, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-identifier-naming, + -readability-implicit-bool-conversion, + -readability-inconsistent-declaration-parameter-name, + -readability-isolate-declaration, + -readability-magic-numbers, + -readability-make-member-function-const, + -readability-non-const-parameter, + -readability-redundant-control-flow, + -readability-simplify-boolean-expr, + -readability-suspicious-call-argument" +WarningsAsErrors: '' +HeaderFilterRegex: '.*' +AnalyzeTemporaryDtors: false +FormatStyle: 'file' +UseColor: false +CheckOptions: + # readability-identifier-naming allowed casing types + # - lower_case + # - UPPER_CASE + # - camelBack + # - CamelCase + # - camel_Snake_Back + # - Camel_Snake_Case + # - aNy_CasE + # + # Entries that are commented out probably aren't needed but it should be verified + readability-identifier-naming.VariableCase: 'lower_case' + readability-identifier-naming.FunctionCase: 'CamelCase' + readability-identifier-naming.NamespaceCase: 'lower_case' + readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' + readability-identifier-naming.TypedefCase: 'CamelCase' + readability-identifier-naming.TypeAliasCase: 'CamelCase' + readability-identifier-naming.EnumCase: 'CamelCase' + readability-identifier-naming.ConstantCase: 'CamelCase' + + readability-identifier-naming.ConstantPrefix: 'k' + readability-identifier-naming.GlobalVariablePrefix: 'g_' + + readability-identifier-naming.ClassCase: 'CamelCase' + # readability-identifier-naming.MemberCase: 'lower_case' + # readability-identifier-naming.MethodCase: 'CamelCase' + readability-identifier-naming.PrivateMemberSuffix: '_' + readability-identifier-naming.PrivateMethodSuffix: '_' + + # readability-identifier-naming.StructCase: 'CamelCase' +... diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_and_lint.yml similarity index 76% rename from .github/workflows/build_tests.yml rename to .github/workflows/build_and_lint.yml index 19bdabb44..9c3d20502 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_and_lint.yml @@ -1,4 +1,4 @@ -name: Cholla Compile +name: Build & Lint on: pull_request: @@ -9,15 +9,15 @@ on: jobs: Build: name: > - Build + Build & Lint: ${{ matrix.container.name }} TYPE=${{ matrix.make-type }} # if: ${{ false }} # If uncommented this line will disable this job # Choose OS/Runner runs-on: ubuntu-latest - container: - image: ${{matrix.container.link}} + container: + image: ${{matrix.container.link}} defaults: run: shell: bash @@ -26,15 +26,11 @@ jobs: fail-fast: false matrix: make-type: [hydro, gravity, disk, particles, cosmology, mhd] - container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] + container: [{name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"}, {name: "HIP",link: "docker://chollahydro/cholla:rocm_github"},] # Setup environment variables env: - CHOLLA_MACHINE: github CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} - CUDA_ROOT: /usr/local/cuda - HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial - MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi # Run the job itself steps: @@ -64,7 +60,6 @@ jobs: run: | hipcc --version hipconfig --full - # Perform Build - name: Cholla setup @@ -77,11 +72,6 @@ jobs: echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV - - name: Build GoogleTest - run: | - source builds/run_tests.sh - buildGoogleTest - echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV - name: Build Cholla run: | source builds/run_tests.sh @@ -90,3 +80,16 @@ jobs: run: | source builds/run_tests.sh buildChollaTests + + # Run Clang-tidy + - name: Run clang-tidy + run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" + - name: Display tidy_results_cpp.txt + if: always() + run: cat tidy_results_cpp.txt + - name: Display tidy_results_c.txt + if: always() + run: cat tidy_results_c.txt + - name: Display tidy_results_gpu.txt + if: always() + run: cat tidy_results_gpu.txt diff --git a/.github/workflows/cpp-lint.yml b/.github/workflows/code_formatting.yml similarity index 61% rename from .github/workflows/cpp-lint.yml rename to .github/workflows/code_formatting.yml index d48f1da60..f145efdd1 100644 --- a/.github/workflows/cpp-lint.yml +++ b/.github/workflows/code_formatting.yml @@ -1,4 +1,4 @@ -name: cpp-linter +name: Code Formatting on: [pull_request, push] @@ -15,13 +15,13 @@ jobs: - name: Install clang-format run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - - sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" - sudo apt install clang-format-15 - sudo ln --symbolic --force /usr/bin/clang-format-15 /usr/bin/clang-format + sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-${{ env.CLANG_FORMAT_VERSION }} main" + sudo apt install clang-format-${{ env.CLANG_FORMAT_VERSION }} + sudo ln --symbolic --force /usr/bin/clang-format-${{ env.CLANG_FORMAT_VERSION }} /usr/bin/clang-format - name: Verify clang-format installation run: | - clang-format-15 --version - which clang-format-15 + clang-format-${{ env.CLANG_FORMAT_VERSION }} --version + which clang-format-${{ env.CLANG_FORMAT_VERSION }} clang-format --version which clang-format - name: Check if files are properly formatted diff --git a/.gitignore b/.gitignore index ba64b82f1..72bf7018e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Files specific to this repo # ############################## googletest* +tidy_results*.txt # Compiled source # ################### diff --git a/Makefile b/Makefile index fa2bc4500..dc31ff16a 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +SHELL = /usr/bin/env bash #-- Set default include makefile MACHINE ?= $(shell builds/machine.sh) TYPE ?= hydro @@ -25,25 +26,32 @@ CLEAN_OBJS := $(subst .c,.o,$(CFILES)) \ $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) -# Set testing related lists and variables +# Check if it should include testing flags ifeq ($(TEST), true) - # This is a test build so lets clear out Cholla's main file and set - # appropriate compiler flags, suffix, etc + ADD_TEST_FLAGS = yes $(info Building Tests...) $(info ) - SUFFIX := $(strip $(SUFFIX)).tests CPPFILES := $(filter-out src/main.cpp,$(CPPFILES)) - LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp - TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include - CFLAGS += $(TEST_FLAGS) - CXXFLAGS += $(TEST_FLAGS) - GPUFLAGS += $(TEST_FLAGS) - # HACK # Set the build flags to debug. This is mostly to avoid the approximations # made by Ofast which break std::isnan and std::isinf which are required for # the testing BUILD = DEBUG +endif +ifeq ($(MAKECMDGOALS), tidy) + ADD_TEST_FLAGS = yes +endif + +# Set testing related lists and variables +ifeq ($(ADD_TEST_FLAGS), yes) + # This is a test build so lets clear out Cholla's main file and set + # appropriate compiler flags, suffix, etc + SUFFIX := $(strip $(SUFFIX)).tests + LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp + TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include + CFLAGS += $(TEST_FLAGS) + CXXFLAGS += $(TEST_FLAGS) + GPUFLAGS += $(TEST_FLAGS) else # This isn't a test build so clear out testing related files CFILES := $(filter-out src/system_tests/% %_tests.c,$(CFILES)) @@ -172,6 +180,20 @@ DFLAGS += -DGIT_HASH='"$(shell git rev-parse --verify HEAD)"' MACRO_FLAGS := -DMACRO_FLAGS='"$(DFLAGS)"' DFLAGS += $(MACRO_FLAGS) +# Setup variables for clang-tidy +LIBS_CLANG_TIDY := $(subst -I/, -isystem /,$(LIBS)) +LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include +CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) +CFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(CFLAGS)) +GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) +GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) +ifdef HIPCONFIG + GPUFLAGS_CLANG_TIDY += --cuda-host-only --rocm-path=$(ROCM_PATH) -isystem /clang/includes -isystem $(ROCM_PATH)/include +else + GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes +endif + + $(EXEC): prereq-build $(OBJS) mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS) eval $(EXTRA_COMMANDS) @@ -185,7 +207,19 @@ $(EXEC): prereq-build $(OBJS) %.o: %.cu $(GPUCXX) $(GPUFLAGS) -c $< -o $@ -.PHONY: clean +.PHONY: clean, clobber, tidy, format + +format: + tools/clang-format_runner.sh + +tidy: +# Flags we might want +# - --warnings-as-errors= Upgrade all warnings to error, good for CI + clang-tidy --verify-config + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.txt 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.txt 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.txt 2>&1 & \ + for i in 1 2 3; do wait -n; done clean: rm -f $(CLEAN_OBJS) diff --git a/README.md b/README.md index b372f6b34..610cb1a35 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -![Compile](https://github.com/cholla-hydro/cholla/actions/workflows/build_tests.yml/badge.svg) - +![Build & Lint](https://github.com/cholla-hydro/cholla/actions/workflows/build_and_lint.yml/badge.svg) +![Code Formatting](https://github.com/cholla-hydro/cholla/actions/workflows/code_formatting.yml/badge.svg) CHOLLA ============ A 3D GPU-based hydrodynamics code (Schneider & Robertson, ApJS, 2015). diff --git a/builds/setup.c3po.gcc.sh b/builds/setup.c3po.gcc.sh index 24fc6860d..d08360e6b 100755 --- a/builds/setup.c3po.gcc.sh +++ b/builds/setup.c3po.gcc.sh @@ -7,5 +7,5 @@ echo "mpicxx --version is: " mpicxx --version # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/builds/setup.crc.gcc.sh b/builds/setup.crc.gcc.sh index 586dcbd00..7893b2875 100755 --- a/builds/setup.crc.gcc.sh +++ b/builds/setup.crc.gcc.sh @@ -9,5 +9,5 @@ echo "mpicxx --version is: " mpicxx --version # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/builds/setup.github.gcc.sh b/builds/setup.github.gcc.sh index fd001f23a..a959b3cea 100755 --- a/builds/setup.github.gcc.sh +++ b/builds/setup.github.gcc.sh @@ -4,5 +4,4 @@ # source ./setup.c3po.gcc.sh # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" export CHOLLA_ENVSET=1 diff --git a/builds/setup.summit.gcc.sh b/builds/setup.summit.gcc.sh index 81a99dd36..0f15f6bfe 100755 --- a/builds/setup.summit.gcc.sh +++ b/builds/setup.summit.gcc.sh @@ -6,6 +6,5 @@ #module load gcc/10.2.0 cuda/11.4.0 fftw hdf5 python module load gcc cuda fftw hdf5 python googletest/1.11.0 -#export F_OFFLOAD="-fopenmp -foffload=nvptx-none='-lm -Ofast'" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile new file mode 100644 index 000000000..abecbe2c3 --- /dev/null +++ b/docker/cuda/Dockerfile @@ -0,0 +1,29 @@ +FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 +# Needs to be devel, not base or runtime, to have nvcc +# Ubuntu 22 is better than 18 because Ubuntu 22 default git is > 2.17 +# Github actions requires git > 2.17 so that cholla is pulled into a git repo +# Which is required for the Makefile +# With ubuntu 22.04 this grabs 2.34.1 + +RUN apt-get -y update && apt install -y \ + cmake \ + git \ + gnupg \ + libgtest-dev \ + libhdf5-serial-dev \ + libopenmpi-dev \ + openmpi-bin \ + software-properties-common \ + wget + +# Install Clang and Tools +RUN wget https://apt.llvm.org/llvm.sh && \ + chmod +x llvm.sh && \ + echo "\n" | ./llvm.sh 15 all && \ + find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c + +# Needed by Cholla Makefile +ENV CHOLLA_MACHINE=github +ENV CUDA_ROOT=/usr/local/cuda-11/ +ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial/ +ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi/ diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile new file mode 100644 index 000000000..5ddaa15ac --- /dev/null +++ b/docker/rocm/Dockerfile @@ -0,0 +1,43 @@ +FROM rocm/dev-ubuntu-20.04:5.2.3 + +# Avoid annoying cmake -> tzdata install prompt +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get -y update && apt-get -y install \ + cmake \ + git \ + gnupg \ + hipfft \ + libgtest-dev \ + libhdf5-serial-dev \ + libopenmpi-dev \ + openmpi-bin \ + rocfft \ + software-properties-common \ + wget + +# Needed to trick ROCm into thinking there's a GPU +RUN echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst + +# Install rocRand +RUN apt-get -y install rocrand + +# Install Clang and Tools +RUN wget https://apt.llvm.org/llvm.sh && \ + chmod +x llvm.sh && \ + echo "\n" | ./llvm.sh 15 all && \ + find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c + +# Install CUDA since clang-tidy needs it for now +# TODO: Remove this when possible +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ + apt-get update && \ + apt-get -y install cuda nvidia-gds + +# Needed by Cholla Makefile +ENV CHOLLA_MACHINE=github +ENV HIPCONFIG=/opt/rocm-5.2.3 +ENV ROCM_PATH=/opt/rocm-5.2.3 +ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial +ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index eb9c3f9ed..490e8eadb 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -29,10 +29,6 @@ // ============================================================================= TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) { - - Real* testDt; - cudaHostAlloc(&testDt, sizeof(Real), cudaHostAllocDefault); - // Call the function we are testing int num_blocks = 1; dim3 dim1dGrid(num_blocks, 1, 1); @@ -58,7 +54,8 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - dev_conserved.cpyHostToDevice(host_conserved); + CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); + //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) // Run the kernel hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, From 6b76b8fb4d73973236b10478ba0b22672c80f4fc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 6 Dec 2022 16:21:57 -0500 Subject: [PATCH 168/694] Add source files reorganization to git blame ignore --- .git-blame-rev-ignore-revs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.git-blame-rev-ignore-revs b/.git-blame-rev-ignore-revs index 160ec665e..8e42d4fda 100644 --- a/.git-blame-rev-ignore-revs +++ b/.git-blame-rev-ignore-revs @@ -6,4 +6,8 @@ # Strip all trailing whitespace 40fcc44334cc92572beb726961e23beb6be8ae2f -# Reformat Code with clang-format \ No newline at end of file +# Source Files Reorganization +50ce61188d43f778e5a31a28b95bbc7312a5bbfb +b78d8c96680c9c2d5a5d41656895cb3795e1e204 + +# Reformat Code with clang-format From d5ac61e6b2c76a9391893556fbb0c26de9ffd979 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:01:39 -0500 Subject: [PATCH 169/694] Add indenting to preprocessor directives --- .clang-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index 3aaa1d316..8a9f6dc6f 100644 --- a/.clang-format +++ b/.clang-format @@ -125,7 +125,7 @@ IndentAccessModifiers: false IndentCaseLabels: true IndentCaseBlocks: false IndentGotoLabels: true -IndentPPDirectives: None +IndentPPDirectives: BeforeHash IndentExternBlock: AfterExternBlock IndentRequiresClause: true IndentWidth: 2 From 66f7ae7db98e30e188ed07bb7b0706f76286b291 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:05:35 -0500 Subject: [PATCH 170/694] Only run clang-tidy on CUDA github actions builds --- .github/workflows/build_and_lint.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index 9c3d20502..0187b254e 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -83,13 +83,14 @@ jobs: # Run Clang-tidy - name: Run clang-tidy + if: matrix.container.name == 'CUDA' run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" - name: Display tidy_results_cpp.txt - if: always() + if: ${{ (matrix.container.name == 'CUDA') && (always()) }} run: cat tidy_results_cpp.txt - name: Display tidy_results_c.txt - if: always() + if: ${{ (matrix.container.name == 'CUDA') && (always()) }} run: cat tidy_results_c.txt - name: Display tidy_results_gpu.txt - if: always() + if: ${{ (matrix.container.name == 'CUDA') && (always()) }} run: cat tidy_results_gpu.txt From 6dff2096e0c3468913f6eb33d479b4724ea5e5e1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:51:15 -0500 Subject: [PATCH 171/694] Remove ROCm code from clang-tidy logic in makefile --- Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index dc31ff16a..61e7d9e06 100644 --- a/Makefile +++ b/Makefile @@ -187,11 +187,8 @@ CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) CFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(CFLAGS)) GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) -ifdef HIPCONFIG - GPUFLAGS_CLANG_TIDY += --cuda-host-only --rocm-path=$(ROCM_PATH) -isystem /clang/includes -isystem $(ROCM_PATH)/include -else - GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes -endif +GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes + $(EXEC): prereq-build $(OBJS) From 0d813ed14fc181766b8b173f0f4efe3c1dedc95b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:54:39 -0500 Subject: [PATCH 172/694] Rename tidy_results files to .log instead of .txt --- .github/workflows/build_and_lint.yml | 12 ++++++------ Makefile | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index 0187b254e..4c8e374a9 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -85,12 +85,12 @@ jobs: - name: Run clang-tidy if: matrix.container.name == 'CUDA' run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" - - name: Display tidy_results_cpp.txt + - name: Display tidy_results_cpp.log if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_cpp.txt - - name: Display tidy_results_c.txt + run: cat tidy_results_cpp.log + - name: Display tidy_results_c.log if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_c.txt - - name: Display tidy_results_gpu.txt + run: cat tidy_results_c.log + - name: Display tidy_results_gpu.log if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_gpu.txt + run: cat tidy_results_gpu.log diff --git a/Makefile b/Makefile index 61e7d9e06..61d2dda4b 100644 --- a/Makefile +++ b/Makefile @@ -213,9 +213,9 @@ tidy: # Flags we might want # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config - (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.txt 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.txt 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.txt 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ for i in 1 2 3; do wait -n; done clean: From 3566a68cdbba24248e99e04da71ae21c76dc7d54 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 15:42:35 -0500 Subject: [PATCH 173/694] Add the ability to run clang-tidy on specific files --- Makefile | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 61d2dda4b..782eaf71c 100644 --- a/Makefile +++ b/Makefile @@ -188,8 +188,15 @@ CFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(CFLAGS)) GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes - - +CPPFILES_TIDY := $(CPPFILES) +CFILES_TIDY := $(CFILES) +GPUFILES_TIDY := $(GPUFILES) + +ifdef TIDY_FILES + CPPFILES_TIDY := $(filter $(TIDY_FILES), $(CPPFILES_TIDY)) + CFILES_TIDY := $(filter $(TIDY_FILES), $(CFILES_TIDY)) + GPUFILES_TIDY := $(filter $(TIDY_FILES), $(GPUFILES_TIDY)) +endif $(EXEC): prereq-build $(OBJS) mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS) @@ -213,9 +220,9 @@ tidy: # Flags we might want # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config - (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.log 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES_TIDY) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ for i in 1 2 3; do wait -n; done clean: From 4f10c55344a552bde11d4d03a663159eca7b1d11 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:13:02 -0500 Subject: [PATCH 174/694] Set clang-tidy naming as decided --- .clang-tidy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 4a5f5703d..120337900 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -177,22 +177,22 @@ CheckOptions: # # Entries that are commented out probably aren't needed but it should be verified readability-identifier-naming.VariableCase: 'lower_case' - readability-identifier-naming.FunctionCase: 'CamelCase' + readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' readability-identifier-naming.NamespaceCase: 'lower_case' readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' readability-identifier-naming.TypedefCase: 'CamelCase' readability-identifier-naming.TypeAliasCase: 'CamelCase' readability-identifier-naming.EnumCase: 'CamelCase' - readability-identifier-naming.ConstantCase: 'CamelCase' + readability-identifier-naming.ConstantCase: 'lower_case' - readability-identifier-naming.ConstantPrefix: 'k' + readability-identifier-naming.ConstantPrefix: 'k_' readability-identifier-naming.GlobalVariablePrefix: 'g_' readability-identifier-naming.ClassCase: 'CamelCase' # readability-identifier-naming.MemberCase: 'lower_case' # readability-identifier-naming.MethodCase: 'CamelCase' - readability-identifier-naming.PrivateMemberSuffix: '_' - readability-identifier-naming.PrivateMethodSuffix: '_' + readability-identifier-naming.PrivateMemberPrefix: '_' + readability-identifier-naming.PrivateMethodPrefix: '_' # readability-identifier-naming.StructCase: 'CamelCase' ... From 7ecc3f0fab31e86ae6b780eaedeae90c0d94a202 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Dec 2022 14:37:21 -0500 Subject: [PATCH 175/694] Remove clang and CUDA from rocm contianer --- docker/rocm/Dockerfile | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 5ddaa15ac..3a7eb66ed 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -23,17 +23,10 @@ RUN echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst RUN apt-get -y install rocrand # Install Clang and Tools -RUN wget https://apt.llvm.org/llvm.sh && \ - chmod +x llvm.sh && \ - echo "\n" | ./llvm.sh 15 all && \ - find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c - -# Install CUDA since clang-tidy needs it for now -# TODO: Remove this when possible -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - apt-get update && \ - apt-get -y install cuda nvidia-gds +# RUN wget https://apt.llvm.org/llvm.sh && \ +# chmod +x llvm.sh && \ +# echo "\n" | ./llvm.sh 15 all && \ +# find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c # Needed by Cholla Makefile ENV CHOLLA_MACHINE=github From d10acfaeb3de4d182e1dd78bb49b7e4c8cca2b22 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 11 Jan 2023 10:41:48 -0500 Subject: [PATCH 176/694] clang-tidy private members to use `_` suffix It was set to require a `_` prefix but that can run afoul of C++ reserved names so instead we're using a `_` suffix. --- .clang-tidy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 120337900..cd7085f4d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -191,8 +191,8 @@ CheckOptions: readability-identifier-naming.ClassCase: 'CamelCase' # readability-identifier-naming.MemberCase: 'lower_case' # readability-identifier-naming.MethodCase: 'CamelCase' - readability-identifier-naming.PrivateMemberPrefix: '_' - readability-identifier-naming.PrivateMethodPrefix: '_' + readability-identifier-naming.PrivateMemberSuffix: '_' + readability-identifier-naming.PrivateMethodSuffix: '_' # readability-identifier-naming.StructCase: 'CamelCase' ... From f0e19f0f958d7285ee0c77422b64b6945d716a3b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 11:13:03 -0500 Subject: [PATCH 177/694] Rename git blame file and fix clang-tidy warnings - Fixed naming of .git-blame-rev-ignore-revs to standard .git-blame-ignore-revs - Fixed a couple of clang-tidy warnings that came with the MHD codes --- .git-blame-rev-ignore-revs => .git-blame-ignore-revs | 0 src/utils/mhd_utilities.cu | 7 ++----- src/utils/mhd_utilities.h | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) rename .git-blame-rev-ignore-revs => .git-blame-ignore-revs (100%) diff --git a/.git-blame-rev-ignore-revs b/.git-blame-ignore-revs similarity index 100% rename from .git-blame-rev-ignore-revs rename to .git-blame-ignore-revs diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu index 9e947b6c6..b522b61b4 100644 --- a/src/utils/mhd_utilities.cu +++ b/src/utils/mhd_utilities.cu @@ -16,10 +16,7 @@ // Local Includes #include "../utils/mhd_utilities.h" -namespace mhd{ -namespace utils +namespace mhd::utils { -}//utils - -} // end namespace mhd \ No newline at end of file +} // end namespace mhd::utils \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index ef64b9536..48fb59e0c 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -17,8 +17,7 @@ #include "../utils/gpu.hpp" #include "../utils/cuda_utilities.h" -namespace mhd{ -namespace utils{ +namespace mhd::utils{ /*! * \brief Namespace for functions required by functions within the mhd::utils * namespace. Everything in this name space should be regarded as private @@ -335,4 +334,3 @@ namespace utils{ #endif // MHD // ========================================================================= } // end namespace mhd::utils -} // end namespace mhd \ No newline at end of file From 729ef8ed307eaa2cf42baa1f5af6c389ad614ac4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 11:15:03 -0500 Subject: [PATCH 178/694] Format all C++ files --- src/analysis/analysis.cpp | 400 +- src/analysis/analysis.h | 151 +- src/analysis/feedback_analysis.cpp | 171 +- src/analysis/feedback_analysis.h | 41 +- src/analysis/feedback_analysis_gpu.cu | 202 +- src/analysis/io_analysis.cpp | 941 +-- src/analysis/lya_statistics.cpp | 2349 +++---- src/analysis/phase_diagram.cpp | 165 +- src/chemistry_gpu/chemistry_functions.cpp | 472 +- src/chemistry_gpu/chemistry_functions_gpu.cu | 1564 ++--- src/chemistry_gpu/chemistry_gpu.h | 125 +- src/chemistry_gpu/chemistry_io.cpp | 130 +- src/chemistry_gpu/rates.cuh | 219 +- src/chemistry_gpu/rates_Katz95.cuh | 72 +- src/cooling/cooling_cuda.cu | 351 +- src/cooling/cooling_cuda.h | 52 +- src/cooling/load_cloudy_texture.cu | 229 +- src/cooling/load_cloudy_texture.h | 12 +- src/cooling/texture_utilities.h | 36 +- src/cooling_grackle/cool_grackle.cpp | 241 +- src/cooling_grackle/cool_grackle.h | 29 +- src/cooling_grackle/grackle_functions.cpp | 221 +- src/cosmology/cosmology.cpp | 99 +- src/cosmology/cosmology.h | 38 +- src/cosmology/cosmology_functions.cpp | 172 +- src/cosmology/cosmology_functions_gpu.cu | 71 +- src/cosmology/cosmology_functions_gpu.h | 25 +- src/cosmology/io_cosmology.cpp | 96 +- src/dust/dust_cuda.cu | 238 +- src/dust/dust_cuda.h | 25 +- src/dust/dust_cuda_tests.cpp | 97 +- src/global/global.cpp | 446 +- src/global/global.h | 268 +- src/global/global_cuda.cu | 6 +- src/global/global_cuda.h | 150 +- src/gravity/grav3D.cpp | 151 +- src/gravity/grav3D.h | 147 +- src/gravity/gravity_boundaries.cpp | 475 +- src/gravity/gravity_boundaries_gpu.cu | 399 +- src/gravity/gravity_functions.cpp | 882 +-- src/gravity/gravity_functions_gpu.cu | 302 +- src/gravity/paris/HenryPeriodic.cu | 129 +- src/gravity/paris/HenryPeriodic.hpp | 592 +- src/gravity/paris/ParisPeriodic.cu | 126 +- src/gravity/paris/ParisPeriodic.hpp | 76 +- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 919 +-- src/gravity/paris/PoissonZero3DBlockedGPU.hpp | 44 +- src/gravity/potential_SOR_3D.cpp | 758 ++- src/gravity/potential_SOR_3D.h | 219 +- src/gravity/potential_SOR_3D_gpu.cu | 825 ++- src/gravity/potential_paris_3D.cu | 204 +- src/gravity/potential_paris_3D.h | 41 +- src/gravity/potential_paris_galactic.cu | 226 +- src/gravity/potential_paris_galactic.h | 49 +- src/gravity/static_grav.h | 219 +- src/grid/boundary_conditions.cpp | 640 +- src/grid/cuda_boundaries.cu | 479 +- src/grid/cuda_boundaries.h | 40 +- src/grid/grid3D.cpp | 646 +- src/grid/grid3D.h | 1160 ++-- src/grid/grid_enum.h | 87 +- src/grid/initial_conditions.cpp | 1833 +++--- src/grid/mpi_boundaries.cpp | 1309 ++-- src/h_correction/flux_correction.h | 35 +- src/h_correction/h_correction_2D_cuda.cu | 249 +- src/h_correction/h_correction_2D_cuda.h | 79 +- src/h_correction/h_correction_3D_cuda.cu | 348 +- src/h_correction/h_correction_3D_cuda.h | 102 +- src/hydro/hydro_cuda.cu | 1531 ++--- src/hydro/hydro_cuda.h | 143 +- src/hydro/hydro_cuda_tests.cu | 154 +- src/integrators/VL_1D_cuda.cu | 311 +- src/integrators/VL_1D_cuda.h | 13 +- src/integrators/VL_2D_cuda.cu | 391 +- src/integrators/VL_2D_cuda.h | 15 +- src/integrators/VL_3D_cuda.cu | 787 ++- src/integrators/VL_3D_cuda.h | 21 +- src/integrators/simple_1D_cuda.cu | 129 +- src/integrators/simple_1D_cuda.h | 13 +- src/integrators/simple_2D_cuda.cu | 163 +- src/integrators/simple_2D_cuda.h | 14 +- src/integrators/simple_3D_cuda.cu | 318 +- src/integrators/simple_3D_cuda.h | 23 +- src/io/io.cpp | 3704 ++++++----- src/io/io.h | 55 +- src/io/io_gpu.cu | 133 +- src/main.cpp | 328 +- src/main_tests.cpp | 197 +- src/mhd/ct_electric_fields.cu | 643 +- src/mhd/ct_electric_fields.h | 256 +- src/mhd/ct_electric_fields_tests.cu | 298 +- src/mhd/magnetic_divergence.cu | 201 +- src/mhd/magnetic_divergence.h | 81 +- src/mhd/magnetic_divergence_tests.cu | 80 +- src/mhd/magnetic_update.cu | 130 +- src/mhd/magnetic_update.h | 59 +- src/mhd/magnetic_update_tests.cu | 202 +- src/model/disk_ICs.cpp | 1318 ++-- src/model/disk_galaxy.h | 376 +- src/mpi/MPI_Comm_node.c | 53 +- src/mpi/MPI_Comm_node.h | 6 +- src/mpi/cuda_mpi_routines.cu | 54 +- src/mpi/cuda_mpi_routines.h | 7 +- src/mpi/mpi_routines.cpp | 867 ++- src/mpi/mpi_routines.h | 92 +- src/particles/density_CIC.cpp | 423 +- src/particles/density_CIC.h | 10 +- src/particles/density_CIC_gpu.cu | 210 +- src/particles/density_boundaries.cpp | 264 +- src/particles/density_boundaries_gpu.cu | 270 +- src/particles/feedback_CIC_gpu.cu | 1010 +-- src/particles/gravity_CIC.cpp | 386 +- src/particles/gravity_CIC_gpu.cu | 367 +- src/particles/io_particles.cpp | 1012 +-- src/particles/particles_3D.cpp | 959 +-- src/particles/particles_3D.h | 296 +- src/particles/particles_3D_gpu.cu | 262 +- src/particles/particles_boundaries.cpp | 1335 ++-- src/particles/particles_boundaries_cpu.cpp | 702 ++- src/particles/particles_boundaries_gpu.cu | 500 +- src/particles/particles_boundaries_gpu.h | 68 +- src/particles/particles_dynamics.cpp | 504 +- src/particles/particles_dynamics_gpu.cu | 264 +- src/particles/supernova.h | 65 +- src/reconstruction/pcm_cuda.cu | 650 +- src/reconstruction/pcm_cuda.h | 40 +- src/reconstruction/plmc_cuda.cu | 782 +-- src/reconstruction/plmc_cuda.h | 31 +- src/reconstruction/plmp_cuda.cu | 462 +- src/reconstruction/plmp_cuda.h | 45 +- src/reconstruction/ppmc_cuda.cu | 1526 +++-- src/reconstruction/ppmc_cuda.h | 29 +- src/reconstruction/ppmp_cuda.cu | 942 +-- src/reconstruction/ppmp_cuda.h | 47 +- src/riemann_solvers/exact_cuda.cu | 358 +- src/riemann_solvers/exact_cuda.h | 41 +- src/riemann_solvers/hll_cuda.cu | 316 +- src/riemann_solvers/hll_cuda.h | 30 +- src/riemann_solvers/hllc_cuda.cu | 408 +- src/riemann_solvers/hllc_cuda.h | 30 +- src/riemann_solvers/hllc_cuda_tests.cu | 373 +- src/riemann_solvers/hlld_cuda.cu | 1607 ++--- src/riemann_solvers/hlld_cuda.h | 631 +- src/riemann_solvers/hlld_cuda_tests.cu | 5583 +++++++++-------- src/riemann_solvers/roe_cuda.cu | 479 +- src/riemann_solvers/roe_cuda.h | 30 +- src/system_tests/cooling_system_tests.cpp | 61 +- src/system_tests/gravity_system_tests.cpp | 7 +- src/system_tests/hydro_system_tests.cpp | 384 +- src/system_tests/mhd_system_tests.cpp | 984 +-- src/system_tests/particles_system_tests.cpp | 7 +- src/system_tests/system_tester.cpp | 1357 ++-- src/system_tests/system_tester.h | 760 +-- src/utils/DeviceVector.h | 598 +- src/utils/DeviceVector_tests.cu | 501 +- src/utils/cuda_utilities.cpp | 5 +- src/utils/cuda_utilities.h | 221 +- src/utils/cuda_utilities_tests.cpp | 162 +- src/utils/error_check_cuda.cu | 74 +- src/utils/error_check_cuda.h | 25 +- src/utils/error_handling.cpp | 14 +- src/utils/gpu.hpp | 420 +- src/utils/gpu_arrays_functions.cu | 80 +- src/utils/gpu_arrays_functions.h | 54 +- src/utils/hydro_utilities.cpp | 5 +- src/utils/hydro_utilities.h | 191 +- src/utils/hydro_utilities_tests.cpp | 282 +- src/utils/math_utilities.h | 129 +- src/utils/math_utilities_tests.cpp | 69 +- src/utils/mhd_utilities.cu | 2 +- src/utils/mhd_utilities.h | 604 +- src/utils/mhd_utilities_tests.cu | 649 +- src/utils/parallel_omp.cpp | 49 +- src/utils/parallel_omp.h | 31 +- src/utils/prng_utilities.h | 55 +- src/utils/ran.h | 31 +- src/utils/reduction_utilities.cu | 52 +- src/utils/reduction_utilities.h | 520 +- src/utils/reduction_utilities_tests.cu | 88 +- src/utils/testing_utilities.cpp | 206 +- src/utils/testing_utilities.h | 363 +- src/utils/timing_functions.cpp | 171 +- src/utils/timing_functions.h | 48 +- 183 files changed, 36349 insertions(+), 33117 deletions(-) diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index 0e4de3b17..ec2eba059 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -1,71 +1,68 @@ #ifdef ANALYSIS -#include -#include "../analysis/analysis.h" -#include "../io/io.h" + #include "../analysis/analysis.h" + #include -Analysis_Module::Analysis_Module( void ){} + #include "../io/io.h" -#ifdef LYA_STATISTICS -void Grid3D::Compute_Lya_Statistics( ){ +Analysis_Module::Analysis_Module(void) {} + #ifdef LYA_STATISTICS +void Grid3D::Compute_Lya_Statistics() +{ int axis, n_skewers; Real time_start, time_end, time_elapsed; time_start = get_time(); - + // Copmpute Lya Statitics - chprintf( "Computing Lya Absorbiton along skewers \n"); - for ( axis=0; axis<3; axis++ ){ - - if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x; - if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y; - if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z; - - if ( axis == 0 ) chprintf( " Computing Along X axis: "); - if ( axis == 1 ) chprintf( " Computing Along Y axis: "); - if ( axis == 2 ) chprintf( " Computing Along Z axis: "); - - - Populate_Lya_Skewers_Local( axis ); - Analysis.Initialize_Lya_Statistics_Measurements( axis ); - Analysis.Transfer_Skewers_Data( axis ); - - for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){ - Compute_Transmitted_Flux_Skewer( skewer_id, axis ); - Analysis.Compute_Lya_Mean_Flux_Skewer( skewer_id, axis ); + chprintf("Computing Lya Absorbiton along skewers \n"); + for (axis = 0; axis < 3; axis++) { + if (axis == 0) n_skewers = Analysis.n_skewers_local_x; + if (axis == 1) n_skewers = Analysis.n_skewers_local_y; + if (axis == 2) n_skewers = Analysis.n_skewers_local_z; + + if (axis == 0) chprintf(" Computing Along X axis: "); + if (axis == 1) chprintf(" Computing Along Y axis: "); + if (axis == 2) chprintf(" Computing Along Z axis: "); + + Populate_Lya_Skewers_Local(axis); + Analysis.Initialize_Lya_Statistics_Measurements(axis); + Analysis.Transfer_Skewers_Data(axis); + + for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { + Compute_Transmitted_Flux_Skewer(skewer_id, axis); + Analysis.Compute_Lya_Mean_Flux_Skewer(skewer_id, axis); } - Analysis.Reduce_Lya_Mean_Flux_Axis( axis ); - + Analysis.Reduce_Lya_Mean_Flux_Axis(axis); + #ifdef OUTPUT_SKEWERS - Analysis.Transfer_Skewers_Global_Axis( axis ); + Analysis.Transfer_Skewers_Global_Axis(axis); #endif - - } + } Analysis.Reduce_Lya_Mean_Flux_Global(); // if( Analysis.Flux_mean_HI > 1e-10 ){ - - // Compute the Flux Power Spectrum after computing the mean transmitted flux - for ( axis=0; axis<3; axis++ ){ - if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x; - if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y; - if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z; + // Compute the Flux Power Spectrum after computing the mean transmitted flux + for (axis = 0; axis < 3; axis++) { + if (axis == 0) n_skewers = Analysis.n_skewers_local_x; + if (axis == 1) n_skewers = Analysis.n_skewers_local_y; + if (axis == 2) n_skewers = Analysis.n_skewers_local_z; - if ( axis == 0 ) chprintf( " Computing P(k) Along X axis\n"); - if ( axis == 1 ) chprintf( " Computing P(k) Along Y axis\n"); - if ( axis == 2 ) chprintf( " Computing P(k) Along Z axis\n"); + if (axis == 0) chprintf(" Computing P(k) Along X axis\n"); + if (axis == 1) chprintf(" Computing P(k) Along Y axis\n"); + if (axis == 2) chprintf(" Computing P(k) Along Z axis\n"); - Initialize_Power_Spectrum_Measurements( axis ); + Initialize_Power_Spectrum_Measurements(axis); - for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){ - Compute_Flux_Power_Spectrum_Skewer( skewer_id, axis ); + for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { + Compute_Flux_Power_Spectrum_Skewer(skewer_id, axis); } - - Analysis.Reduce_Power_Spectrum_Axis( axis ); + + Analysis.Reduce_Power_Spectrum_Axis(axis); } - + Analysis.Reduce_Power_Spectrum_Global(); Analysis.Computed_Flux_Power_Spectrum = 1; @@ -73,27 +70,27 @@ void Grid3D::Compute_Lya_Statistics( ){ // Analysis.Computed_Flux_Power_Spectrum = 0; // } - time_end = get_time(); - time_elapsed = (time_end - time_start)*1000; - chprintf( "Analysis Time: %f9.1 ms \n", time_elapsed ); + time_end = get_time(); + time_elapsed = (time_end - time_start) * 1000; + chprintf("Analysis Time: %f9.1 ms \n", time_elapsed); } -#endif //LYA_STATISTICS - - -void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ + #endif // LYA_STATISTICS +void Grid3D::Compute_and_Output_Analysis(struct parameters *P) +{ #ifdef COSMOLOGY - chprintf("\nComputing Analysis current_z: %f\n", Analysis.current_z ); - #else + chprintf("\nComputing Analysis current_z: %f\n", Analysis.current_z); + #else chprintf("\nComputing Analysis \n"); #endif - - cudaMemcpy( C.density, C.device, H.n_fields*H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); + + cudaMemcpy(C.density, C.device, H.n_fields * H.n_cells * sizeof(Real), + cudaMemcpyDeviceToHost); #ifdef PHASE_DIAGRAM - #ifdef CHEMISTRY_GPU - Compute_Gas_Temperature( Chem.Fields.temperature_h, true ); - #endif + #ifdef CHEMISTRY_GPU + Compute_Gas_Temperature(Chem.Fields.temperature_h, true); + #endif Compute_Phase_Diagram(); #endif @@ -101,42 +98,40 @@ void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ Compute_Lya_Statistics(); #endif - //Write to HDF5 file + // Write to HDF5 file #if defined(COSMOLOGY) || defined(PHASE_DIAGRAM) || defined(LYA_STATISTICS) - #ifdef MPI_CHOLLA - if ( procID == 0 ) Output_Analysis(P); - #else + #ifdef MPI_CHOLLA + if (procID == 0) Output_Analysis(P); + #else Output_Analysis(P); - #endif + #endif #endif - #ifdef LYA_STATISTICS - if (Analysis.Computed_Flux_Power_Spectrum == 1) Analysis.Clear_Power_Spectrum_Measurements(); + if (Analysis.Computed_Flux_Power_Spectrum == 1) + Analysis.Clear_Power_Spectrum_Measurements(); #endif #ifdef COSMOLOGY Analysis.Set_Next_Scale_Output(); - #endif + #endif Analysis.Output_Now = false; - // exit(0); } +void Grid3D::Initialize_Analysis_Module(struct parameters *P) +{ + chprintf("\nInitializng Analysis Module...\n"); - -void Grid3D::Initialize_Analysis_Module( struct parameters *P ){ - - chprintf( "\nInitializng Analysis Module...\n"); - #ifndef MPI_CHOLLA - chprintf( "The Analysys Module is implemented for the MPI version only... sorry!\n "); + chprintf( + "The Analysys Module is implemented for the MPI version only... " + "sorry!\n "); exit(-1); #endif - - + Real z_now; #ifdef COSMOLOGY z_now = Cosmo.current_z; @@ -144,41 +139,47 @@ void Grid3D::Initialize_Analysis_Module( struct parameters *P ){ z_now = 0; #endif - Analysis.Initialize( H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P ); - + Analysis.Initialize(H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, + H.zblocal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, + H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P); } -void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P ){ - - //Domain Length +void Analysis_Module::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, + Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, + Real dx_real, Real dy_real, Real dz_real, + int n_ghost_hydro, Real z_now, + struct parameters *P) +{ + // Domain Length Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; - //Left Boundaries of Local domain + // Left Boundaries of Local domain xMin = x_min; yMin = y_min; zMin = z_min; - //Cell sizes + // Cell sizes dx = dx_real; dy = dy_real; dz = dz_real; - //Size of Global Domain + // Size of Global Domain nx_total = nx; ny_total = ny; nz_total = nz; - //Size of Local Domain + // Size of Local Domain nx_local = nx_real; ny_local = ny_real; nz_local = nz_real; - //Number of ghost cells in the conserved arrays + // Number of ghost cells in the conserved arrays n_ghost = n_ghost_hydro; - //Domain Global left Boundary + // Domain Global left Boundary xMin_global = P->xmin; yMin_global = P->ymin; zMin_global = P->zmin; @@ -186,7 +187,7 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_ #ifdef COSMOLOGY current_z = z_now; - //Load values of scale factor for analysis outputs + // Load values of scale factor for analysis outputs Load_Scale_Outputs(P); #endif @@ -198,139 +199,128 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_ Initialize_Lya_Statistics(P); #endif - chprintf( "Analysis Module Successfully Initialized.\n\n"); - - + chprintf("Analysis Module Successfully Initialized.\n\n"); } - - - - -void Analysis_Module::Reset(){ - +void Analysis_Module::Reset() +{ #ifdef PHASE_DIAGRAM free(phase_diagram); #endif #ifdef LYA_STATISTICS - free( skewers_HI_density_local_x ); - free( skewers_HI_density_local_y ); - free( skewers_HI_density_local_z ); - free( skewers_HeII_density_local_x ); - free( skewers_HeII_density_local_y ); - free( skewers_HeII_density_local_z ); - free( skewers_velocity_local_x ); - free( skewers_velocity_local_y ); - free( skewers_velocity_local_z ); - free( skewers_temperature_local_x ); - free( skewers_temperature_local_y ); - free( skewers_temperature_local_z ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_local_x ); - free( skewers_density_local_y ); - free( skewers_density_local_z ); - #endif - - #ifdef MPI_CHOLLA - - if ( procID == 0 ){ - free( root_procs_x ); - free( root_procs_y ); - free( root_procs_z ); + free(skewers_HI_density_local_x); + free(skewers_HI_density_local_y); + free(skewers_HI_density_local_z); + free(skewers_HeII_density_local_x); + free(skewers_HeII_density_local_y); + free(skewers_HeII_density_local_z); + free(skewers_velocity_local_x); + free(skewers_velocity_local_y); + free(skewers_velocity_local_z); + free(skewers_temperature_local_x); + free(skewers_temperature_local_y); + free(skewers_temperature_local_z); #ifdef OUTPUT_SKEWERS - free( transfer_buffer_root_x ); - free( transfer_buffer_root_y ); - free( transfer_buffer_root_z ); - free( skewers_transmitted_flux_HI_x_global ); - free( skewers_transmitted_flux_HI_y_global ); - free( skewers_transmitted_flux_HI_z_global ); - free( skewers_transmitted_flux_HeII_x_global ); - free( skewers_transmitted_flux_HeII_y_global ); - free( skewers_transmitted_flux_HeII_z_global ); - free( skewers_density_x_global ); - free( skewers_density_y_global ); - free( skewers_density_z_global ); - free( skewers_HI_density_x_global ); - free( skewers_HI_density_y_global ); - free( skewers_HI_density_z_global ); - free( skewers_HeII_density_x_global ); - free( skewers_HeII_density_y_global ); - free( skewers_HeII_density_z_global ); - free( skewers_temperature_x_global ); - free( skewers_temperature_y_global ); - free( skewers_temperature_z_global ); - free( skewers_los_velocity_x_global ); - free( skewers_los_velocity_y_global ); - free( skewers_los_velocity_z_global ); - - #endif - } - - if ( am_I_root_x ){ - free( skewers_HI_density_root_x ); - free( skewers_HeII_density_root_x ); - free( skewers_velocity_root_x ); - free( skewers_temperature_root_x ); - free( full_HI_density_x ); - free( full_HeII_density_x ); - free( full_velocity_x ); - free( full_temperature_x ); - free( full_optical_depth_HI_x ); - free( full_optical_depth_HeII_x ); - free( full_vel_Hubble_x ); - free( skewers_transmitted_flux_HI_x ); - free( skewers_transmitted_flux_HeII_x ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_x ); + free(skewers_density_local_x); + free(skewers_density_local_y); + free(skewers_density_local_z); #endif + + #ifdef MPI_CHOLLA + + if (procID == 0) { + free(root_procs_x); + free(root_procs_y); + free(root_procs_z); + #ifdef OUTPUT_SKEWERS + free(transfer_buffer_root_x); + free(transfer_buffer_root_y); + free(transfer_buffer_root_z); + free(skewers_transmitted_flux_HI_x_global); + free(skewers_transmitted_flux_HI_y_global); + free(skewers_transmitted_flux_HI_z_global); + free(skewers_transmitted_flux_HeII_x_global); + free(skewers_transmitted_flux_HeII_y_global); + free(skewers_transmitted_flux_HeII_z_global); + free(skewers_density_x_global); + free(skewers_density_y_global); + free(skewers_density_z_global); + free(skewers_HI_density_x_global); + free(skewers_HI_density_y_global); + free(skewers_HI_density_z_global); + free(skewers_HeII_density_x_global); + free(skewers_HeII_density_y_global); + free(skewers_HeII_density_z_global); + free(skewers_temperature_x_global); + free(skewers_temperature_y_global); + free(skewers_temperature_z_global); + free(skewers_los_velocity_x_global); + free(skewers_los_velocity_y_global); + free(skewers_los_velocity_z_global); + + #endif } - if ( am_I_root_y ){ - free( skewers_HI_density_root_y ); - free( skewers_HeII_density_root_y ); - free( skewers_velocity_root_y ); - free( skewers_temperature_root_y ); - free( full_HI_density_y ); - free( full_HeII_density_y ); - free( full_velocity_y ); - free( full_temperature_y ); - free( full_optical_depth_HI_y ); - free( full_optical_depth_HeII_y ); - free( full_vel_Hubble_y ); - free( skewers_transmitted_flux_HI_y ); - free( skewers_transmitted_flux_HeII_y ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_y ); - #endif + if (am_I_root_x) { + free(skewers_HI_density_root_x); + free(skewers_HeII_density_root_x); + free(skewers_velocity_root_x); + free(skewers_temperature_root_x); + free(full_HI_density_x); + free(full_HeII_density_x); + free(full_velocity_x); + free(full_temperature_x); + free(full_optical_depth_HI_x); + free(full_optical_depth_HeII_x); + free(full_vel_Hubble_x); + free(skewers_transmitted_flux_HI_x); + free(skewers_transmitted_flux_HeII_x); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_x); + #endif } - if ( am_I_root_z ){ - free( skewers_HI_density_root_z ); - free( skewers_HeII_density_root_z ); - free( skewers_velocity_root_z ); - free( skewers_temperature_root_z ); - free( full_HI_density_z ); - free( full_HeII_density_z ); - free( full_velocity_z ); - free( full_temperature_z ); - free( full_optical_depth_HI_z ); - free( full_optical_depth_HeII_z ); - free( full_vel_Hubble_z ); - free( skewers_transmitted_flux_HI_z ); - free( skewers_transmitted_flux_HeII_z ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_z ); - #endif + if (am_I_root_y) { + free(skewers_HI_density_root_y); + free(skewers_HeII_density_root_y); + free(skewers_velocity_root_y); + free(skewers_temperature_root_y); + free(full_HI_density_y); + free(full_HeII_density_y); + free(full_velocity_y); + free(full_temperature_y); + free(full_optical_depth_HI_y); + free(full_optical_depth_HeII_y); + free(full_vel_Hubble_y); + free(skewers_transmitted_flux_HI_y); + free(skewers_transmitted_flux_HeII_y); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_y); + #endif } + if (am_I_root_z) { + free(skewers_HI_density_root_z); + free(skewers_HeII_density_root_z); + free(skewers_velocity_root_z); + free(skewers_temperature_root_z); + free(full_HI_density_z); + free(full_HeII_density_z); + free(full_velocity_z); + free(full_temperature_z); + free(full_optical_depth_HI_z); + free(full_optical_depth_HeII_z); + free(full_vel_Hubble_z); + free(skewers_transmitted_flux_HI_z); + free(skewers_transmitted_flux_HeII_z); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_z); + #endif + } + #endif #endif - #endif - - } - - #endif diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h index 096d6b6bd..7c7913b2d 100644 --- a/src/analysis/analysis.h +++ b/src/analysis/analysis.h @@ -1,20 +1,21 @@ #ifdef ANALYSIS -#ifndef ANALYSIS_H -#define ANALYSIS_H + #ifndef ANALYSIS_H + #define ANALYSIS_H -#include "../global/global.h" -#include + #include -#ifdef LYA_STATISTICS -#include -#endif + #include "../global/global.h" -using namespace std; + #ifdef LYA_STATISTICS + #include + #endif -class Analysis_Module{ -public: +using namespace std; +class Analysis_Module +{ + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -47,13 +48,11 @@ class Analysis_Module{ bool Output_Now; int n_file; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real current_z; - #endif - - + #endif - #ifdef PHASE_DIAGRAM + #ifdef PHASE_DIAGRAM int n_dens; int n_temp; Real temp_min; @@ -61,13 +60,12 @@ class Analysis_Module{ Real dens_min; Real dens_max; float *phase_diagram; - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA float *phase_diagram_global; - #endif - #endif - + #endif + #endif - #ifdef LYA_STATISTICS + #ifdef LYA_STATISTICS int Computed_Flux_Power_Spectrum; int n_stride; int n_skewers_local_x; @@ -128,7 +126,6 @@ class Analysis_Module{ Real *full_HI_density_y; Real *full_HI_density_z; - Real *full_HeII_density_x; Real *full_HeII_density_y; Real *full_HeII_density_z; @@ -140,91 +137,89 @@ class Analysis_Module{ Real *full_temperature_x; Real *full_temperature_y; Real *full_temperature_z; - + Real *full_optical_depth_HI_x; Real *full_optical_depth_HI_y; Real *full_optical_depth_HI_z; - + Real *full_optical_depth_HeII_x; Real *full_optical_depth_HeII_y; Real *full_optical_depth_HeII_z; - + Real *full_vel_Hubble_x; Real *full_vel_Hubble_y; Real *full_vel_Hubble_z; - + Real *skewers_transmitted_flux_HI_x; Real *skewers_transmitted_flux_HI_y; Real *skewers_transmitted_flux_HI_z; - + Real *skewers_transmitted_flux_HeII_x; Real *skewers_transmitted_flux_HeII_y; Real *skewers_transmitted_flux_HeII_z; - - #ifdef OUTPUT_SKEWERS - + + #ifdef OUTPUT_SKEWERS + Real *skewers_density_local_x; Real *skewers_density_local_y; Real *skewers_density_local_z; - + Real *skewers_density_root_x; Real *skewers_density_root_y; Real *skewers_density_root_z; - + Real *skewers_density_x_global; Real *skewers_density_y_global; Real *skewers_density_z_global; - - + Real *skewers_HI_density_x_global; Real *skewers_HI_density_y_global; Real *skewers_HI_density_z_global; - + Real *skewers_HeII_density_x_global; Real *skewers_HeII_density_y_global; Real *skewers_HeII_density_z_global; - + Real *skewers_temperature_x_global; Real *skewers_temperature_y_global; Real *skewers_temperature_z_global; - + Real *skewers_los_velocity_x_global; Real *skewers_los_velocity_y_global; Real *skewers_los_velocity_z_global; - + Real *skewers_transmitted_flux_HI_x_global; Real *skewers_transmitted_flux_HI_y_global; Real *skewers_transmitted_flux_HI_z_global; - + Real *skewers_transmitted_flux_HeII_x_global; Real *skewers_transmitted_flux_HeII_y_global; Real *skewers_transmitted_flux_HeII_z_global; - + Real *transfer_buffer_root_x; Real *transfer_buffer_root_y; Real *transfer_buffer_root_z; - #endif - + #endif + Real Flux_mean_root_HI_x; Real Flux_mean_root_HI_y; Real Flux_mean_root_HI_z; - + Real Flux_mean_root_HeII_x; Real Flux_mean_root_HeII_y; Real Flux_mean_root_HeII_z; - + Real Flux_mean_HI_x; Real Flux_mean_HI_y; Real Flux_mean_HI_z; - + Real Flux_mean_HeII_x; Real Flux_mean_HeII_y; Real Flux_mean_HeII_z; - + Real Flux_mean_HI; Real Flux_mean_HeII; - int n_skewers_processed; int n_ghost_skewer; @@ -281,51 +276,49 @@ class Analysis_Module{ Real *ps_global_z; Real *ps_mean; Real *k_centers; - + bool *root_procs_x; bool *root_procs_y; - bool *root_procs_z; - - #ifdef MPI_CHOLLA + bool *root_procs_z; + + #ifdef MPI_CHOLLA Real *mpi_domain_boundary_x; Real *mpi_domain_boundary_y; Real *mpi_domain_boundary_z; vector mpi_indices_x; vector mpi_indices_y; vector mpi_indices_z; - #endif - - #endif + #endif + #endif - Analysis_Module( void ); - void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P ); + Analysis_Module(void); + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, + int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, + Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, + Real z_now, struct parameters *P); void Reset(void); - void Load_Scale_Outputs( struct parameters *P ); - void Set_Next_Scale_Output( ); - - - - #ifdef PHASE_DIAGRAM - void Initialize_Phase_Diagram( struct parameters *P ); - #endif - - #ifdef LYA_STATISTICS - void Initialize_Lya_Statistics( struct parameters *P ); - void Initialize_Lya_Statistics_Measurements( int axis ); - void Transfer_Skewers_Data( int axis ); - void Compute_Lya_Mean_Flux_Skewer( int skewer_id, int axis ); - void Reduce_Lya_Mean_Flux_Axis( int axis ); - void Reduce_Lya_Mean_Flux_Global( ); - void Clear_Power_Spectrum_Measurements( void ); - void Reduce_Power_Spectrum_Axis( int axis ); - void Reduce_Power_Spectrum_Global( ); - void Transfer_Skewers_Global_Axis( int axis ); - #endif + void Load_Scale_Outputs(struct parameters *P); + void Set_Next_Scale_Output(); + + #ifdef PHASE_DIAGRAM + void Initialize_Phase_Diagram(struct parameters *P); + #endif + + #ifdef LYA_STATISTICS + void Initialize_Lya_Statistics(struct parameters *P); + void Initialize_Lya_Statistics_Measurements(int axis); + void Transfer_Skewers_Data(int axis); + void Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis); + void Reduce_Lya_Mean_Flux_Axis(int axis); + void Reduce_Lya_Mean_Flux_Global(); + void Clear_Power_Spectrum_Measurements(void); + void Reduce_Power_Spectrum_Axis(int axis); + void Reduce_Power_Spectrum_Global(); + void Transfer_Skewers_Global_Axis(int axis); + #endif }; - - -#endif + #endif #endif diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 579979266..0ea688a97 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -1,84 +1,90 @@ #include "feedback_analysis.h" + #include "../io/io.h" #include "../model/disk_galaxy.h" -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif +#ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" +#endif -#define VRMS_CUTOFF_DENSITY 0.01*0.6*MP/DENSITY_UNIT +#define VRMS_CUTOFF_DENSITY 0.01 * 0.6 * MP / DENSITY_UNIT -FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) { +FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) +{ // allocate arrays - h_circ_vel_x = (Real *) malloc(G.H.n_cells*sizeof(Real)); - h_circ_vel_y = (Real *) malloc(G.H.n_cells*sizeof(Real)); - - #ifdef PARTICLES_GPU - CHECK( cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells*sizeof(Real)) ); - CHECK( cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells*sizeof(Real)) ); - #endif + h_circ_vel_x = (Real*)malloc(G.H.n_cells * sizeof(Real)); + h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real)); +#ifdef PARTICLES_GPU + CHECK(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real))); + CHECK(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real))); +#endif - //setup the (constant) circular speed arrays + // setup the (constant) circular speed arrays int id; Real vca, r, x, y, z; - for (int k=G.H.n_ghost; k +#include "../global/global.h" +#include "../grid/grid3D.h" -class FeedbackAnalysis { - - Real *h_circ_vel_x, *h_circ_vel_y; +class FeedbackAnalysis +{ + Real *h_circ_vel_x, *h_circ_vel_y; - #ifdef PARTICLES_GPU - Real *d_circ_vel_x, *d_circ_vel_y; - void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G); - #endif +#ifdef PARTICLES_GPU + Real *d_circ_vel_x, *d_circ_vel_y; + void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G); +#endif - public: - int countSN {0}; - int countResolved {0}; - int countUnresolved {0}; - Real totalEnergy {0}; - Real totalMomentum {0}; - Real totalUnresEnergy {0}; + public: + int countSN{0}; + int countResolved{0}; + int countUnresolved{0}; + Real totalEnergy{0}; + Real totalMomentum{0}; + Real totalUnresEnergy{0}; - FeedbackAnalysis(Grid3D& G); - ~FeedbackAnalysis(); + FeedbackAnalysis(Grid3D& G); + ~FeedbackAnalysis(); - void Compute_Gas_Velocity_Dispersion(Grid3D& G); - void Reset(); - + void Compute_Gas_Velocity_Dispersion(Grid3D& G); + void Reset(); }; \ No newline at end of file diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index d8e34d155..a934e52d0 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -1,55 +1,61 @@ -#include "feedback_analysis.h" -#include "../io/io.h" #include -#ifdef PARTICLES_GPU -#define MU 0.6 -// in cgs, this is 0.01 cm^{-3} -#define MIN_DENSITY 0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT // 148279.7 -#define TPB_ANALYSIS 1024 +#include "../io/io.h" +#include "feedback_analysis.h" +#ifdef PARTICLES_GPU + #define MU 0.6 + // in cgs, this is 0.01 cm^{-3} + #define MIN_DENSITY \ + 0.01 * MP *MU *LENGTH_UNIT *LENGTH_UNIT *LENGTH_UNIT / \ + MASS_UNIT // 148279.7 + #define TPB_ANALYSIS 1024 __device__ void warpReduce(volatile Real *buff, size_t tid) { - if (TPB_ANALYSIS >= 64) buff[tid] += buff[tid + 32]; - if (TPB_ANALYSIS >= 32) buff[tid] += buff[tid + 16]; - if (TPB_ANALYSIS >= 16) buff[tid] += buff[tid + 8]; - if (TPB_ANALYSIS >= 8) buff[tid] += buff[tid + 4]; - if (TPB_ANALYSIS >= 4) buff[tid] += buff[tid + 2]; - if (TPB_ANALYSIS >= 2) buff[tid] += buff[tid + 1]; + if (TPB_ANALYSIS >= 64) buff[tid] += buff[tid + 32]; + if (TPB_ANALYSIS >= 32) buff[tid] += buff[tid + 16]; + if (TPB_ANALYSIS >= 16) buff[tid] += buff[tid + 8]; + if (TPB_ANALYSIS >= 8) buff[tid] += buff[tid + 4]; + if (TPB_ANALYSIS >= 4) buff[tid] += buff[tid + 2]; + if (TPB_ANALYSIS >= 2) buff[tid] += buff[tid + 1]; } - -void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x, Real *momentum_y, - Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y, Real *partial_mass, Real *partial_vel) { +void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, + Real *density, Real *momentum_x, + Real *momentum_y, Real *momentum_z, + Real *circ_vel_x, Real *circ_vel_y, + Real *partial_mass, Real *partial_vel) +{ __shared__ Real s_mass[TPB_ANALYSIS]; __shared__ Real s_vel[TPB_ANALYSIS]; int id, zid, yid, xid, tid; - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; tid = threadIdx.x; s_mass[tid] = 0; - s_vel[tid] = 0; + s_vel[tid] = 0; Real vx, vy, vz; - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost && density[id] > MIN_DENSITY) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost && + density[id] > MIN_DENSITY) { s_mass[tid] = density[id]; - vx = momentum_x[id]/ density[id]; - vy = momentum_y[id]/ density[id]; - vz = momentum_z[id]/ density[id]; - s_vel[tid] = ( (vx - circ_vel_x[id])*(vx - circ_vel_x[id]) + - (vy - circ_vel_y[id])*(vy - circ_vel_y[id]) + - (vz*vz) - )*density[id]; + vx = momentum_x[id] / density[id]; + vy = momentum_y[id] / density[id]; + vz = momentum_z[id] / density[id]; + s_vel[tid] = ((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) * + density[id]; } __syncthreads(); - for (unsigned int s=blockDim.x/2; s>0; s>>=1) { + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { s_mass[tid] += s_mass[tid + s]; s_vel[tid] += s_vel[tid + s]; @@ -57,104 +63,142 @@ void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Rea __syncthreads(); } if (tid == 0) { - //printf("ReduceKernel 1: blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = %.5e\n", blockIdx.x, s_mass[0], s_vel[0]); + // printf("ReduceKernel 1: blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = + // %.5e\n", blockIdx.x, s_mass[0], s_vel[0]); partial_mass[blockIdx.x] = s_mass[0]; - partial_vel[blockIdx.x] = s_vel[0]; + partial_vel[blockIdx.x] = s_vel[0]; } } - -void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *output_m, Real *output_v, int n) { +void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, + Real *output_m, Real *output_v, int n) +{ __shared__ Real s_mass[TPB_ANALYSIS]; __shared__ Real s_vel[TPB_ANALYSIS]; - size_t tid = threadIdx.x; - size_t i = blockIdx.x*(TPB_ANALYSIS) + tid; - size_t gridSize = TPB_ANALYSIS*gridDim.x; - s_mass[tid] = 0; - s_vel[tid] = 0; + size_t tid = threadIdx.x; + size_t i = blockIdx.x * (TPB_ANALYSIS) + tid; + size_t gridSize = TPB_ANALYSIS * gridDim.x; + s_mass[tid] = 0; + s_vel[tid] = 0; - while (i < n) { + while (i < n) { s_mass[tid] += input_m[i]; - s_vel[tid] += input_v[i]; + s_vel[tid] += input_v[i]; i += gridSize; } __syncthreads(); - if (TPB_ANALYSIS >= 1024) { if (tid < 512) { s_mass[tid] += s_mass[tid + 512]; s_vel[tid] += s_vel[tid + 512]; } __syncthreads(); } - if (TPB_ANALYSIS >= 512) { if (tid < 256) { s_mass[tid] += s_mass[tid + 256]; s_vel[tid] += s_vel[tid + 256]; } __syncthreads(); } - if (TPB_ANALYSIS >= 256) { if (tid < 128) { s_mass[tid] += s_mass[tid + 128]; s_vel[tid] += s_vel[tid + 128]; } __syncthreads(); } - if (TPB_ANALYSIS >= 128) { if (tid < 64) { s_mass[tid] += s_mass[tid + 64]; s_vel[tid] += s_vel[tid + 64]; } __syncthreads(); } + if (TPB_ANALYSIS >= 1024) { + if (tid < 512) { + s_mass[tid] += s_mass[tid + 512]; + s_vel[tid] += s_vel[tid + 512]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 512) { + if (tid < 256) { + s_mass[tid] += s_mass[tid + 256]; + s_vel[tid] += s_vel[tid + 256]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 256) { + if (tid < 128) { + s_mass[tid] += s_mass[tid + 128]; + s_vel[tid] += s_vel[tid + 128]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 128) { + if (tid < 64) { + s_mass[tid] += s_mass[tid + 64]; + s_vel[tid] += s_vel[tid + 64]; + } + __syncthreads(); + } - if (tid < 32) { warpReduce(s_mass, tid); warpReduce(s_vel, tid); } + if (tid < 32) { + warpReduce(s_mass, tid); + warpReduce(s_vel, tid); + } __syncthreads(); if (tid == 0) { - //printf("Reduce_Tubulence_kernel 2: n = %d/%d, blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = %.5e\n", - // n, gridDim.x, blockIdx.x, s_mass[0], s_vel[0]); + // printf("Reduce_Tubulence_kernel 2: n = %d/%d, blockIdx.x = %d -> + // s_mass[0] = %.5e, s_vel[0] = %.5e\n", + // n, gridDim.x, blockIdx.x, s_mass[0], s_vel[0]); output_m[blockIdx.x] = s_mass[0]; output_v[blockIdx.x] = s_vel[0]; } } +void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) +{ + size_t ngrid = std::ceil((1. * G.H.nx * G.H.ny * G.H.nz) / TPB_ANALYSIS); + + Real *d_partial_mass; + Real *d_partial_vel; + Real *h_partial_mass = (Real *)malloc(ngrid * sizeof(Real)); + Real *h_partial_vel = (Real *)malloc(ngrid * sizeof(Real)); + CHECK(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real))); + CHECK(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real))); -void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G) { - size_t ngrid = std::ceil((1.*G.H.nx*G.H.ny*G.H.nz)/TPB_ANALYSIS); - - Real* d_partial_mass; - Real* d_partial_vel; - Real* h_partial_mass = (Real *) malloc(ngrid*sizeof(Real)); - Real* h_partial_vel = (Real *) malloc(ngrid*sizeof(Real)); - CHECK(cudaMalloc((void**)&d_partial_mass, ngrid*sizeof(Real))); - CHECK(cudaMalloc((void**)&d_partial_vel, ngrid*sizeof(Real))); - Real total_mass = 0; - Real total_vel = 0; + Real total_vel = 0; - hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, - G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, + hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, + G.H.ny, G.H.nz, G.H.n_ghost, G.C.d_density, + G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x, d_circ_vel_y, d_partial_mass, d_partial_vel); - - size_t n = ngrid; + + size_t n = ngrid; Real *mass_input = d_partial_mass; - Real *vel_input = d_partial_vel; + Real *vel_input = d_partial_vel; while (n > TPB_ANALYSIS) { - ngrid = std::ceil( (n*1.)/TPB_ANALYSIS ); - //printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid); - hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, mass_input, vel_input, d_partial_mass, d_partial_vel, n); + ngrid = std::ceil((n * 1.) / TPB_ANALYSIS); + // printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid); + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, + mass_input, vel_input, d_partial_mass, d_partial_vel, n); mass_input = d_partial_mass; - vel_input = d_partial_vel; - n = ngrid; + vel_input = d_partial_vel; + n = ngrid; } if (n > 1) { - hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, d_partial_mass, d_partial_vel, d_partial_mass, d_partial_vel, n); + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, + d_partial_mass, d_partial_vel, d_partial_mass, + d_partial_vel, n); } - - //cudaDeviceSynchronize(); - CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid*sizeof(Real), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid*sizeof(Real), cudaMemcpyDeviceToHost)); + // cudaDeviceSynchronize(); + + CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), + cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), + cudaMemcpyDeviceToHost)); #ifdef MPI_CHOLLA MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); MPI_Allreduce(h_partial_vel, &total_vel, 1, MPI_CHREAL, MPI_SUM, world); #else total_mass = h_partial_mass[0]; - total_vel = h_partial_vel[0]; + total_vel = h_partial_vel[0]; #endif if (total_vel < 0 || total_mass < 0) { - chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", total_vel, total_mass); + chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", + total_vel, total_mass); } - chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, sqrt(total_vel/total_mass)*VELOCITY_UNIT/1e5); + chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, + sqrt(total_vel / total_mass) * VELOCITY_UNIT / 1e5); CHECK(cudaFree(d_partial_vel)); - CHECK(cudaFree(d_partial_mass)); + CHECK(cudaFree(d_partial_mass)); free(h_partial_mass); free(h_partial_vel); } - #endif // PARTICLES_GPU +#endif // PARTICLES_GPU diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp index 3f0141c05..2b213aba5 100644 --- a/src/analysis/io_analysis.cpp +++ b/src/analysis/io_analysis.cpp @@ -1,18 +1,19 @@ #ifdef ANALYSIS -#include -#include -#include "../analysis/analysis.h" -#include "../io/io.h" -#include "../grid/grid3D.h" + #include + #include + + #include "../analysis/analysis.h" + #include "../grid/grid3D.h" + #include "../io/io.h" using namespace std; // #define OUTPUT_SKEWERS_TRANSMITTED_FLUX -#ifdef OUTPUT_SKEWERS -void Grid3D::Output_Skewers_File( struct parameters *P ){ - + #ifdef OUTPUT_SKEWERS +void Grid3D::Output_Skewers_File(struct parameters *P) +{ FILE *out; char filename[180]; char timestep[20]; @@ -20,439 +21,530 @@ void Grid3D::Output_Skewers_File( struct parameters *P ){ // create the filename strcpy(filename, P->skewersdir); sprintf(timestep, "%d", Analysis.n_file); - strcat(filename,timestep); + strcat(filename, timestep); // a binary file is created for each process // only one HDF5 file is created - strcat(filename,"_skewers"); - strcat(filename,".h5"); - - + strcat(filename, "_skewers"); + strcat(filename, ".h5"); + chprintf("Writing Skewers File: %d ", Analysis.n_file); - - hid_t file_id; - herr_t status; - + + hid_t file_id; + herr_t status; + // Create a new file collectively file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - Write_Skewers_Header_HDF5( file_id ); - Write_Skewers_Data_HDF5( file_id ); - + Write_Skewers_Header_HDF5(file_id); + Write_Skewers_Data_HDF5(file_id); + // Close the file status = H5Fclose(file_id); - - chprintf("Saved Skewers File.\n"); - -} + chprintf("Saved Skewers File.\n"); +} +void Grid3D::Write_Skewers_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; -void Grid3D::Write_Skewers_Header_HDF5( hid_t file_id ){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; - - Real H0 = Cosmo.cosmo_h*100; + Real H0 = Cosmo.cosmo_h * 100; // Single attributes first attr_dims = 1; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - #ifdef COSMOLOGY - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + #ifdef COSMOLOGY + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); - status = H5Aclose(attribute_id); - #endif - + attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); + status = H5Aclose(attribute_id); + #endif + status = H5Sclose(dataspace_id); - + // 3D atributes now attr_dims = 3; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - + Real_data[0] = Analysis.Lbox_x; Real_data[1] = Analysis.Lbox_y; Real_data[2] = Analysis.Lbox_z; - - attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); - - status = H5Sclose(dataspace_id); - -} + attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); + status = H5Aclose(attribute_id); + status = H5Sclose(dataspace_id); +} - -void Grid3D::Write_Skewers_Data_HDF5( hid_t file_id ){ - +void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) +{ int n_global_x, n_global_y, n_global_z; int n_los_x, n_los_y, n_los_z; n_global_x = Analysis.n_skewers_processed_x; n_global_y = Analysis.n_skewers_processed_y; n_global_z = Analysis.n_skewers_processed_z; - n_los_x = Analysis.nx_total; - n_los_y = Analysis.ny_total; - n_los_z = Analysis.nz_total; - + n_los_x = Analysis.nx_total; + n_los_y = Analysis.ny_total; + n_los_z = Analysis.nz_total; + Real *dataset_buffer_x; Real *dataset_buffer_y; Real *dataset_buffer_z; - + int data_id, buffer_id; - - herr_t status; - hid_t dataset_id; - - //Write Skerwes X - dataset_buffer_x = (Real *) malloc(n_global_x*n_los_x*sizeof(Real)); - hsize_t dims_x[2]; + + herr_t status; + hid_t dataset_id; + + // Write Skerwes X + dataset_buffer_x = (Real *)malloc(n_global_x * n_los_x * sizeof(Real)); + hsize_t dims_x[2]; dims_x[0] = n_global_x; dims_x[1] = n_los_x; hid_t skewers_group_x, dataspace_id_skewers_x; dataspace_id_skewers_x = H5Screate_simple(2, dims_x, NULL); - skewers_group_x = H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + skewers_group_x = + H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - for ( int skewer_id=0; skewer_idanalysisdir); sprintf(timestep, "%d", Analysis.n_file); - strcat(filename,timestep); + strcat(filename, timestep); // a binary file is created for each process // only one HDF5 file is created - strcat(filename,"_analysis"); - strcat(filename,".h5"); - - + strcat(filename, "_analysis"); + strcat(filename, ".h5"); + chprintf("Writing Analysis File: %d ", Analysis.n_file); - - hid_t file_id; - herr_t status; + + hid_t file_id; + herr_t status; // Create a new file collectively file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - Write_Analysis_Header_HDF5( file_id ); - Write_Analysis_Data_HDF5( file_id ); + Write_Analysis_Header_HDF5(file_id); + Write_Analysis_Data_HDF5(file_id); // Close the file status = H5Fclose(file_id); chprintf("Saved Analysis File.\n\n"); - } - -void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; - +void Grid3D::Write_Analysis_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; // Single attributes first attr_dims = 1; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); #ifdef COSMOLOGY - Real H0 = Cosmo.cosmo_h*100; - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + Real H0 = Cosmo.cosmo_h * 100; + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); - status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); + status = H5Aclose(attribute_id); #endif - + status = H5Sclose(dataspace_id); // 3D atributes now @@ -530,158 +625,161 @@ void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){ Real_data[1] = Analysis.Lbox_y; Real_data[2] = Analysis.Lbox_z; - attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); - + attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); + status = H5Aclose(attribute_id); + status = H5Sclose(dataspace_id); - } - - -void Grid3D::Write_Analysis_Data_HDF5( hid_t file_id ){ - - - herr_t status; - hid_t dataset_id, dataspace_id, group_id, attribute_id; - hsize_t dims2d[2]; - hsize_t attr_dims; +void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) +{ + herr_t status; + hid_t dataset_id, dataspace_id, group_id, attribute_id; + hsize_t dims2d[2]; + hsize_t attr_dims; int nx_dset, ny_dset, j, i, id, buf_id; #ifdef PHASE_DIAGRAM - nx_dset = Analysis.n_temp; - ny_dset = Analysis.n_dens; - float *dataset_buffer = (float *) malloc(nx_dset*ny_dset*sizeof(Real)); - + nx_dset = Analysis.n_temp; + ny_dset = Analysis.n_dens; + float *dataset_buffer = (float *)malloc(nx_dset * ny_dset * sizeof(Real)); // Create the data space for the datasets - dims2d[0] = nx_dset; - dims2d[1] = ny_dset; + dims2d[0] = nx_dset; + dims2d[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims2d, NULL); - group_id = H5Gcreate(file_id, "/phase_diagram", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - for (j=0; janalysis_scale_outputs_file); - chprintf( " Loading Analysis Scale_Factor Outpus: %s\n", filename_1); + chprintf(" Loading Analysis Scale_Factor Outpus: %s\n", filename_1); - ifstream file_out ( filename_1 ); + ifstream file_out(filename_1); string line; Real a_value, current_a; - if (file_out.is_open()){ - while ( getline (file_out,line) ){ - a_value = atof( line.c_str() ); - scale_outputs.push_back( a_value ); + if (file_out.is_open()) { + while (getline(file_out, line)) { + a_value = atof(line.c_str()); + scale_outputs.push_back(a_value); n_outputs += 1; // chprintf("%f\n", a_value); } file_out.close(); - n_outputs = scale_outputs.size(); + n_outputs = scale_outputs.size(); next_output_indx = 0; chprintf(" Loaded %d scale outputs \n", n_outputs); - } - else{ + } else { chprintf(" Error: Unable to open cosmology outputs file\n"); exit(1); } @@ -689,42 +787,45 @@ void Analysis_Module::Load_Scale_Outputs( struct parameters *P ) { chprintf(" Setting next analysis output\n"); int scale_indx = next_output_indx; - current_a = 1. / ( 1 + current_z ); - a_value = scale_outputs[scale_indx]; + current_a = 1. / (1 + current_z); + a_value = scale_outputs[scale_indx]; - while ( (current_a - a_value) > 1e-4 ){ + while ((current_a - a_value) > 1e-4) { // chprintf( "%f %f\n", a_value, current_a); scale_indx += 1; a_value = scale_outputs[scale_indx]; } next_output_indx = scale_indx; - next_output = a_value; - chprintf(" Next output scale index: %d \n", next_output_indx ); + next_output = a_value; + chprintf(" Next output scale index: %d \n", next_output_indx); chprintf(" Next output scale value: %f \n", next_output); - if ( fabs(current_a - next_output) > 1e-4 ) Output_Now = false; - else Output_Now = true; + if (fabs(current_a - next_output) > 1e-4) + Output_Now = false; + else + Output_Now = true; n_file = next_output_indx; - } -void Analysis_Module::Set_Next_Scale_Output( ){ - +void Analysis_Module::Set_Next_Scale_Output() +{ int scale_indx = next_output_indx; Real a_value, current_a; - current_a = 1. / ( 1 + current_z ); - a_value = scale_outputs[scale_indx]; - if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1; - else scale_indx += 1; + current_a = 1. / (1 + current_z); + a_value = scale_outputs[scale_indx]; + if ((scale_indx == 0) && (abs(a_value - current_a) < 1e-5)) + scale_indx = 1; + else + scale_indx += 1; a_value = scale_outputs[scale_indx]; next_output_indx = scale_indx; - next_output = a_value; - n_file = next_output_indx; + next_output = a_value; + n_file = next_output_indx; // chprintf("Next Analysis Output: z=%f \n", 1./next_output - 1); } -#endif //COSMOLOGY + #endif // COSMOLOGY #endif diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index 35f8ce337..afea84fa7 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -1,21 +1,22 @@ #ifdef ANALYSIS -#ifdef LYA_STATISTICS + #ifdef LYA_STATISTICS -#include "../analysis/analysis.h" -#include "../io/io.h" -#include -#include + #include + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../analysis/analysis.h" + #include "../io/io.h" + + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif // #define PRINT_ANALYSIS_LOG -void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){ - +void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) +{ bool am_I_root; - int n_skewers_root, n_los; + int n_skewers_root, n_los; bool *root_procs; Real *skewers_density_root; Real *skewers_density_global; @@ -32,395 +33,410 @@ void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){ Real *skewers_F_HeII_global; Real *skewers_F_HeII_root; Real *transfer_buffer; - - + // chprintf( " Transfering Skewers \n" ); - - if ( axis == 0 ){ - am_I_root = am_I_root_x; - n_los = nx_total; - root_procs = root_procs_x; - n_skewers_root = n_skewers_local_x; - skewers_density_root = skewers_density_root_x; - skewers_density_global = skewers_density_x_global; - skewers_HI_density_root = skewers_HI_density_root_x; - skewers_HI_density_global = skewers_HI_density_x_global; + + if (axis == 0) { + am_I_root = am_I_root_x; + n_los = nx_total; + root_procs = root_procs_x; + n_skewers_root = n_skewers_local_x; + skewers_density_root = skewers_density_root_x; + skewers_density_global = skewers_density_x_global; + skewers_HI_density_root = skewers_HI_density_root_x; + skewers_HI_density_global = skewers_HI_density_x_global; skewers_HeII_density_root = skewers_HeII_density_root_x; skewers_HeII_density_global = skewers_HeII_density_x_global; - skewers_temperature_root = skewers_temperature_root_x; - skewers_temperature_global = skewers_temperature_x_global; + skewers_temperature_root = skewers_temperature_root_x; + skewers_temperature_global = skewers_temperature_x_global; skewers_los_velocity_root = skewers_velocity_root_x; skewers_los_velocity_global = skewers_los_velocity_x_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_x_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_x_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_x; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_x; - transfer_buffer = transfer_buffer_root_x; - } - if ( axis == 1 ){ - am_I_root = am_I_root_y; - n_los = ny_total; - root_procs = root_procs_y; - n_skewers_root = n_skewers_local_y; - skewers_density_root = skewers_density_root_y; - skewers_density_global = skewers_density_y_global; - skewers_HI_density_root = skewers_HI_density_root_y; - skewers_HI_density_global = skewers_HI_density_y_global; + skewers_F_HI_global = skewers_transmitted_flux_HI_x_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_x_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_x; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_x; + transfer_buffer = transfer_buffer_root_x; + } + if (axis == 1) { + am_I_root = am_I_root_y; + n_los = ny_total; + root_procs = root_procs_y; + n_skewers_root = n_skewers_local_y; + skewers_density_root = skewers_density_root_y; + skewers_density_global = skewers_density_y_global; + skewers_HI_density_root = skewers_HI_density_root_y; + skewers_HI_density_global = skewers_HI_density_y_global; skewers_HeII_density_root = skewers_HeII_density_root_y; skewers_HeII_density_global = skewers_HeII_density_y_global; - skewers_temperature_root = skewers_temperature_root_y; - skewers_temperature_global = skewers_temperature_y_global; + skewers_temperature_root = skewers_temperature_root_y; + skewers_temperature_global = skewers_temperature_y_global; skewers_los_velocity_root = skewers_velocity_root_y; skewers_los_velocity_global = skewers_los_velocity_y_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_y_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_y_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_y; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_y; - transfer_buffer = transfer_buffer_root_y; - } - if ( axis == 2 ){ - am_I_root = am_I_root_z; - n_los = nz_total; - root_procs = root_procs_z; - n_skewers_root = n_skewers_local_z; - skewers_density_root = skewers_density_root_z; - skewers_density_global = skewers_density_z_global; - skewers_HI_density_root = skewers_HI_density_root_z; - skewers_HI_density_global = skewers_HI_density_z_global; + skewers_F_HI_global = skewers_transmitted_flux_HI_y_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_y_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_y; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_y; + transfer_buffer = transfer_buffer_root_y; + } + if (axis == 2) { + am_I_root = am_I_root_z; + n_los = nz_total; + root_procs = root_procs_z; + n_skewers_root = n_skewers_local_z; + skewers_density_root = skewers_density_root_z; + skewers_density_global = skewers_density_z_global; + skewers_HI_density_root = skewers_HI_density_root_z; + skewers_HI_density_global = skewers_HI_density_z_global; skewers_HeII_density_root = skewers_HeII_density_root_z; skewers_HeII_density_global = skewers_HeII_density_z_global; - skewers_temperature_root = skewers_temperature_root_z; - skewers_temperature_global = skewers_temperature_z_global; + skewers_temperature_root = skewers_temperature_root_z; + skewers_temperature_global = skewers_temperature_z_global; skewers_los_velocity_root = skewers_velocity_root_z; skewers_los_velocity_global = skewers_los_velocity_z_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_z_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_z_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_z; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_z; - transfer_buffer = transfer_buffer_root_z; + skewers_F_HI_global = skewers_transmitted_flux_HI_z_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_z_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_z; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_z; + transfer_buffer = transfer_buffer_root_z; } - - if ( !am_I_root ) return; + + if (!am_I_root) return; MPI_Status mpi_status; int n_added, offset; - #ifdef OUTPUT_SKEWERS + #ifdef OUTPUT_SKEWERS // Set the density array - if ( procID == 0){ + if (procID == 0) { // Write the local data into the global array - for ( int skewer_id=0; skewer_id values[N-1] ) return -1; + if (val < values[0]) return -2; + if (val > values[N - 1]) return -1; int index = 0; - while ( index < N ){ - if ( val < values[index] ) break; + while (index < N) { + if (val < values[index]) break; index += 1; } - if ( val < values[index-1] ){ - chprintf( "ERROR; Value less than left edge: val=%f left=%f \n", val, values[index-1] ); + if (val < values[index - 1]) { + chprintf("ERROR; Value less than left edge: val=%f left=%f \n", val, + values[index - 1]); exit(-1); } - if ( val > values[index] ){ - chprintf( "ERROR; Value grater than right edge: val=%f right=%f \n", val, values[index] ); + if (val > values[index]) { + chprintf("ERROR; Value grater than right edge: val=%f right=%f \n", val, + values[index]); exit(-1); } - // chprintf( " %d: %e %e %e \n ", index, values[index-1], val, values[index]); - return index-1; - + // chprintf( " %d: %e %e %e \n ", index, values[index-1], val, + // values[index]); + return index - 1; } -void Analysis_Module::Clear_Power_Spectrum_Measurements( void ){ - - MPI_Barrier( world ); +void Analysis_Module::Clear_Power_Spectrum_Measurements(void) +{ + MPI_Barrier(world); // chprintf( "Cleared Power Spectrum cache \n "); - free( hist_k_edges_x ); - free( hist_PS_x ); - free( hist_n_x ); - free( ps_root_x ); - free( ps_global_x ); - - free( hist_k_edges_y ); - free( hist_PS_y ); - free( hist_n_y ); - free( ps_root_y ); - free( ps_global_y ); - - free( hist_k_edges_z ); - free( hist_PS_z ); - free( hist_n_z ); - free( ps_root_z ); - free( ps_global_z ); - - free( k_centers ); - free( ps_mean ); - + free(hist_k_edges_x); + free(hist_PS_x); + free(hist_n_x); + free(ps_root_x); + free(ps_global_x); + + free(hist_k_edges_y); + free(hist_PS_y); + free(hist_n_y); + free(ps_root_y); + free(ps_global_y); + + free(hist_k_edges_z); + free(hist_PS_z); + free(hist_n_z); + free(ps_root_z); + free(ps_global_z); + + free(k_centers); + free(ps_mean); } -void Grid3D::Initialize_Power_Spectrum_Measurements( int axis ){ - +void Grid3D::Initialize_Power_Spectrum_Measurements(int axis) +{ int n_los, n_fft; Real Lbox, delta_x; Real *k_vals; - if ( axis == 0 ){ + if (axis == 0) { Analysis.n_PS_processed_x = 0; - n_los = Analysis.nx_total; - n_fft = Analysis.n_fft_x; - Lbox = Analysis.Lbox_x; - delta_x = Analysis.dx; - k_vals = Analysis.k_vals_x; + n_los = Analysis.nx_total; + n_fft = Analysis.n_fft_x; + Lbox = Analysis.Lbox_x; + delta_x = Analysis.dx; + k_vals = Analysis.k_vals_x; } - if ( axis == 1 ){ + if (axis == 1) { Analysis.n_PS_processed_y = 0; - n_los = Analysis.ny_total; - n_fft = Analysis.n_fft_y; - Lbox = Analysis.Lbox_y; - delta_x = Analysis.dy; - k_vals = Analysis.k_vals_y; + n_los = Analysis.ny_total; + n_fft = Analysis.n_fft_y; + Lbox = Analysis.Lbox_y; + delta_x = Analysis.dy; + k_vals = Analysis.k_vals_y; } - if ( axis == 2 ){ + if (axis == 2) { Analysis.n_PS_processed_z = 0; - n_los = Analysis.nz_total; - n_fft = Analysis.n_fft_z; - Lbox = Analysis.Lbox_z; - delta_x = Analysis.dz; - k_vals = Analysis.k_vals_z; + n_los = Analysis.nz_total; + n_fft = Analysis.n_fft_z; + Lbox = Analysis.Lbox_z; + delta_x = Analysis.dz; + k_vals = Analysis.k_vals_z; } - // Get Cosmological variables Real H, current_a, L_proper, dx_proper, dv_Hubble; current_a = Cosmo.current_a; - L_proper = Lbox * current_a / Cosmo.cosmo_h; + L_proper = Lbox * current_a / Cosmo.cosmo_h; dx_proper = delta_x * current_a / Cosmo.cosmo_h; - H = Cosmo.Get_Hubble_Parameter( current_a ); - dv_Hubble = H * dx_proper; // km/s - + H = Cosmo.Get_Hubble_Parameter(current_a); + dv_Hubble = H * dx_proper; // km/s // Compute the K values - for ( int i=0; i= n_bins ) continue; + if (k_val == 0) continue; + bin_id = Locate_Index(k_val, hist_k_edges, n_hist_edges); + if (bin_id < 0) + chprintf(" %d: %e %e %e \n", bin_id, hist_k_edges[0], k_val, + hist_k_edges[1]); + if (bin_id < 0 || bin_id >= n_bins) continue; hist_PS[bin_id] += fft2_delta_F[i]; - hist_n[bin_id] += 1; + hist_n[bin_id] += 1; } int hist_sum = 0; - for ( int i=0; i mpi_indices; MPI_Status mpi_status; - #endif + #endif - if ( axis == 0 ){ - root_id = root_id_x; - am_I_root = am_I_root_x; - n_los_local = nx_local; - n_los_total = nx_total; - n_skewers = n_skewers_local_x; - skewers_HI_density_local = skewers_HI_density_local_x; - skewers_HI_density_root = skewers_HI_density_root_x; + if (axis == 0) { + root_id = root_id_x; + am_I_root = am_I_root_x; + n_los_local = nx_local; + n_los_total = nx_total; + n_skewers = n_skewers_local_x; + skewers_HI_density_local = skewers_HI_density_local_x; + skewers_HI_density_root = skewers_HI_density_root_x; skewers_HeII_density_local = skewers_HeII_density_local_x; skewers_HeII_density_root = skewers_HeII_density_root_x; - skewers_velocity_local = skewers_velocity_local_x; - skewers_temperature_local = skewers_temperature_local_x; - skewers_velocity_root = skewers_velocity_root_x; - skewers_temperature_root = skewers_temperature_root_x; + skewers_velocity_local = skewers_velocity_local_x; + skewers_temperature_local = skewers_temperature_local_x; + skewers_velocity_root = skewers_velocity_root_x; + skewers_temperature_root = skewers_temperature_root_x; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_x; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_x; + skewers_density_root = skewers_density_root_x; skewers_density_local = skewers_density_local_x; #endif } - - if ( axis == 1 ){ - root_id = root_id_y; - am_I_root = am_I_root_y; - n_los_local = ny_local; - n_los_total = ny_total; - n_skewers = n_skewers_local_y; - skewers_HI_density_local = skewers_HI_density_local_y; + if (axis == 1) { + root_id = root_id_y; + am_I_root = am_I_root_y; + n_los_local = ny_local; + n_los_total = ny_total; + n_skewers = n_skewers_local_y; + skewers_HI_density_local = skewers_HI_density_local_y; skewers_HeII_density_local = skewers_HeII_density_local_y; skewers_HeII_density_root = skewers_HeII_density_root_y; - skewers_velocity_local = skewers_velocity_local_y; - skewers_temperature_local = skewers_temperature_local_y; - skewers_HI_density_root = skewers_HI_density_root_y; - skewers_velocity_root = skewers_velocity_root_y; - skewers_temperature_root = skewers_temperature_root_y; + skewers_velocity_local = skewers_velocity_local_y; + skewers_temperature_local = skewers_temperature_local_y; + skewers_HI_density_root = skewers_HI_density_root_y; + skewers_velocity_root = skewers_velocity_root_y; + skewers_temperature_root = skewers_temperature_root_y; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_y; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_y; + skewers_density_root = skewers_density_root_y; skewers_density_local = skewers_density_local_y; #endif } - - if ( axis == 2 ){ - root_id = root_id_z; - am_I_root = am_I_root_z; - n_los_local = nz_local; - n_los_total = nz_total; - n_skewers = n_skewers_local_z; - skewers_HI_density_local = skewers_HI_density_local_z; + if (axis == 2) { + root_id = root_id_z; + am_I_root = am_I_root_z; + n_los_local = nz_local; + n_los_total = nz_total; + n_skewers = n_skewers_local_z; + skewers_HI_density_local = skewers_HI_density_local_z; skewers_HeII_density_local = skewers_HeII_density_local_z; skewers_HeII_density_root = skewers_HeII_density_root_z; - skewers_velocity_local = skewers_velocity_local_z; - skewers_temperature_local = skewers_temperature_local_z; - skewers_HI_density_root = skewers_HI_density_root_z; - skewers_velocity_root = skewers_velocity_root_z; - skewers_temperature_root = skewers_temperature_root_z; + skewers_velocity_local = skewers_velocity_local_z; + skewers_temperature_local = skewers_temperature_local_z; + skewers_HI_density_root = skewers_HI_density_root_z; + skewers_velocity_root = skewers_velocity_root_z; + skewers_temperature_root = skewers_temperature_root_z; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_z; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_z; + skewers_density_root = skewers_density_root_z; skewers_density_local = skewers_density_local_z; #endif } - // Copy Skewers Local Data to Root data Real HI_density, HeII_density, velocity, temperature; Real density; - - #ifdef MPI_CHOLLA - if ( am_I_root ){ - if ( root_id != procID ){ - printf("ERROR: Root ID doesn't match procID\n" ); + #ifdef MPI_CHOLLA + if (am_I_root) { + if (root_id != procID) { + printf("ERROR: Root ID doesn't match procID\n"); exit(-1); } - for ( int skewer_id=0; skewer_idlya_skewers_stride; - chprintf(" Lya Skewers Stride: %d\n", n_stride ); + chprintf(" Lya Skewers Stride: %d\n", n_stride); d_log_k = P->lya_Pk_d_log_k; - chprintf(" Power Spectrum d_log_k: %f\n", d_log_k ); + chprintf(" Power Spectrum d_log_k: %f\n", d_log_k); - n_skewers_local_x = ( ny_local / n_stride ) * ( nz_local / n_stride ); - n_skewers_local_y = ( nx_local / n_stride ) * ( nz_local / n_stride ); - n_skewers_local_z = ( nx_local / n_stride ) * ( ny_local / n_stride ); + n_skewers_local_x = (ny_local / n_stride) * (nz_local / n_stride); + n_skewers_local_y = (nx_local / n_stride) * (nz_local / n_stride); + n_skewers_local_z = (nx_local / n_stride) * (ny_local / n_stride); - #ifdef MPI_CHOLLA - n_skewers_total_x = ( ny_total / n_stride ) * ( nz_total / n_stride ); - n_skewers_total_y = ( nx_total / n_stride ) * ( nz_total / n_stride ); - n_skewers_total_z = ( nx_total / n_stride ) * ( ny_total / n_stride ); - #else + #ifdef MPI_CHOLLA + n_skewers_total_x = (ny_total / n_stride) * (nz_total / n_stride); + n_skewers_total_y = (nx_total / n_stride) * (nz_total / n_stride); + n_skewers_total_z = (nx_total / n_stride) * (ny_total / n_stride); + #else n_skewers_total_x = n_skewers_local_x; n_skewers_total_y = n_skewers_local_y; n_skewers_total_z = n_skewers_local_z; - #endif - + #endif // Alocate Memory For Properties of Local Skewers - skewers_HI_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_HI_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_HI_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_HeII_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_HeII_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_HeII_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_velocity_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_velocity_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_velocity_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_temperature_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_temperature_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_temperature_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - #ifdef OUTPUT_SKEWERS - skewers_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - #endif - - - // for (int i=0; i 0 ){ + if (n_mpi_x > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_x[mpi_indices_x[i+1]] ){ - temp_indx = mpi_indices_x[i]; - mpi_indices_x[i] = mpi_indices_x[i+1]; - mpi_indices_x[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_x - 1; i++) { + if (mpi_domain_boundary_x[mpi_indices_x[i]] > + mpi_domain_boundary_x[mpi_indices_x[i + 1]]) { + temp_indx = mpi_indices_x[i]; + mpi_indices_x[i] = mpi_indices_x[i + 1]; + mpi_indices_x[i + 1] = temp_indx; + sorted = false; } } } } - if ( n_mpi_y > 0 ){ + if (n_mpi_y > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_y[mpi_indices_y[i+1]] ){ - temp_indx = mpi_indices_y[i]; - mpi_indices_y[i] = mpi_indices_y[i+1]; - mpi_indices_y[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_y - 1; i++) { + if (mpi_domain_boundary_y[mpi_indices_y[i]] > + mpi_domain_boundary_y[mpi_indices_y[i + 1]]) { + temp_indx = mpi_indices_y[i]; + mpi_indices_y[i] = mpi_indices_y[i + 1]; + mpi_indices_y[i + 1] = temp_indx; + sorted = false; } } } } - if ( n_mpi_z > 0 ){ + if (n_mpi_z > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_z[mpi_indices_z[i+1]] ){ - temp_indx = mpi_indices_z[i]; - mpi_indices_z[i] = mpi_indices_z[i+1]; - mpi_indices_z[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_z - 1; i++) { + if (mpi_domain_boundary_z[mpi_indices_z[i]] > + mpi_domain_boundary_z[mpi_indices_z[i + 1]]) { + temp_indx = mpi_indices_z[i]; + mpi_indices_z[i] = mpi_indices_z[i + 1]; + mpi_indices_z[i + 1] = temp_indx; + sorted = false; } } } } - - // for (int i=0; i /* printf */ -#include -#include "../analysis/analysis.h" -#include "../io/io.h" + #include + #include /* printf */ -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../analysis/analysis.h" + #include "../io/io.h" -void Grid3D::Compute_Phase_Diagram(){ + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif +void Grid3D::Compute_Phase_Diagram() +{ int n_temp, n_dens; Real temp_min, temp_max, dens_min, dens_max; Real log_temp_min, log_temp_max, log_dens_min, log_dens_max; Real log_delta_dens, log_delta_temp; - n_dens = Analysis.n_dens; - n_temp = Analysis.n_temp; + n_dens = Analysis.n_dens; + n_temp = Analysis.n_temp; dens_min = Analysis.dens_min; dens_max = Analysis.dens_max; temp_min = Analysis.temp_min; temp_max = Analysis.temp_max; - log_dens_min = log10( dens_min ); - log_dens_max = log10( dens_max ); - log_temp_min = log10( temp_min ); - log_temp_max = log10( temp_max ); - - log_delta_dens = ( log_dens_max - log_dens_min ) / n_dens; - log_delta_temp = ( log_temp_max - log_temp_min ) / n_temp; + log_dens_min = log10(dens_min); + log_dens_max = log10(dens_max); + log_temp_min = log10(temp_min); + log_temp_max = log10(temp_max); + log_delta_dens = (log_dens_max - log_dens_min) / n_dens; + log_delta_temp = (log_temp_max - log_temp_min) / n_temp; int nx_local, ny_local, nz_local, n_ghost; int nx_grid, ny_grid, nz_grid; nx_local = Analysis.nx_local; ny_local = Analysis.ny_local; nz_local = Analysis.nz_local; - n_ghost = Analysis.n_ghost; - nx_grid = nx_local + 2*n_ghost; - ny_grid = ny_local + 2*n_ghost; - nz_grid = nz_local + 2*n_ghost; - - + n_ghost = Analysis.n_ghost; + nx_grid = nx_local + 2 * n_ghost; + ny_grid = ny_local + 2 * n_ghost; + nz_grid = nz_local + 2 * n_ghost; Real dens, log_dens, temp, log_temp; int k, j, i, id_grid; int indx_dens, indx_temp, indx_phase; + // Clear Phase Dikagram + for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) + Analysis.phase_diagram[indx_phase] = 0; + + for (k = 0; k < nz_local; k++) { + for (j = 0; j < ny_local; j++) { + for (i = 0; i < nx_local; i++) { + id_grid = (i + n_ghost) + (j + n_ghost) * nx_grid + + (k + n_ghost) * nx_grid * ny_grid; + dens = C.density[id_grid] * Cosmo.rho_0_gas / + Cosmo.rho_mean_baryon; // Baryonic overdensity + // chprintf( "%f %f \n", dens, temp); + #ifdef COOLING_GRACKLE + temp = Cool.temperature[id_grid]; + #elif defined CHEMISTRY_GPU + temp = Chem.Fields.temperature_h[id_grid]; + #else + chprintf( + "ERROR: Temperature Field is only supported for Grackle Cooling or " + "CHEMISTRY_GPU\n"); + exit(-1); + #endif - //Clear Phase Dikagram - for (indx_phase=0; indx_phase dens_max || temp < temp_min || temp > temp_max ){ - // printf("Outside Phase Diagram: dens:%e temp:%e \n", dens, temp ); - continue; - } - log_dens = log10(dens); - log_temp = log10(temp); - indx_dens = ( log_dens - log_dens_min ) / log_delta_dens; - indx_temp = ( log_temp - log_temp_min ) / log_delta_temp; - - indx_phase = indx_temp + indx_dens*n_temp; - if ( indx_phase >= n_dens*n_temp || indx_phase < 0 ){ - printf("Index outside Phase Diagram: indx:%d N:%d dens:%e temp:%e indx_dens:%d indx_temp:%d \n", indx_phase, n_dens*n_temp, dens, temp, indx_dens, indx_temp ); - continue; - } - Analysis.phase_diagram[indx_phase] += 1; - + if (dens < dens_min || dens > dens_max || temp < temp_min || + temp > temp_max) { + // printf("Outside Phase Diagram: dens:%e temp:%e \n", dens, temp + // ); + continue; + } + log_dens = log10(dens); + log_temp = log10(temp); + indx_dens = (log_dens - log_dens_min) / log_delta_dens; + indx_temp = (log_temp - log_temp_min) / log_delta_temp; + + indx_phase = indx_temp + indx_dens * n_temp; + if (indx_phase >= n_dens * n_temp || indx_phase < 0) { + printf( + "Index outside Phase Diagram: indx:%d N:%d dens:%e temp:%e " + " indx_dens:%d indx_temp:%d \n", + indx_phase, n_dens * n_temp, dens, temp, indx_dens, indx_temp); + continue; + } + Analysis.phase_diagram[indx_phase] += 1; } } } // Real phase_sum_local = 0; - // for (indx_phase=0; indx_phaseH0; + + #ifdef COSMOLOGY + Chem.H.H0 = P->H0; Chem.H.Omega_M = P->Omega_M; Chem.H.Omega_L = P->Omega_L; -#endif //COSMOLOGY - + #endif // COSMOLOGY + // Set up the units system. Real Msun, kpc_cgs, kpc_km, dens_to_CGS; - Msun = MSUN_CGS; - kpc_cgs = KPC_CGS; - kpc_km = KPC; + Msun = MSUN_CGS; + kpc_cgs = KPC_CGS; + kpc_km = KPC; dens_to_CGS = Msun / kpc_cgs / kpc_cgs / kpc_cgs; -#ifdef COSMOLOGY + #ifdef COSMOLOGY dens_to_CGS = dens_to_CGS * Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h; -#endif //COSMOLOGY - + #endif // COSMOLOGY + // These are conversions from code units to cgs. Following Grackle - Chem.H.density_units = dens_to_CGS; - Chem.H.length_units = kpc_cgs; - Chem.H.time_units = kpc_km; + Chem.H.density_units = dens_to_CGS; + Chem.H.length_units = kpc_cgs; + Chem.H.time_units = kpc_km; Chem.H.dens_number_conv = Chem.H.density_units / MH; -#ifdef COSMOLOGY + #ifdef COSMOLOGY Chem.H.a_value = Cosmo.current_a; - Chem.H.density_units = Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value ; - Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; - Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h ; + Chem.H.density_units = + Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value; + Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; + Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h; Chem.H.dens_number_conv = Chem.H.density_number_conv * pow(Chem.H.a_value, 3); -#endif //COSMOLOGY - Chem.H.velocity_units = Chem.H.length_units /Chem.H.time_units; - + #endif // COSMOLOGY + Chem.H.velocity_units = Chem.H.length_units / Chem.H.time_units; + Real dens_base, length_base, time_base; dens_base = Chem.H.density_units; length_base = Chem.H.length_units; -#ifdef COSMOLOGY + #ifdef COSMOLOGY dens_base = dens_base * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; length_base = length_base / Chem.H.a_value; -#endif //COSMOLOGY + #endif // COSMOLOGY - time_base = Chem.H.time_units; - Chem.H.cooling_units = ( pow(length_base, 2) * pow(MH, 2) ) / ( dens_base * pow(time_base, 3) ); - Chem.H.reaction_units = MH / (dens_base * time_base ); + time_base = Chem.H.time_units; + Chem.H.cooling_units = + (pow(length_base, 2) * pow(MH, 2)) / (dens_base * pow(time_base, 3)); + Chem.H.reaction_units = MH / (dens_base * time_base); // printf(" cooling_units: %e\n", Chem.H.cooling_units ); // printf(" reaction_units: %e\n", Chem.H.reaction_units ); - + Chem.H.max_iter = 10000; - + // Initialize all the rates - Chem.Initialize( P ); - + Chem.Initialize(P); + #ifdef COSMOLOGY // Real kpc_cgs = KPC_CGS; - Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow( kpc_cgs, 3) * MSUN_CGS ; - Chem.H.energy_conversion = Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; //km^2 -> cm^2 ; - #else // Not COSMOLOGY + Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / + pow(kpc_cgs, 3) * MSUN_CGS; + Chem.H.energy_conversion = + Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; // km^2 -> cm^2 ; + #else // Not COSMOLOGY Chem.H.density_conversion = 1.0; Chem.H.energy_conversion = 1.0; #endif - Chem.H.n_uvb_rates_samples = Chem.n_uvb_rates_samples; - Chem.H.uvb_rates_redshift_d = Chem.rates_z_d; - Chem.H.photo_ion_HI_rate_d = Chem.Ion_rates_HI_d; - Chem.H.photo_ion_HeI_rate_d = Chem.Ion_rates_HeI_d; - Chem.H.photo_ion_HeII_rate_d = Chem.Ion_rates_HeII_d; + Chem.H.n_uvb_rates_samples = Chem.n_uvb_rates_samples; + Chem.H.uvb_rates_redshift_d = Chem.rates_z_d; + Chem.H.photo_ion_HI_rate_d = Chem.Ion_rates_HI_d; + Chem.H.photo_ion_HeI_rate_d = Chem.Ion_rates_HeI_d; + Chem.H.photo_ion_HeII_rate_d = Chem.Ion_rates_HeII_d; Chem.H.photo_heat_HI_rate_d = Chem.Heat_rates_HI_d; Chem.H.photo_heat_HeI_rate_d = Chem.Heat_rates_HeI_d; Chem.H.photo_heat_HeII_rate_d = Chem.Heat_rates_HeII_d; - - chprintf( "Allocating Memory. \n\n"); - int n_cells = H.nx * H.ny * H.nz; - Chem.Fields.temperature_h = (Real *) malloc(n_cells * sizeof(Real)); - - chprintf( "Chemistry Solver Successfully Initialized. \n\n"); + chprintf("Allocating Memory. \n\n"); + int n_cells = H.nx * H.ny * H.nz; + Chem.Fields.temperature_h = (Real *)malloc(n_cells * sizeof(Real)); + + chprintf("Chemistry Solver Successfully Initialized. \n\n"); } +void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, + Rate_Function_T rate_function, + Real units) +{ + // Host array for storing the rates + Real *rate_table_array_h = (Real *)malloc(H.N_Temp_bins * sizeof(Real)); -void Chem_GPU::Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units ){ - - // Host array for storing the rates - Real *rate_table_array_h = (Real *) malloc( H.N_Temp_bins * sizeof(Real) ); - - //Get the temperature spacing. + // Get the temperature spacing. Real T, logT, logT_start, d_logT; logT_start = log(H.Temp_start); - d_logT = ( log(H.Temp_end) - logT_start ) / ( H.N_Temp_bins - 1 ); - + d_logT = (log(H.Temp_end) - logT_start) / (H.N_Temp_bins - 1); + // Evaluate the rate at each temperature. - for (int i=0; i 1e7 ) chprintf( "Temperature: %e mu: %e \n", temp, mu ); - + // if ( temp > 1e7 ) chprintf( "Temperature: %e mu: %e \n", temp, mu + // ); } - } - } - + } + } } -void Chem_GPU::Reset(){ - - free( rates_z_h ); - free( Heat_rates_HI_h ); - free( Heat_rates_HeI_h ); - free( Heat_rates_HeII_h ); - free( Ion_rates_HI_h ); - free( Ion_rates_HeI_h ); - free( Ion_rates_HeII_h ); - - Free_Array_GPU_float( rates_z_d ); - Free_Array_GPU_float( Heat_rates_HI_d ); - Free_Array_GPU_float( Heat_rates_HeI_d ); - Free_Array_GPU_float( Heat_rates_HeII_d ); - Free_Array_GPU_float( Ion_rates_HI_d ); - Free_Array_GPU_float( Ion_rates_HeI_d ); - Free_Array_GPU_float( Ion_rates_HeII_d ); - - free( Fields.temperature_h ); - +void Chem_GPU::Reset() +{ + free(rates_z_h); + free(Heat_rates_HI_h); + free(Heat_rates_HeI_h); + free(Heat_rates_HeII_h); + free(Ion_rates_HI_h); + free(Ion_rates_HeI_h); + free(Ion_rates_HeII_h); + + Free_Array_GPU_float(rates_z_d); + Free_Array_GPU_float(Heat_rates_HI_d); + Free_Array_GPU_float(Heat_rates_HeI_d); + Free_Array_GPU_float(Heat_rates_HeII_d); + Free_Array_GPU_float(Ion_rates_HI_d); + Free_Array_GPU_float(Ion_rates_HeI_d); + Free_Array_GPU_float(Ion_rates_HeII_d); + + free(Fields.temperature_h); } - - - - - - - - - - #endif diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 0f621b7f4..b5ecddb45 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -1,255 +1,318 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include "../hydro/hydro_cuda.h" -#include "../global/global_cuda.h" -#include "../io/io.h" -#include "rates.cuh" -#include "rates_Katz95.cuh" -#include "../grid/grid_enum.h" - -#define eV_to_K 1.160451812e4 -#define K_to_eV 8.617333263e-5 -#define n_min 1e-20 -#define tiny 1e-20 - -#define TPB_CHEM 256 - -void Chem_GPU::Allocate_Array_GPU_float( float **array_dev, int size ){ -cudaMalloc( (void**)array_dev, size*sizeof(float)); -CudaCheckError(); + #include "../global/global_cuda.h" + #include "../grid/grid_enum.h" + #include "../hydro/hydro_cuda.h" + #include "../io/io.h" + #include "chemistry_gpu.h" + #include "rates.cuh" + #include "rates_Katz95.cuh" + + #define eV_to_K 1.160451812e4 + #define K_to_eV 8.617333263e-5 + #define n_min 1e-20 + #define tiny 1e-20 + + #define TPB_CHEM 256 + +void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size) +{ + cudaMalloc((void **)array_dev, size * sizeof(float)); + CudaCheckError(); } -void Chem_GPU::Copy_Float_Array_to_Device( int size, float *array_h, float *array_d ){ -CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(float), cudaMemcpyHostToDevice ) ); -cudaDeviceSynchronize(); +void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, + float *array_d) +{ + CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(float), + cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); } -void Chem_GPU::Free_Array_GPU_float( float *array_dev ){ -cudaFree( array_dev ); -CudaCheckError(); +void Chem_GPU::Free_Array_GPU_float(float *array_dev) +{ + cudaFree(array_dev); + CudaCheckError(); } -void Chem_GPU::Allocate_Array_GPU_Real( Real **array_dev, int size ){ -cudaMalloc( (void**)array_dev, size*sizeof(Real)); -CudaCheckError(); +void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size) +{ + cudaMalloc((void **)array_dev, size * sizeof(Real)); + CudaCheckError(); } -void Chem_GPU::Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d ){ -CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(Real), cudaMemcpyHostToDevice ) ); -cudaDeviceSynchronize(); +void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d) +{ + CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(Real), + cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); } -void Chem_GPU::Free_Array_GPU_Real( Real *array_dev ){ -cudaFree( array_dev ); -CudaCheckError(); +void Chem_GPU::Free_Array_GPU_Real(Real *array_dev) +{ + cudaFree(array_dev); + CudaCheckError(); } -class Thermal_State{ -public: - -Real U; -Real d; -Real d_HI; -Real d_HII; -Real d_HeI; -Real d_HeII; -Real d_HeIII; -Real d_e; - -// Constructor -__host__ __device__ Thermal_State( Real U_0=1, Real d_0=1, Real d_HI_0=1, Real d_HII_0=0, Real d_HeI_0=1, Real d_HeII_0=0, Real d_HeIII_0=1, Real d_e_0=0 ) : U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0) {} - -__host__ __device__ Real get_MMW( ){ - // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII; - Real n_tot = d_HI + d_HII + 0.25 * ( d_HeI + d_HeII + d_HeIII ) + d_e; - return d / n_tot; - // return m_tot / n_tot; -} +class Thermal_State +{ + public: + Real U; + Real d; + Real d_HI; + Real d_HII; + Real d_HeI; + Real d_HeII; + Real d_HeIII; + Real d_e; + + // Constructor + __host__ __device__ Thermal_State(Real U_0 = 1, Real d_0 = 1, Real d_HI_0 = 1, + Real d_HII_0 = 0, Real d_HeI_0 = 1, + Real d_HeII_0 = 0, Real d_HeIII_0 = 1, + Real d_e_0 = 0) + : U(U_0), + d(d_0), + d_HI(d_HI_0), + d_HII(d_HII_0), + d_HeI(d_HeI_0), + d_HeII(d_HeII_0), + d_HeIII(d_HeIII_0), + d_e(d_e_0) + { + } -__host__ __device__ Real get_temperature( Real gamma ){ - Real mu, temp; - mu = get_MMW(); - temp = (gamma - 1) * mu * U * MP / KB * 1e10; - return temp; -} + __host__ __device__ Real get_MMW() + { + // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII; + Real n_tot = d_HI + d_HII + 0.25 * (d_HeI + d_HeII + d_HeIII) + d_e; + return d / n_tot; + // return m_tot / n_tot; + } -__host__ __device__ Real compute_U( Real temp, Real gamma ){ - Real mu, U_local; - mu = get_MMW(); - U_local = temp / ( gamma - 1 ) / mu / MP * KB / 1e10; - return U_local; -} + __host__ __device__ Real get_temperature(Real gamma) + { + Real mu, temp; + mu = get_MMW(); + temp = (gamma - 1) * mu * U * MP / KB * 1e10; + return temp; + } + __host__ __device__ Real compute_U(Real temp, Real gamma) + { + Real mu, U_local; + mu = get_MMW(); + U_local = temp / (gamma - 1) / mu / MP * KB / 1e10; + return U_local; + } }; -__device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, bool print ){ - +__device__ void get_temperature_indx(Real T, Chemistry_Header &Chem_H, + int &temp_indx, Real &delta_T, + Real temp_old, bool print) +{ Real logT, logT_start, d_logT, logT_l, logT_r; - logT = log( 0.5 * ( T + temp_old ) ); - logT_start = log( Chem_H.Temp_start ); - logT = fmax( logT_start, logT ); - logT = fmin( log( Chem_H.Temp_end ), logT ); - d_logT = ( log( Chem_H.Temp_end ) - logT_start ) / ( Chem_H.N_Temp_bins - 1 ); - temp_indx = (int) floor( (logT - logT_start) / d_logT ); - temp_indx = max( 0, temp_indx ); - temp_indx = min( Chem_H.N_Temp_bins-2, temp_indx ); - logT_l = logT_start + temp_indx * d_logT; - logT_r = logT_start + (temp_indx+1) * d_logT; - delta_T = ( logT - logT_l ) / ( logT_r - logT_l ); - // if (print) printf(" logT_start: %f logT_end: %f d_logT: %f \n", logT_start, log( Chem_H.Temp_end ), d_logT ); - // if (print) printf(" logT: %f logT_l: %f logT_r: %f \n", logT, logT_l, logT_r ); - + logT = log(0.5 * (T + temp_old)); + logT_start = log(Chem_H.Temp_start); + logT = fmax(logT_start, logT); + logT = fmin(log(Chem_H.Temp_end), logT); + d_logT = (log(Chem_H.Temp_end) - logT_start) / (Chem_H.N_Temp_bins - 1); + temp_indx = (int)floor((logT - logT_start) / d_logT); + temp_indx = max(0, temp_indx); + temp_indx = min(Chem_H.N_Temp_bins - 2, temp_indx); + logT_l = logT_start + temp_indx * d_logT; + logT_r = logT_start + (temp_indx + 1) * d_logT; + delta_T = (logT - logT_l) / (logT_r - logT_l); + // if (print) printf(" logT_start: %f logT_end: %f d_logT: %f \n", + // logT_start, log( Chem_H.Temp_end ), d_logT ); if (print) printf(" logT: %f + // logT_l: %f logT_r: %f \n", logT, logT_l, logT_r ); } -__device__ Real interpolate_rate( Real *rate_table, int indx, Real delta ){ - +__device__ Real interpolate_rate(Real *rate_table, int indx, Real delta) +{ Real rate_val; rate_val = rate_table[indx]; - rate_val = rate_val + delta * ( rate_table[indx+1] - rate_val ); + rate_val = rate_val + delta * (rate_table[indx + 1] - rate_val); return rate_val; } -__device__ Real Get_Cooling_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, Real temp_prev, - float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print ){ - +__device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, + Real dens_number_conv, Real current_z, + Real temp_prev, float photo_h_HI, + float photo_h_HeI, float photo_h_HeII, + bool print) +{ int temp_indx; Real temp, delta_T, U_dot; - temp = TS.get_temperature( Chem_H.gamma ); - get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp_prev, print ); - if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T ); + temp = TS.get_temperature(Chem_H.gamma); + get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp_prev, print); + if (print) + printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), + temp, temp_indx, delta_T); U_dot = 0.0; // Collisional excitation cooling Real cool_ceHI, cool_ceHeI, cool_ceHeII; - cool_ceHI = interpolate_rate( Chem_H.cool_ceHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; - cool_ceHeI = interpolate_rate( Chem_H.cool_ceHeI_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0 ; - cool_ceHeII = interpolate_rate( Chem_H.cool_ceHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; + cool_ceHI = interpolate_rate(Chem_H.cool_ceHI_d, temp_indx, delta_T) * + TS.d_HI * TS.d_e; + cool_ceHeI = interpolate_rate(Chem_H.cool_ceHeI_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ceHeII = interpolate_rate(Chem_H.cool_ceHeII_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e / 4.0; U_dot -= cool_ceHI + cool_ceHeI + cool_ceHeII; // Collisional excitation cooling Real cool_ciHI, cool_ciHeI, cool_ciHeII, cool_ciHeIS; - cool_ciHI = interpolate_rate( Chem_H.cool_ciHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; - cool_ciHeI = interpolate_rate( Chem_H.cool_ciHeI_d, temp_indx, delta_T ) * TS.d_HeI * TS.d_e / 4.0; - cool_ciHeII = interpolate_rate( Chem_H.cool_ciHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_ciHeIS = interpolate_rate( Chem_H.cool_ciHeIS_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ciHI = interpolate_rate(Chem_H.cool_ciHI_d, temp_indx, delta_T) * + TS.d_HI * TS.d_e; + cool_ciHeI = interpolate_rate(Chem_H.cool_ciHeI_d, temp_indx, delta_T) * + TS.d_HeI * TS.d_e / 4.0; + cool_ciHeII = interpolate_rate(Chem_H.cool_ciHeII_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e / 4.0; + cool_ciHeIS = interpolate_rate(Chem_H.cool_ciHeIS_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; U_dot -= cool_ciHI + cool_ciHeI + cool_ciHeII + cool_ciHeIS; // Recombination cooling Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII; - cool_reHII = interpolate_rate( Chem_H.cool_reHII_d, temp_indx, delta_T ) * TS.d_HII * TS.d_e; - cool_reHeII1 = interpolate_rate( Chem_H.cool_reHeII1_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_reHeII2 = interpolate_rate( Chem_H.cool_reHeII2_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_reHeIII = interpolate_rate( Chem_H.cool_reHeIII_d, temp_indx, delta_T ) * TS.d_HeIII * TS.d_e / 4.0; + cool_reHII = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * + TS.d_HII * TS.d_e; + cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII1_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e / 4.0; + cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII2_d, temp_indx, delta_T) * + TS.d_HeII * TS.d_e / 4.0; + cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * + TS.d_HeIII * TS.d_e / 4.0; U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII; // Bremsstrahlung cooling Real cool_brem; - cool_brem = interpolate_rate( Chem_H.cool_brem_d, temp_indx, delta_T ) * ( TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII ) * TS.d_e; + cool_brem = interpolate_rate(Chem_H.cool_brem_d, temp_indx, delta_T) * + (TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII) * TS.d_e; U_dot -= cool_brem; // Compton cooling or heating Real cool_compton, temp_cmb; - temp_cmb = 2.73 * ( 1.0 + current_z ); - cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * ( temp - temp_cmb ) * TS.d_e / dens_number_conv; + temp_cmb = 2.73 * (1.0 + current_z); + cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * + (temp - temp_cmb) * TS.d_e / dens_number_conv; U_dot -= cool_compton; // Phothoheating Real photo_heat; - photo_heat = ( photo_h_HI * TS.d_HI + 0.25 * ( photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII ) ) / dens_number_conv; + photo_heat = (photo_h_HI * TS.d_HI + + 0.25 * (photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII)) / + dens_number_conv; U_dot += photo_heat; - if ( temp <= 1.01* Chem_H.Temp_start && fabs( U_dot ) < 0 ) U_dot = tiny; - if ( fabs(U_dot) < tiny ) U_dot = tiny; - - - if (print) printf("HI: %e \n", TS.d_HI ); - if (print) printf("HII: %e \n", TS.d_HII ); - if (print) printf("HeI: %e \n", TS.d_HeI ); - if (print) printf("HeII: %e \n", TS.d_HeII ); - if (print) printf("HeIII: %e \n", TS.d_HeIII ); - if (print) printf("de: %e \n", TS.d_e ); - if (print) printf("Cooling ceHI: %e \n", cool_ceHI ); - if (print) printf("Cooling ceHeI: %e \n", cool_ceHeI ); - if (print) printf("Cooling ceHeII: %e \n", cool_ceHeII ); - if (print) printf("Cooling ciHI: %e \n", cool_ciHI ); - if (print) printf("Cooling ciHeI: %e \n", cool_ciHeI ); - if (print) printf("Cooling ciHeII: %e \n", cool_ciHeII ); - if (print) printf("Cooling ciHeIS: %e \n", cool_ciHeIS ); - if (print) printf("Cooling reHII: %e \n", cool_reHII ); - if (print) printf("Cooling reHeII1: %e \n", cool_reHeII1 ); - if (print) printf("Cooling reHeII2: %e \n", cool_reHeII2 ); - if (print) printf("Cooling reHeIII: %e \n", cool_reHeIII ); - if (print) printf("Cooling brem: %e \n", cool_brem ); - if (print) printf("Cooling piHI: %e rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv ); - if (print) printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25 ); - if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); - if (print) printf("Cooling DOM: %e \n", dens_number_conv ); - if (print) printf("Cooling compton: %e \n", cool_compton ); - if (print) printf("Cooling U_dot: %e \n", U_dot ); - + if (temp <= 1.01 * Chem_H.Temp_start && fabs(U_dot) < 0) U_dot = tiny; + if (fabs(U_dot) < tiny) U_dot = tiny; + + if (print) printf("HI: %e \n", TS.d_HI); + if (print) printf("HII: %e \n", TS.d_HII); + if (print) printf("HeI: %e \n", TS.d_HeI); + if (print) printf("HeII: %e \n", TS.d_HeII); + if (print) printf("HeIII: %e \n", TS.d_HeIII); + if (print) printf("de: %e \n", TS.d_e); + if (print) printf("Cooling ceHI: %e \n", cool_ceHI); + if (print) printf("Cooling ceHeI: %e \n", cool_ceHeI); + if (print) printf("Cooling ceHeII: %e \n", cool_ceHeII); + if (print) printf("Cooling ciHI: %e \n", cool_ciHI); + if (print) printf("Cooling ciHeI: %e \n", cool_ciHeI); + if (print) printf("Cooling ciHeII: %e \n", cool_ciHeII); + if (print) printf("Cooling ciHeIS: %e \n", cool_ciHeIS); + if (print) printf("Cooling reHII: %e \n", cool_reHII); + if (print) printf("Cooling reHeII1: %e \n", cool_reHeII1); + if (print) printf("Cooling reHeII2: %e \n", cool_reHeII2); + if (print) printf("Cooling reHeIII: %e \n", cool_reHeIII); + if (print) printf("Cooling brem: %e \n", cool_brem); + if (print) + printf("Cooling piHI: %e rate: %e \n", photo_h_HI, + photo_h_HI * TS.d_HI / dens_number_conv); + if (print) + printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, + photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25); + if (print) + printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, + photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); + if (print) printf("Cooling DOM: %e \n", dens_number_conv); + if (print) printf("Cooling compton: %e \n", cool_compton); + if (print) printf("Cooling U_dot: %e \n", U_dot); return U_dot; - } -__device__ void Get_Reaction_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, Real &k_coll_i_HeII, - Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print ){ - +__device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, + Real &k_coll_i_HI, Real &k_coll_i_HeI, + Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, + Real &k_coll_i_HI_HeI, Real &k_recomb_HII, + Real &k_recomb_HeII, Real &k_recomb_HeIII, + bool print) +{ int temp_indx; Real temp, delta_T; - temp = TS.get_temperature( Chem_H.gamma ); - get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp, print ); - - k_coll_i_HI = interpolate_rate( Chem_H.k_coll_i_HI_d, temp_indx, delta_T ); - k_coll_i_HeI = interpolate_rate( Chem_H.k_coll_i_HeI_d, temp_indx, delta_T ); - k_coll_i_HeII = interpolate_rate( Chem_H.k_coll_i_HeII_d, temp_indx, delta_T ); - - k_coll_i_HI_HI = interpolate_rate( Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T ); - k_coll_i_HI_HeI = interpolate_rate( Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T ); - - k_recomb_HII = interpolate_rate( Chem_H.k_recomb_HII_d, temp_indx, delta_T ); - k_recomb_HeII = interpolate_rate( Chem_H.k_recomb_HeII_d, temp_indx, delta_T ); - k_recomb_HeIII = interpolate_rate( Chem_H.k_recomb_HeIII_d, temp_indx, delta_T ); - - if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx ); - if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI ); - if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI ); - if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII ); - if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI ); - if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI ); - if (print) printf("k_recomb_HII: %e \n", k_recomb_HII ); - if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII ); - if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII ); - + temp = TS.get_temperature(Chem_H.gamma); + get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp, print); + + k_coll_i_HI = interpolate_rate(Chem_H.k_coll_i_HI_d, temp_indx, delta_T); + k_coll_i_HeI = interpolate_rate(Chem_H.k_coll_i_HeI_d, temp_indx, delta_T); + k_coll_i_HeII = interpolate_rate(Chem_H.k_coll_i_HeII_d, temp_indx, delta_T); + + k_coll_i_HI_HI = + interpolate_rate(Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T); + k_coll_i_HI_HeI = + interpolate_rate(Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T); + + k_recomb_HII = interpolate_rate(Chem_H.k_recomb_HII_d, temp_indx, delta_T); + k_recomb_HeII = interpolate_rate(Chem_H.k_recomb_HeII_d, temp_indx, delta_T); + k_recomb_HeIII = + interpolate_rate(Chem_H.k_recomb_HeIII_d, temp_indx, delta_T); + + if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx); + if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI); + if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI); + if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII); + if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI); + if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI); + if (print) printf("k_recomb_HII: %e \n", k_recomb_HII); + if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII); + if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII); } -__device__ int Binary_Search( int N, Real val, float *data, int indx_l, int indx_r ){ +__device__ int Binary_Search(int N, Real val, float *data, int indx_l, + int indx_r) +{ int n, indx; - n = indx_r - indx_l; - indx = indx_l + n/2; - if ( val >= data[N-1] ) return indx_r; - if ( val <= data[0] ) return indx_l; - if ( indx_r == indx_l + 1 ) return indx_l; - if ( data[indx] <= val ) indx_l = indx; - else indx_r = indx; - return Binary_Search( N, val, data, indx_l, indx_r ); + n = indx_r - indx_l; + indx = indx_l + n / 2; + if (val >= data[N - 1]) return indx_r; + if (val <= data[0]) return indx_l; + if (indx_r == indx_l + 1) return indx_l; + if (data[indx] <= val) + indx_l = indx; + else + indx_r = indx; + return Binary_Search(N, val, data, indx_l, indx_r); } -__device__ Real linear_interpolation( Real delta_x, int indx_l, int indx_r, float*array ){ +__device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, + float *array) +{ float v_l, v_r; Real v; v_l = array[indx_l]; v_r = array[indx_r]; - v = delta_x * ( v_r - v_l ) + v_l; + v = delta_x * (v_r - v_l) + v_l; return v; } -__device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H, - float &photo_i_HI, float &photo_i_HeI, float &photo_i_HeII, - float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, bool print ){ - - if ( current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]){ +__device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, + float &photo_i_HI, float &photo_i_HeI, + float &photo_i_HeII, float &photo_h_HI, + float &photo_h_HeI, float &photo_h_HeII, + bool print) +{ + if (current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]) { photo_h_HI = 0; photo_h_HeI = 0; photo_h_HeII = 0; @@ -257,246 +320,289 @@ __device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H, photo_i_HeI = 0; photo_i_HeII = 0; return; - } // Find closest value of z in rates_z such that z<=current_z int indx_l; Real z_l, z_r, delta_x; - indx_l = Binary_Search( Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0, Chem_H.n_uvb_rates_samples-1 ); - z_l = Chem_H.uvb_rates_redshift_d[indx_l]; - z_r = Chem_H.uvb_rates_redshift_d[indx_l+1]; - delta_x = (current_z - z_l) / ( z_r - z_l ); - - photo_i_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HI_rate_d ); - photo_i_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeI_rate_d ); - photo_i_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeII_rate_d ); - photo_h_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HI_rate_d ); - photo_h_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeI_rate_d ); - photo_h_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeII_rate_d ); - + indx_l = Binary_Search(Chem_H.n_uvb_rates_samples, current_z, + Chem_H.uvb_rates_redshift_d, 0, + Chem_H.n_uvb_rates_samples - 1); + z_l = Chem_H.uvb_rates_redshift_d[indx_l]; + z_r = Chem_H.uvb_rates_redshift_d[indx_l + 1]; + delta_x = (current_z - z_l) / (z_r - z_l); + + photo_i_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_ion_HI_rate_d); + photo_i_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_ion_HeI_rate_d); + photo_i_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_ion_HeII_rate_d); + photo_h_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_heat_HI_rate_d); + photo_h_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_heat_HeI_rate_d); + photo_h_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, + Chem_H.photo_heat_HeII_rate_d); } -__device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, - Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, - int n_iter, Real HI_dot_prev, Real e_dot_prev, - Real t_chem, Real dt_hydro, bool print ){ - - Real dt, energy; +__device__ Real Get_Chemistry_dt( + Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, + Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, + Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, Real k_recomb_HII, + Real k_recomb_HeII, Real k_recomb_HeIII, float photo_i_HI, + float photo_i_HeI, float photo_i_HeII, int n_iter, Real HI_dot_prev, + Real e_dot_prev, Real t_chem, Real dt_hydro, bool print) +{ + Real dt, energy; // Rate of change of HI - HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - - photo_i_HI * TS.d_HI; + HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - + k_coll_i_HI_HI * TS.d_HI * TS.d_HI - + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - photo_i_HI * TS.d_HI; // Rate of change of electron - e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI/4.0 * TS.d_e + k_coll_i_HeII * TS.d_HeII/4.0 * TS.d_e - + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - - k_recomb_HII * TS.d_HII * TS.d_e - k_recomb_HeII * TS.d_HeII/4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII/4.0 * TS.d_e - + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0; + e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + + k_coll_i_HeI * TS.d_HeI / 4.0 * TS.d_e + + k_coll_i_HeII * TS.d_HeII / 4.0 * TS.d_e + + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + +k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - + k_recomb_HII * TS.d_HII * TS.d_e - + k_recomb_HeII * TS.d_HeII / 4.0 * TS.d_e - + k_recomb_HeIII * TS.d_HeIII / 4.0 * TS.d_e + photo_i_HI * TS.d_HI + + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; // Bound from below to prevent numerical errors - if ( fabs(HI_dot) < tiny ) HI_dot = fmin( tiny, TS.d_HI ); - if ( fabs(e_dot) < tiny ) e_dot = fmin( tiny, TS.d_e ); + if (fabs(HI_dot) < tiny) HI_dot = fmin(tiny, TS.d_HI); + if (fabs(e_dot) < tiny) e_dot = fmin(tiny, TS.d_e); // If the net rate is almost perfectly balanced then set // it to zero (since it is zero to available precision) - if ( fmin( fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e) ) / fmax( fabs(HI_dot), fabs(e_dot) ) > 1e6 ){ + if (fmin(fabs(k_coll_i_HI * TS.d_HI * TS.d_e), + fabs(k_recomb_HII * TS.d_HII * TS.d_e)) / + fmax(fabs(HI_dot), fabs(e_dot)) > + 1e6) { HI_dot = tiny; e_dot = tiny; } - if ( n_iter > 50 ){ - HI_dot = fmin( fabs(HI_dot), fabs( HI_dot_prev) ); - e_dot = fmin( fabs(e_dot), fabs( e_dot_prev) ); + if (n_iter > 50) { + HI_dot = fmin(fabs(HI_dot), fabs(HI_dot_prev)); + e_dot = fmin(fabs(e_dot), fabs(e_dot_prev)); } - if ( TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0 ){ - printf( "#### Equlibrium \n" ); + if (TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0) { + printf("#### Equlibrium \n"); } #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) + TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif - energy = fmax( TS.U * TS.d, tiny ); - dt = fmin( fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ) ); - dt = fmin( fabs( 0.1 * energy / U_dot ), dt ); - dt = fmin( 0.5 * dt_hydro, dt ); - dt = fmin( dt_hydro - t_chem, dt ); - - if ( n_iter == Chem_H.max_iter-1 ){ - printf("##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: %e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ), fabs( 0.1 * TS.U * TS.d / U_dot ) ) ; + energy = fmax(TS.U * TS.d, tiny); + dt = fmin(fabs(0.1 * TS.d_HI / HI_dot), fabs(0.1 * TS.d_e / e_dot)); + dt = fmin(fabs(0.1 * energy / U_dot), dt); + dt = fmin(0.5 * dt_hydro, dt); + dt = fmin(dt_hydro - t_chem, dt); + + if (n_iter == Chem_H.max_iter - 1) { + printf( + "##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: " + "%e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", + dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, + fabs(0.1 * TS.d_HI / HI_dot), fabs(0.1 * TS.d_e / e_dot), + fabs(0.1 * TS.U * TS.d / U_dot)); } - - if (print) printf("HIdot: %e\n", HI_dot ); - if (print) printf("edot: %e\n", e_dot ); - if (print) printf("energy: %e\n", TS.U * TS.d ); - if (print) printf("Udot: %e\n", U_dot ); - if (print) printf("dt_hydro: %e\n", dt_hydro ); - if (print) printf("dt: %e\n", dt ); + if (print) printf("HIdot: %e\n", HI_dot); + if (print) printf("edot: %e\n", e_dot); + if (print) printf("energy: %e\n", TS.U * TS.d); + if (print) printf("Udot: %e\n", U_dot); + if (print) printf("dt_hydro: %e\n", dt_hydro); + if (print) printf("dt: %e\n", dt); return dt; - } -__device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, - Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, - Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, bool print ){ - - Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; +__device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, + Real dt, Real U_dot, Real k_coll_i_HI, + Real k_coll_i_HeI, Real k_coll_i_HeII, + Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, + Real k_recomb_HII, Real k_recomb_HeII, + Real k_recomb_HeIII, float photo_i_HI, + float photo_i_HeI, float photo_i_HeII, + Real &HI_dot_prev, Real &e_dot_prev, + Real &temp_prev, bool print) +{ + Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; Real s_coef, a_coef; // Update HI s_coef = k_recomb_HII * TS.d_HII * TS.d_e; - a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI/4.0 + photo_i_HI; - d_HI_p = ( dt * s_coef + TS.d_HI ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, d_HI_p ); + a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HeI / 4.0 + photo_i_HI; + d_HI_p = (dt * s_coef + TS.d_HI) / (1.0 + dt * a_coef); + if (print) + printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, + d_HI_p); // Update HII - s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + photo_i_HI * d_HI_p; - a_coef = k_recomb_HII * TS.d_e; - d_HII_p = ( dt * s_coef + TS.d_HII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, d_HII_p ); + s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * d_HI_p; + a_coef = k_recomb_HII * TS.d_e; + d_HII_p = (dt * s_coef + TS.d_HII) / (1.0 + dt * a_coef); + if (print) + printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, + d_HII_p); // Update electron - s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 - + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0 ; - a_coef = - k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI/4.0 + k_recomb_HeII * TS.d_HeII/4.0 - - k_coll_i_HeII * TS.d_HeII/4.0 + k_recomb_HeIII * TS.d_HeIII/4.0; - d_e_p = ( dt * s_coef + TS.d_e ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, d_e_p ); + s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * TS.d_HI + + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; + a_coef = -k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - + k_coll_i_HeI * TS.d_HeI / 4.0 + k_recomb_HeII * TS.d_HeII / 4.0 - + k_coll_i_HeII * TS.d_HeII / 4.0 + k_recomb_HeIII * TS.d_HeIII / 4.0; + d_e_p = (dt * s_coef + TS.d_e) / (1.0 + dt * a_coef); + if (print) + printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, + d_e_p); // Update HeI - s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; - a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; - d_HeI_p = ( dt * s_coef + TS.d_HeI ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, d_HeI_p ); + s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; + a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; + d_HeI_p = (dt * s_coef + TS.d_HeI) / (1.0 + dt * a_coef); + if (print) + printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, + d_HeI_p); // Update HeII - s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; - a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; - d_HeII_p = ( dt * s_coef + TS.d_HeII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, a_coef, d_HeII_p ); + s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; + a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; + d_HeII_p = (dt * s_coef + TS.d_HeII) / (1.0 + dt * a_coef); + if (print) + printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, + a_coef, d_HeII_p); // Update HeIII - s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; - a_coef = k_recomb_HeIII * TS.d_e; - d_HeIII_p = ( dt * s_coef + TS.d_HeIII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p ); + s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; + a_coef = k_recomb_HeIII * TS.d_e; + d_HeIII_p = (dt * s_coef + TS.d_HeIII) / (1.0 + dt * a_coef); + if (print) + printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, + a_coef, d_HeIII_p); // Record the temperature for the next step - temp_prev = TS.get_temperature( Chem_H.gamma ); + temp_prev = TS.get_temperature(Chem_H.gamma); - HI_dot_prev = fabs( TS.d_HI - d_HI_p ) / fmax( dt, tiny ); - TS.d_HI = fmax( d_HI_p, tiny ); - TS.d_HII = fmax( d_HII_p, tiny ); - TS.d_HeI = fmax( d_HeI_p, tiny ); - TS.d_HeII = fmax( d_HeII_p, tiny ); - TS.d_HeIII = fmax( d_HeIII_p, 1e-5*tiny ); + HI_dot_prev = fabs(TS.d_HI - d_HI_p) / fmax(dt, tiny); + TS.d_HI = fmax(d_HI_p, tiny); + TS.d_HII = fmax(d_HII_p, tiny); + TS.d_HeI = fmax(d_HeI_p, tiny); + TS.d_HeII = fmax(d_HeII_p, tiny); + TS.d_HeIII = fmax(d_HeIII_p, 1e-5 * tiny); // Use charge conservation to determine electron fraction e_dot_prev = TS.d_e; - TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; - e_dot_prev = fabs( TS.d_e - e_dot_prev ) / fmax( dt, tiny ); + TS.d_e = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0; + e_dot_prev = fabs(TS.d_e - e_dot_prev) / fmax(dt, tiny); // Update internal energy TS.U += U_dot / TS.d * dt; #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) + TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif - if ( print ) printf("Updated U: %e \n", TS.U); - - - } - - -__global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt_hydro, Chemistry_Header Chem_H ){ - + if (print) printf("Updated U: %e \n", TS.U); +} +__global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields, + Real dt_hydro, Chemistry_Header Chem_H) +{ int id, xid, yid, zid, n_cells, n_iter; Real d, d_inv, vx, vy, vz; Real GE, E_kin, dt_chem, t_chem; Real current_a, a3, a2; Real current_z, density_conv, energy_conv; - current_z = Chem_H.current_z; + current_z = Chem_H.current_z; density_conv = Chem_H.density_conversion; energy_conv = Chem_H.energy_conversion; Real U_dot, HI_dot, e_dot, HI_dot_prev, e_dot_prev, temp_prev; - Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI; + Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, + k_coll_i_HI_HeI; Real k_recomb_HII, k_recomb_HeII, k_recomb_HeIII; float photo_i_HI, photo_i_HeI, photo_i_HeII; float photo_h_HI, photo_h_HeI, photo_h_HeII; Real correct_H, correct_He; - - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; bool print; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E_kin = 0.5*d*(vx*vx + vy*vy + vz*vz); - #ifdef DE - GE = dev_conserved[(n_fields-1)*n_cells + id]; - #else - GE = dev_conserved[4*n_cells + id] - E_kin; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + #ifdef DE + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + #else + GE = dev_conserved[4 * n_cells + id] - E_kin; + #endif print = false; // if ( xid == n_ghost && yid == n_ghost && zid == n_ghost ) print = true; // Convert to cgs units - current_a = 1 / ( current_z + 1); - a2 = current_a * current_a; - a3 = a2 * current_a; - d *= density_conv / a3; - GE *= energy_conv / a2; + current_a = 1 / (current_z + 1); + a2 = current_a * current_a; + a3 = a2 * current_a; + d *= density_conv / a3; + GE *= energy_conv / a2; dt_hydro = dt_hydro / Chem_H.time_units; -#ifdef COSMOLOGY - dt_hydro *= current_a * current_a / Chem_H.H0 * 1000 * KPC -#endif //COSMOLOGY - //dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units; - // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * dt_hydro * Chem_H.time_units; - - // Initialize the thermal state - Thermal_State TS; - TS.d = dev_conserved[ id ] / a3; - TS.d_HI = dev_conserved[ id + n_cells*grid_enum::HI_density ] / a3; - TS.d_HII = dev_conserved[ id + n_cells*grid_enum::HII_density ] / a3; - TS.d_HeI = dev_conserved[ id + n_cells*grid_enum::HeI_density ] / a3; - TS.d_HeII = dev_conserved[ id + n_cells*grid_enum::HeII_density ] / a3; - TS.d_HeIII = dev_conserved[ id + n_cells*grid_enum::HeIII_density ] / a3; - TS.d_e = dev_conserved[ id + n_cells*grid_enum::e_density ] / a3; + #ifdef COSMOLOGY + dt_hydro *= current_a * current_a / Chem_H.H0 * 1000 * + KPC + #endif // COSMOLOGY + // dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * + // 1000 * KPC / Chem_H.time_units; + // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + + // Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * + // dt_hydro * Chem_H.time_units; + + // Initialize the thermal state + Thermal_State TS; + TS.d = dev_conserved[id] / a3; + TS.d_HI = dev_conserved[id + n_cells * grid_enum::HI_density] / a3; + TS.d_HII = dev_conserved[id + n_cells * grid_enum::HII_density] / a3; + TS.d_HeI = dev_conserved[id + n_cells * grid_enum::HeI_density] / a3; + TS.d_HeII = dev_conserved[id + n_cells * grid_enum::HeII_density] / a3; + TS.d_HeIII = dev_conserved[id + n_cells * grid_enum::HeIII_density] / a3; + TS.d_e = dev_conserved[id + n_cells * grid_enum::e_density] / a3; TS.U = GE * d_inv * 1e-10; // Ceiling species - TS.d_HI = fmax( TS.d_HI, tiny ); - TS.d_HII = fmax( TS.d_HII, tiny ); - TS.d_HeI = fmax( TS.d_HeI, tiny ); - TS.d_HeII = fmax( TS.d_HeII, tiny ); - TS.d_HeIII = fmax( TS.d_HeIII, 1e-5*tiny ); - TS.d_e = fmax( TS.d_e, tiny ); + TS.d_HI = fmax(TS.d_HI, tiny); + TS.d_HII = fmax(TS.d_HII, tiny); + TS.d_HeI = fmax(TS.d_HeI, tiny); + TS.d_HeII = fmax(TS.d_HeII, tiny); + TS.d_HeIII = fmax(TS.d_HeIII, 1e-5 * tiny); + TS.d_e = fmax(TS.d_e, tiny); // Compute temperature at first iteration - temp_prev = TS.get_temperature( Chem_H.gamma ); + temp_prev = TS.get_temperature(Chem_H.gamma); // if (print){ // printf("current_z: %f\n", current_z ); @@ -518,549 +624,535 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in // } // Get the photoheating and photoionization rates at z=current_z - Get_Current_UVB_Rates( current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, - photo_h_HI, photo_h_HeI, photo_h_HeII, print ); + Get_Current_UVB_Rates(current_z, Chem_H, photo_i_HI, photo_i_HeI, + photo_i_HeII, photo_h_HI, photo_h_HeI, photo_h_HeII, + print); HI_dot_prev = 0; e_dot_prev = 0; - n_iter = 0; - t_chem = 0; - while ( t_chem < dt_hydro ){ - - if (print) printf("########################################## Iter %d \n", n_iter ); - - U_dot = Get_Cooling_Rates( TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, - photo_h_HI, photo_h_HeI, photo_h_HeII, print ); - - Get_Reaction_Rates( TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, - k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print ); - - dt_chem = Get_Chemistry_dt( TS, Chem_H, HI_dot, e_dot, U_dot, - k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, - k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, - photo_i_HI, photo_i_HeI, photo_i_HeII, - n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print ); - - Update_Step( TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, - k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, - e_dot_prev, temp_prev, print ); + n_iter = 0; + t_chem = 0; + while (t_chem < dt_hydro) { + if (print) + printf("########################################## Iter %d \n", n_iter); + + U_dot = Get_Cooling_Rates(TS, Chem_H, Chem_H.dens_number_conv, current_z, + temp_prev, photo_h_HI, photo_h_HeI, + photo_h_HeII, print); + + Get_Reaction_Rates(TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, + k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, + k_recomb_HeII, k_recomb_HeIII, print); + + dt_chem = Get_Chemistry_dt( + TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, + k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, + k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, + n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print); + + Update_Step(TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, + k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, + k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, + photo_i_HeII, HI_dot_prev, e_dot_prev, temp_prev, print); t_chem += dt_chem; n_iter += 1; - if ( n_iter == Chem_H.max_iter ) break; - + if (n_iter == Chem_H.max_iter) break; } - if ( print ) printf("Chem_GPU: N Iter: %d\n", n_iter ); + if (print) printf("Chem_GPU: N Iter: %d\n", n_iter); // Make consistent abundances with the H and He density - correct_H = Chem_H.H_fraction * TS.d / ( TS.d_HI + TS.d_HII ); - correct_He = ( 1.0 - Chem_H.H_fraction ) * TS.d / ( TS.d_HeI + TS.d_HeII + TS.d_HeIII ); - TS.d_HI *= correct_H; - TS.d_HII *= correct_H; - TS.d_HeI *= correct_He; - TS.d_HeII *= correct_He; + correct_H = Chem_H.H_fraction * TS.d / (TS.d_HI + TS.d_HII); + correct_He = + (1.0 - Chem_H.H_fraction) * TS.d / (TS.d_HeI + TS.d_HeII + TS.d_HeIII); + TS.d_HI *= correct_H; + TS.d_HII *= correct_H; + TS.d_HeI *= correct_He; + TS.d_HeII *= correct_He; TS.d_HeIII *= correct_He; // Use charge conservation to determine electron fractioan - TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; + TS.d_e = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0; // Write the Updated Thermal State - dev_conserved[id + n_cells*grid_enum::HI_density ] = TS.d_HI * a3; - dev_conserved[id + n_cells*grid_enum::HII_density ] = TS.d_HII * a3; - dev_conserved[id + n_cells*grid_enum::HeI_density ] = TS.d_HeI * a3; - dev_conserved[id + n_cells*grid_enum::HeII_density ] = TS.d_HeII * a3; - dev_conserved[id + n_cells*grid_enum::HeIII_density ] = TS.d_HeIII * a3; - dev_conserved[id + n_cells*grid_enum::e_density ] = TS.d_e * a3; - d = d / density_conv * a3; - GE = TS.U / d_inv / energy_conv * a2 / 1e-10; - dev_conserved[4*n_cells + id] = GE + E_kin; - #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = GE; - #endif - - if ( print ) printf("###########################################\n" ); - if ( print ) printf("Updated HI: %e\n", TS.d_HI * a3 ); - if ( print ) printf("Updated HII: %e\n", TS.d_HII * a3 ); - if ( print ) printf("Updated HeI: %e\n", TS.d_HeI * a3 ); - if ( print ) printf("Updated HeII: %e\n", TS.d_HeII * a3 ); - if ( print ) printf("Updated HeIII: %e\n", TS.d_HeIII * a3 ); - if ( print ) printf("Updated e: %e\n", TS.d_e * a3 ); - if ( print ) printf("Updated GE: %e\n", dev_conserved[(n_fields-1)*n_cells + id] ); - if ( print ) printf("Updated E: %e\n", dev_conserved[4*n_cells + id] ); + dev_conserved[id + n_cells * grid_enum::HI_density] = TS.d_HI * a3; + dev_conserved[id + n_cells * grid_enum::HII_density] = TS.d_HII * a3; + dev_conserved[id + n_cells * grid_enum::HeI_density] = TS.d_HeI * a3; + dev_conserved[id + n_cells * grid_enum::HeII_density] = TS.d_HeII * a3; + dev_conserved[id + n_cells * grid_enum::HeIII_density] = TS.d_HeIII * a3; + dev_conserved[id + n_cells * grid_enum::e_density] = TS.d_e * a3; + d = d / density_conv * a3; + GE = TS.U / d_inv / energy_conv * a2 / 1e-10; + dev_conserved[4 * n_cells + id] = GE + E_kin; + #ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] = GE; + #endif + if (print) printf("###########################################\n"); + if (print) printf("Updated HI: %e\n", TS.d_HI * a3); + if (print) printf("Updated HII: %e\n", TS.d_HII * a3); + if (print) printf("Updated HeI: %e\n", TS.d_HeI * a3); + if (print) printf("Updated HeII: %e\n", TS.d_HeII * a3); + if (print) printf("Updated HeIII: %e\n", TS.d_HeIII * a3); + if (print) printf("Updated e: %e\n", TS.d_e * a3); + if (print) + printf("Updated GE: %e\n", dev_conserved[(n_fields - 1) * n_cells + id]); + if (print) printf("Updated E: %e\n", dev_conserved[4 * n_cells + id]); } } -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H){ - +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, + Chemistry_Header &Chem_H) +{ float time; cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - int ngrid = (nx*ny*nz - 1) / TPB_CHEM + 1; + int ngrid = (nx * ny * nz - 1) / TPB_CHEM + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_CHEM, 1, 1); - hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H ); + hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H); CudaCheckError(); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); - Chem_H.runtime_chemistry_step = (Real) time/1000; // (Convert ms to secs ) - + Chem_H.runtime_chemistry_step = (Real)time / 1000; // (Convert ms to secs ) } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Reaction and cooling rates from Grackle - -//Kelvin to eV conversion factor -#ifndef tevk -#define tevk 1.1605e4 -#endif -//Comparison value -#ifndef dhuge -#define dhuge 1.0e30 -#endif -//Small value -#ifndef tiny -#define tiny 1.0e-20 -#endif -// Boltzmann's constant -#ifndef kboltz -#define kboltz 1.3806504e-16 //Boltzmann's constant [cm2gs-2K-1] or [ergK-1] -#endif + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Reaction and cooling rates from Grackle + // Kelvin to eV conversion factor + #ifndef tevk + #define tevk 1.1605e4 + #endif + // Comparison value + #ifndef dhuge + #define dhuge 1.0e30 + #endif + // Small value + #ifndef tiny + #define tiny 1.0e-20 + #endif + // Boltzmann's constant + #ifndef kboltz + #define kboltz \ + 1.3806504e-16 // Boltzmann's constant [cm2gs-2K-1] or [ergK-1] + #endif // Calculation of k1 (HI + e --> HII + 2e) // k1_rate -__device__ Real coll_i_HI_rate( Real T, Real units ) +__device__ Real coll_i_HI_rate(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - - Real k1 = exp( -32.71396786375 - + 13.53655609057*logT_ev - - 5.739328757388*pow(logT_ev, 2) - + 1.563154982022*pow(logT_ev, 3) - - 0.2877056004391*pow(logT_ev, 4) - + 0.03482559773736999*pow(logT_ev, 5) - - 0.00263197617559*pow(logT_ev, 6) - + 0.0001119543953861*pow(logT_ev, 7) - - 2.039149852002e-6*pow(logT_ev, 8)) / units; - if (T_ev <= 0.8){ - k1 = fmax(tiny, k1); - } - return k1; + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + + Real k1 = + exp(-32.71396786375 + 13.53655609057 * logT_ev - + 5.739328757388 * pow(logT_ev, 2) + 1.563154982022 * pow(logT_ev, 3) - + 0.2877056004391 * pow(logT_ev, 4) + + 0.03482559773736999 * pow(logT_ev, 5) - + 0.00263197617559 * pow(logT_ev, 6) + + 0.0001119543953861 * pow(logT_ev, 7) - + 2.039149852002e-6 * pow(logT_ev, 8)) / + units; + if (T_ev <= 0.8) { + k1 = fmax(tiny, k1); + } + return k1; } -//Calculation of k3 (HeI + e --> HeII + 2e) -// k3_rate -__device__ Real coll_i_HeI_rate( Real T, Real units ) +// Calculation of k3 (HeI + e --> HeII + 2e) +// k3_rate +__device__ Real coll_i_HeI_rate(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - - if (T_ev > 0.8){ - return exp( -44.09864886561001 - + 23.91596563469*logT_ev - - 10.75323019821*pow(logT_ev, 2) - + 3.058038757198*pow(logT_ev, 3) - - 0.5685118909884001*pow(logT_ev, 4) - + 0.06795391233790001*pow(logT_ev, 5) - - 0.005009056101857001*pow(logT_ev, 6) - + 0.0002067236157507*pow(logT_ev, 7) - - 3.649161410833e-6*pow(logT_ev, 8)) / units; - } else { - return tiny; - } + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + + if (T_ev > 0.8) { + return exp(-44.09864886561001 + 23.91596563469 * logT_ev - + 10.75323019821 * pow(logT_ev, 2) + + 3.058038757198 * pow(logT_ev, 3) - + 0.5685118909884001 * pow(logT_ev, 4) + + 0.06795391233790001 * pow(logT_ev, 5) - + 0.005009056101857001 * pow(logT_ev, 6) + + 0.0002067236157507 * pow(logT_ev, 7) - + 3.649161410833e-6 * pow(logT_ev, 8)) / + units; + } else { + return tiny; + } } -//Calculation of k4 (HeII + e --> HeI + photon) -// k4_rate -__device__ Real recomb_HeII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k4 (HeII + e --> HeI + photon) +// k4_rate +__device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - //If case B recombination on. - if (use_case_B){ - return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; - } + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + // If case B recombination on. + if (use_case_B) { + return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units; + } - //If case B recombination off. - if (T_ev > 0.8){ - return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev)) - / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5)) - + 3.92e-13/pow(T_ev, 0.6353)) / units; - } else { - return 3.92e-13/pow(T_ev, 0.6353) / units; - } + // If case B recombination off. + if (T_ev > 0.8) { + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / + (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + 3.92e-13 / pow(T_ev, 0.6353)) / + units; + } else { + return 3.92e-13 / pow(T_ev, 0.6353) / units; + } } // k4_rate Case A -__device__ Real recomb_HeII_rate_case_A( Real T, Real units ) +__device__ Real recomb_HeII_rate_case_A(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - if (T_ev > 0.8){ - return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev)) - / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5)) - + 3.92e-13/pow(T_ev, 0.6353)) / units; - } else { - return 3.92e-13/pow(T_ev, 0.6353) / units; - } + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + if (T_ev > 0.8) { + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / + (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + 3.92e-13 / pow(T_ev, 0.6353)) / + units; + } else { + return 3.92e-13 / pow(T_ev, 0.6353) / units; + } } // k4_rate Case B -__device__ Real recomb_HeII_rate_case_B( Real T, Real units ) +__device__ Real recomb_HeII_rate_case_B(Real T, Real units) { - //If case B recombination on. - return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; + // If case B recombination on. + return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units; } - -//Calculation of k2 (HII + e --> HI + photon) -// k2_rate -__device__ Real recomb_HII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k2 (HII + e --> HI + photon) +// k2_rate +__device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B) { - if (use_case_B) { - if (T < 1.0e9) { - return 4.881357e-6*pow(T, -1.5) \ - * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; - } else { - return tiny; - } + if (use_case_B) { + if (T < 1.0e9) { + return 4.881357e-6 * pow(T, -1.5) * + pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; } else { - if (T > 5500) { - //Convert temperature to appropriate form. - Real T_ev = T / tevk; - Real logT_ev = log(T_ev); - - return exp( -28.61303380689232 \ - - 0.7241125657826851*logT_ev \ - - 0.02026044731984691*pow(logT_ev, 2) \ - - 0.002380861877349834*pow(logT_ev, 3) \ - - 0.0003212605213188796*pow(logT_ev, 4) \ - - 0.00001421502914054107*pow(logT_ev, 5) \ - + 4.989108920299513e-6*pow(logT_ev, 6) \ - + 5.755614137575758e-7*pow(logT_ev, 7) \ - - 1.856767039775261e-8*pow(logT_ev, 8) \ - - 3.071135243196595e-9*pow(logT_ev, 9)) / units; - } else { - return recomb_HeII_rate(T, units, use_case_B); - } + return tiny; } -} -// k2_rate Case A -__device__ Real recomb_HII_rate_case_A( Real T, Real units ) -{ + } else { if (T > 5500) { - //Convert temperature to appropriate form. - Real T_ev = T / tevk; - Real logT_ev = log(T_ev); - - return exp( -28.61303380689232 \ - - 0.7241125657826851*logT_ev \ - - 0.02026044731984691*pow(logT_ev, 2) \ - - 0.002380861877349834*pow(logT_ev, 3) \ - - 0.0003212605213188796*pow(logT_ev, 4) \ - - 0.00001421502914054107*pow(logT_ev, 5) \ - + 4.989108920299513e-6*pow(logT_ev, 6) \ - + 5.755614137575758e-7*pow(logT_ev, 7) \ - - 1.856767039775261e-8*pow(logT_ev, 8) \ - - 3.071135243196595e-9*pow(logT_ev, 9)) / units; + // Convert temperature to appropriate form. + Real T_ev = T / tevk; + Real logT_ev = log(T_ev); + + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - + 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - + 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - + 1.856767039775261e-8 * pow(logT_ev, 8) - + 3.071135243196595e-9 * pow(logT_ev, 9)) / + units; } else { - return recomb_HeII_rate_case_A(T, units ); + return recomb_HeII_rate(T, units, use_case_B); } + } +} +// k2_rate Case A +__device__ Real recomb_HII_rate_case_A(Real T, Real units) +{ + if (T > 5500) { + // Convert temperature to appropriate form. + Real T_ev = T / tevk; + Real logT_ev = log(T_ev); + + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - + 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - + 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - + 1.856767039775261e-8 * pow(logT_ev, 8) - + 3.071135243196595e-9 * pow(logT_ev, 9)) / + units; + } else { + return recomb_HeII_rate_case_A(T, units); + } } // k2_rate Case B -__device__ Real recomb_HII_rate_case_B( Real T, Real units ) +__device__ Real recomb_HII_rate_case_B(Real T, Real units) { - if (T < 1.0e9) { - return 4.881357e-6*pow(T, -1.5) \ - * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; - } else { - return tiny; - } + if (T < 1.0e9) { + return 4.881357e-6 * pow(T, -1.5) * + pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; + } else { + return tiny; + } } - -//Calculation of k5 (HeII + e --> HeIII + 2e) -// k5_rate -__device__ Real coll_i_HeII_rate( Real T, Real units ) +// Calculation of k5 (HeII + e --> HeIII + 2e) +// k5_rate +__device__ Real coll_i_HeII_rate(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - - Real k5; - if (T_ev > 0.8){ - k5 = exp(-68.71040990212001 - + 43.93347632635*logT_ev - - 18.48066993568*pow(logT_ev, 2) - + 4.701626486759002*pow(logT_ev, 3) - - 0.7692466334492*pow(logT_ev, 4) - + 0.08113042097303*pow(logT_ev, 5) - - 0.005324020628287001*pow(logT_ev, 6) - + 0.0001975705312221*pow(logT_ev, 7) - - 3.165581065665e-6*pow(logT_ev, 8)) / units; - } else { - k5 = tiny; - } - return k5; + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + + Real k5; + if (T_ev > 0.8) { + k5 = exp(-68.71040990212001 + 43.93347632635 * logT_ev - + 18.48066993568 * pow(logT_ev, 2) + + 4.701626486759002 * pow(logT_ev, 3) - + 0.7692466334492 * pow(logT_ev, 4) + + 0.08113042097303 * pow(logT_ev, 5) - + 0.005324020628287001 * pow(logT_ev, 6) + + 0.0001975705312221 * pow(logT_ev, 7) - + 3.165581065665e-6 * pow(logT_ev, 8)) / + units; + } else { + k5 = tiny; + } + return k5; } -//Calculation of k6 (HeIII + e --> HeII + photon) -// k6_rate -__device__ Real recomb_HeIII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k6 (HeIII + e --> HeII + photon) +// k6_rate +__device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B) { - Real k6; - //Has case B recombination setting. - if (use_case_B) { - if (T < 1.0e9) { - k6 = 7.8155e-5*pow(T, -1.5) - * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units; - } else { - k6 = tiny; - } + Real k6; + // Has case B recombination setting. + if (use_case_B) { + if (T < 1.0e9) { + k6 = 7.8155e-5 * pow(T, -1.5) * + pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; } else { - k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2) - / (1.0 + pow(T/1.0e6, 0.7)) / units; + k6 = tiny; } - return k6; + } else { + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / + (1.0 + pow(T / 1.0e6, 0.7)) / units; + } + return k6; } // k6_rate Case A -__device__ Real recomb_HeIII_rate_case_A( Real T, Real units ) +__device__ Real recomb_HeIII_rate_case_A(Real T, Real units) { - Real k6; - //Has case B recombination setting. - k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2) - / (1.0 + pow(T/1.0e6, 0.7)) / units; - return k6; + Real k6; + // Has case B recombination setting. + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / + units; + return k6; } // k6_rate Case B -__device__ Real recomb_HeIII_rate_case_B( Real T, Real units ) +__device__ Real recomb_HeIII_rate_case_B(Real T, Real units) { - Real k6; - //Has case B recombination setting. - if (T < 1.0e9) { - k6 = 7.8155e-5*pow(T, -1.5) - * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units; - } else { - k6 = tiny; - } - return k6; + Real k6; + // Has case B recombination setting. + if (T < 1.0e9) { + k6 = 7.8155e-5 * pow(T, -1.5) * + pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; + } else { + k6 = tiny; + } + return k6; } -//Calculation of k57 (HI + HI --> HII + HI + e) -// k57_rate -__device__ Real coll_i_HI_HI_rate( Real T, Real units ) +// Calculation of k57 (HI + HI --> HII + HI + e) +// k57_rate +__device__ Real coll_i_HI_HI_rate(Real T, Real units) { - // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). - // k57 value based on experimental cross-sections from Gealy & van Zyl (1987). - if (T > 3.0e3) { - return 1.2e-17 * pow(T, 1.2) * exp(-1.578e5 / T) / units; - } else { - return tiny; - } + // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). + // k57 value based on experimental cross-sections from Gealy & van Zyl (1987). + if (T > 3.0e3) { + return 1.2e-17 * pow(T, 1.2) * exp(-1.578e5 / T) / units; + } else { + return tiny; + } } -//Calculation of k58 (HI + HeI --> HII + HeI + e) -// k58_rate -__device__ Real coll_i_HI_HeI_rate( Real T, Real units ) +// Calculation of k58 (HI + HeI --> HII + HeI + e) +// k58_rate +__device__ Real coll_i_HI_HeI_rate(Real T, Real units) { - // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). - // k58 value based on cross-sections from van Zyl, Le & Amme (1981). - if (T > 3.0e3) { - return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units; - } else { - return tiny; - } + // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). + // k58 value based on cross-sections from van Zyl, Le & Amme (1981). + if (T > 3.0e3) { + return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units; + } else { + return tiny; + } } -//Calculation of ceHI. -// Cooling collisional excitation HI -__host__ __device__ Real cool_ceHI_rate( Real T, Real units ) +// Calculation of ceHI. +// Cooling collisional excitation HI +__host__ __device__ Real cool_ceHI_rate(Real T, Real units) { - return 7.5e-19*exp( -fmin(log(dhuge), 118348.0 / T) ) - / ( 1.0 + sqrt(T / 1.0e5) ) / units; + return 7.5e-19 * exp(-fmin(log(dhuge), 118348.0 / T)) / + (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ceHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ceHeI_rate( Real T, Real units ) +// Calculation of ceHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ceHeI_rate(Real T, Real units) { - return 9.1e-27*exp(-fmin(log(dhuge), 13179.0/T)) - * pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) ) / units; + return 9.1e-27 * exp(-fmin(log(dhuge), 13179.0 / T)) * pow(T, -0.1687) / + (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ceHeII. -// Cooling collisional excitation HeII -__host__ __device__ Real cool_ceHeII_rate( Real T, Real units ) +// Calculation of ceHeII. +// Cooling collisional excitation HeII +__host__ __device__ Real cool_ceHeII_rate(Real T, Real units) { - return 5.54e-17*exp(-fmin(log(dhuge), 473638.0/T)) - * pow(T, -0.3970) / ( 1.0 + sqrt(T/1.0e5) ) / units; + return 5.54e-17 * exp(-fmin(log(dhuge), 473638.0 / T)) * pow(T, -0.3970) / + (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ciHeIS. -// Cooling collisional ionization HeIS -__host__ __device__ Real cool_ciHeIS_rate( Real T, Real units ) +// Calculation of ciHeIS. +// Cooling collisional ionization HeIS +__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units) { - return 5.01e-27*pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) ) - * exp(-fmin(log(dhuge), 55338.0/T)) / units; + return 5.01e-27 * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) * + exp(-fmin(log(dhuge), 55338.0 / T)) / units; } -//Calculation of ciHI. -// Cooling collisional ionization HI -__host__ __device__ Real cool_ciHI_rate( Real T, Real units ) +// Calculation of ciHI. +// Cooling collisional ionization HI +__host__ __device__ Real cool_ciHI_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 2.18e-11 * coll_i_HI_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 2.18e-11 * coll_i_HI_rate(T, 1) / units; } - -//Calculation of ciHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ciHeI_rate( Real T, Real units ) +// Calculation of ciHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ciHeI_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 3.94e-11 * coll_i_HeI_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 3.94e-11 * coll_i_HeI_rate(T, 1) / units; } -//Calculation of ciHeII. -// Cooling collisional ionization HeII -__host__ __device__ Real cool_ciHeII_rate( Real T, Real units ) +// Calculation of ciHeII. +// Cooling collisional ionization HeII +__host__ __device__ Real cool_ciHeII_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; } -//Calculation of reHII. -// Cooling recombination HII -__host__ __device__ Real cool_reHII_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHII. +// Cooling recombination HII +__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B) { - Real lambdaHI = 2.0 * 157807.0 / T; - if (use_case_B) { - return 3.435e-30 * T * pow(lambdaHI, 1.970) - / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720) - / units; - } else { - return 1.778e-29 * T * pow(lambdaHI, 1.965) - / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; - } + Real lambdaHI = 2.0 * 157807.0 / T; + if (use_case_B) { + return 3.435e-30 * T * pow(lambdaHI, 1.970) / + pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; + } else { + return 1.778e-29 * T * pow(lambdaHI, 1.965) / + pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; + } } -//Calculation of reHII. -// Cooling recombination HII Case A -__host__ __device__ Real cool_reHII_rate_case_A( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HII Case A +__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units) { - Real lambdaHI = 2.0 * 157807.0 / T; - return 1.778e-29 * T * pow(lambdaHI, 1.965) - / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; + Real lambdaHI = 2.0 * 157807.0 / T; + return 1.778e-29 * T * pow(lambdaHI, 1.965) / + pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; } -//Calculation of reHII. -// Cooling recombination HII Case B -__host__ __device__ Real cool_reHII_rate_case_B( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HII Case B +__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units) { - Real lambdaHI = 2.0 * 157807.0 / T; - return 3.435e-30 * T * pow(lambdaHI, 1.970) - / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720) - / units; + Real lambdaHI = 2.0 * 157807.0 / T; + return 3.435e-30 * T * pow(lambdaHI, 1.970) / + pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; } -//Calculation of reHII. -// Cooling recombination HeII -__host__ __device__ Real cool_reHeII1_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHII. +// Cooling recombination HeII +__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B) { - Real lambdaHeII = 2.0 * 285335.0 / T; - if ( use_case_B ) { - return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) - / units; - } else { - return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) - / units; - } + Real lambdaHeII = 2.0 * 285335.0 / T; + if (use_case_B) { + return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units; + } else { + return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units; + } } -//Calculation of reHII. -// Cooling recombination HeII Case A -__host__ __device__ Real cool_reHeII1_rate_case_A( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HeII Case A +__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units) { - Real lambdaHeII = 2.0 * 285335.0 / T; - return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) - / units; + Real lambdaHeII = 2.0 * 285335.0 / T; + return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units; } -//Calculation of reHII. -// Cooling recombination HeII Case B -__host__ __device__ Real cool_reHeII1_rate_case_B( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HeII Case B +__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units) { - Real lambdaHeII = 2.0 * 285335.0 / T; - return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) - / units; + Real lambdaHeII = 2.0 * 285335.0 / T; + return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units; } -//Calculation of reHII2. -// Cooling recombination HeII Dielectronic -__host__ __device__ Real cool_reHeII2_rate( Real T, Real units ) +// Calculation of reHII2. +// Cooling recombination HeII Dielectronic +__host__ __device__ Real cool_reHeII2_rate(Real T, Real units) { - //Dielectronic recombination (Cen, 1992). - return 1.24e-13 * pow(T, -1.5) - * exp( -fmin(log(dhuge), 470000.0 / T) ) - * ( 1.0 + 0.3 * exp( -fmin(log(dhuge), 94000.0 / T) ) ) - / units; + // Dielectronic recombination (Cen, 1992). + return 1.24e-13 * pow(T, -1.5) * exp(-fmin(log(dhuge), 470000.0 / T)) * + (1.0 + 0.3 * exp(-fmin(log(dhuge), 94000.0 / T))) / units; } -//Calculation of reHIII. -// Cooling recombination HeIII -__host__ __device__ Real cool_reHeIII_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHIII. +// Cooling recombination HeIII +__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - if ( use_case_B ) { - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) - / units; - } else { - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) - / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) - / units; - } + Real lambdaHeIII = 2.0 * 631515.0 / T; + if (use_case_B) { + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / + pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; + } else { + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / + pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; + } } -//Calculation of reHIII. -// Cooling recombination HeIII Case A -__host__ __device__ Real cool_reHeIII_rate_case_A( Real T, Real units ) +// Calculation of reHIII. +// Cooling recombination HeIII Case A +__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) - / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) - / units; + Real lambdaHeIII = 2.0 * 631515.0 / T; + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / + pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; } -//Calculation of reHIII. -// Cooling recombination HeIII Case B -__host__ __device__ Real cool_reHeIII_rate_case_B( Real T, Real units ) +// Calculation of reHIII. +// Cooling recombination HeIII Case B +__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) - / units; + Real lambdaHeIII = 2.0 * 631515.0 / T; + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / + pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } -//Calculation of brem. -// Cooling Bremsstrahlung -__host__ __device__ Real cool_brem_rate( Real T, Real units ) +// Calculation of brem. +// Cooling Bremsstrahlung +__host__ __device__ Real cool_brem_rate(Real T, Real units) { - return 1.43e-27 * sqrt(T) - * ( 1.1 + 0.34 * exp( -pow(5.5 - log10(T), 2) / 3.0) ) - / units; + return 1.43e-27 * sqrt(T) * + (1.1 + 0.34 * exp(-pow(5.5 - log10(T), 2) / 3.0)) / units; } - - - - #endif diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h index 751059f07..12b0c0364 100644 --- a/src/chemistry_gpu/chemistry_gpu.h +++ b/src/chemistry_gpu/chemistry_gpu.h @@ -1,26 +1,23 @@ #ifndef CHEMISTRY_GPU_H #define CHEMISTRY_GPU_H -#include"../global/global.h" +#include "../global/global.h" #define CHEM_TINY 1e-20 -//Define the type of a generic rate function. -typedef Real (*Rate_Function_T)( Real, Real ); - +// Define the type of a generic rate function. +typedef Real (*Rate_Function_T)(Real, Real); // #define TEXTURES_UVB_INTERPOLATION -struct Chemistry_Header -{ +struct Chemistry_Header { Real gamma; Real density_conversion; Real energy_conversion; Real current_z; Real runtime_chemistry_step; Real H_fraction; - - + // Units system Real a_value; Real density_units; @@ -30,45 +27,45 @@ struct Chemistry_Header Real cooling_units; Real reaction_units; Real dens_number_conv; - + // Cosmological parameters Real H0; Real Omega_M; Real Omega_L; - + // Interpolation tables for the rates - int N_Temp_bins; + int N_Temp_bins; Real Temp_start; Real Temp_end; - + Real *cool_ceHI_d; Real *cool_ceHeI_d; Real *cool_ceHeII_d; - + Real *cool_ciHI_d; Real *cool_ciHeI_d; Real *cool_ciHeII_d; Real *cool_ciHeIS_d; - + Real *cool_reHII_d; Real *cool_reHeII1_d; Real *cool_reHeII2_d; Real *cool_reHeIII_d; - + Real *cool_brem_d; - + Real cool_compton; - + Real *k_coll_i_HI_d; Real *k_coll_i_HeI_d; Real *k_coll_i_HeII_d; Real *k_coll_i_HI_HI_d; Real *k_coll_i_HI_HeI_d; - + Real *k_recomb_HII_d; Real *k_recomb_HeII_d; Real *k_recomb_HeIII_d; - + int max_iter; int n_uvb_rates_samples; @@ -79,32 +76,26 @@ struct Chemistry_Header float *photo_heat_HI_rate_d; float *photo_heat_HeI_rate_d; float *photo_heat_HeII_rate_d; - }; - - - #ifdef CHEMISTRY_GPU class Chem_GPU { -public: - + public: int nx; int ny; int nz; - - + bool use_case_B_recombination; - + Real scale_factor_UVB_on; float *cosmo_params_h; float *cosmo_params_d; - + int n_uvb_rates_samples; - float *rates_z_h; + float *rates_z_h; float *Heat_rates_HI_h; float *Heat_rates_HeI_h; float *Heat_rates_HeII_h; @@ -119,52 +110,52 @@ class Chem_GPU float *Ion_rates_HI_d; float *Ion_rates_HeI_d; float *Ion_rates_HeII_d; - + struct Chemistry_Header H; - - - struct Fields - { + + struct Fields { Real *temperature_h; } Fields; - - - void Allocate_Array_GPU_Real( Real **array_dev, int size ); - void Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d ); - void Free_Array_GPU_Real( Real *array_dev ); - void Allocate_Array_GPU_float( float **array_dev, int size ); - void Copy_Float_Array_to_Device( int size, float *array_h, float *array_d ); - void Free_Array_GPU_float( float *array_dev ); - - void Initialize( struct parameters *P ); - - void Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units ); - + + void Allocate_Array_GPU_Real(Real **array_dev, int size); + void Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d); + void Free_Array_GPU_Real(Real *array_dev); + void Allocate_Array_GPU_float(float **array_dev, int size); + void Copy_Float_Array_to_Device(int size, float *array_h, float *array_d); + void Free_Array_GPU_float(float *array_dev); + + void Initialize(struct parameters *P); + + void Generate_Reaction_Rate_Table(Real **rate_table_array_d, + Rate_Function_T rate_function, Real units); + void Initialize_Cooling_Rates(); - - void Initialize_Reaction_Rates(); - - void Initialize_UVB_Ionization_and_Heating_Rates( struct parameters *P ); - - void Load_UVB_Ionization_and_Heating_Rates( struct parameters *P ); - - void Copy_UVB_Rates_to_GPU(); - - void Reset( ); - - #ifdef TEXTURES_UVB_INTERPOLATION - void Bind_GPU_Textures( int size, float *H_HI_h, float *H_HeI_h, float *H_HeII_h , float *I_HI_h, float *I_HeI_h, float *I_HeII_h ); - #endif -}; + void Initialize_Reaction_Rates(); + + void Initialize_UVB_Ionization_and_Heating_Rates(struct parameters *P); + + void Load_UVB_Ionization_and_Heating_Rates(struct parameters *P); + void Copy_UVB_Rates_to_GPU(); -/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) -* \brief When passed an array of conserved variables and a timestep, update the ionization fractions of H and He and update -the internal energy to account for radiative cooling and photoheating from the UV background. */ -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H); + void Reset(); + #ifdef TEXTURES_UVB_INTERPOLATION + void Bind_GPU_Textures(int size, float *H_HI_h, float *H_HeI_h, + float *H_HeII_h, float *I_HI_h, float *I_HeI_h, + float *I_HeII_h); + #endif +}; +/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int +n_ghost, int n_fields, Real dt, Real gamma) +* \brief When passed an array of conserved variables and a timestep, update the +ionization fractions of H and He and update the internal energy to account for +radiative cooling and photoheating from the UV background. */ +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, + Chemistry_Header &Chem_H); #endif #endif \ No newline at end of file diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index 6f54c3f28..915bfcd65 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -1,111 +1,95 @@ #ifdef CHEMISTRY_GPU -#include -#include -#include -#include -#include -#include // provides std::strcpy (strcpy in this file) -#include "chemistry_gpu.h" -#include "../io/io.h" + #include // provides std::strcpy (strcpy in this file) + #include + #include + #include + #include + #include + #include "../io/io.h" + #include "chemistry_gpu.h" using namespace std; - -void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates( struct parameters *P ){ - - +void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct parameters *P) +{ char uvb_filename[100]; // create the filename to read from strcpy(uvb_filename, P->UVB_rates_file); - chprintf( " Loading UVB rates: %s\n", uvb_filename); - + chprintf(" Loading UVB rates: %s\n", uvb_filename); - std::fstream in(uvb_filename); std::string line; std::vector> v; int i = 0; - if (in.is_open()){ - while (std::getline(in, line)) - { - if ( line.find("#") == 0 ) continue; - - float value; - std::stringstream ss(line); - // chprintf( "%s \n", line.c_str() ); - v.push_back(std::vector()); - - while (ss >> value){ - v[i].push_back(value); - } - i += 1; + if (in.is_open()) { + while (std::getline(in, line)) { + if (line.find("#") == 0) continue; + + float value; + std::stringstream ss(line); + // chprintf( "%s \n", line.c_str() ); + v.push_back(std::vector()); + + while (ss >> value) { + v[i].push_back(value); + } + i += 1; } in.close(); - } else{ + } else { chprintf(" Error: Unable to open UVB rates file: %s\n", uvb_filename); exit(1); } - + int n_lines = i; - - chprintf( " Loaded %d lines in file\n", n_lines); - - rates_z_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HI_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HeI_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HeII_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HI_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HeI_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HeII_h = (float *)malloc(sizeof(float)*n_lines); - - Real eV_to_ergs, heat_units, ion_units; + + chprintf(" Loaded %d lines in file\n", n_lines); + + rates_z_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HI_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HeI_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HeII_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HI_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HeI_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HeII_h = (float *)malloc(sizeof(float) * n_lines); + + Real eV_to_ergs, heat_units, ion_units; eV_to_ergs = 1.60218e-12; heat_units = eV_to_ergs / H.cooling_units; ion_units = H.time_units; - - for (i=0; i rates_z_h[i+1] ){ - chprintf( " ERROR: UVB rates must be ordered such that redshift is increasing as the rows increase in the file\n", uvb_filename); + + for (i = 0; i < n_lines - 1; i++) { + if (rates_z_h[i] > rates_z_h[i + 1]) { + chprintf( + " ERROR: UVB rates must be ordered such that redshift is increasing " + "as the rows increase in the file\n", + uvb_filename); exit(2); } } - + n_uvb_rates_samples = n_lines; - scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples-1] + 1 ); + scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples - 1] + 1); chprintf(" Loaded UVB rates: \n"); - chprintf(" N redshift values: %d \n", n_uvb_rates_samples ); - chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples-1] ); - chprintf(" UVB on: a=%f \n", scale_factor_UVB_on ); - - + chprintf(" N redshift values: %d \n", n_uvb_rates_samples); + chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], + rates_z_h[n_uvb_rates_samples - 1]); + chprintf(" UVB on: a=%f \n", scale_factor_UVB_on); } - - - - - - - - - - - - - - #endif diff --git a/src/chemistry_gpu/rates.cuh b/src/chemistry_gpu/rates.cuh index 5a9455824..95886502b 100644 --- a/src/chemistry_gpu/rates.cuh +++ b/src/chemistry_gpu/rates.cuh @@ -1,170 +1,173 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include"../global/global_cuda.h" - - + #include "../global/global_cuda.h" + #include "chemistry_gpu.h" // Calculation of k1 (HI + e --> HII + 2e) // k1_rate -__host__ __device__ Real coll_i_HI_rate(Real T, Real units ); +__host__ __device__ Real coll_i_HI_rate(Real T, Real units); -//Calculation of k3 (HeI + e --> HeII + 2e) -// k3_rate -__host__ __device__ Real coll_i_HeI_rate(Real T, Real units ); +// Calculation of k3 (HeI + e --> HeII + 2e) +// k3_rate +__host__ __device__ Real coll_i_HeI_rate(Real T, Real units); -//Calculation of k4 (HeII + e --> HeI + photon) -// k4_rate -__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k4 (HeII + e --> HeI + photon) +// k4_rate +__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B); // k4_rate Case A -__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units); // k4_rate Case B -__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units ); +__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units); -//Calculation of k2 (HII + e --> HI + photon) -// k2_rate -__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k2 (HII + e --> HI + photon) +// k2_rate +__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B); // k2_rate Case A -__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units); // k2_rate Case B -__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units ); +__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units); -//Calculation of k5 (HeII + e --> HeIII + 2e) -// k5_rate -__host__ __device__ Real coll_i_HeII_rate(Real T, Real units ); +// Calculation of k5 (HeII + e --> HeIII + 2e) +// k5_rate +__host__ __device__ Real coll_i_HeII_rate(Real T, Real units); -//Calculation of k6 (HeIII + e --> HeII + photon) -// k6_rate -__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k6 (HeIII + e --> HeII + photon) +// k6_rate +__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B); // k6_rate Case A -__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units); // k6_rate Case B -__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units ); - -//Calculation of k57 (HI + HI --> HII + HI + e) -// k57_rate -__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units ); +__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units); -//Calculation of k58 (HI + HeI --> HII + HeI + e) -// k58_rate -__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units ); +// Calculation of k57 (HI + HI --> HII + HI + e) +// k57_rate +__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units); -//Calculation of ceHI. -// Cooling collisional excitation HI -__host__ __device__ Real cool_ceHI_rate(Real T, Real units ); +// Calculation of k58 (HI + HeI --> HII + HeI + e) +// k58_rate +__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units); -//Calculation of ceHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ceHeI_rate(Real T, Real units ); +// Calculation of ceHI. +// Cooling collisional excitation HI +__host__ __device__ Real cool_ceHI_rate(Real T, Real units); -//Calculation of ceHeII. -// Cooling collisional excitation HeII -__host__ __device__ Real cool_ceHeII_rate(Real T, Real units ); +// Calculation of ceHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ceHeI_rate(Real T, Real units); -//Calculation of ciHeIS. -// Cooling collisional ionization HeIS -__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units ); +// Calculation of ceHeII. +// Cooling collisional excitation HeII +__host__ __device__ Real cool_ceHeII_rate(Real T, Real units); -//Calculation of ciHI. -// Cooling collisional ionization HI -__host__ __device__ Real cool_ciHI_rate(Real T, Real units ); +// Calculation of ciHeIS. +// Cooling collisional ionization HeIS +__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units); +// Calculation of ciHI. +// Cooling collisional ionization HI +__host__ __device__ Real cool_ciHI_rate(Real T, Real units); -//Calculation of ciHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ciHeI_rate(Real T, Real units ); +// Calculation of ciHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ciHeI_rate(Real T, Real units); -//Calculation of ciHeII. -// Cooling collisional ionization HeII -__host__ __device__ Real cool_ciHeII_rate(Real T, Real units ); +// Calculation of ciHeII. +// Cooling collisional ionization HeII +__host__ __device__ Real cool_ciHeII_rate(Real T, Real units); - -//Calculation of reHII. -// Cooling recombination HII -__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHII. +// Cooling recombination HII +__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B); // Cooling recombination HII Case A -__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units); // Cooling recombination HII Case B -__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units); -//Calculation of reHII. -// Cooling recombination HeII -__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHII. +// Cooling recombination HeII +__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B); // Cooling recombination HeII Case A -__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units); // Cooling recombination HeII Case B -__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units); -//Calculation of reHII2. -// Cooling recombination HeII Dielectronic -__host__ __device__ Real cool_reHeII2_rate(Real T, Real units ); +// Calculation of reHII2. +// Cooling recombination HeII Dielectronic +__host__ __device__ Real cool_reHeII2_rate(Real T, Real units); -//Calculation of reHIII. -// Cooling recombination HeIII -__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHIII. +// Cooling recombination HeIII +__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B); // Cooling recombination HeIII Case A -__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units); // Cooling recombination HeIII Case B -__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units); -//Calculation of brem. -// Cooling Bremsstrahlung -__host__ __device__ Real cool_brem_rate(Real T, Real units ); +// Calculation of brem. +// Cooling Bremsstrahlung +__host__ __device__ Real cool_brem_rate(Real T, Real units); -//Calculation of comp. -// Compton cooling +// Calculation of comp. +// Compton cooling __host__ __device__ Real comp_rate(Real n_e, Real T, Real zr, Real units); -__host__ __device__ Real cool_compton_rate( Real T, Real units ); - +__host__ __device__ Real cool_compton_rate(Real T, Real units); // X-ray compton heating -__host__ __device__ Real xray_heat_rate( Real n_e, Real T, Real Redshift, Real units ); - - -// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII) -Real __device__ Collisional_Ionization_Rate_e_HI_Abel97( Real temp ); - -Real __device__ Recombination_Rate_HII_Abel97( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97( Real temp ); +__host__ __device__ Real xray_heat_rate(Real n_e, Real T, Real Redshift, + Real units); -Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91( Real temp ); +// Colisional excitation of neutral hydrogen (HI) and singly ionized helium +// (HeII) +Real __device__ Collisional_Ionization_Rate_e_HI_Abel97(Real temp); -Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91( Real temp ); +Real __device__ Recombination_Rate_HII_Abel97(Real temp); -Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97(Real temp); -Real __device__ Recombination_Rate_HII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97(Real temp); -Real __device__ Recombination_Rate_HeII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91(Real temp); -Real __device__ Recombination_Rate_HeIII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91(Real temp); +Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91(Real temp); -Real __device__ Cooling_Rate_Recombination_HII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HeII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HeIII_Hui97(Real temp); -Real __device__ Recombination_Rate_dielectronic_HeII_Hui97( Real temp ); +Real __device__ Cooling_Rate_Recombination_HII_Hui97(Real n_e, Real n_HII, + Real temp); -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeII_Hui97(Real n_e, Real n_HII, + Real temp); -Real __device__ Collisional_Ionization_Rate_e_HI_Hui97( Real temp ); +Real __device__ Cooling_Rate_Recombination_HeIII_Hui97(Real n_e, Real n_HII, + Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97( Real n_e, Real n_HI, Real temp ); +Real __device__ Recombination_Rate_dielectronic_HeII_Hui97(Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97(Real n_e, + Real n_HeII, + Real temp); -// Compton cooling off the CMB -Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01( Real n_e, Real temp, Real z ); +Real __device__ Collisional_Ionization_Rate_e_HI_Hui97(Real temp); -// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real current_z, cosmo ); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97(Real n_e, + Real n_HI, + Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97(Real n_e, + Real n_HeII, + Real temp); +// Compton cooling off the CMB +Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01(Real n_e, Real temp, + Real z); +// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real +// current_z, cosmo ); #endif \ No newline at end of file diff --git a/src/chemistry_gpu/rates_Katz95.cuh b/src/chemistry_gpu/rates_Katz95.cuh index 4942f1558..01a88c12b 100644 --- a/src/chemistry_gpu/rates_Katz95.cuh +++ b/src/chemistry_gpu/rates_Katz95.cuh @@ -1,58 +1,70 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include"../global/global_cuda.h" + #include "../global/global_cuda.h" + #include "chemistry_gpu.h" +// Colisional excitation of neutral hydrogen (HI) and singly ionized helium +// (HeII) -// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII) - -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95( Real n_e, Real n_HI, Real temp ); - -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); - +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95(Real n_e, + Real n_HI, + Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95(Real n_e, + Real n_HeII, + Real temp); // Colisional ionization of HI, HeI and HeII -Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95( Real n_e, Real n_HI, Real temp ); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95(Real n_e, + Real n_HI, + Real temp); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95(Real n_e, + Real n_HeI, + Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95( Real n_e, Real n_HeI, Real temp ); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95(Real n_e, + Real n_HeII, + Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HI_Katz95(Real temp); -Real __device__ Collisional_Ionization_Rate_e_HI_Katz95( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeI_Katz95(Real temp); -Real __device__ Collisional_Ionization_Rate_e_HeI_Katz95( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95(Real temp); // Standard Recombination of HII, HeII and HeIII -Real __device__ Cooling_Rate_Recombination_HII_Katz95( Real n_e, Real n_HII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HII_Katz95(Real n_e, Real n_HII, + Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeII_Katz95(Real n_e, Real n_HeII, + Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Katz95( Real n_e, Real n_HeIII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeIII_Katz95(Real n_e, Real n_HeIII, + Real temp); -Real __device__ Recombination_Rate_HII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HII_Katz95(Real temp); -Real __device__ Recombination_Rate_HeII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HeII_Katz95(Real temp); -Real __device__ Recombination_Rate_HeIII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HeIII_Katz95(Real temp); // Dielectronic recombination of HeII -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); - -Real __device__ Recombination_Rate_dielectronic_HeII_Katz95( Real temp ); - -// Free-Free emission (Bremsstrahlung) -Real __device__ gaunt_factor( Real log10_T ); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95(Real n_e, + Real n_HeII, + Real temp); -Real __device__ Cooling_Rate_Bremsstrahlung_Katz95( Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp ); +Real __device__ Recombination_Rate_dielectronic_HeII_Katz95(Real temp); +// Free-Free emission (Bremsstrahlung) +Real __device__ gaunt_factor(Real log10_T); -// Compton cooling off the CMB -Real __device__ Cooling_Rate_Compton_CMB_Katz95( Real n_e, Real temp, Real z ); +Real __device__ Cooling_Rate_Bremsstrahlung_Katz95(Real n_e, Real n_HII, + Real n_HeII, Real n_HeIII, + Real temp); +// Compton cooling off the CMB +Real __device__ Cooling_Rate_Compton_CMB_Katz95(Real n_e, Real temp, Real z); #endif \ No newline at end of file diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index cb942da03..0fa3f3c36 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -2,109 +2,113 @@ * \brief Functions to calculate cooling rate for a given rho, P, dt. */ #ifdef CUDA -#ifdef COOLING_GPU + #ifdef COOLING_GPU -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../cooling/cooling_cuda.h" + #include -#ifdef CLOUDY_COOL -#include "../cooling/texture_utilities.h" -#endif + #include "../cooling/cooling_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef CLOUDY_COOL + #include "../cooling/texture_utilities.h" + #endif cudaTextureObject_t coolTexObj = 0; cudaTextureObject_t heatTexObj = 0; -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma){ - - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dt, Real gamma) +{ + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, heatTexObj); - CudaCheckError(); + hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, + heatTexObj); + CudaCheckError(); } - -/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, + cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) { - - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; is = n_ghost; - ie = nx-n_ghost; + ie = nx - n_ghost; if (ny == 1) { js = 0; je = 1; - } - else { + } else { js = n_ghost; - je = ny-n_ghost; + je = ny - n_ghost; } if (nz == 1) { ks = 0; ke = 1; - } - else { + } else { ks = n_ghost; - ke = nz-n_ghost; + ke = nz - n_ghost; } Real d, E; Real n, T, T_init; Real del_T, dt_sub; - Real mu; // mean molecular weight - Real cool; //cooling rate per volume, erg/s/cm^3 - //#ifndef DE + Real mu; // mean molecular weight + Real cool; // cooling rate per volume, erg/s/cm^3 + // #ifndef DE Real vx, vy, vz, p; - //#endif - #ifdef DE + // #endif + #ifdef DE Real ge; - #endif + #endif mu = 0.6; - //mu = 1.27; + // mu = 1.27; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx*ny); - int yid = (id - zid*nx*ny) / nx; - int xid = id - zid*nx*ny - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; // only threads corresponding to real cells do the calculation if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // load values of density and pressure - d = dev_conserved[ id]; - E = dev_conserved[4*n_cells + id]; + d = dev_conserved[id]; + E = dev_conserved[4 * n_cells + id]; // don't apply cooling if this thread crashed if (E < 0.0 || E != E) return; - //#ifndef DE - vx = dev_conserved[1*n_cells + id] / d; - vy = dev_conserved[2*n_cells + id] / d; - vz = dev_conserved[3*n_cells + id] / d; - p = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - p = fmax(p, (Real) TINY_NUMBER); - //#endif + // #ifndef DE + vx = dev_conserved[1 * n_cells + id] / d; + vy = dev_conserved[2 * n_cells + id] / d; + vz = dev_conserved[3 * n_cells + id] / d; + p = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + p = fmax(p, (Real)TINY_NUMBER); + // #endif #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d; - ge = fmax(ge, (Real) TINY_NUMBER); + ge = dev_conserved[(n_fields - 1) * n_cells + id] / d; + ge = fmax(ge, (Real)TINY_NUMBER); #endif // calculate the number density of the gas (in cgs) - n = d*DENSITY_UNIT / (mu * MP); + n = d * DENSITY_UNIT / (mu * MP); // calculate the temperature of the gas - T_init = p*PRESSURE_UNIT/ (n*KB); + T_init = p * PRESSURE_UNIT / (n * KB); #ifdef DE - T_init = d*ge*(gamma-1.0)*PRESSURE_UNIT/(n*KB); + T_init = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); #endif // calculate cooling rate per volume @@ -117,34 +121,34 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #endif // calculate change in temperature given dt - del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); + del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); // limit change in temperature to 1% - while (del_T/T > 0.01) { + while (del_T / T > 0.01) { // what dt gives del_T = 0.01*T? - dt_sub = 0.01*T*n*KB/(cool*TIME_UNIT*(gamma-1.0)); + dt_sub = 0.01 * T * n * KB / (cool * TIME_UNIT * (gamma - 1.0)); // apply that dt - T -= cool*dt_sub*TIME_UNIT*(gamma-1.0)/(n*KB); + T -= cool * dt_sub * TIME_UNIT * (gamma - 1.0) / (n * KB); // how much time is left from the original timestep? dt -= dt_sub; - // calculate cooling again - #ifdef CLOUDY_COOL + // calculate cooling again + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - #endif + #endif // calculate new change in temperature - del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); + del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); } // calculate final temperature T -= del_T; // adjust value of energy based on total change in temperature - del_T = T_init - T; // total change in T - E -= n*KB*del_T / ((gamma-1.0)*ENERGY_UNIT); + del_T = T_init - T; // total change in T + E -= n * KB * del_T / ((gamma - 1.0) * ENERGY_UNIT); #ifdef DE - ge -= KB*del_T / (mu*MP*(gamma-1.0)*SP_ENERGY_UNIT); + ge -= KB * del_T / (mu * MP * (gamma - 1.0) * SP_ENERGY_UNIT); #endif // calculate cooling rate for new T @@ -152,49 +156,43 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); #else cool = CIE_cool(n, T); - //printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); + // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); #endif // and send back from kernel - dev_conserved[4*n_cells + id] = E; + dev_conserved[4 * n_cells + id] = E; #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = d*ge; + dev_conserved[(n_fields - 1) * n_cells + id] = d * ge; #endif - } - } - /* \fn __device__ Real test_cool(Real n, Real T) * \brief Cooling function from Creasey 2011. */ __device__ Real test_cool(int tid, Real n, Real T) { Real T0, T1, lambda, cool; - T0 = 10000.0; - T1 = 20*T0; + T0 = 10000.0; + T1 = 20 * T0; cool = 0.0; - //lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1 - lambda = 5.0e-20; //cooling coefficient, 5e-24 erg cm^3 s^-1 + // lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1 + lambda = 5.0e-20; // cooling coefficient, 5e-24 erg cm^3 s^-1 // constant cooling rate - //cool = n*n*lambda; + // cool = n*n*lambda; // Creasey cooling function - if (T >= T0 && T <= 0.5*(T1+T0)) { - cool = n*n*lambda*(T - T0) / T0; + if (T >= T0 && T <= 0.5 * (T1 + T0)) { + cool = n * n * lambda * (T - T0) / T0; } - if (T >= 0.5*(T1+T0) && T <= T1) { - cool = n*n*lambda*(T1 - T) / T0; + if (T >= 0.5 * (T1 + T0) && T <= T1) { + cool = n * n * lambda * (T1 - T) / T0; } - - //printf("%d %f %f\n", tid, T, cool); + // printf("%d %f %f\n", tid, T, cool); return cool; - } - /* \fn __device__ Real primordial_cool(Real n, Real T) * \brief Primordial hydrogen/helium cooling curve derived according to Katz et al. 1996. */ @@ -202,8 +200,10 @@ __device__ Real primordial_cool(Real n, Real T) { Real n_h, Y, y, g_ff, cool; Real n_h0, n_hp, n_he0, n_hep, n_hepp, n_e, n_e_old; - Real alpha_hp, alpha_hep, alpha_d, alpha_hepp, gamma_eh0, gamma_ehe0, gamma_ehep; - Real le_h0, le_hep, li_h0, li_he0, li_hep, lr_hp, lr_hep, lr_hepp, ld_hep, l_ff; + Real alpha_hp, alpha_hep, alpha_d, alpha_hepp, gamma_eh0, gamma_ehe0, + gamma_ehep; + Real le_h0, le_hep, li_h0, li_he0, li_hep, lr_hp, lr_hep, lr_hepp, ld_hep, + l_ff; Real gamma_lh0, gamma_lhe0, gamma_lhep, e_h0, e_he0, e_hep, H; int heat_flag, n_iter; Real diff, tol; @@ -211,159 +211,170 @@ __device__ Real primordial_cool(Real n, Real T) // set flag to 1 for photoionization & heating heat_flag = 0; - //Real X = 0.76; //hydrogen abundance by mass - Y = 0.24; //helium abundance by mass - y = Y/(4 - 4*Y); + // Real X = 0.76; //hydrogen abundance by mass + Y = 0.24; // helium abundance by mass + y = Y / (4 - 4 * Y); // set the hydrogen number density n_h = n; // calculate the recombination and collisional ionization rates // (Table 2 from Katz 1996) - alpha_hp = (8.4e-11) * (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))); - alpha_hep = (1.5e-10) * (pow(T,(-0.6353))); - alpha_d = (1.9e-3) * (pow(T,(-1.5))) * exp(-470000.0/T) * (1.0 + 0.3*exp(-94000.0/T)); - alpha_hepp = (3.36e-10)* (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))); - gamma_eh0 = (5.85e-11)* sqrt(T) * exp(-157809.1/T) * (1.0 / (1.0 + sqrt(T/1e5))); - gamma_ehe0 = (2.38e-11)* sqrt(T) * exp(-285335.4/T) * (1.0 / (1.0 + sqrt(T/1e5))); - gamma_ehep = (5.68e-12)* sqrt(T) * exp(-631515.0/T) * (1.0 / (1.0 + sqrt(T/1e5))); + alpha_hp = (8.4e-11) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * + (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + alpha_hep = (1.5e-10) * (pow(T, (-0.6353))); + alpha_d = (1.9e-3) * (pow(T, (-1.5))) * exp(-470000.0 / T) * + (1.0 + 0.3 * exp(-94000.0 / T)); + alpha_hepp = (3.36e-10) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * + (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + gamma_eh0 = + (5.85e-11) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehe0 = + (2.38e-11) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehep = + (5.68e-12) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); // externally evaluated integrals for photoionization rates // assumed J(nu) = 10^-22 (nu_L/nu) - gamma_lh0 = 3.19851e-13; + gamma_lh0 = 3.19851e-13; gamma_lhe0 = 3.13029e-13; gamma_lhep = 2.00541e-14; // externally evaluated integrals for heating rates - e_h0 = 2.4796e-24; + e_h0 = 2.4796e-24; e_he0 = 6.86167e-24; e_hep = 6.21868e-25; - // assuming no photoionization, solve equations for number density of // each species - n_e = n_h; //as a first guess, use the hydrogen number density + n_e = n_h; // as a first guess, use the hydrogen number density n_iter = 20; - diff = 1.0; - tol = 1.0e-6; + diff = 1.0; + tol = 1.0e-6; if (heat_flag) { - for (int i=0; i= 4.0 && log10(T) < 5.9) { + } else if (log10(T) >= 4.0 && log10(T) < 5.9) { lambda = pow(10.0, (-1.3 * (log10(T) - 5.25) * (log10(T) - 5.25) - 21.25)); - } - else if (log10(T) >= 5.9 && log10(T) < 7.4) { + } else if (log10(T) >= 5.9 && log10(T) < 7.4) { lambda = pow(10.0, (0.7 * (log10(T) - 7.1) * (log10(T) - 7.1) - 22.8)); - } - else { - lambda = pow(10.0, (0.45*log10(T) - 26.065)); + } else { + lambda = pow(10.0, (0.45 * log10(T) - 26.065)); } // cooling rate per unit volume - cool = n*n*lambda; + cool = n * n * lambda; return cool; - } - -#ifdef CLOUDY_COOL -/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) + #ifdef CLOUDY_COOL +/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t + coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ -__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) { - Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 - Real H = 0.0; //heating rate, erg s^-1 cm^3 - Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 + Real lambda = 0.0; // cooling rate, erg s^-1 cm^3 + Real H = 0.0; // heating rate, erg s^-1 cm^3 + Real cool = 0.0; // cooling per unit volume, erg /s / cm^3 float log_n, log_T; log_n = log10(n); log_T = log10(T); // remap coordinates for texture // remapped = (input - TABLE_MIN_VALUE)*(1/TABLE_SPACING) - // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE) - log_T = (log_T - 1.0)*10; - log_n = (log_n + 6.0)*10; + // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE) + log_T = (log_T - 1.0) * 10; + log_n = (log_n + 6.0) * 10; + + // Note: although the cloudy table columns are n,T,L,H , T is the fastest + // variable so it is treated as "x" This is why the Texture calls are T first, + // then n: Bilinear_Texture(tex, log_T, log_n) - // Note: although the cloudy table columns are n,T,L,H , T is the fastest variable so it is treated as "x" - // This is why the Texture calls are T first, then n: Bilinear_Texture(tex, log_T, log_n) - // don't cool below 10 K if (log10(T) > 1.0) { lambda = Bilinear_Texture(coolTexObj, log_T, log_n); - } - else lambda = 0.0; + } else + lambda = 0.0; H = Bilinear_Texture(heatTexObj, log_T, log_n); // cooling rate per unit volume - cool = n*n*(powf(10, lambda) - powf(10, H)); + cool = n * n * (powf(10, lambda) - powf(10, H)); // printf("DEBUG Cloudy L350: %.17e\n",cool); return cool; } -#endif //CLOUDY_COOL - - - + #endif // CLOUDY_COOL -#endif //COOLING_GPU -#endif //CUDA + #endif // COOLING_GPU +#endif // CUDA diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index f8d098e59..68b8d7e04 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -2,49 +2,55 @@ * \brief Declarations of cooling functions. */ #ifdef CUDA -#ifdef COOLING_GPU -#pragma once + #ifdef COOLING_GPU + #pragma once -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" + #include + + #include "../global/global.h" + #include "../utils/gpu.hpp" extern cudaTextureObject_t coolTexObj; extern cudaTextureObject_t heatTexObj; -/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); - - -/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real dt, Real gamma) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); - +/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, int n_fields, Real dt, Real gamma) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dt, Real gamma); + +/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, Real dt, Real gamma) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma, + cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj); /* \fn __device__ Real test_cool(Real n, Real T) * \brief Cooling function from Creasey 2011. */ __device__ Real test_cool(int tid, Real n, Real T); - /* \fn __device__ Real primordial_cool(Real n, Real T) * \brief Primordial hydrogen/helium cooling curve derived according to Katz et al. 1996. */ __device__ Real primordial_cool(Real n, Real T); - /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ __device__ Real CIE_cool(Real n, Real T); - -/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t + coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ -__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); +__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj); -#endif //COOLING_GPU -#endif //CUDA + #endif // COOLING_GPU +#endif // CUDA diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index 2d5758bbd..ce4839425 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -2,28 +2,27 @@ * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ #ifdef CUDA -#ifdef CLOUDY_COOL + #ifdef CLOUDY_COOL -#include -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../cooling/load_cloudy_texture.h" -#include "../cooling/cooling_cuda.h" -#include "../cooling/texture_utilities.h" + #include + #include -#include "../io/io.h" // provides chprintf + #include "../cooling/cooling_cuda.h" + #include "../cooling/load_cloudy_texture.h" + #include "../cooling/texture_utilities.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" // provides chprintf -cudaArray* cuCoolArray; -cudaArray* cuHeatArray; +cudaArray *cuCoolArray; +cudaArray *cuHeatArray; void Test_Cloudy_Textures(); void Test_Cloudy_Speed(); - /* \fn void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) * \brief Load the Cloudy cooling tables into host (CPU) memory. */ -void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) +void Host_Read_Cooling_Tables(float *cooling_table, float *heating_table) { double *n_arr; double *T_arr; @@ -36,56 +35,51 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) FILE *infile; char buffer[0x1000]; - char * pch; + char *pch; // allocate arrays for temperature data - n_arr = (double *) malloc(nx*ny*sizeof(double)); - T_arr = (double *) malloc(nx*ny*sizeof(double)); - L_arr = (double *) malloc(nx*ny*sizeof(double)); - H_arr = (double *) malloc(nx*ny*sizeof(double)); + n_arr = (double *)malloc(nx * ny * sizeof(double)); + T_arr = (double *)malloc(nx * ny * sizeof(double)); + L_arr = (double *)malloc(nx * ny * sizeof(double)); + H_arr = (double *)malloc(nx * ny * sizeof(double)); // Read in cloudy cooling/heating curve (function of density and temperature) - i=0; + i = 0; + + const char *cloudy_filename1 = "./cloudy_coolingcurve.txt"; + const char *cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt"; + const char *file_in_use; - const char* cloudy_filename1 = "./cloudy_coolingcurve.txt"; - const char* cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt"; - const char* file_in_use; - - infile = fopen(cloudy_filename1, "r"); + infile = fopen(cloudy_filename1, "r"); file_in_use = cloudy_filename1; if (infile == NULL) { - infile = fopen(cloudy_filename2, "r"); + infile = fopen(cloudy_filename2, "r"); file_in_use = cloudy_filename2; } - if (infile == NULL) { - chprintf("Unable to open Cloudy file with expected relative paths:\n %s \n OR \n %s\n", cloudy_filename1, cloudy_filename2); + chprintf( + "Unable to open Cloudy file with expected relative paths:\n %s \n OR " + "\n %s\n", + cloudy_filename1, cloudy_filename2); exit(1); } else { chprintf("Using Cloudy file at relative path: %s \n", file_in_use); } - - while (fgets(buffer, sizeof(buffer), infile) != NULL) - { + while (fgets(buffer, sizeof(buffer), infile) != NULL) { if (buffer[0] == '#') { continue; - } - else { - pch = strtok(buffer, "\t"); + } else { + pch = strtok(buffer, "\t"); n_arr[i] = atof(pch); - while (pch != NULL) - { + while (pch != NULL) { pch = strtok(NULL, "\t"); - if (pch != NULL) - T_arr[i] = atof(pch); + if (pch != NULL) T_arr[i] = atof(pch); pch = strtok(NULL, "\t"); - if (pch != NULL) - L_arr[i] = atof(pch); + if (pch != NULL) L_arr[i] = atof(pch); pch = strtok(NULL, "\t"); - if (pch != NULL) - H_arr[i] = atof(pch); + if (pch != NULL) H_arr[i] = atof(pch); } i++; } @@ -93,8 +87,7 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) fclose(infile); // copy data from cooling array into the table - for (i=0; i(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture( + coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture( + heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness - printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat); - + printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n", log_T, log_n, lambda, heat); } - -/* Consider this function only to be used at the end of Load_Cuda_Textures when testing - * Evaluate texture on grid of size num_n num_T for variables n,T */ -__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/* Consider this function only to be used at the end of Load_Cuda_Textures when + * testing Evaluate texture on grid of size num_n num_T for variables n,T */ +__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, + cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) { - int id,id_n,id_T; + int id, id_n, id_T; id = threadIdx.x + blockIdx.x * blockDim.x; // Calculate log_T and log_n based on id - id_T = id/num_n; - id_n = id%num_n; + id_T = id / num_n; + id_n = id % num_n; - // Min value, but include id=-1 as an outside value to check clamping. Use dx = 0.05 instead of 0.1 to check interpolation - // float log_T = 1.0 + (id_T-1)*0.05; + // Min value, but include id=-1 as an outside value to check clamping. Use dx + // = 0.05 instead of 0.1 to check interpolation float log_T = 1.0 + + // (id_T-1)*0.05; // float log_n = -6.0 + (id_n-1)*0.05; // Remap for texture with normalized coords @@ -245,27 +249,30 @@ __global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject // float rlog_T = (log_T - 1.0) * 10; // float rlog_n = (log_n + 6.0) * 10; - float rlog_T = (id_T - 1)*0.0125; - float rlog_n = (id_n - 1)*0.0125; + float rlog_T = (id_T - 1) * 0.0125; + float rlog_n = (id_n - 1) * 0.0125; // Evaluate - float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture( + coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture( + heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness - // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat); - + // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, + // heat); } -/* Consider this function only to be used at the end of Load_Cuda_Textures when testing - * Evaluate texture on grid of size num_n num_T for variables n,T */ +/* Consider this function only to be used at the end of Load_Cuda_Textures when + * testing Evaluate texture on grid of size num_n num_T for variables n,T */ void Test_Cloudy_Textures() { - int num_n = 1+2*121; - int num_T = 1+2*81; - dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1); + int num_n = 1 + 2 * 121; + int num_T = 1 + 2 * 81; + dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj); + hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, + num_n, num_T, coolTexObj, heatTexObj); CHECK(cudaDeviceSynchronize()); printf("Exiting due to Test_Cloudy_Textures() being called \n"); exit(0); @@ -273,14 +280,15 @@ void Test_Cloudy_Textures() void Test_Cloudy_Speed() { - int num_n = 1+80*121; - int num_T = 1+80*81; - dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1); + int num_n = 1 + 80 * 121; + int num_T = 1 + 80 * 81; + dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); CHECK(cudaDeviceSynchronize()); Real time_start = get_time(); - for (int i=0; i<100; i++) { - hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj); + for (int i = 0; i < 100; i++) { + hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, + num_n, num_T, coolTexObj, heatTexObj); } CHECK(cudaDeviceSynchronize()); Real time_end = get_time(); @@ -289,12 +297,5 @@ void Test_Cloudy_Speed() exit(0); } - - - - - - - -#endif + #endif #endif diff --git a/src/cooling/load_cloudy_texture.h b/src/cooling/load_cloudy_texture.h index 164125392..3da31e4dd 100644 --- a/src/cooling/load_cloudy_texture.h +++ b/src/cooling/load_cloudy_texture.h @@ -2,20 +2,20 @@ * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ #ifdef CUDA -#ifdef CLOUDY_COOL + #ifdef CLOUDY_COOL -#pragma once + #pragma once -#include "../global/global.h" + #include "../global/global.h" /* \fn void Load_Cuda_Textures() * \brief Load the Cloudy cooling tables into texture memory on the GPU. */ void Load_Cuda_Textures(); /* \fn void Free_Cuda_Textures() - * \brief Unbind the texture memory on the GPU, and free the associated Cuda arrays. */ + * \brief Unbind the texture memory on the GPU, and free the associated Cuda + * arrays. */ void Free_Cuda_Textures(); + #endif #endif -#endif - diff --git a/src/cooling/texture_utilities.h b/src/cooling/texture_utilities.h index 6b271d5a1..70b1baebf 100644 --- a/src/cooling/texture_utilities.h +++ b/src/cooling/texture_utilities.h @@ -1,25 +1,29 @@ /*! \file texture_utilities.h * \brief Declarations of functions needed for textures. */ -// WARNING: do not include this header file in any .cpp file or any .h file that would be included into a .cpp file -// because tex2D is undefined when compiling with gcc. +// WARNING: do not include this header file in any .cpp file or any .h file that +// would be included into a .cpp file because tex2D is undefined when compiling +// with gcc. #ifdef CUDA -#pragma once + #pragma once -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" + #include + + #include "../global/global.h" + #include "../utils/gpu.hpp" inline __device__ float lerp(float v0, float v1, float f) { - return fma(f, v1, fma(-f,v0,v0)); + return fma(f, v1, fma(-f, v0, v0)); } /* \fn float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) - \brief Access texture values from tex at coordinates (x,y) using bilinear interpolation + \brief Access texture values from tex at coordinates (x,y) using bilinear + interpolation */ -inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) +inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, + float y) { // Split coordinates into integer px/py and fractional fx/fy parts float px = floorf(x); @@ -27,18 +31,18 @@ inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float float fx = x - px; float fy = y - py; - // 0.5 offset is necessary to represent half-pixel offset built into texture coordinates + // 0.5 offset is necessary to represent half-pixel offset built into texture + // coordinates px += 0.5; py += 0.5; - float t00 = tex2D(tex,px,py); - float t01 = tex2D(tex,px,py+1); - float t10 = tex2D(tex,px+1,py); - float t11 = tex2D(tex,px+1,py+1); + float t00 = tex2D(tex, px, py); + float t01 = tex2D(tex, px, py + 1); + float t10 = tex2D(tex, px + 1, py); + float t11 = tex2D(tex, px + 1, py + 1); // The inner lerps interpolate along x // The outer lerp interpolates along y return lerp(lerp(t00, t10, fx), lerp(t01, t11, fx), fy); - } -#endif //CUDA +#endif // CUDA diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index e9ea551ca..bcb829f58 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -1,183 +1,186 @@ #ifdef COOLING_GRACKLE + #include "../cooling_grackle/cool_grackle.h" -#include -#include -#include -#include "../io/io.h" -#include "../cooling_grackle/cool_grackle.h" -#include "../grid/grid_enum.h" + #include + #include + #include + #include "../grid/grid_enum.h" + #include "../io/io.h" -Cool_GK::Cool_GK( void ){} +Cool_GK::Cool_GK(void) {} -void Grid3D::Initialize_Grackle( struct parameters *P ){ +void Grid3D::Initialize_Grackle(struct parameters *P) +{ + chprintf("Initializing Grackle... \n"); - chprintf( "Initializing Grackle... \n"); - - Cool.Initialize( P, Cosmo ); + Cool.Initialize(P, Cosmo); Allocate_Memory_Grackle(); Initialize_Fields_Grackle(); - chprintf( "Grackle Initialized Successfully. \n\n"); - - + chprintf("Grackle Initialized Successfully. \n\n"); } - -void Cool_GK::Initialize( struct parameters *P, Cosmology &Cosmo ){ - - chprintf( " Using Grackle for chemistry and cooling \n" ); - chprintf( " N scalar fields: %d \n", NSCALARS ); +void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) +{ + chprintf(" Using Grackle for chemistry and cooling \n"); + chprintf(" N scalar fields: %d \n", NSCALARS); grackle_verbose = 1; #ifdef MPI_CHOLLA // Enable output - if (procID != 0 ) grackle_verbose = 0; + if (procID != 0) grackle_verbose = 0; #endif - tiny_number = 1.e-20; - gamma = P->gamma; + gamma = P->gamma; - dens_conv = Cosmo.rho_0_gas; - energy_conv = Cosmo.v_0_gas * Cosmo.v_0_gas ; + dens_conv = Cosmo.rho_0_gas; + energy_conv = Cosmo.v_0_gas * Cosmo.v_0_gas; Real Msun = MSUN_CGS; - Real kpc = KPC_CGS; - Real km = KM_CGS - + Real kpc = KPC_CGS; + Real km = KM_CGS - dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; - vel_to_CGS = km; - energy_to_CGS = km * km; + dens_to_CGS = + dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; + vel_to_CGS = km; + energy_to_CGS = km * km; // First, set up the units system. // These are conversions from code units to cgs. - units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not - units.a_units = 1.0 ; // units for the expansion factor - units.a_value = Cosmo.current_a / units.a_units; - units.density_units = dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a ; - units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; - units.time_units = KPC / Cosmo.cosmo_h ; - units.velocity_units = units.length_units / Cosmo.current_a / units.time_units; // since u = a * dx/dt - - // Second, create a chemistry object for parameters. This needs to be a pointer. + units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not + units.a_units = 1.0; // units for the expansion factor + units.a_value = Cosmo.current_a / units.a_units; + units.density_units = + dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; + units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; + units.time_units = KPC / Cosmo.cosmo_h; + units.velocity_units = units.length_units / Cosmo.current_a / + units.time_units; // since u = a * dx/dt + + // Second, create a chemistry object for parameters. This needs to be a + // pointer. data = new chemistry_data; if (set_default_chemistry_parameters(data) == 0) { - chprintf( "GRACKLE: Error in set_default_chemistry_parameters.\n"); - exit(-1) ; + chprintf("GRACKLE: Error in set_default_chemistry_parameters.\n"); + exit(-1); } // Set parameter values for chemistry. // Access the parameter storage with the struct you've created // or with the grackle_data pointer declared in grackle.h (see further below). - data->use_grackle = 1; // chemistry on - data->with_radiative_cooling = 1; // Cooling on - data->primordial_chemistry = 1; // molecular network with H, He - data->UVbackground = 1; // UV background on - // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data file - // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file - // data->grackle_data_file = "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file - data->grackle_data_file = P->UVB_rates_file; // data file - // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data file - data->use_specific_heating_rate = 0; + data->use_grackle = 1; // chemistry on + data->with_radiative_cooling = 1; // Cooling on + data->primordial_chemistry = 1; // molecular network with H, He + data->UVbackground = 1; // UV background on + // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data + // file data->grackle_data_file = + // "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file + // data->grackle_data_file = + // "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file + data->grackle_data_file = P->UVB_rates_file; // data file + // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data + // file + data->use_specific_heating_rate = 0; data->use_volumetric_heating_rate = 0; - data->cmb_temperature_floor = 1; + data->cmb_temperature_floor = 1; #ifdef GRACKLE_METALS - data->metal_cooling = 1; // metal cooling off + data->metal_cooling = 1; // metal cooling off #else - chprintf( "WARNING: Metal Cooling is Off. \n" ); - data->metal_cooling = 0; // metal cooling off + chprintf("WARNING: Metal Cooling is Off. \n"); + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP data->omp_nthreads = N_OMP_THREADS_GRACKLE; #endif - if ( data->UVbackground == 1) chprintf( "GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file ); + if (data->UVbackground == 1) + chprintf("GRACKLE: Loading UV Background File: %s\n", + data->grackle_data_file); // Finally, initialize the chemistry object. if (initialize_chemistry_data(&units) == 0) { - chprintf( "GRACKLE: Error in initialize_chemistry_data.\n"); - exit(-1) ; + chprintf("GRACKLE: Error in initialize_chemistry_data.\n"); + exit(-1); } - if ( data->UVbackground == 1){ - scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1 ); - chprintf( "GRACKLE: UVB on: %f \n", scale_factor_UVB_on ); + if (data->UVbackground == 1) { + scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1); + chprintf("GRACKLE: UVB on: %f \n", scale_factor_UVB_on); } - } -void Grid3D::Allocate_Memory_Grackle( ){ - -int n_cells = H.nx * H.ny * H.nz; -int nx = Grav.nx_local; -int ny = Grav.ny_local; -int nz = Grav.nz_local; -// Set grid dimension and size. -Cool.field_size = n_cells; -Cool.fields.grid_rank = 3; -Cool.fields.grid_dimension = new int[3]; -Cool.fields.grid_start = new int[3]; -Cool.fields.grid_end = new int[3]; -Cool.fields.grid_dimension[0] = H.nx; // the active dimension -Cool.fields.grid_dimension[1] = H.ny; // the active dimension -Cool.fields.grid_dimension[2] = H.nz; // the active dimension -// grid_start and grid_end are used to ignore ghost zones. -Cool.fields.grid_start[0] = H.n_ghost; -Cool.fields.grid_start[1] = H.n_ghost; -Cool.fields.grid_start[2] = H.n_ghost; -Cool.fields.grid_end[0] = H.nx - H.n_ghost - 1 ; -Cool.fields.grid_end[1] = H.ny - H.n_ghost - 1 ; -Cool.fields.grid_end[2] = H.nz - H.n_ghost - 1 ; - -Cool.fields.grid_dx = 0.0; // used only for H2 self-shielding approximation - -Cool.fields.density = C.density; -Cool.fields.internal_energy = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.x_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.y_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.z_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -Cool.fields.x_velocity = NULL; -Cool.fields.y_velocity = NULL; -Cool.fields.z_velocity = NULL; - -chprintf( " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); -Cool.fields.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; -Cool.fields.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; -Cool.fields.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; -Cool.fields.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; -Cool.fields.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; -Cool.fields.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; - -#ifdef GRACKLE_METALS -chprintf( " Allocating memory for: metal density\n"); -Cool.fields.metal_density = &C.host[ H.n_cells*grid_enum::metal_density ]; -#else -Cool.fields.metal_density = NULL; -#endif +void Grid3D::Allocate_Memory_Grackle() +{ + int n_cells = H.nx * H.ny * H.nz; + int nx = Grav.nx_local; + int ny = Grav.ny_local; + int nz = Grav.nz_local; + // Set grid dimension and size. + Cool.field_size = n_cells; + Cool.fields.grid_rank = 3; + Cool.fields.grid_dimension = new int[3]; + Cool.fields.grid_start = new int[3]; + Cool.fields.grid_end = new int[3]; + Cool.fields.grid_dimension[0] = H.nx; // the active dimension + Cool.fields.grid_dimension[1] = H.ny; // the active dimension + Cool.fields.grid_dimension[2] = H.nz; // the active dimension + // grid_start and grid_end are used to ignore ghost zones. + Cool.fields.grid_start[0] = H.n_ghost; + Cool.fields.grid_start[1] = H.n_ghost; + Cool.fields.grid_start[2] = H.n_ghost; + Cool.fields.grid_end[0] = H.nx - H.n_ghost - 1; + Cool.fields.grid_end[1] = H.ny - H.n_ghost - 1; + Cool.fields.grid_end[2] = H.nz - H.n_ghost - 1; + + Cool.fields.grid_dx = 0.0; // used only for H2 self-shielding approximation + + Cool.fields.density = C.density; + Cool.fields.internal_energy = (Real *)malloc(Cool.field_size * sizeof(Real)); + // Cool.fields.x_velocity = (Real *) malloc(Cool.field_size * + // sizeof(Real)); Cool.fields.y_velocity = (Real *) + // malloc(Cool.field_size * sizeof(Real)); Cool.fields.z_velocity = (Real + // *) malloc(Cool.field_size * sizeof(Real)); + Cool.fields.x_velocity = NULL; + Cool.fields.y_velocity = NULL; + Cool.fields.z_velocity = NULL; + + chprintf( + " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); + Cool.fields.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + Cool.fields.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + Cool.fields.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + Cool.fields.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + Cool.fields.e_density = &C.host[H.n_cells * grid_enum::e_density]; -#ifdef OUTPUT_TEMPERATURE -Cool.temperature = (Real *) malloc(Cool.field_size * sizeof(Real)); -#endif -} + #ifdef GRACKLE_METALS + chprintf(" Allocating memory for: metal density\n"); + Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density]; + #else + Cool.fields.metal_density = NULL; + #endif + #ifdef OUTPUT_TEMPERATURE + Cool.temperature = (Real *)malloc(Cool.field_size * sizeof(Real)); + #endif +} -void Cool_GK::Free_Memory( ){ +void Cool_GK::Free_Memory() +{ // free( fields.x_velocity ); // free( fields.y_velocity ); // free( fields.z_velocity ); - free( fields.internal_energy ); + free(fields.internal_energy); #ifdef OUTPUT_TEMPERATURE - free( temperature ); + free(temperature); #endif - } #endif - diff --git a/src/cooling_grackle/cool_grackle.h b/src/cooling_grackle/cool_grackle.h index c1fab3812..a8e19c338 100644 --- a/src/cooling_grackle/cool_grackle.h +++ b/src/cooling_grackle/cool_grackle.h @@ -1,18 +1,17 @@ #ifdef COOLING_GRACKLE -#ifndef INIT_GRACKLE_H -#define INIT_GRACKLE_H + #ifndef INIT_GRACKLE_H + #define INIT_GRACKLE_H -#include "../global/global.h" + #include "../global/global.h" extern "C" { -#include + #include } class Cool_GK { - public: - + public: code_units units; chemistry_data *data; @@ -27,9 +26,9 @@ class Cool_GK Real temperature_units; - #ifdef OUTPUT_TEMPERATURE + #ifdef OUTPUT_TEMPERATURE Real *temperature; - #endif + #endif Real tiny_number; @@ -39,17 +38,15 @@ class Cool_GK grackle_field_data fields; int field_size; + Cool_GK(void); -Cool_GK( void ); - -void Initialize( struct parameters *P, Cosmology &Cosmo ); + void Initialize(struct parameters *P, Cosmology &Cosmo); -void Free_Memory(); -// void Do_Cooling_Step( Real dt ); - -Real Get_Mean_Molecular_Weight( int cell_id ); + void Free_Memory(); + // void Do_Cooling_Step( Real dt ); + Real Get_Mean_Molecular_Weight(int cell_id); }; -#endif + #endif #endif diff --git a/src/cooling_grackle/grackle_functions.cpp b/src/cooling_grackle/grackle_functions.cpp index 6e1b48ed6..e3ba1213b 100644 --- a/src/cooling_grackle/grackle_functions.cpp +++ b/src/cooling_grackle/grackle_functions.cpp @@ -1,124 +1,121 @@ #ifdef COOLING_GRACKLE -#include -#include -#include -#include "../io/io.h" -#include "../cooling_grackle/cool_grackle.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - - - + #include + #include + #include + #include "../cooling_grackle/cool_grackle.h" + #include "../io/io.h" -void Grid3D::Initialize_Fields_Grackle(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +void Grid3D::Initialize_Fields_Grackle() +{ int nx_g, ny_g, nz_g, nx, ny, nz, nGHST; - nx_g = H.nx; - ny_g = H.ny; - nz_g = H.nz; - nx = H.nx_real; - ny = H.ny_real; - nz = H.nz_real; + nx_g = H.nx; + ny_g = H.ny; + nz_g = H.nz; + nx = H.nx_real; + ny = H.ny_real; + nz = H.nz_real; nGHST = H.n_ghost; Real d, vx, vy, vz, E, Ekin, GE, U; bool flag_DE; int i, j, k, i_g, j_g, k_g, id; - for (k=0; kH0; + cosmo_h = H0 / 100; + H0 /= 1000; //[km/s / kpc] + Omega_M = P->Omega_M; + Omega_L = P->Omega_L; + Omega_K = 1 - (Omega_M + Omega_L); + Omega_b = P->Omega_b; - chprintf( "Cosmological Simulation\n"); - - H0 = P-> H0; - cosmo_h = H0/100; - H0 /= 1000; //[km/s / kpc] - Omega_M = P-> Omega_M; - Omega_L = P-> Omega_L; - Omega_K = 1 - ( Omega_M + Omega_L ); - Omega_b = P-> Omega_b; - - if(strcmp(P->init, "Read_Grid")==0){ + if (strcmp(P->init, "Read_Grid") == 0) { // Read scale factor value from Particles current_z = Particles.current_z; current_a = Particles.current_a; - } - else{ - current_z = P->Init_redshift; - current_a = 1. / ( current_z + 1 ); + } else { + current_z = P->Init_redshift; + current_a = 1. / (current_z + 1); Particles.current_z = current_z; Particles.current_a = current_a; } - // Set Scale factor in Gravity Grav.current_a = current_a; @@ -42,53 +40,40 @@ void Cosmology::Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Pa Grav.Gconst = cosmo_G; max_delta_a = 0.001; - delta_a = max_delta_a; + delta_a = max_delta_a; // Initialize Time and set the time conversion - t_secs = 0; + t_secs = 0; time_conversion = KPC; - // Set Normalization factors - r_0_dm = P->xlen/P->nx; + r_0_dm = P->xlen / P->nx; t_0_dm = 1. / H0; v_0_dm = r_0_dm / t_0_dm / cosmo_h; - rho_0_dm = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h; - rho_mean_baryon = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_b /cosmo_h/cosmo_h; + rho_0_dm = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; + rho_mean_baryon = + 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_b / cosmo_h / cosmo_h; // dens_avrg = 0; - r_0_gas = 1.0; - rho_0_gas = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h; - t_0_gas = 1/H0*cosmo_h; - v_0_gas = r_0_gas / t_0_gas; + r_0_gas = 1.0; + rho_0_gas = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; + t_0_gas = 1 / H0 * cosmo_h; + v_0_gas = r_0_gas / t_0_gas; phi_0_gas = v_0_gas * v_0_gas; - p_0_gas = rho_0_gas * v_0_gas * v_0_gas; - e_0_gas = v_0_gas * v_0_gas; - - chprintf( " H0: %f\n", H0 * 1000 ); - chprintf( " Omega_L: %f\n", Omega_L ); - chprintf( " Omega_M: %f\n", Omega_M ); - chprintf( " Omega_b: %f\n", Omega_b ); - chprintf( " Current_a: %f\n", current_a ); - chprintf( " Current_z: %f\n", current_z ); - chprintf( " rho_0: %f\n", rho_0_gas ); - chprintf( " v_0: %f \n", v_0_gas ); - chprintf( " Max delta_a: %f \n", MAX_DELTA_A); - - Set_Scale_Outputs( P ); - + p_0_gas = rho_0_gas * v_0_gas * v_0_gas; + e_0_gas = v_0_gas * v_0_gas; + + chprintf(" H0: %f\n", H0 * 1000); + chprintf(" Omega_L: %f\n", Omega_L); + chprintf(" Omega_M: %f\n", Omega_M); + chprintf(" Omega_b: %f\n", Omega_b); + chprintf(" Current_a: %f\n", current_a); + chprintf(" Current_z: %f\n", current_z); + chprintf(" rho_0: %f\n", rho_0_gas); + chprintf(" v_0: %f \n", v_0_gas); + chprintf(" Max delta_a: %f \n", MAX_DELTA_A); + + Set_Scale_Outputs(P); } - - - - - - - - - - - - #endif diff --git a/src/cosmology/cosmology.h b/src/cosmology/cosmology.h index b45e904b1..d9cf14bcf 100644 --- a/src/cosmology/cosmology.h +++ b/src/cosmology/cosmology.h @@ -1,19 +1,19 @@ #ifdef COSMOLOGY -#ifndef COSMOLOGY_H -#define COSMOLOGY_H + #ifndef COSMOLOGY_H + #define COSMOLOGY_H -#include -#include -#include "../global/global.h" -#include "../particles/particles_3D.h" -#include "../gravity/grav3D.h" + #include + #include + + #include "../global/global.h" + #include "../gravity/grav3D.h" + #include "../particles/particles_3D.h" class Cosmology { -public: - + public: Real H0; Real Omega_M; Real Omega_L; @@ -54,21 +54,19 @@ class Cosmology Real next_output; bool exit_now; + Cosmology(void); + void Initialize(struct parameters *P, Grav3D &Grav, Particles_3D &Particles); - Cosmology( void ); - void Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Particles ); - - void Load_Scale_Outputs( struct parameters *P ); - void Set_Scale_Outputs( struct parameters *P ); + void Load_Scale_Outputs(struct parameters *P); + void Set_Scale_Outputs(struct parameters *P); - void Set_Next_Scale_Output( ); + void Set_Next_Scale_Output(); - Real Get_Hubble_Parameter( Real a ); - - Real Get_da_from_dt( Real dt ); - Real Get_dt_from_da( Real da ); + Real Get_Hubble_Parameter(Real a); + Real Get_da_from_dt(Real dt); + Real Get_dt_from_da(Real da); }; -#endif + #endif #endif diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index 6dfcfa7d7..d58ae9c2d 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -1,134 +1,132 @@ #ifdef COSMOLOGY + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../io/io.h" -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" -#include "../grid/grid_enum.h" - - - -void Grid3D::Initialize_Cosmology( struct parameters *P ){ - - chprintf( "Initializing Cosmology... \n"); - Cosmo.Initialize( P, Grav, Particles ); +void Grid3D::Initialize_Cosmology(struct parameters *P) +{ + chprintf("Initializing Cosmology... \n"); + Cosmo.Initialize(P, Grav, Particles); // Change to comoving Cosmological System - Change_Cosmological_Frame_Sytem( true ); + Change_Cosmological_Frame_Sytem(true); - if ( fabs( Cosmo.current_a - Cosmo.next_output ) < 1e-5 ) H.Output_Now = true; - - chprintf( "Cosmology Successfully Initialized. \n\n"); + if (fabs(Cosmo.current_a - Cosmo.next_output) < 1e-5) H.Output_Now = true; + chprintf("Cosmology Successfully Initialized. \n\n"); } -Real Cosmology::Get_da_from_dt( Real dt ){ - Real a2 = current_a * current_a; - Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ; +Real Cosmology::Get_da_from_dt(Real dt) +{ + Real a2 = current_a * current_a; + Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0; return a_dot * dt; } -Real Cosmology::Get_dt_from_da( Real da ){ - Real a2 = current_a * current_a; - Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ; +Real Cosmology::Get_dt_from_da(Real da) +{ + Real a2 = current_a * current_a; + Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0; return da / a_dot; } -Real Cosmology::Get_Hubble_Parameter( Real a ){ - Real a2 = a * a; - Real a3 = a2 * a; - Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L ); +Real Cosmology::Get_Hubble_Parameter(Real a) +{ + Real a2 = a * a; + Real a3 = a2 * a; + Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L); return H0 * sqrt(factor); } -void Grid3D::Change_Cosmological_Frame_Sytem( bool forward ){ - - if (forward) chprintf( " Converting to Cosmological Comoving System\n"); - else chprintf( " Converting to Cosmological Physical System\n"); +void Grid3D::Change_Cosmological_Frame_Sytem(bool forward) +{ + if (forward) + chprintf(" Converting to Cosmological Comoving System\n"); + else + chprintf(" Converting to Cosmological Physical System\n"); - Change_DM_Frame_System( forward ); + Change_DM_Frame_System(forward); #ifndef ONLY_PARTICLES - Change_GAS_Frame_System_GPU( forward ); + Change_GAS_Frame_System_GPU(forward); - Change_GAS_Frame_System( forward ); - #endif//ONLY_PARTICLES + Change_GAS_Frame_System(forward); + #endif // ONLY_PARTICLES } -void Grid3D::Change_DM_Frame_System( bool forward ){ - +void Grid3D::Change_DM_Frame_System(bool forward) +{ #ifdef PARTICLES_CPU part_int_t pIndx; Real vel_factor; vel_factor = 1; - - for ( pIndx=0; pIndx= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; - tid_grid = tid_x + tid_y*nx + tid_z*nx*ny; + tid_grid = tid_x + tid_y * nx + tid_z * nx * ny; - density_d[tid_grid] = density_d[tid_grid] * dens_factor; + density_d[tid_grid] = density_d[tid_grid] * dens_factor; momentum_x_d[tid_grid] = momentum_x_d[tid_grid] * momentum_factor; momentum_y_d[tid_grid] = momentum_y_d[tid_grid] * momentum_factor; momentum_z_d[tid_grid] = momentum_z_d[tid_grid] * momentum_factor; - Energy_d[tid_grid] = Energy_d[tid_grid] * energy_factor; + Energy_d[tid_grid] = Energy_d[tid_grid] * energy_factor; #ifdef DE - GasEnergy_d[tid_grid] = GasEnergy_d[tid_grid] * energy_factor; + GasEnergy_d[tid_grid] = GasEnergy_d[tid_grid] * energy_factor; #endif - //NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species here - + // NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species + // here } - -void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ - +void Grid3D::Change_GAS_Frame_System_GPU(bool forward) +{ Real dens_factor, momentum_factor, energy_factor; - if ( forward ){ - dens_factor = 1 / Cosmo.rho_0_gas; + if (forward) { + dens_factor = 1 / Cosmo.rho_0_gas; momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a; - energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a; - } - else{ - dens_factor = Cosmo.rho_0_gas; - momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; - energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; + energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * + Cosmo.current_a * Cosmo.current_a; + } else { + dens_factor = Cosmo.rho_0_gas; + momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; + energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / + Cosmo.current_a / Cosmo.current_a; } int nx, ny, nz; @@ -60,9 +59,9 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ nz = H.nz; // set values for GPU kernels - int tpb_x = TPBX_COSMO; - int tpb_y = TPBY_COSMO; - int tpb_z = TPBZ_COSMO; + int tpb_x = TPBX_COSMO; + int tpb_y = TPBY_COSMO; + int tpb_z = TPBZ_COSMO; int ngrid_x = (nx - 1) / tpb_x + 1; int ngrid_y = (ny - 1) / tpb_y + 1; int ngrid_z = (nz - 1) / tpb_z + 1; @@ -78,12 +77,10 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ GasEnergy_d = NULL; #endif - hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor, energy_factor, nx, ny, nz, - C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy, GasEnergy_d ); - + hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, + 0, dens_factor, momentum_factor, energy_factor, nx, ny, nz, + C.d_density, C.d_momentum_x, C.d_momentum_y, + C.d_momentum_z, C.d_Energy, GasEnergy_d); } - - - -#endif //COSMOLOGY +#endif // COSMOLOGY diff --git a/src/cosmology/cosmology_functions_gpu.h b/src/cosmology/cosmology_functions_gpu.h index ced300114..092e13bdf 100644 --- a/src/cosmology/cosmology_functions_gpu.h +++ b/src/cosmology/cosmology_functions_gpu.h @@ -1,18 +1,15 @@ -#if defined(COSMOLOGY) +#if defined(COSMOLOGY) + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" -#include "../utils/gpu.hpp" + #define TPBX_COSMO 16 + #define TPBY_COSMO 8 + #define TPBZ_COSMO 8 -#define TPBX_COSMO 16 -#define TPBY_COSMO 8 -#define TPBZ_COSMO 8 +// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real +// Omega_L, Real Omega_K ); -// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ); - - - - -#endif //COSMOLOGY +#endif // COSMOLOGY diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp index c4f9aa029..b20657f7c 100644 --- a/src/cosmology/io_cosmology.cpp +++ b/src/cosmology/io_cosmology.cpp @@ -1,36 +1,35 @@ #ifdef COSMOLOGY -#include -#include -#include "../cosmology/cosmology.h" -#include "../io/io.h" - -using namespace std; + #include + #include + #include "../cosmology/cosmology.h" + #include "../io/io.h" -void Cosmology::Load_Scale_Outputs( struct parameters *P ) { +using namespace std; +void Cosmology::Load_Scale_Outputs(struct parameters *P) +{ char filename_1[100]; // create the filename to read from strcpy(filename_1, P->scale_outputs_file); - chprintf( " Loading Scale_Factor Outpus: %s\n", filename_1); + chprintf(" Loading Scale_Factor Outpus: %s\n", filename_1); - ifstream file_out ( filename_1 ); + ifstream file_out(filename_1); string line; Real a_value; - if (file_out.is_open()){ - while ( getline (file_out,line) ){ - a_value = atof( line.c_str() ); - scale_outputs.push_back( a_value ); + if (file_out.is_open()) { + while (getline(file_out, line)) { + a_value = atof(line.c_str()); + scale_outputs.push_back(a_value); n_outputs += 1; // chprintf("%f\n", a_value); } file_out.close(); - n_outputs = scale_outputs.size(); + n_outputs = scale_outputs.size(); next_output_indx = 0; chprintf(" Loaded %d scale outputs \n", n_outputs); - } - else{ + } else { chprintf(" Error: Unable to open cosmology outputs file\n"); exit(1); } @@ -38,62 +37,55 @@ void Cosmology::Load_Scale_Outputs( struct parameters *P ) { chprintf(" Setting next snapshot output\n"); int scale_indx = next_output_indx; - a_value = scale_outputs[scale_indx]; + a_value = scale_outputs[scale_indx]; - while ( (current_a - a_value) > 1e-3 ){ + while ((current_a - a_value) > 1e-3) { // chprintf( "%f %f\n", a_value, current_a); scale_indx += 1; a_value = scale_outputs[scale_indx]; } next_output_indx = scale_indx; - next_output = a_value; - chprintf(" Next output index: %d \n", next_output_indx ); - chprintf(" Next output z value: %f \n", 1./next_output - 1 ); + next_output = a_value; + chprintf(" Next output index: %d \n", next_output_indx); + chprintf(" Next output z value: %f \n", 1. / next_output - 1); exit_now = false; - } -void Cosmology::Set_Scale_Outputs( struct parameters *P ){ - - if ( P->scale_outputs_file[0] == '\0' ){ - chprintf( " Output every %d timesteps.\n", P->n_steps_output ); - Real scale_end = 1 / ( P->End_redshift + 1); - scale_outputs.push_back( current_a ); - scale_outputs.push_back( scale_end ); - n_outputs = scale_outputs.size(); +void Cosmology::Set_Scale_Outputs(struct parameters *P) +{ + if (P->scale_outputs_file[0] == '\0') { + chprintf(" Output every %d timesteps.\n", P->n_steps_output); + Real scale_end = 1 / (P->End_redshift + 1); + scale_outputs.push_back(current_a); + scale_outputs.push_back(scale_end); + n_outputs = scale_outputs.size(); next_output_indx = 0; - next_output = current_a; - chprintf(" Next output index: %d \n", next_output_indx ); - chprintf(" Next output z value: %f \n", 1./next_output - 1 ); - } - else Load_Scale_Outputs( P ); - - - + next_output = current_a; + chprintf(" Next output index: %d \n", next_output_indx); + chprintf(" Next output z value: %f \n", 1. / next_output - 1); + } else + Load_Scale_Outputs(P); } - -void Cosmology::Set_Next_Scale_Output( ){ - - +void Cosmology::Set_Next_Scale_Output() +{ int scale_indx = next_output_indx; - Real a_value = scale_outputs[scale_indx]; - // chprintf("Setting next output index. Current index: %d n_outputs: %d ", scale_indx, n_outputs); + Real a_value = scale_outputs[scale_indx]; + // chprintf("Setting next output index. Current index: %d n_outputs: %d ", + // scale_indx, n_outputs); - // if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1; + // if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx + // = 1; scale_indx += 1; - if ( scale_indx < n_outputs ){ - a_value = scale_outputs[scale_indx]; + if (scale_indx < n_outputs) { + a_value = scale_outputs[scale_indx]; next_output_indx = scale_indx; - next_output = a_value; - } - else{ + next_output = a_value; + } else { exit_now = true; } - } - #endif diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 50356c3c5..5df251725 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,133 +1,139 @@ #ifdef CUDA -#ifdef DUST - -#include "dust_cuda.h" - -#include -#include -#include - -#include - -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../utils/gpu.hpp" -#include "../utils/hydro_utilities.h" -#include "../utils/cuda_utilities.h" -#include "../grid/grid3D.h" -#include "../grid/grid_enum.h" - -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - int n_cells = nx * ny * nz; - int ngrid = (n_cells + TPB - 1) / TPB; - dim3 dim1dGrid(ngrid, 1, 1); - dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); - CudaCheckError(); + #ifdef DUST + + #include + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + #include "dust_cuda.h" + +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dt, Real gamma) +{ + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + nx, ny, nz, n_ghost, n_fields, dt, gamma); + CudaCheckError(); } -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - // get grid indices - int n_cells = nx * ny * nz; - int is, ie, js, je, ks, ke; - cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); - // get a global thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; - // add a thread id within the block - - // define physics variables - Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real mu = 0.6; // mean molecular weight - Real T, E, P; // temperature, energy, pressure - Real vx, vy, vz; // velocities +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma) +{ + // get grid indices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); + // get a global thread ID + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; + // add a thread id within the block + + // define physics variables + Real d_gas, d_dust; // fluid mass densities + Real n; // gas number density + Real mu = 0.6; // mean molecular weight + Real T, E, P; // temperature, energy, pressure + Real vx, vy, vz; // velocities #ifdef DE - Real ge; - #endif // DE - - // define integration variables - Real dd_dt; // instantaneous rate of change in dust density - Real dd; // change in dust density at current time-step - Real dd_max = 0.01; // allowable percentage of dust density increase - Real dt_sub; //refined timestep - - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // get conserved quanitites - d_gas = dev_conserved[id + n_cells*grid_enum::density]; - d_dust = dev_conserved[id + n_cells*grid_enum::dust_density]; - E = dev_conserved[id + n_cells*grid_enum::Energy]; - - n = d_gas*DENSITY_UNIT / (mu*MP); - - if (E < 0.0 || E != E) return; - - vx = dev_conserved[id + n_cells*grid_enum::momentum_x ] / d_gas; - vy = dev_conserved[id + n_cells*grid_enum::momentum_y ] / d_gas; - vz = dev_conserved[id + n_cells*grid_enum::momentum_z ] / d_gas; - #ifdef DE - ge = dev_conserved[id + n_cells*grid_enum::GasEnergy ] / d_gas; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif // DE - - // calculate physical quantities - P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); - - Real T_init; - T_init = hydro_utilities::Calc_Temp(P, n); - - #ifdef DE - T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); - #endif // DE - - T = T_init; - - Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // kyr, sim units - - dd_dt = calc_dd_dt(d_dust, tau_sp); - dd = dd_dt * dt; - - // ensure that dust density is not changing too rapidly - while (dd/d_dust > dd_max) { - dt_sub = dd_max * d_dust / dd_dt; - d_dust += dt_sub * dd_dt; - dt -= dt_sub; - dd_dt = calc_dd_dt(d_dust, tau_sp); - dd = dt * dd_dt; - } - - // update dust density - d_dust += dd; - - dev_conserved[id + n_cells*grid_enum::dust_density ] = d_dust; - - #ifdef DE - dev_conserved[id + n_cells*grid_enum::GasEnergy ] = d_dust*ge; - #endif + Real ge; + #endif // DE + + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current time-step + Real dd_max = 0.01; // allowable percentage of dust density increase + Real dt_sub; // refined timestep + + if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + // get conserved quanitites + d_gas = dev_conserved[id + n_cells * grid_enum::density]; + d_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + E = dev_conserved[id + n_cells * grid_enum::Energy]; + + n = d_gas * DENSITY_UNIT / (mu * MP); + + if (E < 0.0 || E != E) return; + + vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; + vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; + vz = dev_conserved[id + n_cells * grid_enum::momentum_z] / d_gas; + #ifdef DE + ge = dev_conserved[id + n_cells * grid_enum::GasEnergy] / d_gas; + ge = fmax(ge, (Real)TINY_NUMBER); + #endif // DE + + // calculate physical quantities + P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); + + Real T_init; + T_init = hydro_utilities::Calc_Temp(P, n); + + #ifdef DE + T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); + #endif // DE + + T = T_init; + + Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // kyr, sim units + + dd_dt = calc_dd_dt(d_dust, tau_sp); + dd = dd_dt * dt; + + // ensure that dust density is not changing too rapidly + while (dd / d_dust > dd_max) { + dt_sub = dd_max * d_dust / dd_dt; + d_dust += dt_sub * dd_dt; + dt -= dt_sub; + dd_dt = calc_dd_dt(d_dust, tau_sp); + dd = dt * dd_dt; } + + // update dust density + d_dust += dd; + + dev_conserved[id + n_cells * grid_enum::dust_density] = d_dust; + + #ifdef DE + dev_conserved[id + n_cells * grid_enum::GasEnergy] = d_dust * ge; + #endif + } } // McKinnon et al. (2017) -__device__ __host__ Real calc_tau_sp(Real n, Real T) { +__device__ __host__ Real calc_tau_sp(Real n, Real T) +{ Real YR_IN_S = 3.154e7; - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2e6; // K - Real omega = 2.5; - Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s + Real a1 = 1; // dust grain size in units of 0.1 micrometers + Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 + Real T_0 = 2e6; // K + Real omega = 2.5; + Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s - Real tau_sp = A * (a1/d0) * (pow(T_0/T, omega) + 1); // s + Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // s return tau_sp; } // McKinnon et al. (2017) -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { - return -d_dust / (tau_sp/3); +__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) +{ + return -d_dust / (tau_sp / 3); } -#endif // DUST -#endif // CUDA + #endif // DUST +#endif // CUDA diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index ac2f02c50..5fd9ffb33 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,21 +1,24 @@ #ifdef CUDA -#ifdef DUST + #ifdef DUST -#ifndef DUST_CUDA_H -#define DUST_CUDA_H + #ifndef DUST_CUDA_H + #define DUST_CUDA_H -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" + #include -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); + #include "../global/global.h" + #include "../utils/gpu.hpp" -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dt, Real gamma); + +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dt, Real gamma); __device__ __host__ Real calc_tau_sp(Real n, Real T); __device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); -#endif // DUST -#endif // CUDA -#endif // DUST_CUDA_H \ No newline at end of file + #endif // DUST + #endif // CUDA +#endif // DUST_CUDA_H \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 2f2742cc4..b6a56a292 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -1,74 +1,85 @@ /*! -* \file dust_cuda_tests.cpp -* \author Helena Richie (helenarichie@pitt.edu) -* \brief Test dust model functions -* -*/ + * \file dust_cuda_tests.cpp + * \author Helena Richie (helenarichie@pitt.edu) + * \brief Test dust model functions + * + */ // STL Includes +#include + #include #include -#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes +#include "../dust/dust_cuda.h" // Include code to test #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../dust/dust_cuda.h" // Include code to test #ifdef DUST -TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) // test suite name, test name +TEST(tDUSTTestSputteringTimescale, + CorrectInputExpectCorrectOutput) // test suite name, test name { - // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_n = 1; - Real const k_test_T = pow(10, 5.0); + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_n = 1; + Real const k_test_T = pow(10, 5.0); - Real const k_fiducial_num = 182565146.96398282; + Real const k_fiducial_num = 182565146.96398282; - Real test_num = calc_tau_sp(k_test_n, k_test_T) / YR_IN_S; // yr + Real test_num = calc_tau_sp(k_test_n, k_test_T) / YR_IN_S; // yr - double abs_diff; - int64_t ulps_diff; + double abs_diff; + int64_t ulps_diff; - bool is_true; + bool is_true; - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); - - EXPECT_TRUE(is_true) - << "The fiducial value is: " << k_fiducial_num << std::endl - << "The test value is: " << test_num << std::endl - << "The absolute difference is: " << abs_diff << std::endl - << "The ULP difference is: " << ulps_diff << std::endl; + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, + ulps_diff); + + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num + << std::endl + << "The test value is: " << test_num + << std::endl + << "The absolute difference is: " << abs_diff + << std::endl + << "The ULP difference is: " << ulps_diff + << std::endl; } -TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) // test suite name, test name +TEST(tDUSTTestSputteringGrowthRate, + CorrectInputExpectCorrectOutput) // test suite name, test name { - // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_tau_sp = 0.17e6; // kyr - Real const k_test_d_dust = 1e-26 / DENSITY_UNIT; // sim units + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_tau_sp = 0.17e6; // kyr + Real const k_test_d_dust = 1e-26 / DENSITY_UNIT; // sim units + + Real const k_fiducial_num = -2.6073835738056728; - Real const k_fiducial_num = -2.6073835738056728; + Real test_num = calc_dd_dt(k_test_d_dust, k_test_tau_sp); - Real test_num = calc_dd_dt(k_test_d_dust, k_test_tau_sp); + double abs_diff; + int64_t ulps_diff; - double abs_diff; - int64_t ulps_diff; + bool is_true; - bool is_true; + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, + ulps_diff); - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); - - EXPECT_TRUE(is_true) - << "The fiducial value is: " << k_fiducial_num << std::endl - << "The test value is: " << test_num << std::endl - << "The absolute difference is: " << abs_diff << std::endl - << "The ULP difference is: " << ulps_diff << std::endl; + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num + << std::endl + << "The test value is: " << test_num + << std::endl + << "The absolute difference is: " << abs_diff + << std::endl + << "The ULP difference is: " << ulps_diff + << std::endl; } -#endif // DUST \ No newline at end of file +#endif // DUST \ No newline at end of file diff --git a/src/global/global.cpp b/src/global/global.cpp index ced97d788..9447f2548 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -1,56 +1,57 @@ /* \file global.cpp * \brief Global function definitions.*/ +#include "../global/global.h" +#include #include -#include #include #include #include +#include + #include -#include -#include "../global/global.h" -#include "../io/io.h" //defines chprintf + +#include "../io/io.h" //defines chprintf /* Global variables */ -Real gama; // Ratio of specific heats -Real C_cfl; // CFL number +Real gama; // Ratio of specific heats +Real C_cfl; // CFL number #ifdef PARTICLES -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA // Constants for the inital size of the buffers for particles transfer // and the number of data transferred for each particle int N_PARTICLES_TRANSFER; int N_DATA_PER_PARTICLE_TRANSFER; + #endif #endif -#endif - /*! \fn void Set_Gammas(Real gamma_in) * \brief Set gamma values for Riemann solver */ void Set_Gammas(Real gamma_in) { - //set gamma - gama = gamma_in; - + // set gamma + gama = gamma_in; } - /*! \fn double get_time(void) * \brief Returns the current clock time. */ double get_time(void) { struct timeval timer; - gettimeofday(&timer,NULL); - return timer.tv_sec + 1.0e-6*timer.tv_usec; + gettimeofday(&timer, NULL); + return timer.tv_sec + 1.0e-6 * timer.tv_usec; } /*! \fn int sgn * \brief Mathematical sign function. Returns sign of x. */ int sgn(Real x) { - if (x < 0) return -1; - else return 1; + if (x < 0) + return -1; + else + return 1; } #ifndef CUDA @@ -60,380 +61,379 @@ Real calc_eta(Real cW[], Real gamma) { Real pl, pr, al, ar; - pl = (cW[8] - 0.5*(cW[2]*cW[2] + cW[4]*cW[4] + cW[6]*cW[6])/cW[0]) * (gamma-1.0); + pl = (cW[8] - 0.5 * (cW[2] * cW[2] + cW[4] * cW[4] + cW[6] * cW[6]) / cW[0]) * + (gamma - 1.0); pl = fmax(pl, TINY_NUMBER); - pr = (cW[9] - 0.5*(cW[3]*cW[3] + cW[5]*cW[5] + cW[7]*cW[7])/cW[1]) * (gamma-1.0); + pr = (cW[9] - 0.5 * (cW[3] * cW[3] + cW[5] * cW[5] + cW[7] * cW[7]) / cW[1]) * + (gamma - 1.0); pr = fmax(pr, TINY_NUMBER); - al = sqrt(gamma*pl/cW[0]); - ar = sqrt(gamma*pr/cW[1]); - - return 0.5*fabs((cW[3]/cW[1] + ar) - (cW[2]/cW[0]-al)); + al = sqrt(gamma * pl / cW[0]); + ar = sqrt(gamma * pr / cW[1]); + return 0.5 * fabs((cW[3] / cW[1] + ar) - (cW[2] / cW[0] - al)); } -#endif //NO CUDA - +#endif // NO CUDA /*! \fn char trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ -char *trim (char * s) +char *trim(char *s) { /* Initialize start, end pointers */ - char *s1 = s, *s2 = &s[strlen (s) - 1]; + char *s1 = s, *s2 = &s[strlen(s) - 1]; /* Trim and delimit right side */ - while ( (isspace (*s2)) && (s2 >= s1) ) - s2--; - *(s2+1) = '\0'; + while ((isspace(*s2)) && (s2 >= s1)) s2--; + *(s2 + 1) = '\0'; /* Trim left side */ - while ( (isspace (*s1)) && (s1 < s2) ) - s1++; + while ((isspace(*s1)) && (s1 < s2)) s1++; /* Copy finished string */ - strcpy (s, s1); + strcpy(s, s1); return s; } -const std::set optionalParams = {"flag_delta", "ddelta_dt", "n_delta", - "Lz" , "Lx" , "phi" , "theta", "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", - "Init_redshift", "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z" }; +const std::set optionalParams = { + "flag_delta", "ddelta_dt", "n_delta", "Lz", + "Lx", "phi", "theta", "delta", + "nzr", "nxr", "H0", "Omega_M", + "Omega_L", "Init_redshift", "End_redshift", "tile_length", + "n_proc_x", "n_proc_y", "n_proc_z"}; /*! \fn int is_param_valid(char *name); - * \brief Verifies that a param is valid (even if not needed). Avoids "warnings" in output. */ -int is_param_valid(const char* param_name) { - for (auto it=optionalParams.begin(); it != optionalParams.end(); ++it) { - if (strcmp(param_name, *it) == 0) return 1; + * \brief Verifies that a param is valid (even if not needed). Avoids + * "warnings" in output. */ +int is_param_valid(const char *param_name) +{ + for (auto it = optionalParams.begin(); it != optionalParams.end(); ++it) { + if (strcmp(param_name, *it) == 0) return 1; } return 0; } -void parse_param(char *name,char *value, struct parameters *parms); - +void parse_param(char *name, char *value, struct parameters *parms); /*! \fn void parse_params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -void parse_params (char *param_file, struct parameters * parms, int argc, char** argv) +void parse_params(char *param_file, struct parameters *parms, int argc, + char **argv) { int buf; char *s, buff[256]; - FILE *fp = fopen (param_file, "r"); - if (fp == NULL) - { - chprintf("Exiting at file %s line %d: failed to read param file %s \n", __FILE__, __LINE__, param_file); + FILE *fp = fopen(param_file, "r"); + if (fp == NULL) { + chprintf("Exiting at file %s line %d: failed to read param file %s \n", + __FILE__, __LINE__, param_file); exit(1); return; } // set default hydro file output parameter - parms->n_hydro=1; - parms->n_particle=1; - parms->n_slice=1; - parms->n_projection=1; - parms->n_rotated_projection=1; + parms->n_hydro = 1; + parms->n_particle = 1; + parms->n_slice = 1; + parms->n_projection = 1; + parms->n_rotated_projection = 1; #ifdef ROTATED_PROJECTION - //initialize rotation parameters to zero - parms->delta = 0; - parms->theta = 0; - parms->phi = 0; - parms->n_delta = 0; - parms->ddelta_dt = 0; + // initialize rotation parameters to zero + parms->delta = 0; + parms->theta = 0; + parms->phi = 0; + parms->n_delta = 0; + parms->ddelta_dt = 0; parms->flag_delta = 0; #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY -//Initialize file name as an empty string -parms->scale_outputs_file[0] = '\0'; + // Initialize file name as an empty string + parms->scale_outputs_file[0] = '\0'; #endif - /* Read next line */ - while ((s = fgets (buff, sizeof buff, fp)) != NULL) - { + while ((s = fgets(buff, sizeof buff, fp)) != NULL) { /* Skip blank lines and comments */ - if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') - continue; + if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') continue; /* Parse name/value pair from line */ char name[MAXLEN], value[MAXLEN]; - s = strtok (buff, "="); - if (s==NULL) + s = strtok(buff, "="); + if (s == NULL) continue; else - strncpy (name, s, MAXLEN); - s = strtok (NULL, "="); - if (s==NULL) + strncpy(name, s, MAXLEN); + s = strtok(NULL, "="); + if (s == NULL) continue; else - strncpy (value, s, MAXLEN); - trim (value); - parse_param(name,value,parms); + strncpy(value, s, MAXLEN); + trim(value); + parse_param(name, value, parms); } /* Close file */ - fclose (fp); + fclose(fp); // Parse overriding args from command line for (int i = 0; i < argc; ++i) { char name[MAXLEN], value[MAXLEN]; - s = strtok (argv[i], "="); - if (s==NULL) + s = strtok(argv[i], "="); + if (s == NULL) continue; else - strncpy (name, s, MAXLEN); - s = strtok (NULL, "="); - if (s==NULL) + strncpy(name, s, MAXLEN); + s = strtok(NULL, "="); + if (s == NULL) continue; else - strncpy (value, s, MAXLEN); - parse_param(name,value,parms); - chprintf("Override with %s=%s\n",name,value); - + strncpy(value, s, MAXLEN); + parse_param(name, value, parms); + chprintf("Override with %s=%s\n", name, value); } } /*! \fn void parse_param(char *name,char *value, struct parameters *parms); * \brief Parses and sets a single param based on name and value. */ -void parse_param(char *name,char *value, struct parameters *parms){ +void parse_param(char *name, char *value, struct parameters *parms) +{ /* Copy into correct entry in parameters struct */ - if (strcmp(name, "nx")==0) + if (strcmp(name, "nx") == 0) parms->nx = atoi(value); - else if (strcmp(name, "ny")==0) + else if (strcmp(name, "ny") == 0) parms->ny = atoi(value); - else if (strcmp(name, "nz")==0) + else if (strcmp(name, "nz") == 0) parms->nz = atoi(value); - else if (strcmp(name, "tout")==0) + else if (strcmp(name, "tout") == 0) parms->tout = atof(value); - else if (strcmp(name, "outstep")==0) + else if (strcmp(name, "outstep") == 0) parms->outstep = atof(value); - else if (strcmp(name, "n_steps_output")==0) + else if (strcmp(name, "n_steps_output") == 0) parms->n_steps_output = atoi(value); - else if (strcmp(name, "gamma")==0) + else if (strcmp(name, "gamma") == 0) parms->gamma = atof(value); - else if (strcmp(name, "init")==0) - strncpy (parms->init, value, MAXLEN); - else if (strcmp(name, "nfile")==0) + else if (strcmp(name, "init") == 0) + strncpy(parms->init, value, MAXLEN); + else if (strcmp(name, "nfile") == 0) parms->nfile = atoi(value); - else if (strcmp(name, "n_hydro")==0) + else if (strcmp(name, "n_hydro") == 0) parms->n_hydro = atoi(value); - else if (strcmp(name, "n_particle")==0) + else if (strcmp(name, "n_particle") == 0) parms->n_particle = atoi(value); - else if (strcmp(name, "n_projection")==0) + else if (strcmp(name, "n_projection") == 0) parms->n_projection = atoi(value); - else if (strcmp(name, "n_rotated_projection")==0) + else if (strcmp(name, "n_rotated_projection") == 0) parms->n_rotated_projection = atoi(value); - else if (strcmp(name, "n_slice")==0) + else if (strcmp(name, "n_slice") == 0) parms->n_slice = atoi(value); - else if (strcmp(name, "n_out_float32")==0) + else if (strcmp(name, "n_out_float32") == 0) parms->n_out_float32 = atoi(value); - else if (strcmp(name, "out_float32_density")==0) + else if (strcmp(name, "out_float32_density") == 0) parms->out_float32_density = atoi(value); - else if (strcmp(name, "out_float32_momentum_x")==0) + else if (strcmp(name, "out_float32_momentum_x") == 0) parms->out_float32_momentum_x = atoi(value); - else if (strcmp(name, "out_float32_momentum_y")==0) + else if (strcmp(name, "out_float32_momentum_y") == 0) parms->out_float32_momentum_y = atoi(value); - else if (strcmp(name, "out_float32_momentum_z")==0) + else if (strcmp(name, "out_float32_momentum_z") == 0) parms->out_float32_momentum_z = atoi(value); - else if (strcmp(name, "out_float32_Energy")==0) + else if (strcmp(name, "out_float32_Energy") == 0) parms->out_float32_Energy = atoi(value); #ifdef DE - else if (strcmp(name, "out_float32_GasEnergy")==0) + else if (strcmp(name, "out_float32_GasEnergy") == 0) parms->out_float32_GasEnergy = atoi(value); -#endif // DE +#endif // DE #ifdef MHD - else if (strcmp(name, "out_float32_magnetic_x")==0) + else if (strcmp(name, "out_float32_magnetic_x") == 0) parms->out_float32_magnetic_x = atoi(value); - else if (strcmp(name, "out_float32_magnetic_y")==0) + else if (strcmp(name, "out_float32_magnetic_y") == 0) parms->out_float32_magnetic_y = atoi(value); - else if (strcmp(name, "out_float32_magnetic_z")==0) + else if (strcmp(name, "out_float32_magnetic_z") == 0) parms->out_float32_magnetic_z = atoi(value); -#endif // MHD - else if (strcmp(name, "xmin")==0) +#endif // MHD + else if (strcmp(name, "xmin") == 0) parms->xmin = atof(value); - else if (strcmp(name, "ymin")==0) + else if (strcmp(name, "ymin") == 0) parms->ymin = atof(value); - else if (strcmp(name, "zmin")==0) + else if (strcmp(name, "zmin") == 0) parms->zmin = atof(value); - else if (strcmp(name, "xlen")==0) + else if (strcmp(name, "xlen") == 0) parms->xlen = atof(value); - else if (strcmp(name, "ylen")==0) + else if (strcmp(name, "ylen") == 0) parms->ylen = atof(value); - else if (strcmp(name, "zlen")==0) + else if (strcmp(name, "zlen") == 0) parms->zlen = atof(value); - else if (strcmp(name, "xl_bcnd")==0) + else if (strcmp(name, "xl_bcnd") == 0) parms->xl_bcnd = atoi(value); - else if (strcmp(name, "xu_bcnd")==0) + else if (strcmp(name, "xu_bcnd") == 0) parms->xu_bcnd = atoi(value); - else if (strcmp(name, "yl_bcnd")==0) + else if (strcmp(name, "yl_bcnd") == 0) parms->yl_bcnd = atoi(value); - else if (strcmp(name, "yu_bcnd")==0) + else if (strcmp(name, "yu_bcnd") == 0) parms->yu_bcnd = atoi(value); - else if (strcmp(name, "zl_bcnd")==0) + else if (strcmp(name, "zl_bcnd") == 0) parms->zl_bcnd = atoi(value); - else if (strcmp(name, "zu_bcnd")==0) + else if (strcmp(name, "zu_bcnd") == 0) parms->zu_bcnd = atoi(value); - else if (strcmp(name, "custom_bcnd")==0) - strncpy (parms->custom_bcnd, value, MAXLEN); - else if (strcmp(name, "outdir")==0) - strncpy (parms->outdir, value, MAXLEN); - else if (strcmp(name, "indir")==0) - strncpy (parms->indir, value, MAXLEN); - else if (strcmp(name, "rho")==0) + else if (strcmp(name, "custom_bcnd") == 0) + strncpy(parms->custom_bcnd, value, MAXLEN); + else if (strcmp(name, "outdir") == 0) + strncpy(parms->outdir, value, MAXLEN); + else if (strcmp(name, "indir") == 0) + strncpy(parms->indir, value, MAXLEN); + else if (strcmp(name, "rho") == 0) parms->rho = atof(value); - else if (strcmp(name, "vx")==0) + else if (strcmp(name, "vx") == 0) parms->vx = atof(value); - else if (strcmp(name, "vy")==0) + else if (strcmp(name, "vy") == 0) parms->vy = atof(value); - else if (strcmp(name, "vz")==0) + else if (strcmp(name, "vz") == 0) parms->vz = atof(value); - else if (strcmp(name, "P")==0) + else if (strcmp(name, "P") == 0) parms->P = atof(value); - else if (strcmp(name, "Bx")==0) + else if (strcmp(name, "Bx") == 0) parms->Bx = atof(value); - else if (strcmp(name, "By")==0) + else if (strcmp(name, "By") == 0) parms->By = atof(value); - else if (strcmp(name, "Bz")==0) + else if (strcmp(name, "Bz") == 0) parms->Bz = atof(value); - else if (strcmp(name, "A")==0) + else if (strcmp(name, "A") == 0) parms->A = atof(value); - else if (strcmp(name, "rho_l")==0) + else if (strcmp(name, "rho_l") == 0) parms->rho_l = atof(value); - else if (strcmp(name, "vx_l")==0) + else if (strcmp(name, "vx_l") == 0) parms->vx_l = atof(value); - else if (strcmp(name, "vy_l")==0) + else if (strcmp(name, "vy_l") == 0) parms->vy_l = atof(value); - else if (strcmp(name, "vz_l")==0) + else if (strcmp(name, "vz_l") == 0) parms->vz_l = atof(value); - else if (strcmp(name, "P_l")==0) + else if (strcmp(name, "P_l") == 0) parms->P_l = atof(value); - else if (strcmp(name, "Bx_l")==0) + else if (strcmp(name, "Bx_l") == 0) parms->Bx_l = atof(value); - else if (strcmp(name, "By_l")==0) + else if (strcmp(name, "By_l") == 0) parms->By_l = atof(value); - else if (strcmp(name, "Bz_l")==0) + else if (strcmp(name, "Bz_l") == 0) parms->Bz_l = atof(value); - else if (strcmp(name, "rho_r")==0) + else if (strcmp(name, "rho_r") == 0) parms->rho_r = atof(value); - else if (strcmp(name, "vx_r")==0) + else if (strcmp(name, "vx_r") == 0) parms->vx_r = atof(value); - else if (strcmp(name, "vy_r")==0) + else if (strcmp(name, "vy_r") == 0) parms->vy_r = atof(value); - else if (strcmp(name, "vz_r")==0) + else if (strcmp(name, "vz_r") == 0) parms->vz_r = atof(value); - else if (strcmp(name, "P_r")==0) + else if (strcmp(name, "P_r") == 0) parms->P_r = atof(value); - else if (strcmp(name, "Bx_r")==0) + else if (strcmp(name, "Bx_r") == 0) parms->Bx_r = atof(value); - else if (strcmp(name, "By_r")==0) + else if (strcmp(name, "By_r") == 0) parms->By_r = atof(value); - else if (strcmp(name, "Bz_r")==0) + else if (strcmp(name, "Bz_r") == 0) parms->Bz_r = atof(value); - else if (strcmp(name, "diaph")==0) + else if (strcmp(name, "diaph") == 0) parms->diaph = atof(value); - else if (strcmp(name, "rEigenVec_rho")==0) + else if (strcmp(name, "rEigenVec_rho") == 0) parms->rEigenVec_rho = atof(value); - else if (strcmp(name, "rEigenVec_MomentumX")==0) + else if (strcmp(name, "rEigenVec_MomentumX") == 0) parms->rEigenVec_MomentumX = atof(value); - else if (strcmp(name, "rEigenVec_MomentumY")==0) + else if (strcmp(name, "rEigenVec_MomentumY") == 0) parms->rEigenVec_MomentumY = atof(value); - else if (strcmp(name, "rEigenVec_MomentumZ")==0) + else if (strcmp(name, "rEigenVec_MomentumZ") == 0) parms->rEigenVec_MomentumZ = atof(value); - else if (strcmp(name, "rEigenVec_E")==0) - parms->rEigenVec_E = atof(value); - else if (strcmp(name, "rEigenVec_Bx")==0) + else if (strcmp(name, "rEigenVec_E") == 0) + parms->rEigenVec_E = atof(value); + else if (strcmp(name, "rEigenVec_Bx") == 0) parms->rEigenVec_Bx = atof(value); - else if (strcmp(name, "rEigenVec_By")==0) + else if (strcmp(name, "rEigenVec_By") == 0) parms->rEigenVec_By = atof(value); - else if (strcmp(name, "rEigenVec_Bz")==0) + else if (strcmp(name, "rEigenVec_Bz") == 0) parms->rEigenVec_Bz = atof(value); - else if (strcmp(name, "pitch")==0) + else if (strcmp(name, "pitch") == 0) parms->pitch = atof(value); - else if (strcmp(name, "yaw")==0) + else if (strcmp(name, "yaw") == 0) parms->yaw = atof(value); #ifdef PARTICLES - else if (strcmp(name, "prng_seed")==0) + else if (strcmp(name, "prng_seed") == 0) parms->prng_seed = atoi(value); -#endif // PARTICLES +#endif // PARTICLES #ifdef SUPERNOVA - else if (strcmp(name, "snr_filename")==0) + else if (strcmp(name, "snr_filename") == 0) strncpy(parms->snr_filename, value, MAXLEN); #endif #ifdef ROTATED_PROJECTION - else if (strcmp(name, "nxr")==0) + else if (strcmp(name, "nxr") == 0) parms->nxr = atoi(value); - else if (strcmp(name, "nzr")==0) + else if (strcmp(name, "nzr") == 0) parms->nzr = atoi(value); - else if (strcmp(name, "delta")==0) + else if (strcmp(name, "delta") == 0) parms->delta = atof(value); - else if (strcmp(name, "theta")==0) + else if (strcmp(name, "theta") == 0) parms->theta = atof(value); - else if (strcmp(name, "phi")==0) + else if (strcmp(name, "phi") == 0) parms->phi = atof(value); - else if (strcmp(name, "Lx")==0) - parms->Lx = atof(value); - else if (strcmp(name, "Lz")==0) + else if (strcmp(name, "Lx") == 0) + parms->Lx = atof(value); + else if (strcmp(name, "Lz") == 0) parms->Lz = atof(value); - else if (strcmp(name, "n_delta")==0) + else if (strcmp(name, "n_delta") == 0) parms->n_delta = atoi(value); - else if (strcmp(name, "ddelta_dt")==0) + else if (strcmp(name, "ddelta_dt") == 0) parms->ddelta_dt = atof(value); - else if (strcmp(name, "flag_delta")==0) - parms->flag_delta = atoi(value); + else if (strcmp(name, "flag_delta") == 0) + parms->flag_delta = atoi(value); #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY - else if (strcmp(name, "scale_outputs_file")==0) - strncpy (parms->scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "Init_redshift")==0) - parms->Init_redshift = atof(value); - else if (strcmp(name, "End_redshift")==0) - parms->End_redshift = atof(value); - else if (strcmp(name, "H0")==0) - parms->H0 = atof(value); - else if (strcmp(name, "Omega_M")==0) - parms->Omega_M = atof(value); - else if (strcmp(name, "Omega_L")==0) - parms->Omega_L = atof(value); - else if (strcmp(name, "Omega_b")==0) - parms->Omega_b = atof(value); -#endif //COSMOLOGY + else if (strcmp(name, "scale_outputs_file") == 0) + strncpy(parms->scale_outputs_file, value, MAXLEN); + else if (strcmp(name, "Init_redshift") == 0) + parms->Init_redshift = atof(value); + else if (strcmp(name, "End_redshift") == 0) + parms->End_redshift = atof(value); + else if (strcmp(name, "H0") == 0) + parms->H0 = atof(value); + else if (strcmp(name, "Omega_M") == 0) + parms->Omega_M = atof(value); + else if (strcmp(name, "Omega_L") == 0) + parms->Omega_L = atof(value); + else if (strcmp(name, "Omega_b") == 0) + parms->Omega_b = atof(value); +#endif // COSMOLOGY #ifdef TILED_INITIAL_CONDITIONS - else if (strcmp(name, "tile_length")==0) - parms->tile_length = atof(value); -#endif //TILED_INITIAL_CONDITIONS + else if (strcmp(name, "tile_length") == 0) + parms->tile_length = atof(value); +#endif // TILED_INITIAL_CONDITIONS #ifdef SET_MPI_GRID // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] - else if (strcmp(name, "n_proc_x")==0) - parms->n_proc_x = atoi(value); - else if (strcmp(name, "n_proc_y")==0) - parms->n_proc_y = atoi(value); - else if (strcmp(name, "n_proc_z")==0) - parms->n_proc_z = atoi(value); + else if (strcmp(name, "n_proc_x") == 0) + parms->n_proc_x = atoi(value); + else if (strcmp(name, "n_proc_y") == 0) + parms->n_proc_y = atoi(value); + else if (strcmp(name, "n_proc_z") == 0) + parms->n_proc_z = atoi(value); #endif - else if (strcmp(name, "bc_potential_type")==0) - parms->bc_potential_type = atoi(value); + else if (strcmp(name, "bc_potential_type") == 0) + parms->bc_potential_type = atoi(value); #ifdef CHEMISTRY_GPU - else if (strcmp(name, "UVB_rates_file")==0) - strncpy (parms->UVB_rates_file, value, MAXLEN); + else if (strcmp(name, "UVB_rates_file") == 0) + strncpy(parms->UVB_rates_file, value, MAXLEN); #endif #ifdef COOLING_GRACKLE - else if (strcmp(name, "UVB_rates_file")==0) - strncpy (parms->UVB_rates_file, value, MAXLEN); + else if (strcmp(name, "UVB_rates_file") == 0) + strncpy(parms->UVB_rates_file, value, MAXLEN); #endif #ifdef ANALYSIS - else if (strcmp(name, "analysis_scale_outputs_file")==0) - strncpy (parms->analysis_scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "analysisdir")==0) - strncpy (parms->analysisdir, value, MAXLEN); - else if (strcmp(name, "lya_skewers_stride")==0) - parms->lya_skewers_stride = atoi(value); - else if (strcmp(name, "lya_Pk_d_log_k")==0) - parms->lya_Pk_d_log_k = atof(value); + else if (strcmp(name, "analysis_scale_outputs_file") == 0) + strncpy(parms->analysis_scale_outputs_file, value, MAXLEN); + else if (strcmp(name, "analysisdir") == 0) + strncpy(parms->analysisdir, value, MAXLEN); + else if (strcmp(name, "lya_skewers_stride") == 0) + parms->lya_skewers_stride = atoi(value); + else if (strcmp(name, "lya_Pk_d_log_k") == 0) + parms->lya_Pk_d_log_k = atof(value); #ifdef OUTPUT_SKEWERS - else if (strcmp(name, "skewersdir")==0) - strncpy (parms->skewersdir, value, MAXLEN); + else if (strcmp(name, "skewersdir") == 0) + strncpy(parms->skewersdir, value, MAXLEN); #endif #endif else if (!is_param_valid(name)) - chprintf ("WARNING: %s/%s: Unknown parameter/value pair!\n", - name, value); + chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); } diff --git a/src/global/global.h b/src/global/global.h index 38e458f57..d74158c40 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -1,146 +1,149 @@ /*! /file global.h * /brief Declarations of global variables and functions. */ - #ifndef GLOBAL_H #define GLOBAL_H -#include "../grid/grid_enum.h" // defines NSCALARS +#include "../grid/grid_enum.h" // defines NSCALARS #ifdef COOLING_CPU -#include -#include + #include + #include #endif -#ifdef PARTICLES +#ifdef PARTICLES #include -#endif //PARTICLES +#endif // PARTICLES -#if PRECISION==1 -#ifndef TYPEDEF_DEFINED_REAL +#if PRECISION == 1 + #ifndef TYPEDEF_DEFINED_REAL typedef float Real; + #endif #endif -#endif -#if PRECISION==2 -#ifndef TYPEDEF_DEFINED_REAL +#if PRECISION == 2 + #ifndef TYPEDEF_DEFINED_REAL typedef double Real; -#endif + #endif #endif -#define MAXLEN 2048 +#define MAXLEN 2048 #define TINY_NUMBER 1.0e-20 -#define PI 3.141592653589793 -#define MP 1.672622e-24 // mass of proton, grams -#define KB 1.380658e-16 // boltzmann constant, cgs -//#define GN 6.67259e-8 // gravitational constant, cgs -#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 -#define C_L 0.306594593 // speed of light in kpc/kyr - -#define MYR 31.536e12 //Myears in secs -#define KPC 3.086e16 // kpc in km -#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 -#define MSUN_CGS 1.98847e33; //Msun in gr -#define KPC_CGS 3.086e21; //kpc in cm -#define KM_CGS 1e5; //km in cm -#define MH 1.67262171e-24 //Mass of hydrogen [g] - -#define TIME_UNIT 3.15569e10 // 1 kyr in s -#define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm -#define MASS_UNIT 1.98847e33 // 1 solar mass in grams -#define DENSITY_UNIT (MASS_UNIT/(LENGTH_UNIT*LENGTH_UNIT*LENGTH_UNIT)) -#define VELOCITY_UNIT (LENGTH_UNIT/TIME_UNIT) -#define ENERGY_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT) -#define PRESSURE_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT) -#define SP_ENERGY_UNIT (VELOCITY_UNIT*VELOCITY_UNIT) -#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT/LENGTH_UNIT) / TIME_UNIT) +#define PI 3.141592653589793 +#define MP 1.672622e-24 // mass of proton, grams +#define KB 1.380658e-16 // boltzmann constant, cgs +// #define GN 6.67259e-8 // gravitational constant, cgs +#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 +#define C_L 0.306594593 // speed of light in kpc/kyr + +#define MYR 31.536e12 // Myears in secs +#define KPC 3.086e16 // kpc in km +#define G_COSMO \ + 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 +#define MSUN_CGS 1.98847e33; // Msun in gr +#define KPC_CGS 3.086e21; // kpc in cm +#define KM_CGS 1e5; // km in cm +#define MH 1.67262171e-24 // Mass of hydrogen [g] + +#define TIME_UNIT 3.15569e10 // 1 kyr in s +#define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm +#define MASS_UNIT 1.98847e33 // 1 solar mass in grams +#define DENSITY_UNIT (MASS_UNIT / (LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT)) +#define VELOCITY_UNIT (LENGTH_UNIT / TIME_UNIT) +#define ENERGY_UNIT (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT) +#define PRESSURE_UNIT (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT) +#define SP_ENERGY_UNIT (VELOCITY_UNIT * VELOCITY_UNIT) +#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT / LENGTH_UNIT) / TIME_UNIT) #define LOG_FILE_NAME "run_output.log" -//Conserved Floor Values +// Conserved Floor Values #define TEMP_FLOOR 1e-3 -#define DENS_FLOOR 1e-5 // in code units +#define DENS_FLOOR 1e-5 // in code units -//Parameter for Enzo dual Energy Condition -#define DE_ETA_1 0.001 //Ratio of U to E for which Internal Energy is used to compute the Pressure -#define DE_ETA_2 0.035 //Ratio of U to max(E_local) used to select which Internal Energy is used for the update. +// Parameter for Enzo dual Energy Condition +#define DE_ETA_1 \ + 0.001 // Ratio of U to E for which Internal Energy is used to compute the + // Pressure +#define DE_ETA_2 \ + 0.035 // Ratio of U to max(E_local) used to select which Internal Energy is + // used for the update. // Maximum time step for cosmological simulations -#define MAX_DELTA_A 0.001 +#define MAX_DELTA_A 0.001 #define MAX_EXPANSION_RATE 0.01 // Limit delta(a)/a - - -#ifdef MHD +#ifdef MHD #define N_MHD_FIELDS 3 #else #define N_MHD_FIELDS 0 -#endif //MHD +#endif // MHD // Inital Chemistry fractions -#define INITIAL_FRACTION_HI 0.75984603480 -#define INITIAL_FRACTION_HII 1.53965115054e-4 -#define INITIAL_FRACTION_HEI 0.24000000008 -#define INITIAL_FRACTION_HEII 9.59999999903e-15 -#define INITIAL_FRACTION_HEIII 9.59999999903e-18 -#define INITIAL_FRACTION_ELECTRON 1.53965115054e-4 -#define INITIAL_FRACTION_METAL 1.00000000000e-10 - - -//Default Particles Compiler Flags +#define INITIAL_FRACTION_HI 0.75984603480 +#define INITIAL_FRACTION_HII 1.53965115054e-4 +#define INITIAL_FRACTION_HEI 0.24000000008 +#define INITIAL_FRACTION_HEII 9.59999999903e-15 +#define INITIAL_FRACTION_HEIII 9.59999999903e-18 +#define INITIAL_FRACTION_ELECTRON 1.53965115054e-4 +#define INITIAL_FRACTION_METAL 1.00000000000e-10 + +// Default Particles Compiler Flags #define PARTICLES_LONG_INTS #define PARTICLES_KDK - #ifdef GRAVITY -#ifdef GRAVITY_5_POINTS_GRADIENT -#ifdef PARTICLES -#define N_GHOST_POTENTIAL 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the CIC interpolation of the potential ) -#else -#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient -#endif //PARTICLES - -#else -#ifdef PARTICLES -#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the CIC interpolation of the potential ) -#else -#define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient -#endif //PARTICLES -#endif //GRAVITY_5_POINTS_GRADIENT - + #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef PARTICLES + #define N_GHOST_POTENTIAL \ + 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the + // CIC interpolation of the potential ) + #else + #define N_GHOST_POTENTIAL \ + 2 // 2 ghost cells are needed for 5 point gradient + #endif // PARTICLES + + #else + #ifdef PARTICLES + #define N_GHOST_POTENTIAL \ + 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the + // CIC interpolation of the potential ) + #else + #define N_GHOST_POTENTIAL \ + 1 // 1 ghost cells are needed for 3 point gradient + #endif // PARTICLES + #endif // GRAVITY_5_POINTS_GRADIENT typedef long int grav_int_t; #endif #ifdef PARTICLES -#ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS typedef long int part_int_t; -#else + #else typedef int part_int_t; -#endif//PARTICLES_LONG_INTS + #endif // PARTICLES_LONG_INTS -#include + #include typedef std::vector real_vector_t; typedef std::vector int_vector_t; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA // Constants for the inital size of the buffers for particles transfer // and the number of data transferred for each particle extern int N_PARTICLES_TRANSFER; extern int N_DATA_PER_PARTICLE_TRANSFER; -#endif//MPI_CHOLLA - -#ifdef AVERAGE_SLOW_CELLS -#define SLOW_FACTOR 10 -#endif//AVERAGE_SLOW_CELLS - -#endif//PARTICLES + #endif // MPI_CHOLLA + #ifdef AVERAGE_SLOW_CELLS + #define SLOW_FACTOR 10 + #endif // AVERAGE_SLOW_CELLS -#define SIGN(a) ( ((a) < 0.) ? -1. : 1. ) +#endif // PARTICLES +#define SIGN(a) (((a) < 0.) ? -1. : 1.) /* Global variables */ -extern Real gama; // Ratio of specific heats -extern Real C_cfl; // CFL number (0 - 0.5) +extern Real gama; // Ratio of specific heats +extern Real C_cfl; // CFL number (0 - 0.5) extern Real t_comm; extern Real t_other; @@ -175,9 +178,7 @@ extern int sgn(Real x); extern Real calc_eta(Real cW[], Real gamma); #endif - -struct parameters -{ +struct parameters { int nx; int ny; int nz; @@ -192,19 +193,19 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; - int n_out_float32=0; - int out_float32_density=0; - int out_float32_momentum_x=0; - int out_float32_momentum_y=0; - int out_float32_momentum_z=0; - int out_float32_Energy=0; + int n_out_float32 = 0; + int out_float32_density = 0; + int out_float32_momentum_x = 0; + int out_float32_momentum_y = 0; + int out_float32_momentum_z = 0; + int out_float32_Energy = 0; #ifdef DE - int out_float32_GasEnergy=0; + int out_float32_GasEnergy = 0; #endif #ifdef MHD - int out_float32_magnetic_x=0; - int out_float32_magnetic_y=0; - int out_float32_magnetic_z=0; + int out_float32_magnetic_x = 0; + int out_float32_magnetic_y = 0; + int out_float32_magnetic_z = 0; #endif Real xmin; Real ymin; @@ -218,7 +219,7 @@ struct parameters int yu_bcnd; int zl_bcnd; int zu_bcnd; -#ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA int xlg_bcnd; int xug_bcnd; int ylg_bcnd; @@ -228,48 +229,48 @@ struct parameters #endif /*MPI_CHOLLA*/ char custom_bcnd[MAXLEN]; char outdir[MAXLEN]; - char indir[MAXLEN]; //Folder to load Initial conditions from + char indir[MAXLEN]; // Folder to load Initial conditions from Real rho; Real vx; Real vy; Real vz; Real P; Real A; - Real Bx=0; - Real By=0; - Real Bz=0; + Real Bx = 0; + Real By = 0; + Real Bz = 0; Real rho_l; Real vx_l; - Real vy_l=0; - Real vz_l=0; + Real vy_l = 0; + Real vz_l = 0; Real P_l; Real Bx_l; Real By_l; Real Bz_l; Real rho_r; Real vx_r; - Real vy_r=0; - Real vz_r=0; + Real vy_r = 0; + Real vz_r = 0; Real P_r; Real Bx_r; Real By_r; Real Bz_r; Real diaph; - Real rEigenVec_rho = 0; - Real rEigenVec_MomentumX = 0; - Real rEigenVec_MomentumY = 0; - Real rEigenVec_MomentumZ = 0; - Real rEigenVec_E = 0; - Real rEigenVec_Bx = 0; - Real rEigenVec_By = 0; - Real rEigenVec_Bz = 0; - Real pitch = 0; - Real yaw = 0; + Real rEigenVec_rho = 0; + Real rEigenVec_MomentumX = 0; + Real rEigenVec_MomentumY = 0; + Real rEigenVec_MomentumZ = 0; + Real rEigenVec_E = 0; + Real rEigenVec_Bx = 0; + Real rEigenVec_By = 0; + Real rEigenVec_Bz = 0; + Real pitch = 0; + Real yaw = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. std::uint_fast64_t prng_seed = 0; -#endif // PARTICLES +#endif // PARTICLES #ifdef SUPERNOVA char snr_filename[MAXLEN]; #endif @@ -292,11 +293,12 @@ struct parameters Real Omega_b; Real Init_redshift; Real End_redshift; - char scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations -#endif //COSMOLOGY + char scale_outputs_file[MAXLEN]; // File for the scale_factor output values + // for cosmological simulations +#endif // COSMOLOGY #ifdef TILED_INITIAL_CONDITIONS Real tile_length; -#endif //TILED_INITIAL_CONDITIONS +#endif // TILED_INITIAL_CONDITIONS #ifdef SET_MPI_GRID // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] @@ -305,11 +307,14 @@ struct parameters int n_proc_z; #endif int bc_potential_type; -#if defined(COOLING_GRACKLE) || defined (CHEMISTRY_GPU) - char UVB_rates_file[MAXLEN]; //File for the UVB photoheating and photoionization rates of HI, HeI and HeII +#if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) + char UVB_rates_file[MAXLEN]; // File for the UVB photoheating and + // photoionization rates of HI, HeI and HeII #endif #ifdef ANALYSIS - char analysis_scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations {{}} + char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output + // values for cosmological + // simulations {{}} char analysisdir[MAXLEN]; int lya_skewers_stride; Real lya_Pk_d_log_k; @@ -319,13 +324,14 @@ struct parameters #endif }; - /*! \fn void parse_params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -extern void parse_params (char *param_file, struct parameters * parms, int argc, char** argv); +extern void parse_params(char *param_file, struct parameters *parms, int argc, + char **argv); /*! \fn int is_param_valid(char *name); - * \brief Verifies that a param is valid (even if not needed). Avoids "warnings" in output. */ + * \brief Verifies that a param is valid (even if not needed). Avoids + * "warnings" in output. */ extern int is_param_valid(const char *name); -#endif //GLOBAL_H +#endif // GLOBAL_H diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu index 2153b1615..4a34b1630 100644 --- a/src/global/global_cuda.cu +++ b/src/global/global_cuda.cu @@ -3,7 +3,7 @@ #ifdef CUDA -#include "../global/global.h" + #include "../global/global.h" // Declare global variables bool memory_allocated; @@ -12,9 +12,9 @@ Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; Real *ctElectricFields; Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z; -//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY +// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY Real *dev_grav_potential; Real *temp_potential; Real *buffer_potential; -#endif //CUDA +#endif // CUDA diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 61cbc0752..983c6eba4 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -1,23 +1,24 @@ /*! /file global_cuda.h - * /brief Declarations of global variables and functions for the cuda kernels. */ + * /brief Declarations of global variables and functions for the cuda kernels. + */ #ifdef CUDA -#include -#include -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" + #include + #include + #include + #include "../global/global.h" + #include "../utils/gpu.hpp" -#ifndef GLOBAL_CUDA_H -#define GLOBAL_CUDA_H + #ifndef GLOBAL_CUDA_H + #define GLOBAL_CUDA_H -#define TPB 256 // threads per block -//#define TPB 64 + #define TPB 256 // threads per block +// #define TPB 64 - -extern bool memory_allocated; // Flag becomes true after allocating the memory on the first timestep +extern bool memory_allocated; // Flag becomes true after allocating the memory + // on the first timestep // Arrays are global so that they can be allocated only once. // Not all arrays will be allocated for every integrator @@ -32,90 +33,89 @@ extern Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; // Constrained transport electric fields extern Real *ctElectricFields; -//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY +// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY extern Real *dev_grav_potential; extern Real *temp_potential; extern Real *buffer_potential; -#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) -#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) + #define CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) + #define CudaCheckError() __cudaCheckError(__FILE__, __LINE__) -inline void __cudaSafeCall( cudaError err, const char *file, const int line ) +inline void __cudaSafeCall(cudaError err, const char *file, const int line) { -#ifdef CUDA_ERROR_CHECK - if ( cudaSuccess != err ) - { - fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } -#endif - - return; + #ifdef CUDA_ERROR_CHECK + if (cudaSuccess != err) { + fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n", file, line, + cudaGetErrorString(err)); + exit(-1); + } + #endif + + return; } -inline void __cudaCheckError( const char *file, const int line ) +inline void __cudaCheckError(const char *file, const int line) { -#ifdef CUDA_ERROR_CHECK - cudaError err = cudaGetLastError(); - if ( cudaSuccess != err ) - { - fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } - - // More careful checking. However, this will affect performance. - // Comment away if needed. - err = cudaDeviceSynchronize(); - if( cudaSuccess != err ) - { - fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } -#endif - - return; + #ifdef CUDA_ERROR_CHECK + cudaError err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, + cudaGetErrorString(err)); + exit(-1); + } + + // More careful checking. However, this will affect performance. + // Comment away if needed. + err = cudaDeviceSynchronize(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, + line, cudaGetErrorString(err)); + exit(-1); + } + #endif + + return; } - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) + #define gpuErrchk(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } +inline void gpuAssert(cudaError_t code, char *file, int line, bool abort = true) { - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } + if (code != cudaSuccess) { + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) exit(code); + } } /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) { - if (x < 0) return -1; - else return 1; + if (x < 0) + return -1; + else + return 1; } - -//Define atomic_add if it's not supported -#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 -#else -__device__ double atomicAdd(double* address, double val) + // Define atomic_add if it's not supported + #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + #else +__device__ double atomicAdd(double *address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); + unsigned long long int *address_as_ull = (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); } -#endif - + #endif -#endif //GLOBAL_CUDA_H + #endif // GLOBAL_CUDA_H -#endif //CUDA +#endif // CUDA diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 06fbc8cc2..20c64f594 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -1,85 +1,87 @@ #ifdef GRAVITY -#include -#include -#include -#include -#include "../global/global.h" -#include "../io/io.h" + #include "../gravity/grav3D.h" -#include "../gravity/grav3D.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include + #include + #include + #include + #include "../global/global.h" + #include "../io/io.h" + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -Grav3D::Grav3D( void ){} +Grav3D::Grav3D(void) {} -void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P ) +void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, + Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, + int nx, int ny, int nz, int nx_real, int ny_real, + int nz_real, Real dx_real, Real dy_real, Real dz_real, + int n_ghost_pot_offset, struct parameters *P) { - - //Set Box Size + // Set Box Size Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; - //Set Box Left boundary positions + // Set Box Left boundary positions xMin = x_min; yMin = y_min; zMin = z_min; - //Set Box Right boundary positions + // Set Box Right boundary positions xMax = x_max; yMax = y_max; zMax = z_max; - - - //Set uniform ( dx, dy, dz ) + // Set uniform ( dx, dy, dz ) dx = dx_real; dy = dy_real; dz = dz_real; - //Set Box Total number of cells + // Set Box Total number of cells nx_total = nx; ny_total = ny; nz_total = nz; - //Set Box local domain number of cells + // Set Box local domain number of cells nx_local = nx_real; ny_local = ny_real; nz_local = nz_real; - //Local n_cells without ghost cells - n_cells = nx_local*ny_local*nz_local; - //Local n_cells including ghost cells for the potential array - n_cells_potential = ( nx_local + 2*N_GHOST_POTENTIAL ) * ( ny_local + 2*N_GHOST_POTENTIAL ) * ( nz_local + 2*N_GHOST_POTENTIAL ); + // Local n_cells without ghost cells + n_cells = nx_local * ny_local * nz_local; + // Local n_cells including ghost cells for the potential array + n_cells_potential = (nx_local + 2 * N_GHOST_POTENTIAL) * + (ny_local + 2 * N_GHOST_POTENTIAL) * + (nz_local + 2 * N_GHOST_POTENTIAL); - //Set Initial and dt used for the extrapolation of the potential; - //The first timestep the potential in not extrapolated ( INITIAL = TRUE ) + // Set Initial and dt used for the extrapolation of the potential; + // The first timestep the potential in not extrapolated ( INITIAL = TRUE ) INITIAL = true; dt_prev = 0; - dt_now = 0; + dt_now = 0; #ifdef COSMOLOGY - //Set the scale factor for cosmological simulations to 1, - //This will be changed to the proper value when cosmology is initialized + // Set the scale factor for cosmological simulations to 1, + // This will be changed to the proper value when cosmology is initialized current_a = 1; #endif - //Set the average density=0 ( Not Used ) + // Set the average density=0 ( Not Used ) dens_avrg = 0; - //Set the Gravitational Constant ( units must be consistent ) + // Set the Gravitational Constant ( units must be consistent ) Gconst = GN; - if (strcmp(P->init, "Spherical_Overdensity_3D")==0){ + if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) { Gconst = 1; chprintf(" WARNING: Using Gravitational Constant G=1.\n"); } - //Flag to transfer the Potential boundaries + // Flag to transfer the Potential boundaries TRANSFER_POTENTIAL_BOUNDARIES = false; // Flag to set the gravity boundary flags @@ -93,12 +95,15 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_ Initialize_values_CPU(); - chprintf( "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n Global: %d %d %d \n", - Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, nz_total ); + chprintf( + "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n " + "Global: %d %d %d \n", + Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, + nz_total); - chprintf( " dx:%f dy:%f dz:%f\n", dx, dy, dz ); - chprintf( " N ghost potential: %d\n", N_GHOST_POTENTIAL); - chprintf( " N ghost offset: %d\n", n_ghost_pot_offset); + chprintf(" dx:%f dy:%f dz:%f\n", dx, dy, dz); + chprintf(" N ghost potential: %d\n", N_GHOST_POTENTIAL); + chprintf(" N ghost offset: %d\n", n_ghost_pot_offset); #ifdef PARALLEL_OMP chprintf(" Using OMP for gravity calculations\n"); @@ -107,51 +112,71 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_ chprintf(" N OMP Threads per MPI process: %d\n", N_OMP_THREADS); #endif - Poisson_solver.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz ); + Poisson_solver.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, + ny_total, nz_total, nx_local, ny_local, nz_local, + dx, dy, dz); #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) - Poisson_solver_test.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz ); + Poisson_solver_test.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, + nx_total, ny_total, nz_total, nx_local, + ny_local, nz_local, dx, dy, dz); #endif } void Grav3D::AllocateMemory_CPU(void) { // allocate memory for the density and potential arrays - F.density_h = (Real *) malloc(n_cells*sizeof(Real)); //array for the density - F.potential_h = (Real *) malloc(n_cells_potential*sizeof(Real)); //array for the potential at the n-th timestep - F.potential_1_h = (Real *) malloc(n_cells_potential*sizeof(Real)); //array for the potential at the (n-1)-th timestep - boundary_flags = (int *) malloc(6*sizeof(int)); // array for the gravity boundary flags + F.density_h = (Real *)malloc(n_cells * sizeof(Real)); // array for the + // density + F.potential_h = (Real *)malloc( + n_cells_potential * + sizeof(Real)); // array for the potential at the n-th timestep + F.potential_1_h = (Real *)malloc( + n_cells_potential * + sizeof(Real)); // array for the potential at the (n-1)-th timestep + boundary_flags = + (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags #ifdef GRAV_ISOLATED_BOUNDARY_X - F.pot_boundary_x0 = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_x1 = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)); + F.pot_boundary_x0 = (Real *)malloc( + N_GHOST_POTENTIAL * ny_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_x1 = + (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - F.pot_boundary_y0 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_y1 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)); + F.pot_boundary_y0 = (Real *)malloc( + N_GHOST_POTENTIAL * nx_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_y1 = + (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - F.pot_boundary_z0 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_z1 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); + F.pot_boundary_z0 = (Real *)malloc( + N_GHOST_POTENTIAL * nx_local * ny_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_z1 = + (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)); #endif #ifdef GRAVITY_ANALYTIC_COMP - F.analytic_potential_h = (Real *) malloc(n_cells_potential*sizeof(Real)); + F.analytic_potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); #endif } -void Grav3D::Set_Boundary_Flags( int *flags ){ - for (int i=0; i<6; i++) boundary_flags[i] = flags[i]; +void Grav3D::Set_Boundary_Flags(int *flags) +{ + for (int i = 0; i < 6; i++) boundary_flags[i] = flags[i]; } -void Grav3D::Initialize_values_CPU(void){ - - //Set initial values to 0. - for (int id=0; id + #include "../global/global.h" #ifdef SOR -#include "../gravity/potential_SOR_3D.h" + #include "../gravity/potential_SOR_3D.h" #endif #ifdef PARIS -#include "../gravity/potential_paris_3D.h" + #include "../gravity/potential_paris_3D.h" #endif #ifdef PARIS_GALACTIC -#include "../gravity/potential_paris_galactic.h" + #include "../gravity/potential_paris_galactic.h" #endif #ifdef HDF5 -#include + #include #endif #define GRAV_ISOLATED_BOUNDARY_X #define GRAV_ISOLATED_BOUNDARY_Y #define GRAV_ISOLATED_BOUNDARY_Z -#define TPB_GRAV 1024 +#define TPB_GRAV 1024 #define TPBX_GRAV 16 #define TPBY_GRAV 8 #define TPBZ_GRAV 8 @@ -33,8 +34,7 @@ * \brief Class to create a the gravity object. */ class Grav3D { - public: - + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -46,46 +46,44 @@ class Grav3D Real yMax; Real zMax; /*! \var nx - * \brief Total number of cells in the x-dimension */ + * \brief Total number of cells in the x-dimension */ int nx_total; /*! \var ny - * \brief Total number of cells in the y-dimension */ + * \brief Total number of cells in the y-dimension */ int ny_total; /*! \var nz - * \brief Total number of cells in the z-dimension */ + * \brief Total number of cells in the z-dimension */ int nz_total; /*! \var nx_local - * \brief Local number of cells in the x-dimension */ + * \brief Local number of cells in the x-dimension */ int nx_local; /*! \var ny_local - * \brief Local number of cells in the y-dimension */ + * \brief Local number of cells in the y-dimension */ int ny_local; /*! \var nz_local - * \brief Local number of cells in the z-dimension */ + * \brief Local number of cells in the z-dimension */ int nz_local; /*! \var dx - * \brief x-width of cells */ + * \brief x-width of cells */ Real dx; /*! \var dy - * \brief y-width of cells */ + * \brief y-width of cells */ Real dy; /*! \var dz - * \brief z-width of cells */ + * \brief z-width of cells */ Real dz; - #ifdef COSMOLOGY +#ifdef COSMOLOGY Real current_a; - #endif - - Real dens_avrg ; +#endif + Real dens_avrg; int n_cells; int n_cells_potential; - bool INITIAL; Real dt_prev; @@ -95,125 +93,130 @@ class Grav3D bool TRANSFER_POTENTIAL_BOUNDARIES; - bool BC_FLAGS_SET; int *boundary_flags; - - #ifdef SOR +#ifdef SOR Potential_SOR_3D Poisson_solver; - #endif +#endif - #ifdef PARIS +#ifdef PARIS Potential_Paris_3D Poisson_solver; - #endif +#endif - #ifdef PARIS_GALACTIC +#ifdef PARIS_GALACTIC #ifdef SOR - #define PARIS_GALACTIC_TEST + #define PARIS_GALACTIC_TEST Potential_Paris_Galactic Poisson_solver_test; #else Potential_Paris_Galactic Poisson_solver; #endif - #endif +#endif - struct Fields - { + struct Fields { /*! \var density_h * \brief Array containing the density of each cell in the grid */ Real *density_h; /*! \var potential_h - * \brief Array containing the gravitational potential of each cell in the grid */ + * \brief Array containing the gravitational potential of each cell in the + * grid */ Real *potential_h; /*! \var potential_h - * \brief Array containing the gravitational potential of each cell in the grid at the previous time step */ + * \brief Array containing the gravitational potential of each cell in the + * grid at the previous time step */ Real *potential_1_h; - #ifdef GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP Real *analytic_potential_h; - #endif +#endif - #ifdef GRAVITY_GPU +#ifdef GRAVITY_GPU /*! \var density_d * \brief Device Array containing the density of each cell in the grid */ Real *density_d; /*! \var potential_d - * \brief Device Array containing the gravitational potential of each cell in the grid */ + * \brief Device Array containing the gravitational potential of each cell + * in the grid */ Real *potential_d; /*! \var potential_d - * \brief Device Array containing the gravitational potential of each cell in the grid at the previous time step */ + * \brief Device Array containing the gravitational potential of each cell + * in the grid at the previous time step */ Real *potential_1_d; - #ifdef GRAVITY_ANALYTIC_COMP + #ifdef GRAVITY_ANALYTIC_COMP Real *analytic_potential_d; - #endif + #endif - #endif //GRAVITY_GPU +#endif // GRAVITY_GPU - // Arrays for computing the potential values in isolated boundaries - #ifdef GRAV_ISOLATED_BOUNDARY_X +// Arrays for computing the potential values in isolated boundaries +#ifdef GRAV_ISOLATED_BOUNDARY_X Real *pot_boundary_x0; Real *pot_boundary_x1; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y +#endif +#ifdef GRAV_ISOLATED_BOUNDARY_Y Real *pot_boundary_y0; Real *pot_boundary_y1; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z +#endif +#ifdef GRAV_ISOLATED_BOUNDARY_Z Real *pot_boundary_z0; Real *pot_boundary_z1; - #endif +#endif - #ifdef GRAVITY_GPU - #ifdef GRAV_ISOLATED_BOUNDARY_X +#ifdef GRAVITY_GPU + #ifdef GRAV_ISOLATED_BOUNDARY_X Real *pot_boundary_x0_d; Real *pot_boundary_x1_d; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y Real *pot_boundary_y0_d; Real *pot_boundary_y1_d; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z Real *pot_boundary_z0_d; Real *pot_boundary_z1_d; - #endif - #endif//GRAVITY_GPU + #endif +#endif // GRAVITY_GPU } F; /*! \fn Grav3D(void) - * \brief Constructor for the gravity class */ + * \brief Constructor for the gravity class */ Grav3D(void); /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) - * \brief Initialize the grid. */ - void Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); + * \brief Initialize the grid. */ + void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, + Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, + int ny_total, int nz_total, int nx_real, int ny_real, + int nz_real, Real dx_real, Real dy_real, Real dz_real, + int n_ghost_pot_offset, struct parameters *P); void AllocateMemory_CPU(void); void Initialize_values_CPU(); void FreeMemory_CPU(void); - Real Get_Average_Density( ); - Real Get_Average_Density_function( int g_start, int g_end ); + Real Get_Average_Density(); + Real Get_Average_Density_function(int g_start, int g_end); - void Set_Boundary_Flags( int *flags ); + void Set_Boundary_Flags(int *flags); - #ifdef SOR - void Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size ); - void Copy_Isolated_Boundaries_To_GPU( struct parameters *P ); - #endif +#ifdef SOR + void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, + Real *isolated_boundary_d, + int boundary_size); + void Copy_Isolated_Boundaries_To_GPU(struct parameters *P); +#endif - #ifdef GRAVITY_GPU +#ifdef GRAVITY_GPU void AllocateMemory_GPU(void); void FreeMemory_GPU(void); - #endif - +#endif }; - -#endif //GRAV3D_H +#endif // GRAV3D_H diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index 1eaec0380..ad942c859 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -1,98 +1,107 @@ #ifdef GRAVITY + #include -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../gravity/grav3D.h" -#include "../model/disk_galaxy.h" + #include "../gravity/grav3D.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" -#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) - -void Grid3D::Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P ){ + #if defined(GRAV_ISOLATED_BOUNDARY_X) || \ + defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) +void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, + struct parameters *P) +{ // Set Isolated Boundaries for the ghost cells. int bc_potential_type = P->bc_potential_type; - //bc_potential_type = 0 -> Point mass potential GM/r - if ( dir == 0 ) Compute_Potential_Isolated_Boundary( 0, 0, bc_potential_type ); - if ( dir == 1 ) Compute_Potential_Isolated_Boundary( 0, 1, bc_potential_type ); - if ( dir == 2 ) Compute_Potential_Isolated_Boundary( 1, 0, bc_potential_type ); - if ( dir == 3 ) Compute_Potential_Isolated_Boundary( 1, 1, bc_potential_type ); - if ( dir == 4 ) Compute_Potential_Isolated_Boundary( 2, 0, bc_potential_type ); - if ( dir == 5 ) Compute_Potential_Isolated_Boundary( 2, 1, bc_potential_type ); - + // bc_potential_type = 0 -> Point mass potential GM/r + if (dir == 0) Compute_Potential_Isolated_Boundary(0, 0, bc_potential_type); + if (dir == 1) Compute_Potential_Isolated_Boundary(0, 1, bc_potential_type); + if (dir == 2) Compute_Potential_Isolated_Boundary(1, 0, bc_potential_type); + if (dir == 3) Compute_Potential_Isolated_Boundary(1, 1, bc_potential_type); + if (dir == 4) Compute_Potential_Isolated_Boundary(2, 0, bc_potential_type); + if (dir == 5) Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type); } -void Grid3D::Set_Potential_Boundaries_Isolated( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, + int *flags) +{ Real *pot_boundary; int n_i, n_j, nGHST; int nx_g, ny_g, nz_g; int nx_local, ny_local, nz_local; - nGHST = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*nGHST; - ny_g = Grav.ny_local + 2*nGHST; - nz_g = Grav.nz_local + 2*nGHST; + nGHST = N_GHOST_POTENTIAL; + nx_g = Grav.nx_local + 2 * nGHST; + ny_g = Grav.ny_local + 2 * nGHST; + nz_g = Grav.nz_local + 2 * nGHST; nx_local = Grav.nx_local; ny_local = Grav.ny_local; nz_local = Grav.nz_local; - #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( direction == 0 ){ + #ifdef GRAV_ISOLATED_BOUNDARY_X + if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_x0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_x1; + if (side == 0) pot_boundary = Grav.F.pot_boundary_x0; + if (side == 1) pot_boundary = Grav.F.pot_boundary_x1; } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( direction == 1 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_y0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_y1; + if (side == 0) pot_boundary = Grav.F.pot_boundary_y0; + if (side == 1) pot_boundary = Grav.F.pot_boundary_y1; } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( direction == 2 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_z0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_z1; + if (side == 0) pot_boundary = Grav.F.pot_boundary_z0; + if (side == 1) pot_boundary = Grav.F.pot_boundary_z1; } - #endif + #endif int i, j, k, id_buffer, id_grid; - for ( k=0; k -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../gravity/grav3D.h" + #include "../gravity/grav3D.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #if defined(GRAV_ISOLATED_BOUNDARY_X) || \ + defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) -#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) - -void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d, Real *pot_boundary_d ){ - +void __global__ Set_Potential_Boundaries_Isolated_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost, Real *potential_d, Real *pot_boundary_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( tid_k ) + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost + tid_k ) + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + if (side == 1) + tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + + (tid_j + n_ghost) * nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + ( tid_k )*nx + (tid_j+n_ghost)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + ( ny - n_ghost + tid_k )*nx + (tid_j+n_ghost)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + + (tid_j + n_ghost) * nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + + (nz - n_ghost + tid_k) * nx * ny; } potential_d[tid_pot] = pot_boundary_d[tid_buffer]; } -void Grid3D::Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, + int *flags) +{ int n_i, n_j, n_ghost, size_buffer; int nx_g, ny_g, nz_g; n_ghost = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*n_ghost; - ny_g = Grav.ny_local + 2*n_ghost; - nz_g = Grav.nz_local + 2*n_ghost; - + nx_g = Grav.nx_local + 2 * n_ghost; + ny_g = Grav.ny_local + 2 * n_ghost; + nz_g = Grav.nz_local + 2 * n_ghost; Real *pot_boundary_h, *pot_boundary_d; - #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( direction == 0 ){ + #ifdef GRAV_ISOLATED_BOUNDARY_X + if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_x0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_x1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_x0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_x1_d; + if (side == 0) pot_boundary_h = Grav.F.pot_boundary_x0; + if (side == 1) pot_boundary_h = Grav.F.pot_boundary_x1; + if (side == 0) pot_boundary_d = Grav.F.pot_boundary_x0_d; + if (side == 1) pot_boundary_d = Grav.F.pot_boundary_x1_d; } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( direction == 1 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_y0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_y1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_y0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_y1_d; + if (side == 0) pot_boundary_h = Grav.F.pot_boundary_y0; + if (side == 1) pot_boundary_h = Grav.F.pot_boundary_y1; + if (side == 0) pot_boundary_d = Grav.F.pot_boundary_y0_d; + if (side == 1) pot_boundary_d = Grav.F.pot_boundary_y1_d; } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( direction == 2 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_z0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_z1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_z0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_z1_d; + if (side == 0) pot_boundary_h = Grav.F.pot_boundary_z0; + if (side == 1) pot_boundary_h = Grav.F.pot_boundary_z1; + if (side == 0) pot_boundary_d = Grav.F.pot_boundary_z0_d; + if (side == 1) pot_boundary_d = Grav.F.pot_boundary_z1_d; } - #endif + #endif size_buffer = N_GHOST_POTENTIAL * n_i * n_j; // set values for GPU kernels - int ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + int ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_GRAV, 1, 1); - //Copy the boundary array from host to device - cudaMemcpy( pot_boundary_d, pot_boundary_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice ); + // Copy the boundary array from host to device + cudaMemcpy(pot_boundary_d, pot_boundary_h, size_buffer * sizeof(Real), + cudaMemcpyHostToDevice); cudaDeviceSynchronize(); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL( Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d ); - + hipLaunchKernelGGL(Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, + dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, + pot_boundary_d); } + #endif // GRAV_ISOLATED_BOUNDARY -#endif //GRAV_ISOLATED_BOUNDARY - - -void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d ){ - +void __global__ Set_Potential_Boundaries_Periodic_kernel( + int direction, int side, int n_i, int n_j, int nx, int ny, int nz, + int n_ghost, Real *potential_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - if ( direction == 0 ){ - if ( side == 0 ) tid_src = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; + + if (direction == 0) { + if (side == 0) + tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_src = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + if (side == 1) tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) + tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - 2*n_ghost + tid_k )*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + if (side == 1) + tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; } - + potential_d[tid_dst] = potential_d[tid_src]; - } - -void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, + int *flags) +{ int n_i, n_j, n_ghost, size; int nx_g, ny_g, nz_g; n_ghost = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*n_ghost; - ny_g = Grav.ny_local + 2*n_ghost; - nz_g = Grav.nz_local + 2*n_ghost; + nx_g = Grav.nx_local + 2 * n_ghost; + ny_g = Grav.ny_local + 2 * n_ghost; + nz_g = Grav.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -163,66 +185,83 @@ void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int size = N_GHOST_POTENTIAL * n_i * n_j; // set values for GPU kernels - int ngrid = ( size - 1 ) / TPB_GRAV + 1; + int ngrid = (size - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_GRAV, 1, 1); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL( Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d ); - - + hipLaunchKernelGGL(Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, + dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, + nz_g, n_ghost, Grav.F.potential_d); } -__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Load_Transfer_Buffer_GPU_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (tid_i) + + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + if (side == 1) + tid_pot = (tid_i) + (tid_j)*nx + + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } -int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ){ - +int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, + Real *buffer, int buffer_start) +{ // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, + n_i, n_j, ngrid; + ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; - nx_pot = Grav.nx_local + 2*n_ghost_potential; - ny_pot = Grav.ny_local + 2*n_ghost_potential; - nz_pot = Grav.nz_local + 2*n_ghost_potential; + nx_pot = Grav.nx_local + 2 * n_ghost_potential; + ny_pot = Grav.ny_local + 2 * n_ghost_potential; + nz_pot = Grav.nz_local + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -230,7 +269,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -242,61 +281,82 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d ); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, + 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, + nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, + send_buffer_d); CHECK(cudaDeviceSynchronize()); return size_buffer; } -__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Transfer_Buffer_GPU_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + + (tid_j)*nx * ny; + if (side == 1) + tid_pot = + (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx + + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 1) + tid_pot = + (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - -void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ){ - +void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, + Real *buffer, + int buffer_start) +{ // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, + n_i, n_j, ngrid; + ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; - nx_pot = Grav.nx_local + 2*n_ghost_potential; - ny_pot = Grav.ny_local + 2*n_ghost_potential; - nz_pot = Grav.nz_local + 2*n_ghost_potential; + nx_pot = Grav.nx_local + 2 * n_ghost_potential; + ny_pot = Grav.ny_local + 2 * n_ghost_potential; + nz_pot = Grav.nz_local + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -304,7 +364,7 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -316,9 +376,10 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL( Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d ); - + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, + 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, + ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, + potential_d, recv_buffer_d); } - -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index fa136c659..95e3252d8 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -1,31 +1,32 @@ #ifdef GRAVITY -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" -#include "../utils/error_handling.h" -#include + #include -#ifdef CUDA -#include "../mpi/cuda_mpi_routines.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/error_handling.h" -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #ifdef CUDA + #include "../mpi/cuda_mpi_routines.h" + #endif -#if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) -#include -#endif + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -//#ifdef PARTICLES -#include "../model/disk_galaxy.h" -//#endif + #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) + #include + #endif -//Set delta_t when usi#ng gravity -void Grid3D::set_dt_Gravity(){ + // #ifdef PARTICLES + #include "../model/disk_galaxy.h" +// #endif - //Delta_t for the hydro +// Set delta_t when usi#ng gravity +void Grid3D::set_dt_Gravity() +{ + // Delta_t for the hydro Real dt_hydro = H.dt; #ifdef AVERAGE_SLOW_CELLS @@ -33,202 +34,208 @@ void Grid3D::set_dt_Gravity(){ #endif #ifdef PARTICLES - //Compute delta_t for particles and choose min(dt_particles, dt_hydro) + // Compute delta_t for particles and choose min(dt_particles, dt_hydro) Real dt_particles, dt_min; - #ifdef COSMOLOGY - chprintf( "Current_z: %f \n", Cosmo.current_z ); + #ifdef COSMOLOGY + chprintf("Current_z: %f \n", Cosmo.current_z); Real da_particles, da_min, dt_physical; - //Compute the particles delta_t + // Compute the particles delta_t Particles.dt = Calc_Particles_dt_Cosmo(); dt_particles = Particles.dt; - //Convert delta_t to delta_a ( a = scale factor ) - da_particles = Cosmo.Get_da_from_dt( dt_particles ); - da_particles = fmin( da_particles, 1.0 ); //Limit delta_a + // Convert delta_t to delta_a ( a = scale factor ) + da_particles = Cosmo.Get_da_from_dt(dt_particles); + da_particles = fmin(da_particles, 1.0); // Limit delta_a - #ifdef ONLY_PARTICLES - //If only particles da_min is only da_particles + #ifdef ONLY_PARTICLES + // If only particles da_min is only da_particles da_min = da_particles; - chprintf( " Delta_a_particles: %f \n", da_particles ); + chprintf(" Delta_a_particles: %f \n", da_particles); - #else //NOT ONLY_PARTICLES - //Here da_min is the minumum between da_particles and da_hydro + #else // NOT ONLY_PARTICLES + // Here da_min is the minumum between da_particles and da_hydro Real da_hydro; - da_hydro = Cosmo.Get_da_from_dt( dt_hydro ) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0; //Convet delta_t to delta_a - da_min = fmin( da_hydro, da_particles ); //Find the minumum delta_a - chprintf( " Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, da_hydro ); + da_hydro = Cosmo.Get_da_from_dt(dt_hydro) * Cosmo.current_a * + Cosmo.current_a / Cosmo.H0; // Convet delta_t to delta_a + da_min = fmin(da_hydro, da_particles); // Find the minumum delta_a + chprintf(" Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, + da_hydro); - #endif//ONLY_PARTICLES + #endif // ONLY_PARTICLES - //Limit delta_a by the expansion rate - Cosmo.max_delta_a = fmin( MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A ); - if( da_min > Cosmo.max_delta_a){ + // Limit delta_a by the expansion rate + Cosmo.max_delta_a = fmin(MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A); + if (da_min > Cosmo.max_delta_a) { da_min = Cosmo.max_delta_a; - chprintf( " Seting max delta_a: %f\n", da_min ); + chprintf(" Seting max delta_a: %f\n", da_min); } - //Small delta_a when reionization starts - #ifdef COOLING_GRACKLE - if ( fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005 ){ + // Small delta_a when reionization starts + #ifdef COOLING_GRACKLE + if (fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005) { da_min /= 2; - chprintf( " Starting UVB. Limiting delta_a: %f \n", da_min); + chprintf(" Starting UVB. Limiting delta_a: %f \n", da_min); } - #endif - #ifdef CHEMISTRY_GPU - if ( fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005 ){ + #endif + #ifdef CHEMISTRY_GPU + if (fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005) { da_min /= 2; - chprintf( " Starting UVB. Limiting delta_a: %f \n", da_min); + chprintf(" Starting UVB. Limiting delta_a: %f \n", da_min); } - #endif - - //Limit delta_a if it's time to output - if ( (Cosmo.current_a + da_min) > Cosmo.next_output ){ - da_min = Cosmo.next_output - Cosmo.current_a; + #endif + + // Limit delta_a if it's time to output + if ((Cosmo.current_a + da_min) > Cosmo.next_output) { + da_min = Cosmo.next_output - Cosmo.current_a; H.Output_Now = true; } - #ifdef ANALYSIS - //Limit delta_a if it's time to run analysis - if( Analysis.next_output_indx < Analysis.n_outputs ){ - if ( H.Output_Now && fabs(Cosmo.current_a + da_min - Analysis.next_output ) < 1e-6 ) Analysis.Output_Now = true; - else if ( Cosmo.current_a + da_min > Analysis.next_output ){ - da_min = Analysis.next_output - Cosmo.current_a; + #ifdef ANALYSIS + // Limit delta_a if it's time to run analysis + if (Analysis.next_output_indx < Analysis.n_outputs) { + if (H.Output_Now && + fabs(Cosmo.current_a + da_min - Analysis.next_output) < 1e-6) + Analysis.Output_Now = true; + else if (Cosmo.current_a + da_min > Analysis.next_output) { + da_min = Analysis.next_output - Cosmo.current_a; Analysis.Output_Now = true; } } - #endif - - if ( da_min < 0 ){ - chprintf( "ERROR: Negative delta_a"); + #endif + + if (da_min < 0) { + chprintf("ERROR: Negative delta_a"); exit(-1); - } - - - //Set delta_a after it has been computed + } + + // Set delta_a after it has been computed Cosmo.delta_a = da_min; - //Convert delta_a back to delta_t - dt_min = Cosmo.Get_dt_from_da( Cosmo.delta_a ) * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ); - //Set the new delta_t for the hydro step + // Convert delta_a back to delta_t + dt_min = Cosmo.Get_dt_from_da(Cosmo.delta_a) * Cosmo.H0 / + (Cosmo.current_a * Cosmo.current_a); + // Set the new delta_t for the hydro step H.dt = dt_min; - chprintf( " Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt ); - - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell - da_particles = fmin( da_particles, Cosmo.max_delta_a ); - min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; + chprintf(" Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, + Cosmo.delta_a, H.dt); + + #ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + da_particles = fmin(da_particles, Cosmo.max_delta_a); + min_dt_slow = Cosmo.Get_dt_from_da(da_particles) / Particles.C_cfl * + Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a) / SLOW_FACTOR; H.min_dt_slow = min_dt_slow; - #endif + #endif - //Compute the physical time - dt_physical = Cosmo.Get_dt_from_da( Cosmo.delta_a ); + // Compute the physical time + dt_physical = Cosmo.Get_dt_from_da(Cosmo.delta_a); Cosmo.dt_secs = dt_physical * Cosmo.time_conversion; Cosmo.t_secs += Cosmo.dt_secs; - chprintf( " t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs/MYR, Cosmo.dt_secs/MYR ); + chprintf(" t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs / MYR, + Cosmo.dt_secs / MYR); Particles.dt = dt_physical; - #else // Not Cosmology - //If NOT using COSMOLOGY + #else // Not Cosmology + // If NOT using COSMOLOGY - //Compute the particles delta_t + // Compute the particles delta_t dt_particles = Calc_Particles_dt(); - dt_particles = fmin( dt_particles, Particles.max_dt); - #ifdef ONLY_PARTICLES - dt_min = dt_particles; - chprintf( " dt_particles: %f \n", dt_particles ); - #else - chprintf( " dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles ); - //Get the minimum delta_t between hydro and particles - dt_min = fmin( dt_hydro, dt_particles ); - #endif//ONLY_PARTICLES - - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell - //min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; - min_dt_slow = 3*H.dx; + dt_particles = fmin(dt_particles, Particles.max_dt); + #ifdef ONLY_PARTICLES + dt_min = dt_particles; + chprintf(" dt_particles: %f \n", dt_particles); + #else + chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); + // Get the minimum delta_t between hydro and particles + dt_min = fmin(dt_hydro, dt_particles); + #endif // ONLY_PARTICLES + + #ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + // min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + min_dt_slow = 3 * H.dx; H.min_dt_slow = min_dt_slow; - #endif + #endif - //Set the new delta_t - H.dt = dt_min; + // Set the new delta_t + H.dt = dt_min; Particles.dt = H.dt; - #endif//COSMOLOGY - #endif//PARTICLES - - #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) - //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - //min_dt_slow = H.dt / C_cfl * 100 ; - min_dt_slow = 3*H.dx; + #endif // COSMOLOGY + #endif // PARTICLES + + #if defined(AVERAGE_SLOW_CELLS) && !defined(PARTICLES) + // Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is + // set to a large value, change this with your condition ) min_dt_slow = H.dt + // / C_cfl * 100 ; + min_dt_slow = 3 * H.dx; H.min_dt_slow = min_dt_slow; #endif // Set current and previous delta_t for the potential extrapolation - if ( Grav.INITIAL ){ + if (Grav.INITIAL) { Grav.dt_prev = H.dt; - Grav.dt_now = H.dt; - }else{ + Grav.dt_now = H.dt; + } else { Grav.dt_prev = Grav.dt_now; - Grav.dt_now = H.dt; + Grav.dt_now = H.dt; } - + #if defined(PARTICLES_GPU) && defined(PRINT_MAX_MEMORY_USAGE) Particles.Print_Max_Memory_Usage(); #endif } -//NOT USED: Get Average density on the Global dommain -Real Grav3D::Get_Average_Density(){ - +// NOT USED: Get Average density on the Global dommain +Real Grav3D::Get_Average_Density() +{ Real dens_sum, dens_mean; #ifndef PARALLEL_OMP - dens_sum = Get_Average_Density_function( 0, nz_local ); + dens_sum = Get_Average_Density_function(0, nz_local); #else dens_sum = 0; Real dens_sum_all[N_OMP_THREADS]; - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( nz_local, n_omp_procs, omp_id, &g_start, &g_end ); - dens_sum_all[omp_id] = Get_Average_Density_function( g_start, g_end ); - + Get_OMP_Grid_Indxs(nz_local, n_omp_procs, omp_id, &g_start, &g_end); + dens_sum_all[omp_id] = Get_Average_Density_function(g_start, g_end); } - for ( int i=0; i -1) { - #endif - const int k = nz/2; - for (int j = 0; j < ny+ng+ng; j++) { - for (int i = 0; i < nx+ng+ng; i++) { - const long ijk = i+(nx+ng+ng)*(j+(ny+ng+ng)*(k+ng)); - printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); - } - printf("\n"); - } - #if 0 + #endif + const int k = nz / 2; + for (int j = 0; j < ny + ng + ng; j++) { + for (int i = 0; i < nx + ng + ng; i++) { + const long ijk = i + (nx + ng + ng) * (j + (ny + ng + ng) * (k + ng)); + printf("%d %d %g %g %g\n", j, i, q[ijk], p[ijk], q[ijk] - p[ijk]); + } + printf("\n"); + } + #if 0 break; } } - #endif + #endif fflush(stdout); MPI_Finalize(); exit(0); } -#endif - - + #endif -//Initialize the Grav Object at the beginning of the simulation -void Grid3D::Initialize_Gravity( struct parameters *P ){ - chprintf( "\nInitializing Gravity... \n"); - Grav.Initialize( H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost_potential_offset, P ); - chprintf( "Gravity Successfully Initialized. \n\n"); +// Initialize the Grav Object at the beginning of the simulation +void Grid3D::Initialize_Gravity(struct parameters *P) +{ + chprintf("\nInitializing Gravity... \n"); + Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, + H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, + P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, + H.dz, H.n_ghost_potential_offset, P); + chprintf("Gravity Successfully Initialized. \n\n"); if (P->bc_potential_type == 1) { + const int ng = N_GHOST_POTENTIAL; + const int twoNG = ng + ng; + const int nk = Grav.nz_local + twoNG; + const int nj = Grav.ny_local + twoNG; + const int ni = Grav.nx_local + twoNG; + const Real dr = 0.5 - ng; - const int ng = N_GHOST_POTENTIAL; - const int twoNG = ng+ng; - const int nk = Grav.nz_local+twoNG; - const int nj = Grav.ny_local+twoNG; - const int ni = Grav.nx_local+twoNG; - const Real dr = 0.5-ng; - - #ifdef PARIS_GALACTIC_TEST + #ifdef PARIS_GALACTIC_TEST chprintf("Analytic Test of Poisson Solvers:\n"); std::vector exact(Grav.n_cells_potential); std::vector potential(Grav.n_cells_potential); - const Real scale = 4.0*M_PI*Grav.Gconst; - const Real ddx = 1.0/(scale*Grav.dx*Grav.dx); - const Real ddy = 1.0/(scale*Grav.dy*Grav.dy); - const Real ddz = 1.0/(scale*Grav.dz*Grav.dz); + const Real scale = 4.0 * M_PI * Grav.Gconst; + const Real ddx = 1.0 / (scale * Grav.dx * Grav.dx); + const Real ddy = 1.0 / (scale * Grav.dy * Grav.dy); + const Real ddz = 1.0 / (scale * Grav.dz * Grav.dz); const Real *const phi = Grav.F.potential_h; - const int nij = ni*nj; - const Real a0 = Galaxies::MW.phi_disk_D3D(0,0); - const Real da0 = 2.0/(25.0*scale); + const int nij = ni * nj; + const Real a0 = Galaxies::MW.phi_disk_D3D(0, 0); + const Real da0 = 2.0 / (25.0 * scale); #pragma omp parallel for for (int k = 0; k < nk; k++) { - const Real z = Grav.zMin+Grav.dz*(k+dr); - const int njk = nj*k; + const Real z = Grav.zMin + Grav.dz * (k + dr); + const int njk = nj * k; for (int j = 0; j < nj; j++) { - const Real y = Grav.yMin+Grav.dy*(j+dr); - const Real yy = y*y; - const int nijk = ni*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + dr); + const Real yy = y * y; + const int nijk = ni * (j + njk); for (int i = 0; i < ni; i++) { - const Real x = Grav.xMin+Grav.dx*(i+dr); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z); + const Real x = Grav.xMin + Grav.dx * (i + dr); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = + Galaxies::MW.phi_disk_D3D(r, z); } } } #pragma omp parallel for for (int k = 0; k < Grav.nz_local; k++) { - const Real z = Grav.zMin+Grav.dz*(k+0.5); - const Real zz = z*z; - const int njk = Grav.ny_local*k; + const Real z = Grav.zMin + Grav.dz * (k + 0.5); + const Real zz = z * z; + const int njk = Grav.ny_local * k; for (int j = 0; j < Grav.ny_local; j++) { - const Real y = Grav.yMin+Grav.dy*(j+0.5); - const Real yy = y*y; - const int nijk = Grav.nx_local*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + 0.5); + const Real yy = y * y; + const int nijk = Grav.nx_local * (j + njk); for (int i = 0; i < Grav.nx_local; i++) { - const Real x = Grav.xMin+Grav.dx*(i+0.5); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - const Real rr = x*x+yy+zz; - const Real f = a0*exp(-0.2*rr); - const Real df = da0*(15.0-2.0*rr)*f; - Grav.F.density_h[ijk] = Galaxies::MW.rho_disk_D3D(r,z)+df; - const int ib = i+ng+ni*(j+ng+nj*(k+ng)); + const Real x = Grav.xMin + Grav.dx * (i + 0.5); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + const Real rr = x * x + yy + zz; + const Real f = a0 * exp(-0.2 * rr); + const Real df = da0 * (15.0 - 2.0 * rr) * f; + Grav.F.density_h[ijk] = Galaxies::MW.rho_disk_D3D(r, z) + df; + const int ib = i + ng + ni * (j + ng + nj * (k + ng)); exact[ib] -= f; } } } - Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h,Grav.F.potential_h,Grav.Gconst,Galaxies::MW); + Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, + Grav.Gconst, Galaxies::MW); chprintf(" Paris Galactic"); - printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local); - Get_Potential_SOR(Grav.Gconst,0,0,P); + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, + Grav.nz_local); + Get_Potential_SOR(Grav.Gconst, 0, 0, P); chprintf(" SOR"); - printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local); - #endif + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, + Grav.nz_local); + #endif - #ifdef SOR + #ifdef SOR chprintf(" Initializing disk analytic potential\n"); #pragma omp parallel for for (int k = 0; k < nk; k++) { - const Real z = Grav.zMin+Grav.dz*(k+dr); - const int njk = nj*k; + const Real z = Grav.zMin + Grav.dz * (k + dr); + const int njk = nj * k; for (int j = 0; j < nj; j++) { - const Real y = Grav.yMin+Grav.dy*(j+dr); - const Real yy = y*y; - const int nijk = ni*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + dr); + const Real yy = y * y; + const int nijk = ni * (j + njk); for (int i = 0; i < ni; i++) { - const Real x = Grav.xMin+Grav.dx*(i+dr); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z); + const Real x = Grav.xMin + Grav.dx * (i + dr); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r, z); } } } - #endif + #endif } } - -//Compute the Gravitational Potential by solving Poisson Equation -void Grid3D::Compute_Gravitational_Potential( struct parameters *P ){ - +// Compute the Gravitational Potential by solving Poisson Equation +void Grid3D::Compute_Gravitational_Potential(struct parameters *P) +{ #ifdef CPU_TIME Timer.Grav_Potential.Start(); #endif #ifdef PARTICLES - //Copy the particles density to the grav_density array - Copy_Particles_Density_to_Gravity( *P ); + // Copy the particles density to the grav_density array + Copy_Particles_Density_to_Gravity(*P); #endif #ifndef ONLY_PARTICLES - //Copy the hydro density to the grav_density array + // Copy the hydro density to the grav_density array Copy_Hydro_Density_to_Gravity(); #endif #ifdef COSMOLOGY - //If using cosmology, set the gravitational constant to the one in the correct units + // If using cosmology, set the gravitational constant to the one in the + // correct units const Real Grav_Constant = Cosmo.cosmo_G; - const Real current_a = Cosmo.current_a; - const Real dens_avrg = Cosmo.rho_0_gas; + const Real current_a = Cosmo.current_a; + const Real dens_avrg = Cosmo.rho_0_gas; #else const Real Grav_Constant = Grav.Gconst; // If slowing the Sphere Collapse problem ( bc_potential_type=0 ) - const Real dens_avrg = (P->bc_potential_type == 0) ? H.sphere_background_density : 0; + const Real dens_avrg = + (P->bc_potential_type == 0) ? H.sphere_background_density : 0; const Real r0 = H.sphere_radius; // Re-use current_a as the total mass of the sphere - const Real current_a = (H.sphere_density-dens_avrg)*4.0*M_PI*r0*r0*r0/3.0; + const Real current_a = + (H.sphere_density - dens_avrg) * 4.0 * M_PI * r0 * r0 * r0 / 3.0; #endif - if ( !Grav.BC_FLAGS_SET ){ + if (!Grav.BC_FLAGS_SET) { Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; // #ifdef MPI_CHOLLA - // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID, Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2], Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] ); + // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID, + // Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2], + // Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] ); // #endif Grav.BC_FLAGS_SET = true; } #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( Grav.boundary_flags[0] == 3 ) Compute_Potential_Boundaries_Isolated(0, P); - if ( Grav.boundary_flags[1] == 3 ) Compute_Potential_Boundaries_Isolated(1, P); + if (Grav.boundary_flags[0] == 3) Compute_Potential_Boundaries_Isolated(0, P); + if (Grav.boundary_flags[1] == 3) Compute_Potential_Boundaries_Isolated(1, P); // chprintf("Isolated X\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( Grav.boundary_flags[2] == 3 ) Compute_Potential_Boundaries_Isolated(2, P); - if ( Grav.boundary_flags[3] == 3 ) Compute_Potential_Boundaries_Isolated(3, P); + if (Grav.boundary_flags[2] == 3) Compute_Potential_Boundaries_Isolated(2, P); + if (Grav.boundary_flags[3] == 3) Compute_Potential_Boundaries_Isolated(3, P); // chprintf("Isolated Y\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( Grav.boundary_flags[4] == 3 ) Compute_Potential_Boundaries_Isolated(4, P); - if ( Grav.boundary_flags[5] == 3 ) Compute_Potential_Boundaries_Isolated(5, P); + if (Grav.boundary_flags[4] == 3) Compute_Potential_Boundaries_Isolated(4, P); + if (Grav.boundary_flags[5] == 3) Compute_Potential_Boundaries_Isolated(5, P); // chprintf("Isolated Z\n"); #endif - //Solve Poisson Equation to compute the potential - //Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens - dens_average ) + // Solve Poisson Equation to compute the potential + // Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens - + // dens_average ) Real *input_density, *output_potential; #ifdef GRAVITY_GPU - input_density = Grav.F.density_d; + input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; + input_density = Grav.F.density_h; output_potential = Grav.F.potential_h; #endif #ifdef SOR - #ifdef PARIS_GALACTIC_TEST - #ifdef GRAVITY_GPU - #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" - #endif - Grav.Poisson_solver_test.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW); - std::vector p(output_potential,output_potential+Grav.n_cells_potential); - Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P ); + #ifdef PARIS_GALACTIC_TEST + #ifdef GRAVITY_GPU + #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" + #endif + Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, + Grav_Constant, Galaxies::MW); + std::vector p(output_potential, + output_potential + Grav.n_cells_potential); + Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); chprintf(" Paris vs SOR"); - printDiff(p.data(),output_potential,Grav.nx_local,Grav.ny_local,Grav.nz_local,N_GHOST_POTENTIAL,false); - #else - Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P ); - #endif + printDiff(p.data(), output_potential, Grav.nx_local, Grav.ny_local, + Grav.nz_local, N_GHOST_POTENTIAL, false); + #else + Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); + #endif #elif defined PARIS_GALACTIC - Grav.Poisson_solver.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW); + Grav.Poisson_solver.Get_Potential(input_density, output_potential, + Grav_Constant, Galaxies::MW); #else - Grav.Poisson_solver.Get_Potential( input_density, output_potential, Grav_Constant, dens_avrg, current_a); - #endif//SOR + Grav.Poisson_solver.Get_Potential(input_density, output_potential, + Grav_Constant, dens_avrg, current_a); + #endif // SOR #ifdef CPU_TIME Timer.Grav_Potential.End(); #endif } - -#ifdef GRAVITY_ANALYTIC_COMP -void Grid3D::Setup_Analytic_Potential(struct parameters *P) { - #ifndef PARALLEL_OMP - Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2*N_GHOST_POTENTIAL, Galaxies::MW); - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifdef GRAVITY_ANALYTIC_COMP +void Grid3D::Setup_Analytic_Potential(struct parameters *P) +{ + #ifndef PARALLEL_OMP + Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, + Galaxies::MW); + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local + 2*N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, + omp_id, &g_start, &g_end); Setup_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); } - #endif + #endif - #ifdef GRAVITY_GPU - CudaSafeCall( cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, Grav.n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif + #ifdef GRAVITY_GPU + CudaSafeCall(cudaMemcpy( + Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, + Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + #endif } - -void Grid3D::Add_Analytic_Potential() { - #ifdef GRAVITY_GPU +void Grid3D::Add_Analytic_Potential() +{ + #ifdef GRAVITY_GPU Add_Analytic_Potential_GPU(); - #else - #ifndef PARALLEL_OMP - Add_Analytic_Potential(0, Grav.nz_local + 2*N_GHOST_POTENTIAL ); - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #else + #ifndef PARALLEL_OMP + Add_Analytic_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL); + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local+ 2*N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, + omp_id, &g_start, &g_end); Add_Analytic_Potential(g_start, g_end); } - #endif //PARALLEL_OMP - #endif // GRAVITY_GPU else + #endif // PARALLEL_OMP + #endif // GRAVITY_GPU else } -#endif //GRAVITY_ANALYTIC_COMP - + #endif // GRAVITY_ANALYTIC_COMP -void Grid3D::Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end){ +void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) +{ // Copy the density array from hydro conserved to gravity density array Real dens; int i, j, k, id, id_grav; - for (k=g_start; k + #include + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/error_handling.h" -void Grav3D::AllocateMemory_GPU(){ - - CudaSafeCall( cudaMalloc((void**)&F.density_d, n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.potential_d, n_cells_potential*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.potential_1_d, n_cells_potential*sizeof(Real)) ); +void Grav3D::AllocateMemory_GPU() +{ + CudaSafeCall(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real))); + CudaSafeCall( + cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); + CudaSafeCall( + cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); #ifdef GRAVITY_GPU - #ifdef GRAVITY_ANALYTIC_COMP - CudaSafeCall( cudaMalloc((void**)&F.analytic_potential_d, n_cells_potential*sizeof(Real)) ); - #endif - - #ifdef GRAV_ISOLATED_BOUNDARY_X - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) ); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) ); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) ); - #endif - - #endif//GRAVITY_GPU - - chprintf( "Allocated Gravity GPU memory \n" ); + #ifdef GRAVITY_ANALYTIC_COMP + CudaSafeCall(cudaMalloc((void **)&F.analytic_potential_d, + n_cells_potential * sizeof(Real))); + #endif + + #ifdef GRAV_ISOLATED_BOUNDARY_X + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_x0_d, + N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_x1_d, + N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_y0_d, + N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_y1_d, + N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_z0_d, + N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + CudaSafeCall( + cudaMalloc((void **)&F.pot_boundary_z1_d, + N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + #endif + + #endif // GRAVITY_GPU + + chprintf("Allocated Gravity GPU memory \n"); } - -void Grav3D::FreeMemory_GPU(void){ - - cudaFree( F.density_d ); - cudaFree( F.potential_d ); - cudaFree( F.potential_1_d ); - +void Grav3D::FreeMemory_GPU(void) +{ + cudaFree(F.density_d); + cudaFree(F.potential_d); + cudaFree(F.potential_1_d); #ifdef GRAVITY_GPU - #ifdef GRAVITY_ANALYTIC_COMP - cudaFree( F.analytic_potential_d ); - #endif - - #ifdef GRAV_ISOLATED_BOUNDARY_X - cudaFree( F.pot_boundary_x0_d); - cudaFree( F.pot_boundary_x1_d); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - cudaFree( F.pot_boundary_y0_d); - cudaFree( F.pot_boundary_y1_d); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - cudaFree( F.pot_boundary_z0_d); - cudaFree( F.pot_boundary_z1_d); - #endif - - #endif //GRAVITY_GPU - + #ifdef GRAVITY_ANALYTIC_COMP + cudaFree(F.analytic_potential_d); + #endif + + #ifdef GRAV_ISOLATED_BOUNDARY_X + cudaFree(F.pot_boundary_x0_d); + cudaFree(F.pot_boundary_x1_d); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + cudaFree(F.pot_boundary_y0_d); + cudaFree(F.pot_boundary_y1_d); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + cudaFree(F.pot_boundary_z0_d); + cudaFree(F.pot_boundary_z1_d); + #endif + + #endif // GRAVITY_GPU } -void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real *dst_density_d, int nx_local, int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas ){ - +void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, + Real *dst_density_d, + int nx_local, int ny_local, + int nz_local, int n_ghost, + Real cosmo_rho_0_gas) +{ int tid_x, tid_y, tid_z, tid_grid, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) return; - tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local; + tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; int nx_grid, ny_grid; - nx_grid = nx_local + 2*n_ghost; - ny_grid = ny_local + 2*n_ghost; - tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid; + nx_grid = nx_local + 2 * n_ghost; + ny_grid = ny_local + 2 * n_ghost; + tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real dens; dens = src_density_d[tid_grid]; @@ -96,15 +112,15 @@ void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real #endif #ifdef PARTICLES - dst_density_d[tid_dens] += dens; //Hydro density is added AFTER partices density + dst_density_d[tid_dens] += + dens; // Hydro density is added AFTER partices density #else - dst_density_d[tid_dens] = dens; + dst_density_d[tid_dens] = dens; #endif } - -void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ - +void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() +{ int nx_local, ny_local, nz_local, n_ghost; nx_local = Grav.nx_local; ny_local = Grav.ny_local; @@ -112,9 +128,9 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ n_ghost = H.n_ghost; // set values for GPU kernels - int tpb_x = TPBX_GRAV; - int tpb_y = TPBY_GRAV; - int tpb_z = TPBZ_GRAV; + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; int ngrid_x = (nx_local - 1) / tpb_x + 1; int ngrid_y = (ny_local - 1) / tpb_y + 1; int ngrid_z = (nz_local - 1) / tpb_z + 1; @@ -128,42 +144,45 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif - //Copy the density from the device array to the Poisson input density array - hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas); + // Copy the density from the device array to the Poisson input density array + hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, + dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, nx_local, + ny_local, nz_local, n_ghost, cosmo_rho_0_gas); } - - -#if defined(GRAVITY_ANALYTIC_COMP) -void __global__ Add_Analytic_Potential_Kernel( Real *analytic_d, Real *potential_d, int nx_pot, int ny_pot, int nz_pot) { + #if defined(GRAVITY_ANALYTIC_COMP) +void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, + Real *potential_d, int nx_pot, + int ny_pot, int nz_pot) +{ int tid_x, tid_y, tid_z, tid; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) return; + + tid = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; - tid= tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; - potential_d[tid] += analytic_d[tid]; /* if (tid_x < 10 && tid_y == (ny_pot/2) && tid_z == (nz_pot/2)) { - //printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, potential_d[tid]); - printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, analytic_d[tid]); + //printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, + potential_d[tid]); printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, + tid_z, analytic_d[tid]); } */ - } - -void Grid3D::Add_Analytic_Potential_GPU() { +void Grid3D::Add_Analytic_Potential_GPU() +{ int nx_pot, ny_pot, nz_pot; - nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL; - ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL; - nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL; + nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL; + ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL; + nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL; // set values for GPU kernels int tpb_x = TPBX_GRAV; @@ -179,90 +198,94 @@ void Grid3D::Add_Analytic_Potential_GPU() { // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - //Copy the analytic potential from the device array to the device potential array - hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d, Grav.F.potential_d, nx_pot, ny_pot, nz_pot); + // Copy the analytic potential from the device array to the device potential + // array + hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, + Grav.F.analytic_potential_d, Grav.F.potential_d, nx_pot, + ny_pot, nz_pot); cudaDeviceSynchronize(); - /*gpuFor(10, + /*gpuFor(10, GPU_LAMBDA(const int i) { - printf("potential_after_analytic[%d, %d, %d] = %.4e\n", i, ny_pot/2, nz_pot/2, Grav.F.potential_d[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); + printf("potential_after_analytic[%d, %d, %d] = %.4e\n", i, ny_pot/2, + nz_pot/2, Grav.F.potential_d[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); } );*/ } -#endif //GRAVITY_ANALYTIC_COMP - - - -void __global__ Extrapolate_Grav_Potential_Kernel( Real *dst_potential, Real *src_potential_0, Real *src_potential_1, - int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, int n_offset, - Real dt_now, Real dt_prev, bool INITIAL, Real cosmo_factor ){ + #endif // GRAVITY_ANALYTIC_COMP +void __global__ Extrapolate_Grav_Potential_Kernel( + Real *dst_potential, Real *src_potential_0, Real *src_potential_1, + int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, + int n_offset, Real dt_now, Real dt_prev, bool INITIAL, Real cosmo_factor) +{ int tid_x, tid_y, tid_z, tid_grid, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) return; - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; tid_x += n_offset; tid_y += n_offset; tid_z += n_offset; - tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid; + tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real pot_now, pot_prev, pot_extrp; - pot_now = src_potential_0[tid_pot]; //Potential at the n-th timestep - if ( INITIAL ){ - pot_extrp = pot_now; //The first timestep the extrapolated potential is phi_0 + pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep + if (INITIAL) { + pot_extrp = + pot_now; // The first timestep the extrapolated potential is phi_0 } else { - pot_prev = src_potential_1[tid_pot]; //Potential at the (n-1)-th timestep ( previous step ) - //Compute the extrapolated potential from phi_n-1 and phi_n - pot_extrp = pot_now + 0.5 * dt_now * ( pot_now - pot_prev ) / dt_prev; + pot_prev = src_potential_1[tid_pot]; // Potential at the (n-1)-th timestep + // ( previous step ) + // Compute the extrapolated potential from phi_n-1 and phi_n + pot_extrp = pot_now + 0.5 * dt_now * (pot_now - pot_prev) / dt_prev; } #ifdef COSMOLOGY - //For cosmological simulation the potential is transformed to 'comoving coordinates' + // For cosmological simulation the potential is transformed to 'comoving + // coordinates' pot_extrp *= cosmo_factor; #endif - //Save the extrapolated potential + // Save the extrapolated potential dst_potential[tid_grid] = pot_extrp; - //Set phi_n-1 = phi_n, to use it during the next step + // Set phi_n-1 = phi_n, to use it during the next step src_potential_1[tid_pot] = pot_now; } - -void Grid3D::Extrapolate_Grav_Potential_GPU(){ - +void Grid3D::Extrapolate_Grav_Potential_GPU() +{ int nx_pot, ny_pot, nz_pot; - nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL; - ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL; - nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL; + nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL; + ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL; + nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL; int n_ghost_grid, nx_grid, ny_grid, nz_grid; n_ghost_grid = H.n_ghost; - nx_grid = Grav.nx_local + 2*n_ghost_grid; - ny_grid = Grav.ny_local + 2*n_ghost_grid; - nz_grid = Grav.nz_local + 2*n_ghost_grid; + nx_grid = Grav.nx_local + 2 * n_ghost_grid; + ny_grid = Grav.ny_local + 2 * n_ghost_grid; + nz_grid = Grav.nz_local + 2 * n_ghost_grid; int n_offset = n_ghost_grid - N_GHOST_POTENTIAL; - Real dt_now, dt_prev, cosmo_factor; - dt_now = Grav.dt_now; + dt_now = Grav.dt_now; dt_prev = Grav.dt_prev; #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels - int tpb_x = TPBX_GRAV; - int tpb_y = TPBY_GRAV; - int tpb_z = TPBZ_GRAV; + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; int ngrid_x = (nx_pot - 1) / tpb_x + 1; int ngrid_y = (ny_pot - 1) / tpb_y + 1; int ngrid_z = (nz_pot - 1) / tpb_z + 1; @@ -271,20 +294,21 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor ); - + hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, + 0, 0, C.d_Grav_potential, Grav.F.potential_d, + Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, + ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, + cosmo_factor); } -#ifdef PARTICLES_CPU -void Grid3D::Copy_Potential_From_GPU(){ - CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); + #ifdef PARTICLES_CPU +void Grid3D::Copy_Potential_From_GPU() +{ + CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, + Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -#endif //PARTICLES_CPU - - - - - + #endif // PARTICLES_CPU -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu index cf82c2d38..8924e4541 100644 --- a/src/gravity/paris/HenryPeriodic.cu +++ b/src/gravity/paris/HenryPeriodic.cu @@ -1,99 +1,104 @@ #ifdef PARIS -#include "HenryPeriodic.hpp" + #include + #include + #include + #include -#include -#include -#include -#include + #include "HenryPeriodic.hpp" -HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): - idi_(id[0]), - idj_(id[1]), - idk_(id[2]), - mi_(m[0]), - mj_(m[1]), - mk_(m[2]), - nh_(n[2]/2+1), - ni_(n[0]), - nj_(n[1]), - nk_(n[2]), - bytes_(0) +HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], + const double hi[3], const int m[3], + const int id[3]) + : idi_(id[0]), + idj_(id[1]), + idk_(id[2]), + mi_(m[0]), + mj_(m[1]), + mk_(m[2]), + nh_(n[2] / 2 + 1), + ni_(n[0]), + nj_(n[1]), + nk_(n[2]), + bytes_(0) { // Pencil sub-decomposition within a 3D block mq_ = int(round(sqrt(mk_))); - while (mk_%mq_) mq_--; - mp_ = mk_/mq_; - assert(mp_*mq_ == mk_); + while (mk_ % mq_) mq_--; + mp_ = mk_ / mq_; + assert(mp_ * mq_ == mk_); - idp_ = idk_/mq_; - idq_ = idk_%mq_; + idp_ = idk_ / mq_; + idq_ = idk_ % mq_; // Communicators of tasks within pencils in each dimension { - const int color = idi_*mj_+idj_; - const int key = idk_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_); + const int color = idi_ * mj_ + idj_; + const int key = idk_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_); } { - const int color = idi_*mp_+idp_; - const int key = idj_*mq_+idq_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_); + const int color = idi_ * mp_ + idp_; + const int key = idj_ * mq_ + idq_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_); } { - const int color = idj_*mq_+idq_; - const int key = idi_*mp_+idp_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_); + const int color = idj_ * mq_ + idq_; + const int key = idi_ * mp_ + idp_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_); } // Maximum numbers of elements for various decompositions and dimensions - - dh_ = (nh_+mk_-1)/mk_; - di_ = (ni_+mi_-1)/mi_; - dj_ = (nj_+mj_-1)/mj_; - dk_ = (nk_+mk_-1)/mk_; - dip_ = (di_+mp_-1)/mp_; - djq_ = (dj_+mq_-1)/mq_; - const int mjq = mj_*mq_; - dhq_ = (nh_+mjq-1)/mjq; - const int mip = mi_*mp_; - djp_ = (nj_+mip-1)/mip; + dh_ = (nh_ + mk_ - 1) / mk_; + di_ = (ni_ + mi_ - 1) / mi_; + dj_ = (nj_ + mj_ - 1) / mj_; + dk_ = (nk_ + mk_ - 1) / mk_; + + dip_ = (di_ + mp_ - 1) / mp_; + djq_ = (dj_ + mq_ - 1) / mq_; + const int mjq = mj_ * mq_; + dhq_ = (nh_ + mjq - 1) / mjq; + const int mip = mi_ * mp_; + djp_ = (nj_ + mip - 1) / mip; // Maximum memory needed by work arrays - + const long nMax = std::max( - { long(di_)*long(dj_)*long(dk_), - long(mp_)*long(mq_)*long(dip_)*long(djq_)*long(dk_), - long(2)*long(dip_)*long(djq_)*long(mk_)*long(dh_), - long(2)*long(dip_)*long(mp_)*long(djq_)*long(mq_)*long(dh_), - long(2)*long(dip_)*long(djq_)*long(mjq)*long(dhq_), - long(2)*long(dip_)*long(dhq_)*long(mip)*long(djp_), - long(2)*djp_*long(dhq_)*long(mip)*long(dip_) - }); + {long(di_) * long(dj_) * long(dk_), + long(mp_) * long(mq_) * long(dip_) * long(djq_) * long(dk_), + long(2) * long(dip_) * long(djq_) * long(mk_) * long(dh_), + long(2) * long(dip_) * long(mp_) * long(djq_) * long(mq_) * long(dh_), + long(2) * long(dip_) * long(djq_) * long(mjq) * long(dhq_), + long(2) * long(dip_) * long(dhq_) * long(mip) * long(djp_), + long(2) * djp_ * long(dhq_) * long(mip) * long(dip_)}); assert(nMax <= INT_MAX); - bytes_ = nMax*sizeof(double); + bytes_ = nMax * sizeof(double); // FFT objects - CHECK(cufftPlanMany(&c2ci_,1,&ni_,&ni_,1,ni_,&ni_,1,ni_,CUFFT_Z2Z,djp_*dhq_)); - CHECK(cufftPlanMany(&c2cj_,1,&nj_,&nj_,1,nj_,&nj_,1,nj_,CUFFT_Z2Z,dip_*dhq_)); - CHECK(cufftPlanMany(&c2rk_,1,&nk_,&nh_,1,nh_,&nk_,1,nk_,CUFFT_Z2D,dip_*djq_)); - CHECK(cufftPlanMany(&r2ck_,1,&nk_,&nk_,1,nk_,&nh_,1,nh_,CUFFT_D2Z,dip_*djq_)); + CHECK(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, + djp_ * dhq_)); + CHECK(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, + dip_ * dhq_)); + CHECK(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, + dip_ * djq_)); + CHECK(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, + dip_ * djq_)); -#ifndef MPI_GPU + #ifndef MPI_GPU // Host arrays for MPI communication - CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault)); + CHECK(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); - hb_ = ha_+nMax; -#endif + hb_ = ha_ + nMax; + #endif } HenryPeriodic::~HenryPeriodic() { -#ifndef MPI_GPU + #ifndef MPI_GPU CHECK(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; -#endif + #endif CHECK(cufftDestroy(r2ck_)); CHECK(cufftDestroy(c2rk_)); CHECK(cufftDestroy(c2cj_)); diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index ab56fde79..82b2307e6 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -1,88 +1,101 @@ #pragma once -#include #include +#include + #include "../../utils/gpu.hpp" /** * @brief Generic distributed-memory 3D FFT filter. */ -class HenryPeriodic { - public: - - /** - * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. } - * @param[in] lo[3] { Physical location of the global lower bound of each dimension. } - * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell. - * The one-cell difference is because of the periodic domain. - * See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. } - * @param[in] m[3] { Number of MPI tasks in each dimension. } - * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } - */ - HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); - - ~HenryPeriodic(); - - /** - * @return { Number of bytes needed for array arguments for @ref filter. } - */ - size_t bytes() const { return bytes_; } - - /** - * @detail { Performs a 3D FFT on the real input field, - * applies the provided filter in frequency space, - * and perform the inverse 3D FFT. - * Expects fields in 3D block distribution with no ghost cells. } - * @tparam F { Type of functor that will applied in frequency space. - * Should be resolved implicitly by the compiler. } - * @param[in] bytes { Number of bytes allocated for arguments @ref before and @ref after. - * Used to ensure that the arrays have enough extra work space. } - * @param[in,out] before { Input field for filtering. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the original field. } - * @param[out] after { Output field, filtered. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the actual output field. } - * @param[in] f { Functor or lambda function to be used as a filter. - * The operator should have the following prototype. - * \code - * complex f(int i, int j, int k, complex before) - * \endcode - * Arguments `i`, `j`, and `k` are the frequency-space coordinates. - * Argument `before` is the input value at those indices, after the FFT. - * The function should return the filtered value. } - */ - template - void filter(const size_t bytes, double *const before, double *const after, const F f) const; - - private: - int idi_,idj_,idk_; //!< MPI coordinates of 3D block - int mi_,mj_,mk_; //!< Number of MPI tasks in each dimension of 3D domain - int nh_; //!< Global number of complex values in Z dimension, after R2C transform - int ni_,nj_,nk_; //!< Global number of real points in each dimension - int mp_,mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil - int idp_,idq_; //!< X and Y task IDs within Z pencil - MPI_Comm commI_,commJ_,commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils - int dh_,di_,dj_,dk_; //!< Max number of local points in each dimension - int dhq_,dip_,djp_,djq_; //!< Max number of local points in dimensions of 2D decompositions - size_t bytes_; //!< Max bytes needed for argument arrays - cufftHandle c2ci_,c2cj_,c2rk_,r2ck_; //!< Objects for forward and inverse FFTs +class HenryPeriodic +{ + public: + /** + * @param[in] n[3] { Global number of cells in each dimension, without ghost + * cells. } + * @param[in] lo[3] { Physical location of the global lower bound of each + * dimension. } + * @param[in] hi[3] { Physical location of the global upper bound of each + * dimension, minus one grid cell. The one-cell difference is because of the + * periodic domain. See @ref Potential_Paris_3D::Initialize for an example + * computation of these arguments. } + * @param[in] m[3] { Number of MPI tasks in each dimension. } + * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } + */ + HenryPeriodic(const int n[3], const double lo[3], const double hi[3], + const int m[3], const int id[3]); + + ~HenryPeriodic(); + + /** + * @return { Number of bytes needed for array arguments for @ref filter. } + */ + size_t bytes() const { return bytes_; } + + /** + * @detail { Performs a 3D FFT on the real input field, + * applies the provided filter in frequency space, + * and perform the inverse 3D FFT. + * Expects fields in 3D block distribution with no ghost cells. } + * @tparam F { Type of functor that will applied in frequency space. + * Should be resolved implicitly by the compiler. } + * @param[in] bytes { Number of bytes allocated for arguments @ref before and + * @ref after. Used to ensure that the arrays have enough extra work space. } + * @param[in,out] before { Input field for filtering. Modified as a work + * array. Must be at least @ref bytes() bytes, likely larger than the original + * field. } + * @param[out] after { Output field, filtered. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger than + * the actual output field. } + * @param[in] f { Functor or lambda function to be used as a filter. + * The operator should have the following prototype. + * \code + * complex f(int i, int j, int k, complex before) + * \endcode + * Arguments `i`, `j`, and `k` are the frequency-space + * coordinates. Argument `before` is the input value at those indices, after + * the FFT. The function should return the filtered value. } + */ + template + void filter(const size_t bytes, double *const before, double *const after, + const F f) const; + + private: + int idi_, idj_, idk_; //!< MPI coordinates of 3D block + int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain + int nh_; //!< Global number of complex values in Z dimension, after R2C + //!< transform + int ni_, nj_, nk_; //!< Global number of real points in each dimension + int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil + int idp_, idq_; //!< X and Y task IDs within Z pencil + MPI_Comm commI_, commJ_, + commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils + int dh_, di_, dj_, dk_; //!< Max number of local points in each dimension + int dhq_, dip_, djp_, + djq_; //!< Max number of local points in dimensions of 2D decompositions + size_t bytes_; //!< Max bytes needed for argument arrays + cufftHandle c2ci_, c2cj_, c2rk_, + r2ck_; //!< Objects for forward and inverse FFTs #ifndef MPI_GPU - double *ha_, *hb_; //!< Host copies for MPI messages + double *ha_, *hb_; //!< Host copies for MPI messages #endif }; #if defined(__HIP__) || defined(__CUDACC__) template -void HenryPeriodic::filter(const size_t bytes, double *const before, double *const after, const F f) const +void HenryPeriodic::filter(const size_t bytes, double *const before, + double *const after, const F f) const { // Make sure arguments have enough space assert(bytes >= bytes_); - double *const a = after; - double *const b = before; - cufftDoubleComplex *const ac = reinterpret_cast(a); - cufftDoubleComplex *const bc = reinterpret_cast(b); + double *const a = after; + double *const b = before; + cufftDoubleComplex *const ac = reinterpret_cast(a); + cufftDoubleComplex *const bc = reinterpret_cast(b); // Local copies of member variables for lambda capture @@ -96,323 +109,328 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Indices and sizes for pencil redistributions - const int idip = idi*mp+idp; - const int idjq = idj*mq+idq; - const int mip = mi*mp; - const int mjq = mj*mq; + const int idip = idi * mp + idp; + const int idjq = idj * mq + idq; + const int mip = mi * mp; + const int mjq = mj * mq; // Reorder 3D block into sub-pencils gpuFor( - mp,mq,dip,djq,dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { - const int ii = p*dip+i; - const int jj = q*djq+j; - const int ia = k+dk*(j+djq*(i+dip*(q+mq*p))); - const int ib = k+dk*(jj+dj*ii); - a[ia] = b[ib]; - }); + mp, mq, dip, djq, dk, + GPU_LAMBDA(const int p, const int q, const int i, const int j, + const int k) { + const int ii = p * dip + i; + const int jj = q * djq + j; + const int ia = k + dk * (j + djq * (i + dip * (q + mq * p))); + const int ib = k + dk * (jj + dj * ii); + a[ia] = b[ib]; + }); // Redistribute into Z pencils - const int countK = dip*djq*dk; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + const int countK = dip * djq * dk; + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_); -#endif + MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); + #endif // Make Z pencils contiguous in Z { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi-iLo,jHi-jLo,mk,dk, - GPU_LAMBDA(const int i, const int j, const int pq, const int k) { - const int kk = pq*dk+k; - if (kk < nk) { - const int ia = kk+nk*(j+djq*i); - const int ib = k+dk*(j+djq*(i+dip*pq)); - a[ia] = b[ib]; - } - }); + iHi - iLo, jHi - jLo, mk, dk, + GPU_LAMBDA(const int i, const int j, const int pq, const int k) { + const int kk = pq * dk + k; + if (kk < nk) { + const int ia = kk + nk * (j + djq * i); + const int ib = k + dk * (j + djq * (i + dip * pq)); + a[ia] = b[ib]; + } + }); } // Real-to-complex FFT in Z - CHECK(cufftExecD2Z(r2ck_,a,bc)); + CHECK(cufftExecD2Z(r2ck_, a, bc)); // Rearrange for Y redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj_*dj_+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj_ * dj_ + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mjq,iHi-iLo,jHi-jLo,dhq, - GPU_LAMBDA(const int q, const int i, const int j, const int k) { - const int kk = q*dhq+k; - if (kk < nh) { - const int ia = k+dhq*(j+djq*(i+dip*q)); - const int ib = kk+nh*(j+djq*i); - ac[ia] = bc[ib]; - } - }); + mjq, iHi - iLo, jHi - jLo, dhq, + GPU_LAMBDA(const int q, const int i, const int j, const int k) { + const int kk = q * dhq + k; + if (kk < nh) { + const int ia = k + dhq * (j + djq * (i + dip * q)); + const int ib = kk + nh * (j + djq * i); + ac[ia] = bc[ib]; + } + }); } // Redistribute for Y pencils - const int countJ = 2*dip*djq*dhq; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + const int countJ = 2 * dip * djq * dhq; + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_); -#endif + MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); + #endif // Make Y pencils contiguous in Y { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi-kLo,iHi-iLo,mj,mq,djq, - GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) { - const int rdj = r*dj; - const int jj = rdj+q*djq+j; - if ((jj < nj) && (jj < rdj+dj)) { - const int ia = jj+nj*(i+dip*k); - const int ib = k+dhq*(j+djq*(i+dip*(q+mq*r))); - ac[ia] = bc[ib]; - } - }); + kHi - kLo, iHi - iLo, mj, mq, djq, + GPU_LAMBDA(const int k, const int i, const int r, const int q, + const int j) { + const int rdj = r * dj; + const int jj = rdj + q * djq + j; + if ((jj < nj) && (jj < rdj + dj)) { + const int ia = jj + nj * (i + dip * k); + const int ib = k + dhq * (j + djq * (i + dip * (q + mq * r))); + ac[ia] = bc[ib]; + } + }); } // Forward FFT in Y - CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_FORWARD)); + CHECK(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_FORWARD)); // Rearrange for X redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mip,kHi-kLo,iHi-iLo,djp, - GPU_LAMBDA(const int p, const int k, const int i, const int j) { - const int jj = p*djp+j; - if (jj < nj) { - const int ia = j+djp*(i+dip*(k+dhq*p)); - const int ib = jj+nj*(i+dip*k); - ac[ia] = bc[ib]; - } - }); + mip, kHi - kLo, iHi - iLo, djp, + GPU_LAMBDA(const int p, const int k, const int i, const int j) { + const int jj = p * djp + j; + if (jj < nj) { + const int ia = j + djp * (i + dip * (k + dhq * p)); + const int ib = jj + nj * (i + dip * k); + ac[ia] = bc[ib]; + } + }); } // Redistribute for X pencils - const int countI = 2*dip*djp*dhq; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + const int countI = 2 * dip * djp * dhq; + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_); -#endif + MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); + #endif // Make X pencils contiguous in X { - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi-jLo,kHi-kLo,mi,mp,dip, - GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) { - const int rdi = r*di; - const int ii = rdi+p*dip+i; - if ((ii < ni) && (ii < rdi+di)) { - const int ia = ii+ni*(k+dhq*j); - const int ib = j+djp*(i+dip*(k+dhq*(p+mp*r))); - ac[ia] = bc[ib]; - } - }); + jHi - jLo, kHi - kLo, mi, mp, dip, + GPU_LAMBDA(const int j, const int k, const int r, const int p, + const int i) { + const int rdi = r * di; + const int ii = rdi + p * dip + i; + if ((ii < ni) && (ii < rdi + di)) { + const int ia = ii + ni * (k + dhq * j); + const int ib = j + djp * (i + dip * (k + dhq * (p + mp * r))); + ac[ia] = bc[ib]; + } + }); } // Forward FFT in X - CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_FORWARD)); + CHECK(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_FORWARD)); // Apply filter in frequency space distributed in X pencils - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi-jLo,kHi-kLo,ni, - GPU_LAMBDA(const int j0, const int k0, const int i) { - const int j = jLo+j0; - const int k = kLo+k0; - const int iab = i+ni*(k0+dhq*j0); - ac[iab] = f(i,j,k,bc[iab]); - }); + jHi - jLo, kHi - kLo, ni, + GPU_LAMBDA(const int j0, const int k0, const int i) { + const int j = jLo + j0; + const int k = kLo + k0; + const int iab = i + ni * (k0 + dhq * j0); + ac[iab] = f(i, j, k, bc[iab]); + }); // Backward FFT in X - CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_INVERSE)); + CHECK(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_INVERSE)); // Rearrange for Y redistribution { - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mi,mp,jHi-jLo,kHi-kLo,dip, - GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) { - const int rdi = r*di; - const int ii = rdi+p*dip+i; - if ((ii < ni) && (ii < rdi+di)) { - const int ia = i+dip*(k+dhq*(j+djp*(p+mp*r))); - const int ib = ii+ni*(k+dhq*j); - ac[ia] = bc[ib]; - } - }); + mi, mp, jHi - jLo, kHi - kLo, dip, + GPU_LAMBDA(const int r, const int p, const int j, const int k, + const int i) { + const int rdi = r * di; + const int ii = rdi + p * dip + i; + if ((ii < ni) && (ii < rdi + di)) { + const int ia = i + dip * (k + dhq * (j + djp * (p + mp * r))); + const int ib = ii + ni * (k + dhq * j); + ac[ia] = bc[ib]; + } + }); } // Redistribute for Y pencils -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_); -#endif + MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); + #endif // Make Y pencils contiguous in Y { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi-kLo,iHi-iLo,mip,djp, - GPU_LAMBDA(const int k, const int i, const int p, const int j) { - const int jj = p*djp+j; - if (jj < nj) { - const int ia = jj+nj*(i+dip*k); - const int ib = i+dip*(k+dhq*(j+djp*p)); - ac[ia] = bc[ib]; - } - }); + kHi - kLo, iHi - iLo, mip, djp, + GPU_LAMBDA(const int k, const int i, const int p, const int j) { + const int jj = p * djp + j; + if (jj < nj) { + const int ia = jj + nj * (i + dip * k); + const int ib = i + dip * (k + dhq * (j + djp * p)); + ac[ia] = bc[ib]; + } + }); } // Backward FFT in Y - CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_INVERSE)); + CHECK(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_INVERSE)); // Rearrange for Z redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mj,mq,kHi-kLo,iHi-iLo,djq, - GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) { - const int rdj = r*dj; - const int jj = rdj+q*djq+j; - if ((jj < nj) && (jj < rdj+dj)) { - const int ia = j+djq*(i+dip*(k+dhq*(q+mq*r))); - const int ib = jj+nj*(i+dip*k); - ac[ia] = bc[ib]; - } - }); + mj, mq, kHi - kLo, iHi - iLo, djq, + GPU_LAMBDA(const int r, const int q, const int k, const int i, + const int j) { + const int rdj = r * dj; + const int jj = rdj + q * djq + j; + if ((jj < nj) && (jj < rdj + dj)) { + const int ia = j + djq * (i + dip * (k + dhq * (q + mq * r))); + const int ib = jj + nj * (i + dip * k); + ac[ia] = bc[ib]; + } + }); } // Redistribute in Z pencils -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_); -#endif + MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); + #endif // Make Z pencils contiguous in Z { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi-iLo,jHi-jLo,mjq,dhq, - GPU_LAMBDA(const int i, const int j, const int q, const int k) { - const int kk = q*dhq+k; - if (kk < nh) { - const int ia = kk+nh*(j+djq*i); - const int ib = j+djq*(i+dip*(k+dhq*q)); - ac[ia] = bc[ib]; - } - }); + iHi - iLo, jHi - jLo, mjq, dhq, + GPU_LAMBDA(const int i, const int j, const int q, const int k) { + const int kk = q * dhq + k; + if (kk < nh) { + const int ia = kk + nh * (j + djq * i); + const int ib = j + djq * (i + dip * (k + dhq * q)); + ac[ia] = bc[ib]; + } + }); } // Complex-to-real FFT in Z - CHECK(cufftExecZ2D(c2rk_,ac,b)); + CHECK(cufftExecZ2D(c2rk_, ac, b)); // Rearrange for 3D-block redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mk,iHi-iLo,jHi-jLo,dk, - GPU_LAMBDA(const int pq, const int i, const int j, const int k) { - const int kk = pq*dk+k; - if (kk < nk) { - const int ia = k+dk*(j+djq*(i+dip*pq)); - const int ib = kk+nk*(j+djq*i); - a[ia] = b[ib]; - } - }); + mk, iHi - iLo, jHi - jLo, dk, + GPU_LAMBDA(const int pq, const int i, const int j, const int k) { + const int kk = pq * dk + k; + if (kk < nk) { + const int ia = k + dk * (j + djq * (i + dip * pq)); + const int ib = kk + nk * (j + djq * i); + a[ia] = b[ib]; + } + }); } // Redistribute for 3D blocks -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); + CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_); -#endif + MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); + #endif // Rearrange into 3D blocks and apply FFT normalization { - const double divN = 1.0/(double(ni)*double(nj)*double(nk)); - const int kLo = idk*dk; - const int kHi = std::min(kLo+dk,nk); + const double divN = 1.0 / (double(ni) * double(nj) * double(nk)); + const int kLo = idk * dk; + const int kHi = std::min(kLo + dk, nk); gpuFor( - mp,dip,mq,djq,kHi-kLo, - GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { - const int ii = p*dip+i; - const int jj = q*djq+j; - if ((ii < di) && (jj < dj)) { - const int ia = k+dk*(jj+dj*ii); - const int ib = k+dk*(j+djq*(i+dip*(q+mq*p))); - a[ia] = divN*b[ib]; - } - }); + mp, dip, mq, djq, kHi - kLo, + GPU_LAMBDA(const int p, const int i, const int q, const int j, + const int k) { + const int ii = p * dip + i; + const int jj = q * djq + j; + if ((ii < di) && (jj < dj)) { + const int ia = k + dk * (jj + dj * ii); + const int ib = k + dk * (j + djq * (i + dip * (q + mq * p))); + a[ia] = divN * b[ib]; + } + }); } } #endif - diff --git a/src/gravity/paris/ParisPeriodic.cu b/src/gravity/paris/ParisPeriodic.cu index 671b42aef..b7e081f78 100644 --- a/src/gravity/paris/ParisPeriodic.cu +++ b/src/gravity/paris/ParisPeriodic.cu @@ -1,77 +1,83 @@ #ifdef PARIS -#include "ParisPeriodic.hpp" + #include -#include + #include "ParisPeriodic.hpp" -__host__ __device__ static inline double sqr(const double x) { return x*x; } +__host__ __device__ static inline double sqr(const double x) { return x * x; } -ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): - ni_(n[0]), - nj_(n[1]), -#ifdef PARIS_3PT - nk_(n[2]), - ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])), - ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])), - ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])), -#elif defined PARIS_5PT - nk_(n[2]), - ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0), - ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0), - ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0), -#else - ddi_{2.0*M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))}, - ddj_{2.0*M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))}, - ddk_{2.0*M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))}, -#endif - henry(n,lo,hi,m,id) -{ } +ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], + const double hi[3], const int m[3], + const int id[3]) + : ni_(n[0]), + nj_(n[1]), + #ifdef PARIS_3PT + nk_(n[2]), + ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])), + ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])), + ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), + #elif defined PARIS_5PT + nk_(n[2]), + ddi_(sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + #else + ddi_{2.0 * M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, + ddj_{2.0 * M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, + ddk_{2.0 * M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))}, + #endif + henry(n, lo, hi, m, id) +{ +} -void ParisPeriodic::solve(const size_t bytes, double *const density, double *const potential) const +void ParisPeriodic::solve(const size_t bytes, double *const density, + double *const potential) const { // Local copies of members for lambda capture const int ni = ni_, nj = nj_; const double ddi = ddi_, ddj = ddj_, ddk = ddk_; // Poisson-solve constants that depend on divergence-operator approximation -#ifdef PARIS_3PT - const int nk = nk_; - const double si = M_PI/double(ni); - const double sj = M_PI/double(nj); - const double sk = M_PI/double(nk); -#elif defined PARIS_5PT - const int nk = nk_; - const double si = 2.0*M_PI/double(ni); - const double sj = 2.0*M_PI/double(nj); - const double sk = 2.0*M_PI/double(nk); -#endif + #ifdef PARIS_3PT + const int nk = nk_; + const double si = M_PI / double(ni); + const double sj = M_PI / double(nj); + const double sk = M_PI / double(nk); + #elif defined PARIS_5PT + const int nk = nk_; + const double si = 2.0 * M_PI / double(ni); + const double sj = 2.0 * M_PI / double(nj); + const double sk = 2.0 * M_PI / double(nk); + #endif // Provide FFT filter with a lambda that does Poisson solve in frequency space - henry.filter(bytes,density,potential, - [=] __device__ (const int i, const int j, const int k, const cufftDoubleComplex b) { - if (i || j || k) { -#ifdef PARIS_3PT - const double i2 = sqr(sin(double(min(i,ni-i))*si)*ddi); - const double j2 = sqr(sin(double(min(j,nj-j))*sj)*ddj); - const double k2 = sqr(sin(double(k)*sk)*ddk); -#elif defined PARIS_5PT - const double ci = cos(double(min(i,ni-i))*si); - const double cj = cos(double(min(j,nj-j))*sj); - const double ck = cos(double(k)*sk); - const double i2 = ddi*(2.0*ci*ci-16.0*ci+14.0); - const double j2 = ddj*(2.0*cj*cj-16.0*cj+14.0); - const double k2 = ddk*(2.0*ck*ck-16.0*ck+14.0); -#else - const double i2 = sqr(double(min(i,ni-i))*ddi); - const double j2 = sqr(double(min(j,nj-j))*ddj); - const double k2 = sqr(double(k)*ddk); -#endif - const double d = -1.0/(i2+j2+k2); - return cufftDoubleComplex{d*b.x,d*b.y}; - } else { - return cufftDoubleComplex{0.0,0.0}; - } - }); + henry.filter( + bytes, density, potential, + [=] __device__(const int i, const int j, const int k, + const cufftDoubleComplex b) { + if (i || j || k) { + #ifdef PARIS_3PT + const double i2 = sqr(sin(double(min(i, ni - i)) * si) * ddi); + const double j2 = sqr(sin(double(min(j, nj - j)) * sj) * ddj); + const double k2 = sqr(sin(double(k) * sk) * ddk); + #elif defined PARIS_5PT + const double ci = cos(double(min(i, ni - i)) * si); + const double cj = cos(double(min(j, nj - j)) * sj); + const double ck = cos(double(k) * sk); + const double i2 = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0); + const double j2 = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); + const double k2 = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0); + #else + const double i2 = sqr(double(min(i, ni - i)) * ddi); + const double j2 = sqr(double(min(j, nj - j)) * ddj); + const double k2 = sqr(double(k) * ddk); + #endif + const double d = -1.0 / (i2 + j2 + k2); + return cufftDoubleComplex{d * b.x, d * b.y}; + } else { + return cufftDoubleComplex{0.0, 0.0}; + } + }); } #endif diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index 92b07becd..11c34fe8c 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -5,44 +5,50 @@ /** * @brief Periodic Poisson solver using @ref Henry FFT filter. */ -class ParisPeriodic { - public: +class ParisPeriodic +{ + public: + /** + * @param[in] n[3] { Global number of cells in each dimension, without ghost + * cells. } + * @param[in] lo[3] { Physical location of the global lower bound of each + * dimension. } + * @param[in] hi[3] { Physical location of the global upper bound of each + * dimension, minus one grid cell. The one-cell difference is because of the + * periodic domain. See @ref Potential_Paris_3D::Initialize for an example + * computation of these arguments. } + * @param[in] m[3] { Number of MPI tasks in each dimension. } + * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } + */ + ParisPeriodic(const int n[3], const double lo[3], const double hi[3], + const int m[3], const int id[3]); - /** - * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. } - * @param[in] lo[3] { Physical location of the global lower bound of each dimension. } - * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell. - * The one-cell difference is because of the periodic domain. - * See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. } - * @param[in] m[3] { Number of MPI tasks in each dimension. } - * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } - */ - ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); + /** + * @return { Number of bytes needed for array arguments for @ref solve. } + */ + size_t bytes() const { return henry.bytes(); } - /** - * @return { Number of bytes needed for array arguments for @ref solve. } - */ - size_t bytes() const { return henry.bytes(); } + /** + * @detail { Solves the Poisson equation for the potential derived from the + * provided density. Assumes periodic boundary conditions. Assumes fields have + * no ghost cells. Uses a 3D FFT provided by the @ref Henry class. } + * @param[in] bytes { Number of bytes allocated for arguments @ref density and + * @ref potential. Used to ensure that the arrays have enough extra work + * space. } + * @param[in,out] density { Input density field. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger + * than the original field. } + * @param[out] potential { Output potential. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger + * than the actual output field. } + */ + void solve(size_t bytes, double *density, double *potential) const; - /** - * @detail { Solves the Poisson equation for the potential derived from the provided density. - * Assumes periodic boundary conditions. - * Assumes fields have no ghost cells. - * Uses a 3D FFT provided by the @ref Henry class. } - * @param[in] bytes { Number of bytes allocated for arguments @ref density and @ref potential. - * Used to ensure that the arrays have enough extra work space. } - * @param[in,out] density { Input density field. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the original field. } - * @param[out] potential { Output potential. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the actual output field. } - */ - void solve(size_t bytes, double *density, double *potential) const; - - private: - int ni_,nj_; //!< Number of elements in X and Y dimensions + private: + int ni_, nj_; //!< Number of elements in X and Y dimensions #if defined(PARIS_3PT) || defined(PARIS_5PT) - int nk_; //!< Number of elements in Z dimension + int nk_; //!< Number of elements in Z dimension #endif - double ddi_,ddj_,ddk_; //!< Frequency-independent terms in Poisson solve - HenryPeriodic henry; //!< FFT filter object + double ddi_, ddj_, ddk_; //!< Frequency-independent terms in Poisson solve + HenryPeriodic henry; //!< FFT filter object }; diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 29093e2a3..57d068e43 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -1,101 +1,113 @@ #ifdef PARIS_GALACTIC -#include "PoissonZero3DBlockedGPU.hpp" + #include + #include + #include + #include + #include -#include -#include -#include -#include -#include + #include "PoissonZero3DBlockedGPU.hpp" -static constexpr double sqrt2 = 0.4142135623730950488016887242096980785696718753769480731766797379; +static constexpr double sqrt2 = + 0.4142135623730950488016887242096980785696718753769480731766797379; -static inline __host__ __device__ double sqr(const double x) { return x*x; } +static inline __host__ __device__ double sqr(const double x) { return x * x; } -PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): -#ifdef PARIS_GALACTIC_3PT - ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])), - ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])), - ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])), -#elif defined PARIS_GALACTIC_5PT - ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0), - ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0), - ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0), -#else - ddi_{M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))}, - ddj_{M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))}, - ddk_{M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))}, -#endif - idi_(id[0]), - idj_(id[1]), - idk_(id[2]), - mi_(m[0]), - mj_(m[1]), - mk_(m[2]), - ni_(n[0]), - nj_(n[1]), - nk_(n[2]) +PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], + const double lo[3], + const double hi[3], + const int m[3], + const int id[3]) + : + #ifdef PARIS_GALACTIC_3PT + ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])), + ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])), + ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), + #elif defined PARIS_GALACTIC_5PT + ddi_(sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + #else + ddi_{M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, + ddj_{M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, + ddk_{M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))}, + #endif + idi_(id[0]), + idj_(id[1]), + idk_(id[2]), + mi_(m[0]), + mj_(m[1]), + mk_(m[2]), + ni_(n[0]), + nj_(n[1]), + nk_(n[2]) { mq_ = int(round(sqrt(mk_))); - while (mk_%mq_) mq_--; - mp_ = mk_/mq_; - assert(mp_*mq_ == mk_); + while (mk_ % mq_) mq_--; + mp_ = mk_ / mq_; + assert(mp_ * mq_ == mk_); - idp_ = idk_/mq_; - idq_ = idk_%mq_; + idp_ = idk_ / mq_; + idq_ = idk_ % mq_; { - const int color = idi_*mj_+idj_; - const int key = idk_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_); + const int color = idi_ * mj_ + idj_; + const int key = idk_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_); } { - const int color = idi_*mp_+idp_; - const int key = idj_*mq_+idq_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_); + const int color = idi_ * mp_ + idp_; + const int key = idj_ * mq_ + idq_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_); } { - const int color = idj_*mq_+idq_; - const int key = idi_*mp_+idp_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_); + const int color = idj_ * mq_ + idq_; + const int key = idi_ * mp_ + idp_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_); } - di_ = (ni_+mi_-1)/mi_; - dj_ = (nj_+mj_-1)/mj_; - dk_ = (nk_+mk_-1)/mk_; + di_ = (ni_ + mi_ - 1) / mi_; + dj_ = (nj_ + mj_ - 1) / mj_; + dk_ = (nk_ + mk_ - 1) / mk_; - dip_ = (di_+mp_-1)/mp_; - djq_ = (dj_+mq_-1)/mq_; - const int mjq = mj_*mq_; - dkq_ = (nk_+mjq-1)/mjq; - const int mip = mi_*mp_; - djp_ = (nj_+mip-1)/mip; + dip_ = (di_ + mp_ - 1) / mp_; + djq_ = (dj_ + mq_ - 1) / mq_; + const int mjq = mj_ * mq_; + dkq_ = (nk_ + mjq - 1) / mjq; + const int mip = mi_ * mp_; + djp_ = (nj_ + mip - 1) / mip; - ni2_ = 2*(ni_/2+1); - nj2_ = 2*(nj_/2+1); - nk2_ = 2*(nk_/2+1); + ni2_ = 2 * (ni_ / 2 + 1); + nj2_ = 2 * (nj_ / 2 + 1); + nk2_ = 2 * (nk_ / 2 + 1); - const long nMax = std::max({di_*dj_*dk_,dip_*djq_*mk_*dk_,dip_*mp_*djq_*mq_*dk_,dip_*djq_*nk2_,dip_*djq_*mjq*dkq_,dip_*dkq_*nj2_,dip_*dkq_*mip*djp_,dkq_*djp_*mip*dip_,dkq_*djp_*ni2_}); - bytes_ = nMax*sizeof(double); + const long nMax = std::max( + {di_ * dj_ * dk_, dip_ * djq_ * mk_ * dk_, dip_ * mp_ * djq_ * mq_ * dk_, + dip_ * djq_ * nk2_, dip_ * djq_ * mjq * dkq_, dip_ * dkq_ * nj2_, + dip_ * dkq_ * mip * djp_, dkq_ * djp_ * mip * dip_, dkq_ * djp_ * ni2_}); + bytes_ = nMax * sizeof(double); - int nkh = nk_/2+1; - CHECK(cufftPlanMany(&d2zk_,1,&nk_,&nk_,1,nk_,&nkh,1,nkh,CUFFT_D2Z,dip_*djq_)); - int njh = nj_/2+1; - CHECK(cufftPlanMany(&d2zj_,1,&nj_,&nj_,1,nj_,&njh,1,njh,CUFFT_D2Z,dip_*dkq_)); - int nih = ni_/2+1; - CHECK(cufftPlanMany(&d2zi_,1,&ni_,&ni_,1,ni_,&nih,1,nih,CUFFT_D2Z,dkq_*djp_)); -#ifndef MPI_GPU - CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault)); + int nkh = nk_ / 2 + 1; + CHECK(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, + dip_ * djq_)); + int njh = nj_ / 2 + 1; + CHECK(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, + dip_ * dkq_)); + int nih = ni_ / 2 + 1; + CHECK(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, + dkq_ * djp_)); + #ifndef MPI_GPU + CHECK(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); - hb_ = ha_+nMax; -#endif + hb_ = ha_ + nMax; + #endif } PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() { -#ifndef MPI_GPU + #ifndef MPI_GPU CHECK(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; -#endif + #endif CHECK(cufftDestroy(d2zi_)); CHECK(cufftDestroy(d2zj_)); CHECK(cufftDestroy(d2zk_)); @@ -104,12 +116,13 @@ PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() MPI_Comm_free(&commK_); } -void print(const char *const title, const int ni, const int nj, const int nk, const double *const v) +void print(const char *const title, const int ni, const int nj, const int nk, + const double *const v) { - printf("%s:\n",title); + printf("%s:\n", title); for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { - for (int k = 0; k < nk; k++) printf("%.6f ",v[(i*nj+j)*nk+k]); + for (int k = 0; k < nk; k++) printf("%.6f ", v[(i * nj + j) * nk + k]); printf(" "); } printf("\n"); @@ -117,408 +130,428 @@ void print(const char *const title, const int ni, const int nj, const int nk, co printf("\n"); } -void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, double *const potential) const +void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, + double *const potential) const { assert(bytes >= bytes_); assert(density); assert(potential); - double *const ua = potential; - double *const ub = density; - cufftDoubleComplex *const uc = reinterpret_cast(ub); + double *const ua = potential; + double *const ub = density; + cufftDoubleComplex *const uc = reinterpret_cast(ub); const double ddi = ddi_; const double ddj = ddj_; const double ddk = ddk_; - const int di = di_; - const int dip = dip_; - const int dj = dj_; - const int djp = djp_; - const int djq = djq_; - const int dk = dk_; - const int dkq = dkq_; - const int idi = idi_; - const int idj = idj_; - const int idp = idp_; - const int idq = idq_; - const int mp = mp_; - const int mq = mq_; - const int ni = ni_; - const int ni2 = ni2_; - const int nj = nj_; - const int nj2 = nj2_; - const int nk = nk_; - const int nk2 = nk2_; + const int di = di_; + const int dip = dip_; + const int dj = dj_; + const int djp = djp_; + const int djq = djq_; + const int dk = dk_; + const int dkq = dkq_; + const int idi = idi_; + const int idj = idj_; + const int idp = idp_; + const int idq = idq_; + const int mp = mp_; + const int mq = mq_; + const int ni = ni_; + const int ni2 = ni2_; + const int nj = nj_; + const int nj2 = nj2_; + const int nk = nk_; + const int nk2 = nk2_; gpuFor( - mp,mq,dip,djq,dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { - const int iLo = p*dip; - const int jLo = q*djq; - if ((i+iLo < di) && (j+jLo < dj)) ua[(((p*mq+q)*dip+i)*djq+j)*dk+k] = ub[((i+iLo)*dj+j+jLo)*dk+k]; - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + mp, mq, dip, djq, dk, + GPU_LAMBDA(const int p, const int q, const int i, const int j, + const int k) { + const int iLo = p * dip; + const int jLo = q * djq; + if ((i + iLo < di) && (j + jLo < dj)) + ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = + ub[((i + iLo) * dj + j + jLo) * dk + k]; + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, + commK_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_); -#endif + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, + commK_); + #endif gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - const int ij = (i*djq+j)*nk; - const int kk = k+k; - if (k == 0) { - ua[ij] = ub[(i*djq+j)*dk]; - } else if (kk == nk) { - const int pq = (nk-1)/dk; - const int kpq = (nk-1)%dk; - ua[ij+k] = -ub[((pq*dip+i)*djq+j)*dk+kpq]; - } else { - const int pqa = (kk-1)/dk; - const int kka = (kk-1)%dk; - ua[ij+(nk-k)] = -ub[((pqa*dip+i)*djq+j)*dk+kka]; - const int pqb = kk/dk; - const int kkb = kk%dk; - ua[ij+k] = ub[((pqb*dip+i)*djq+j)*dk+kkb]; - } - }); - CHECK(cufftExecD2Z(d2zk_,ua,uc)); + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + const int ij = (i * djq + j) * nk; + const int kk = k + k; + if (k == 0) { + ua[ij] = ub[(i * djq + j) * dk]; + } else if (kk == nk) { + const int pq = (nk - 1) / dk; + const int kpq = (nk - 1) % dk; + ua[ij + k] = -ub[((pq * dip + i) * djq + j) * dk + kpq]; + } else { + const int pqa = (kk - 1) / dk; + const int kka = (kk - 1) % dk; + ua[ij + (nk - k)] = -ub[((pqa * dip + i) * djq + j) * dk + kka]; + const int pqb = kk / dk; + const int kkb = kk % dk; + ua[ij + k] = ub[((pqb * dip + i) * djq + j) * dk + kkb]; + } + }); + CHECK(cufftExecD2Z(d2zk_, ua, uc)); gpuFor( - dip,nk/2+1,djq, - GPU_LAMBDA(const int i, const int k, const int j) { - if (k == 0) { - const int q0 = (nk-1)/dkq; - const int k0 = (nk-1)%dkq; - ua[((q0*dip+i)*dkq+k0)*djq+j] = 2.0*ub[(i*djq+j)*nk2]; - } else if (k+k == nk) { - const int qa = (nk/2-1)/dkq; - const int ka = (nk/2-1)%dkq; - ua[((qa*dip+i)*dkq+ka)*djq+j] = sqrt2*ub[(i*djq+j)*nk2+nk]; - } else { - const int qa = (nk-k-1)/dkq; - const int ka = (nk-k-1)%dkq; - const int qb = (k-1)/dkq; - const int kb = (k-1)%dkq; - const double ak = 2.0*ub[(i*djq+j)*nk2+2*k]; - const double bk = 2.0*ub[(i*djq+j)*nk2+2*k+1]; - double wa,wb; - sincospi(double(k)/double(nk+nk),&wb,&wa); - ua[((qa*dip+i)*dkq+ka)*djq+j] = wa*ak+wb*bk; - ua[((qb*dip+i)*dkq+kb)*djq+j] = wb*ak-wa*bk; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*dkq*djq,MPI_DOUBLE,hb_,dip*dkq*djq,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + dip, nk / 2 + 1, djq, GPU_LAMBDA(const int i, const int k, const int j) { + if (k == 0) { + const int q0 = (nk - 1) / dkq; + const int k0 = (nk - 1) % dkq; + ua[((q0 * dip + i) * dkq + k0) * djq + j] = + 2.0 * ub[(i * djq + j) * nk2]; + } else if (k + k == nk) { + const int qa = (nk / 2 - 1) / dkq; + const int ka = (nk / 2 - 1) % dkq; + ua[((qa * dip + i) * dkq + ka) * djq + j] = + sqrt2 * ub[(i * djq + j) * nk2 + nk]; + } else { + const int qa = (nk - k - 1) / dkq; + const int ka = (nk - k - 1) % dkq; + const int qb = (k - 1) / dkq; + const int kb = (k - 1) % dkq; + const double ak = 2.0 * ub[(i * djq + j) * nk2 + 2 * k]; + const double bk = 2.0 * ub[(i * djq + j) * nk2 + 2 * k + 1]; + double wa, wb; + sincospi(double(k) / double(nk + nk), &wb, &wa); + ua[((qa * dip + i) * dkq + ka) * djq + j] = wa * ak + wb * bk; + ua[((qb * dip + i) * dkq + kb) * djq + j] = wb * ak - wa * bk; + } + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, + MPI_DOUBLE, commJ_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*dkq*djq,MPI_DOUBLE,ub,dip*dkq*djq,MPI_DOUBLE,commJ_); -#endif + MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, + commJ_); + #endif gpuFor( - dip,dkq,nj/2+1, - GPU_LAMBDA(const int i, const int k, const int j) { - const int ik = (i*dkq+k)*nj; - if (j == 0) { - ua[ik] = ub[(i*dkq+k)*djq]; - } else if (j+j == nj) { - const int qa = (nj-1)/djq; - const int ja = (nj-1)%djq; - ua[ik+nj/2] = -ub[((qa*dip+i)*dkq+k)*djq+ja]; - } else { - const int qa = (j+j-1)/djq; - const int ja = (j+j-1)%djq; - ua[ik+nj-j] = -ub[((qa*dip+i)*dkq+k)*djq+ja]; - const int qb = (j+j)/djq; - const int jb = (j+j)%djq; - ua[ik+j] = ub[((qb*dip+i)*dkq+k)*djq+jb]; - } - }); - CHECK(cufftExecD2Z(d2zj_,ua,uc)); + dip, dkq, nj / 2 + 1, GPU_LAMBDA(const int i, const int k, const int j) { + const int ik = (i * dkq + k) * nj; + if (j == 0) { + ua[ik] = ub[(i * dkq + k) * djq]; + } else if (j + j == nj) { + const int qa = (nj - 1) / djq; + const int ja = (nj - 1) % djq; + ua[ik + nj / 2] = -ub[((qa * dip + i) * dkq + k) * djq + ja]; + } else { + const int qa = (j + j - 1) / djq; + const int ja = (j + j - 1) % djq; + ua[ik + nj - j] = -ub[((qa * dip + i) * dkq + k) * djq + ja]; + const int qb = (j + j) / djq; + const int jb = (j + j) % djq; + ua[ik + j] = ub[((qb * dip + i) * dkq + k) * djq + jb]; + } + }); + CHECK(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( - dkq,nj/2+1,dip, - GPU_LAMBDA(const int k, const int j, const int i) { - if (j == 0) { - const int pa = (nj-1)/djp; - const int ja = (nj-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = 2.0*ub[(i*dkq+k)*nj2]; - } else if (j+j == nj) { - const int pa = (nj/2-1)/djp; - const int ja = (nj/2-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = sqrt2*ub[(i*dkq+k)*nj2+nj]; - } else { - const double aj = 2.0*ub[(i*dkq+k)*nj2+2*j]; - const double bj = 2.0*ub[(i*dkq+k)*nj2+2*j+1]; - double wa,wb; - sincospi(double(j)/double(nj+nj),&wb,&wa); - const int pa = (nj-j-1)/djp; - const int ja = (nj-j-1)%djp; - const int pb = (j-1)/djp; - const int jb = (j-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = wa*aj+wb*bj; - ua[((pb*dkq+k)*djp+jb)*dip+i] = wb*aj-wa*bj; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + dkq, nj / 2 + 1, dip, GPU_LAMBDA(const int k, const int j, const int i) { + if (j == 0) { + const int pa = (nj - 1) / djp; + const int ja = (nj - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = + 2.0 * ub[(i * dkq + k) * nj2]; + } else if (j + j == nj) { + const int pa = (nj / 2 - 1) / djp; + const int ja = (nj / 2 - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = + sqrt2 * ub[(i * dkq + k) * nj2 + nj]; + } else { + const double aj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j]; + const double bj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j + 1]; + double wa, wb; + sincospi(double(j) / double(nj + nj), &wb, &wa); + const int pa = (nj - j - 1) / djp; + const int ja = (nj - j - 1) % djp; + const int pb = (j - 1) / djp; + const int jb = (j - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = wa * aj + wb * bj; + ua[((pb * dkq + k) * djp + jb) * dip + i] = wb * aj - wa * bj; + } + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, + MPI_DOUBLE, commI_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_); -#endif + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, + commI_); + #endif gpuFor( - dkq,djp,ni/2+1, - GPU_LAMBDA(const int k, const int j, const int i) { - const int kj = (k*djp+j)*ni; - if (i == 0) { - ua[kj] = ub[(k*djp+j)*dip]; - } else if (i+i == ni) { - const int ida = (ni-1)/di; - const int pa = (ni-1)%di/dip; - const int ia = ni-1-ida*di-pa*dip; - ua[kj+ni/2] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia]; - } else { - const int ida = (i+i-1)/di; - const int pa = (i+i-1)%di/dip; - const int ia = i+i-1-ida*di-pa*dip; - ua[kj+ni-i] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia]; - const int idb = (i+i)/di; - const int pb = (i+i)%di/dip; - const int ib = i+i-idb*di-pb*dip; - ua[kj+i] = ub[(((idb*mp+pb)*dkq+k)*djp+j)*dip+ib]; - } - }); - CHECK(cufftExecD2Z(d2zi_,ua,uc)); - { -#ifdef PARIS_GALACTIC_3PT - const double si = M_PI/double(ni+ni); - const double sj = M_PI/double(nj+nj); - const double sk = M_PI/double(nk+nk); - const double iin = sqr(sin(double(ni)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double si = M_PI/double(ni); - const double sj = M_PI/double(nj); - const double sk = M_PI/double(nk); - const double cin = cos(double(ni)*si); - const double iin = ddi*(2.0*cin*cin-16.0*cin+14.0); -#else - const double iin = sqr(double(ni)*ddi); -#endif - const int jLo = (idi*mp+idp)*djp; - const int kLo = (idj*mq+idq)*dkq; - gpuFor( - dkq,djp,ni/2+1, - GPU_LAMBDA(const int k, const int j, const int i) { - const int kj = (k*djp+j)*ni; - const int kj2 = (k*djp+j)*ni2; -#ifdef PARIS_GALACTIC_3PT - const double jjkk = sqr(sin(double(jLo+j+1)*sj)*ddj)+sqr(sin(double(kLo+k+1)*sk)*ddk); -#elif defined PARIS_GALACTIC_5PT - const double cj = cos(double(jLo+j+1)*sj); - const double jj = ddj*(2.0*cj*cj-16.0*cj+14.0); - const double ck = cos(double(kLo+k+1)*sk); - const double kk = ddk*(2.0*ck*ck-16.0*ck+14.0); - const double jjkk = jj+kk; -#else - const double jjkk = sqr(double(jLo+j+1)*ddj)+sqr(double(kLo+k+1)*ddk); -#endif + dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) { + const int kj = (k * djp + j) * ni; if (i == 0) { - ua[kj] = -2.0*ub[kj2]/(iin+jjkk); + ua[kj] = ub[(k * djp + j) * dip]; + } else if (i + i == ni) { + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[kj + ni / 2] = + -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; } else { -#ifdef PARIS_GALACTIC_3PT - const double ii = sqr(sin(double(i)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double ci = cos(double(i)*si); - const double ii = ddi*(2.0*ci*ci-16.0*ci+14.0); -#else - const double ii = sqr(double(i)*ddi); -#endif - if (i+i == ni) { - ua[kj+ni/2] = -2.0*ub[kj2+ni]/(ii+jjkk); - } else { - const double ai = 2.0*ub[kj2+2*i]; - const double bi = 2.0*ub[kj2+2*i+1]; - double wa,wb; - sincospi(double(i)/double(ni+ni),&wb,&wa); -#ifdef PARIS_GALACTIC_3PT - const double nii = sqr(sin(double(ni-i)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double cni = cos(double(ni-i)*si); - const double nii = ddi*(2.0*cni*cni-16.0*cni+14.0); -#else - const double nii = sqr(double(ni-i)*ddi); -#endif - const double aai = -(wa*ai+wb*bi)/(nii+jjkk); - const double bbi = (wa*bi-wb*ai)/(ii+jjkk); - const double apb = aai+bbi; - const double amb = aai-bbi; - ua[kj+i] = wa*amb+wb*apb; - ua[kj+ni-i] = wa*apb-wb*amb; - } + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; + ua[kj + ni - i] = + -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; + const int idb = (i + i) / di; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; + ua[kj + i] = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib]; } }); + CHECK(cufftExecD2Z(d2zi_, ua, uc)); + { + #ifdef PARIS_GALACTIC_3PT + const double si = M_PI / double(ni + ni); + const double sj = M_PI / double(nj + nj); + const double sk = M_PI / double(nk + nk); + const double iin = sqr(sin(double(ni) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double si = M_PI / double(ni); + const double sj = M_PI / double(nj); + const double sk = M_PI / double(nk); + const double cin = cos(double(ni) * si); + const double iin = ddi * (2.0 * cin * cin - 16.0 * cin + 14.0); + #else + const double iin = sqr(double(ni) * ddi); + #endif + const int jLo = (idi * mp + idp) * djp; + const int kLo = (idj * mq + idq) * dkq; + gpuFor( + dkq, djp, ni / 2 + 1, + GPU_LAMBDA(const int k, const int j, const int i) { + const int kj = (k * djp + j) * ni; + const int kj2 = (k * djp + j) * ni2; + #ifdef PARIS_GALACTIC_3PT + const double jjkk = sqr(sin(double(jLo + j + 1) * sj) * ddj) + + sqr(sin(double(kLo + k + 1) * sk) * ddk); + #elif defined PARIS_GALACTIC_5PT + const double cj = cos(double(jLo + j + 1) * sj); + const double jj = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); + const double ck = cos(double(kLo + k + 1) * sk); + const double kk = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0); + const double jjkk = jj + kk; + #else + const double jjkk = + sqr(double(jLo + j + 1) * ddj) + sqr(double(kLo + k + 1) * ddk); + #endif + if (i == 0) { + ua[kj] = -2.0 * ub[kj2] / (iin + jjkk); + } else { + #ifdef PARIS_GALACTIC_3PT + const double ii = sqr(sin(double(i) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double ci = cos(double(i) * si); + const double ii = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0); + #else + const double ii = sqr(double(i) * ddi); + #endif + if (i + i == ni) { + ua[kj + ni / 2] = -2.0 * ub[kj2 + ni] / (ii + jjkk); + } else { + const double ai = 2.0 * ub[kj2 + 2 * i]; + const double bi = 2.0 * ub[kj2 + 2 * i + 1]; + double wa, wb; + sincospi(double(i) / double(ni + ni), &wb, &wa); + #ifdef PARIS_GALACTIC_3PT + const double nii = sqr(sin(double(ni - i) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double cni = cos(double(ni - i) * si); + const double nii = ddi * (2.0 * cni * cni - 16.0 * cni + 14.0); + #else + const double nii = sqr(double(ni - i) * ddi); + #endif + const double aai = -(wa * ai + wb * bi) / (nii + jjkk); + const double bbi = (wa * bi - wb * ai) / (ii + jjkk); + const double apb = aai + bbi; + const double amb = aai - bbi; + ua[kj + i] = wa * amb + wb * apb; + ua[kj + ni - i] = wa * apb - wb * amb; + } + } + }); } - CHECK(cufftExecD2Z(d2zi_,ua,uc)); + CHECK(cufftExecD2Z(d2zi_, ua, uc)); gpuFor( - dkq,ni/2+1,djp, - GPU_LAMBDA(const int k, const int i, const int j) { - if (i == 0) { - ua[k*dip*djp+j] = ub[(k*djp+j)*ni2]; - } else if (i+i == ni) { - const int ida = (ni-1)/di; - const int pa = (ni-1)%di/dip; - const int ia = ni-1-ida*di-pa*dip; - ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = -ub[(k*djp+j)*ni2+ni]; - } else { - const double ai = ub[(k*djp+j)*ni2+i+i]; - const double bi = ub[(k*djp+j)*ni2+i+i+1]; - const int ida = (i+i-1)/di; - const int pa = (i+i-1)%di/dip; - const int ia = i+i-1-ida*di-pa*dip; - ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = bi-ai; - const int idb = (i+i)/di; - const int pb = (i+i)%di/dip; - const int ib = i+i-idb*di-pb*dip; - ua[(((idb*mp+pb)*dkq+k)*dip+ib)*djp+j] = ai+bi; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + dkq, ni / 2 + 1, djp, GPU_LAMBDA(const int k, const int i, const int j) { + if (i == 0) { + ua[k * dip * djp + j] = ub[(k * djp + j) * ni2]; + } else if (i + i == ni) { + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = + -ub[(k * djp + j) * ni2 + ni]; + } else { + const double ai = ub[(k * djp + j) * ni2 + i + i]; + const double bi = ub[(k * djp + j) * ni2 + i + i + 1]; + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; + ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = bi - ai; + const int idb = (i + i) / di; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; + ua[(((idb * mp + pb) * dkq + k) * dip + ib) * djp + j] = ai + bi; + } + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, + MPI_DOUBLE, commI_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_); -#endif + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, + commI_); + #endif gpuFor( - dkq,dip,nj/2+1, - GPU_LAMBDA(const int k, const int i, const int j) { - const long ki = (k*dip+i)*nj; - if (j == 0) { - const int pa = (nj-1)/djp; - const int ja = (nj-1)-pa*djp; - ua[ki] = ub[((pa*dkq+k)*dip+i)*djp+ja]; - } else if (j+j == nj) { - const int pa = (nj/2-1)/djp; - const int ja = nj/2-1-pa*djp; - ua[ki+nj/2] = sqrt2*ub[((pa*dkq+k)*dip+i)*djp+ja]; - } else { - const int pa = (nj-1-j)/djp; - const int ja = nj-1-j-pa*djp; - const double aj = ub[((pa*dkq+k)*dip+i)*djp+ja]; - const int pb = (j-1)/djp; - const int jb = j-1-pb*djp; - const double bj = ub[((pb*dkq+k)*dip+i)*djp+jb]; - const double apb = aj+bj; - const double amb = aj-bj; - double wa,wb; - sincospi(double(j)/double(nj+nj),&wb,&wa); - ua[ki+j] = wa*amb+wb*apb; - ua[ki+nj-j] = wa*apb-wb*amb; - } - }); - CHECK(cufftExecD2Z(d2zj_,ua,uc)); + dkq, dip, nj / 2 + 1, GPU_LAMBDA(const int k, const int i, const int j) { + const long ki = (k * dip + i) * nj; + if (j == 0) { + const int pa = (nj - 1) / djp; + const int ja = (nj - 1) - pa * djp; + ua[ki] = ub[((pa * dkq + k) * dip + i) * djp + ja]; + } else if (j + j == nj) { + const int pa = (nj / 2 - 1) / djp; + const int ja = nj / 2 - 1 - pa * djp; + ua[ki + nj / 2] = sqrt2 * ub[((pa * dkq + k) * dip + i) * djp + ja]; + } else { + const int pa = (nj - 1 - j) / djp; + const int ja = nj - 1 - j - pa * djp; + const double aj = ub[((pa * dkq + k) * dip + i) * djp + ja]; + const int pb = (j - 1) / djp; + const int jb = j - 1 - pb * djp; + const double bj = ub[((pb * dkq + k) * dip + i) * djp + jb]; + const double apb = aj + bj; + const double amb = aj - bj; + double wa, wb; + sincospi(double(j) / double(nj + nj), &wb, &wa); + ua[ki + j] = wa * amb + wb * apb; + ua[ki + nj - j] = wa * apb - wb * amb; + } + }); + CHECK(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( - dip,nj/2+1,dkq, - GPU_LAMBDA(const int i, const int j, const int k) { - if (j == 0) { - ua[i*djq*dkq+k] = ub[(k*dip+i)*nj2]; - } else if (j+j == nj) { - const int ida = (nj-1)/dj; - const int qa = (nj-1)%dj/djq; - const int ja = nj-1-ida*dj-qa*djq; - ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = -ub[(k*dip+i)*nj2+nj]; - } else { - const int jj = j+j; - const int ida = (jj-1)/dj; - const int qa = (jj-1)%dj/djq; - const int ja = jj-1-ida*dj-qa*djq; - const int idb = jj/dj; - const int qb = jj%dj/djq; - const int jb = jj-idb*dj-qb*djq; - const double aj = ub[(k*dip+i)*nj2+jj]; - const double bj = ub[(k*dip+i)*nj2+jj+1]; - ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = bj-aj; - ua[(((idb*mq+qb)*dip+i)*djq+jb)*dkq+k] = aj+bj; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dkq,MPI_DOUBLE,hb_,dip*djq*dkq,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + dip, nj / 2 + 1, dkq, GPU_LAMBDA(const int i, const int j, const int k) { + if (j == 0) { + ua[i * djq * dkq + k] = ub[(k * dip + i) * nj2]; + } else if (j + j == nj) { + const int ida = (nj - 1) / dj; + const int qa = (nj - 1) % dj / djq; + const int ja = nj - 1 - ida * dj - qa * djq; + ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = + -ub[(k * dip + i) * nj2 + nj]; + } else { + const int jj = j + j; + const int ida = (jj - 1) / dj; + const int qa = (jj - 1) % dj / djq; + const int ja = jj - 1 - ida * dj - qa * djq; + const int idb = jj / dj; + const int qb = jj % dj / djq; + const int jb = jj - idb * dj - qb * djq; + const double aj = ub[(k * dip + i) * nj2 + jj]; + const double bj = ub[(k * dip + i) * nj2 + jj + 1]; + ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = bj - aj; + ua[(((idb * mq + qb) * dip + i) * djq + jb) * dkq + k] = aj + bj; + } + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, + MPI_DOUBLE, commJ_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dkq,MPI_DOUBLE,ub,dip*djq*dkq,MPI_DOUBLE,commJ_); -#endif + MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, + commJ_); + #endif gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - const long ij = (i*djq+j)*nk; - if (k == 0) { - const int qa = (nk-1)/dkq; - const int ka = nk-1-qa*dkq; - ua[ij] = ub[((qa*dip+i)*djq+j)*dkq+ka]; - } else if (k+k == nk) { - const int qa = (nk/2-1)/dkq; - const int ka = nk/2-1-qa*dkq; - ua[ij+nk/2] = sqrt2*ub[((qa*dip+i)*djq+j)*dkq+ka]; - } else { - const int qa = (nk-1-k)/dkq; - const int ka = nk-1-k-qa*dkq; - const double ak = ub[((qa*dip+i)*djq+j)*dkq+ka]; - const int qb = (k-1)/dkq; - const int kb = k-1-qb*dkq; - const double bk = ub[((qb*dip+i)*djq+j)*dkq+kb]; - const double apb = ak+bk; - const double amb = ak-bk; - double wa,wb; - sincospi(double(k)/double(nk+nk),&wb,&wa); - ua[ij+k] = wa*amb+wb*apb; - ua[ij+nk-k] = wa*apb-wb*amb; - } - }); - CHECK(cufftExecD2Z(d2zk_,ua,uc)); - const double divN = 1.0/(8.0*double(ni)*double(nj)*double(nk)); + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + const long ij = (i * djq + j) * nk; + if (k == 0) { + const int qa = (nk - 1) / dkq; + const int ka = nk - 1 - qa * dkq; + ua[ij] = ub[((qa * dip + i) * djq + j) * dkq + ka]; + } else if (k + k == nk) { + const int qa = (nk / 2 - 1) / dkq; + const int ka = nk / 2 - 1 - qa * dkq; + ua[ij + nk / 2] = sqrt2 * ub[((qa * dip + i) * djq + j) * dkq + ka]; + } else { + const int qa = (nk - 1 - k) / dkq; + const int ka = nk - 1 - k - qa * dkq; + const double ak = ub[((qa * dip + i) * djq + j) * dkq + ka]; + const int qb = (k - 1) / dkq; + const int kb = k - 1 - qb * dkq; + const double bk = ub[((qb * dip + i) * djq + j) * dkq + kb]; + const double apb = ak + bk; + const double amb = ak - bk; + double wa, wb; + sincospi(double(k) / double(nk + nk), &wb, &wa); + ua[ij + k] = wa * amb + wb * apb; + ua[ij + nk - k] = wa * apb - wb * amb; + } + }); + CHECK(cufftExecD2Z(d2zk_, ua, uc)); + const double divN = 1.0 / (8.0 * double(ni) * double(nj) * double(nk)); gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - if (k == 0) { - ua[(i*djq+j)*dk] = divN*ub[(i*djq+j)*nk2]; - } else if (k+k == nk) { - const int pqa = (nk-1)/dk; - const int ka = nk-1-pqa*dk; - ua[((pqa*dip+i)*djq+j)*dk+ka] = -divN*ub[(i*djq+j)*nk2+nk]; - } else { - const int kk = k+k; - const double ak = ub[(i*djq+j)*nk2+kk]; - const double bk = ub[(i*djq+j)*nk2+kk+1]; - const int pqa = (kk-1)/dk; - const int ka = kk-1-pqa*dk; - ua[((pqa*dip+i)*djq+j)*dk+ka] = divN*(bk-ak); - const int pqb = kk/dk; - const int kb = kk-pqb*dk; - ua[((pqb*dip+i)*djq+j)*dk+kb] = divN*(ak+bk); - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + if (k == 0) { + ua[(i * djq + j) * dk] = divN * ub[(i * djq + j) * nk2]; + } else if (k + k == nk) { + const int pqa = (nk - 1) / dk; + const int ka = nk - 1 - pqa * dk; + ua[((pqa * dip + i) * djq + j) * dk + ka] = + -divN * ub[(i * djq + j) * nk2 + nk]; + } else { + const int kk = k + k; + const double ak = ub[(i * djq + j) * nk2 + kk]; + const double bk = ub[(i * djq + j) * nk2 + kk + 1]; + const int pqa = (kk - 1) / dk; + const int ka = kk - 1 - pqa * dk; + ua[((pqa * dip + i) * djq + j) * dk + ka] = divN * (bk - ak); + const int pqb = kk / dk; + const int kb = kk - pqb * dk; + ua[((pqb * dip + i) * djq + j) * dk + kb] = divN * (ak + bk); + } + }); + #ifndef MPI_GPU + CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, + commK_); + CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_); -#endif + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, + commK_); + #endif gpuFor( - mp,dip,mq,djq,dk, - GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { - const int iLo = p*dip; - const int jLo = q*djq; - if ((iLo+i < di) && (jLo+j < dj)) ua[((i+iLo)*dj+j+jLo)*dk+k] = ub[(((p*mq+q)*dip+i)*djq+j)*dk+k]; - }); + mp, dip, mq, djq, dk, + GPU_LAMBDA(const int p, const int i, const int q, const int j, + const int k) { + const int iLo = p * dip; + const int jLo = q * djq; + if ((iLo + i < di) && (jLo + j < dj)) + ua[((i + iLo) * dj + j + jLo) * dk + k] = + ub[(((p * mq + q) * dip + i) * djq + j) * dk + k]; + }); } #endif diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp index 8d868b54d..0ff37015f 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp @@ -1,28 +1,32 @@ #pragma once #include + #include "../../utils/gpu.hpp" -class PoissonZero3DBlockedGPU { - public: - PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); - ~PoissonZero3DBlockedGPU(); - long bytes() const { return bytes_; } - void solve(long bytes, double *density, double *potential) const; - private: - double ddi_,ddj_,ddk_; - int idi_,idj_,idk_; - int mi_,mj_,mk_; - int ni_,nj_,nk_; - int mp_,mq_; - int idp_,idq_; - MPI_Comm commI_,commJ_,commK_; - int di_,dj_,dk_; - int dip_,djp_,djq_,dkq_; - int ni2_,nj2_,nk2_; - long bytes_; - cufftHandle d2zi_,d2zj_,d2zk_; +class PoissonZero3DBlockedGPU +{ + public: + PoissonZero3DBlockedGPU(const int n[3], const double lo[3], + const double hi[3], const int m[3], const int id[3]); + ~PoissonZero3DBlockedGPU(); + long bytes() const { return bytes_; } + void solve(long bytes, double *density, double *potential) const; + + private: + double ddi_, ddj_, ddk_; + int idi_, idj_, idk_; + int mi_, mj_, mk_; + int ni_, nj_, nk_; + int mp_, mq_; + int idp_, idq_; + MPI_Comm commI_, commJ_, commK_; + int di_, dj_, dk_; + int dip_, djp_, djq_, dkq_; + int ni2_, nj2_, nk2_; + long bytes_; + cufftHandle d2zi_, d2zj_, d2zk_; #ifndef MPI_GPU - double *ha_, *hb_; + double *ha_, *hb_; #endif }; diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index a7a0b4d2f..c5cea83dd 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -1,20 +1,24 @@ #if defined(GRAVITY) && defined(SOR) -#include "../gravity/potential_SOR_3D.h" -#include "../io/io.h" -#include -#include -#include "../grid/grid3D.h" + #include "../gravity/potential_SOR_3D.h" -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" -Potential_SOR_3D::Potential_SOR_3D( void ){} + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif -void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real){ +Potential_SOR_3D::Potential_SOR_3D(void) {} +void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, + Real y_min, Real z_min, int nx, int ny, + int nz, int nx_real, int ny_real, int nz_real, + Real dx_real, Real dy_real, Real dz_real) +{ Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; @@ -33,13 +37,13 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y n_ghost = N_GHOST_POTENTIAL; - nx_pot = nx_local + 2*n_ghost; - ny_pot = ny_local + 2*n_ghost; - nz_pot = nz_local + 2*n_ghost; + nx_pot = nx_local + 2 * n_ghost; + ny_pot = ny_local + 2 * n_ghost; + nz_pot = nz_local + 2 * n_ghost; - n_cells_local = nx_local*ny_local*nz_local; - n_cells_potential = nx_pot*ny_pot*nz_pot; - n_cells_total = nx_total*ny_total*nz_total; + n_cells_local = nx_local * ny_local * nz_local; + n_cells_potential = nx_pot * ny_pot * nz_pot; + n_cells_total = nx_total * ny_total * nz_total; n_ghost_transfer = 1; @@ -51,114 +55,142 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y size_buffer_x = n_ghost_transfer * ny_local * nz_local; size_buffer_y = n_ghost_transfer * nx_local * nz_local; size_buffer_z = n_ghost_transfer * nx_local * ny_local; - if ( size_buffer_x%2 !=0 ) chprintf( " SOR Warning: Buffer X not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_x /= 2; - if ( size_buffer_y%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_y /= 2; - if ( size_buffer_z%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_z /= 2; + if (size_buffer_x % 2 != 0) + chprintf( + " SOR Warning: Buffer X not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_x /= 2; + if (size_buffer_y % 2 != 0) + chprintf( + " SOR Warning: Buffer Y not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_y /= 2; + if (size_buffer_z % 2 != 0) + chprintf( + " SOR Warning: Buffer Y not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_z /= 2; #endif - //Flag to transfer Poisson Boundaries when calling Set_Boundaries + // Flag to transfer Poisson Boundaries when calling Set_Boundaries TRANSFER_POISSON_BOUNDARIES = false; + chprintf(" Using Poisson Solver: SOR\n"); + chprintf(" SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, + Lbox_y, Lbox_z, nx_local, ny_local, nz_local, dx, dy, dz); - chprintf( " Using Poisson Solver: SOR\n"); - chprintf( " SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, dx, dy, dz ); - - chprintf( " SOR: Allocating memory...\n"); + chprintf(" SOR: Allocating memory...\n"); AllocateMemory_CPU(); AllocateMemory_GPU(); potential_initialized = false; - } - -void Potential_SOR_3D::AllocateMemory_CPU( void ){ - F.output_h = (Real *) malloc(n_cells_local*sizeof(Real)); - F.converged_h = (bool *) malloc(sizeof(bool)); - +void Potential_SOR_3D::AllocateMemory_CPU(void) +{ + F.output_h = (Real *)malloc(n_cells_local * sizeof(Real)); + F.converged_h = (bool *)malloc(sizeof(bool)); } - -void Potential_SOR_3D::AllocateMemory_GPU( void ){ - - Allocate_Array_GPU_Real( &F.input_d, n_cells_local ); - Allocate_Array_GPU_Real( &F.density_d, n_cells_local ); - Allocate_Array_GPU_Real( &F.potential_d, n_cells_potential ); - Allocate_Array_GPU_bool( &F.converged_d, 1 ); - Allocate_Array_GPU_Real( &F.boundaries_buffer_x0_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.boundaries_buffer_x1_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.boundaries_buffer_y0_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.boundaries_buffer_y1_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.boundaries_buffer_z0_d, size_buffer_z); - Allocate_Array_GPU_Real( &F.boundaries_buffer_z1_d, size_buffer_z); +void Potential_SOR_3D::AllocateMemory_GPU(void) +{ + Allocate_Array_GPU_Real(&F.input_d, n_cells_local); + Allocate_Array_GPU_Real(&F.density_d, n_cells_local); + Allocate_Array_GPU_Real(&F.potential_d, n_cells_potential); + Allocate_Array_GPU_bool(&F.converged_d, 1); + Allocate_Array_GPU_Real(&F.boundaries_buffer_x0_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.boundaries_buffer_x1_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.boundaries_buffer_y0_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.boundaries_buffer_y1_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.boundaries_buffer_z0_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.boundaries_buffer_z1_d, size_buffer_z); #ifdef MPI_CHOLLA - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x0_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x1_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y0_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y1_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z0_d, size_buffer_z); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z1_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x0_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x1_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y0_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y1_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z0_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z1_d, size_buffer_z); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real( &F.boundary_isolated_x0_d, n_ghost*ny_local*nz_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_x1_d, n_ghost*ny_local*nz_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_x0_d, + n_ghost * ny_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_x1_d, + n_ghost * ny_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real( &F.boundary_isolated_y0_d, n_ghost*nx_local*nz_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_y1_d, n_ghost*nx_local*nz_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_y0_d, + n_ghost * nx_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_y1_d, + n_ghost * nx_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Allocate_Array_GPU_Real( &F.boundary_isolated_z0_d, n_ghost*nx_local*ny_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_z1_d, n_ghost*nx_local*ny_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_z0_d, + n_ghost * nx_local * ny_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_z1_d, + n_ghost * nx_local * ny_local); #endif - } -void Potential_SOR_3D::Copy_Input_And_Initialize( Real *input_density, const Real *const input_potential, Real Grav_Constant, Real dens_avrg, Real current_a ){ - Copy_Input( n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a ); - - if ( !potential_initialized ){ - chprintf( "SOR: Initializing Potential \n"); - CHECK( cudaMemcpy( F.potential_d, input_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice ) ); - //Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, F.potential_d, F.density_d ); +void Potential_SOR_3D::Copy_Input_And_Initialize( + Real *input_density, const Real *const input_potential, Real Grav_Constant, + Real dens_avrg, Real current_a) +{ + Copy_Input(n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, + current_a); + + if (!potential_initialized) { + chprintf("SOR: Initializing Potential \n"); + CHECK(cudaMemcpy(F.potential_d, input_potential, + n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + // Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, + // F.potential_d, F.density_d ); potential_initialized = true; } } - -void Potential_SOR_3D::Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon ){ - if (n_step == 0 ) Poisson_iteration_Patial_1( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d ); - if (n_step == 1 ) Poisson_iteration_Patial_2( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d ); +void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, + Real epsilon) +{ + if (n_step == 0) + Poisson_iteration_Patial_1(n_cells_local, nx_local, ny_local, nz_local, + n_ghost, dx, dy, dz, omega, epsilon, F.density_d, + F.potential_d, F.converged_h, F.converged_d); + if (n_step == 1) + Poisson_iteration_Patial_2(n_cells_local, nx_local, ny_local, nz_local, + n_ghost, dx, dy, dz, omega, epsilon, F.density_d, + F.potential_d, F.converged_h, F.converged_d); } - -void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P ){ - +void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, + Real current_a, struct parameters *P) +{ #ifdef TIME_SOR Real time_start, time_end, time; time_start = get_time(); #endif - Grav.Poisson_solver.Copy_Input_And_Initialize( Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, current_a ); - - //Set Isolated Boundary Conditions - Grav.Copy_Isolated_Boundaries_To_GPU( P ); - Grav.Poisson_solver.Set_Isolated_Boundary_Conditions( Grav.boundary_flags, P ); + Grav.Poisson_solver.Copy_Input_And_Initialize( + Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, + current_a); + // Set Isolated Boundary Conditions + Grav.Copy_Isolated_Boundaries_To_GPU(P); + Grav.Poisson_solver.Set_Isolated_Boundary_Conditions(Grav.boundary_flags, P); Real epsilon = 1e-4; int max_iter = 10000000; - int n_iter = 0; + int n_iter = 0; Grav.Poisson_solver.F.converged_h[0] = 0; // For Diriclet Boundaries - Real omega = 2. / ( 1 + M_PI / Grav.Poisson_solver.nx_total ); + Real omega = 2. / (1 + M_PI / Grav.Poisson_solver.nx_total); // For Periodic Boundaries // Real omega = 2. / ( 1 + 2*M_PI / nx_total ); @@ -166,374 +198,446 @@ void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current bool set_boundaries; - //Number of iterations in between boundary transfers + // Number of iterations in between boundary transfers int n_iter_per_boundaries_transfer = 1; - // Iterate to solve Poisson equation - while ( Grav.Poisson_solver.F.converged_h[0] == 0 ) { - + while (Grav.Poisson_solver.F.converged_h[0] == 0) { set_boundaries = false; - if ( n_iter % n_iter_per_boundaries_transfer == 0 ) set_boundaries = true; + if (n_iter % n_iter_per_boundaries_transfer == 0) set_boundaries = true; // First Partial Iteration Grav.Poisson_solver.iteration_parity = 0; - if ( set_boundaries ){ + if (set_boundaries) { Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon ); - + Grav.Poisson_solver.Poisson_Partial_Iteration( + Grav.Poisson_solver.iteration_parity, omega, epsilon); // Second Partial Iteration Grav.Poisson_solver.iteration_parity = 1; - if ( set_boundaries ){ + if (set_boundaries) { Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon ); + Grav.Poisson_solver.Poisson_Partial_Iteration( + Grav.Poisson_solver.iteration_parity, omega, epsilon); - // Get convergence state - #ifdef MPI_CHOLLA - Grav.Poisson_solver.F.converged_h[0] = Grav.Poisson_solver.Get_Global_Converged( Grav.Poisson_solver.F.converged_h[0] ); - #endif + // Get convergence state + #ifdef MPI_CHOLLA + Grav.Poisson_solver.F.converged_h[0] = + Grav.Poisson_solver.Get_Global_Converged( + Grav.Poisson_solver.F.converged_h[0]); + #endif - //Only aloow to connverge after the boundaries have been transfere to avoid false convergence in the boundaries. - if ( set_boundaries == false ) Grav.Poisson_solver.F.converged_h[0] = 0; + // Only aloow to connverge after the boundaries have been transfere to avoid + // false convergence in the boundaries. + if (set_boundaries == false) Grav.Poisson_solver.F.converged_h[0] = 0; n_iter += 1; - if ( n_iter == max_iter ) break; + if (n_iter == max_iter) break; } - if ( n_iter == max_iter ) chprintf(" SOR: No convergence in %d iterations \n", n_iter); - else chprintf(" SOR: Converged in %d iterations \n", n_iter); + if (n_iter == max_iter) + chprintf(" SOR: No convergence in %d iterations \n", n_iter); + else + chprintf(" SOR: Converged in %d iterations \n", n_iter); - Grav.Poisson_solver.Copy_Output( Grav.F.potential_h ); + Grav.Poisson_solver.Copy_Output(Grav.F.potential_h); #ifdef TIME_SOR - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA MPI_Barrier(world); - #endif + #endif time_end = get_time(); - time = (time_end - time_start); - chprintf( " SOR: Time = %f seg\n", time ); + time = (time_end - time_start); + chprintf(" SOR: Time = %f seg\n", time); #endif - - } -void Grav3D::Copy_Isolated_Boundaries_To_GPU( struct parameters *P ){ - - if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return; +void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct parameters *P) +{ + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && + P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + return; // chprintf( " Copying Isolated Boundaries \n"); - if ( boundary_flags[0] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, Poisson_solver.n_ghost*ny_local*nz_local ); - if ( boundary_flags[1] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, Poisson_solver.n_ghost*ny_local*nz_local ); - if ( boundary_flags[2] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, Poisson_solver.n_ghost*nx_local*nz_local ); - if ( boundary_flags[3] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, Poisson_solver.n_ghost*nx_local*nz_local ); - if ( boundary_flags[4] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, Poisson_solver.n_ghost*nx_local*ny_local ); - if ( boundary_flags[5] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, Poisson_solver.n_ghost*nx_local*ny_local ); - - + if (boundary_flags[0] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, + Poisson_solver.n_ghost * ny_local * nz_local); + if (boundary_flags[1] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, + Poisson_solver.n_ghost * ny_local * nz_local); + if (boundary_flags[2] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, + Poisson_solver.n_ghost * nx_local * nz_local); + if (boundary_flags[3] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, + Poisson_solver.n_ghost * nx_local * nz_local); + if (boundary_flags[4] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, + Poisson_solver.n_ghost * nx_local * ny_local); + if (boundary_flags[5] == 3) + Copy_Isolated_Boundary_To_GPU_buffer( + F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, + Poisson_solver.n_ghost * nx_local * ny_local); } -void Potential_SOR_3D::Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P ){ - - - if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return; - - chprintf( " Setting Isolated Boundaries \n"); - if ( boundary_flags[0] == 3 ) Set_Isolated_Boundary_GPU( 0, 0, F.boundary_isolated_x0_d ); - if ( boundary_flags[1] == 3 ) Set_Isolated_Boundary_GPU( 0, 1, F.boundary_isolated_x1_d ); - if ( boundary_flags[2] == 3 ) Set_Isolated_Boundary_GPU( 1, 0, F.boundary_isolated_y0_d ); - if ( boundary_flags[3] == 3 ) Set_Isolated_Boundary_GPU( 1, 1, F.boundary_isolated_y1_d ); - if ( boundary_flags[4] == 3 ) Set_Isolated_Boundary_GPU( 2, 0, F.boundary_isolated_z0_d ); - if ( boundary_flags[5] == 3 ) Set_Isolated_Boundary_GPU( 2, 1, F.boundary_isolated_z1_d ); - +void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, + struct parameters *P) +{ + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && + P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + return; + + chprintf(" Setting Isolated Boundaries \n"); + if (boundary_flags[0] == 3) + Set_Isolated_Boundary_GPU(0, 0, F.boundary_isolated_x0_d); + if (boundary_flags[1] == 3) + Set_Isolated_Boundary_GPU(0, 1, F.boundary_isolated_x1_d); + if (boundary_flags[2] == 3) + Set_Isolated_Boundary_GPU(1, 0, F.boundary_isolated_y0_d); + if (boundary_flags[3] == 3) + Set_Isolated_Boundary_GPU(1, 1, F.boundary_isolated_y1_d); + if (boundary_flags[4] == 3) + Set_Isolated_Boundary_GPU(2, 0, F.boundary_isolated_z0_d); + if (boundary_flags[5] == 3) + Set_Isolated_Boundary_GPU(2, 1, F.boundary_isolated_z1_d); } - - - -void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic( int direction, int side ){ - +void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic(int direction, int side) +{ Real *boundaries_buffer; - if( direction == 0 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_x0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_x0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_x1_d; } - if( direction == 1 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_y0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_y0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_y1_d; } - if( direction == 2 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_z0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_z0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_z1_d; } int side_load, side_unload; - side_load = side; - side_unload = ( side_load + 1 ) % 2; - - Load_Transfer_Buffer_GPU( direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer ); - Unload_Transfer_Buffer_GPU( direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer ); - + side_load = side; + side_unload = (side_load + 1) % 2; + + Load_Transfer_Buffer_GPU(direction, side_load, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + boundaries_buffer); + Unload_Transfer_Buffer_GPU(direction, side_unload, nx_local, ny_local, + nz_local, n_ghost_transfer, n_ghost, F.potential_d, + boundaries_buffer); } - -void Potential_SOR_3D::FreeMemory_GPU( void ){ - - Free_Array_GPU_Real( F.input_d ); - Free_Array_GPU_Real( F.density_d ); - Free_Array_GPU_Real( F.potential_d ); - Free_Array_GPU_Real( F.boundaries_buffer_x0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_x1_d ); - Free_Array_GPU_Real( F.boundaries_buffer_y0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_y1_d ); - Free_Array_GPU_Real( F.boundaries_buffer_z0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_z1_d ); +void Potential_SOR_3D::FreeMemory_GPU(void) +{ + Free_Array_GPU_Real(F.input_d); + Free_Array_GPU_Real(F.density_d); + Free_Array_GPU_Real(F.potential_d); + Free_Array_GPU_Real(F.boundaries_buffer_x0_d); + Free_Array_GPU_Real(F.boundaries_buffer_x1_d); + Free_Array_GPU_Real(F.boundaries_buffer_y0_d); + Free_Array_GPU_Real(F.boundaries_buffer_y1_d); + Free_Array_GPU_Real(F.boundaries_buffer_z0_d); + Free_Array_GPU_Real(F.boundaries_buffer_z1_d); #ifdef MPI_CHOLLA - Free_Array_GPU_Real( F.recv_boundaries_buffer_x0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_x1_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_y0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_y1_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_z0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_z1_d ); + Free_Array_GPU_Real(F.recv_boundaries_buffer_x0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_x1_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_y0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_y1_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_z0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_z1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Free_Array_GPU_Real( F.boundary_isolated_x0_d ); - Free_Array_GPU_Real( F.boundary_isolated_x1_d ); + Free_Array_GPU_Real(F.boundary_isolated_x0_d); + Free_Array_GPU_Real(F.boundary_isolated_x1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - Free_Array_GPU_Real( F.boundary_isolated_y0_d ); - Free_Array_GPU_Real( F.boundary_isolated_y1_d ); + Free_Array_GPU_Real(F.boundary_isolated_y0_d); + Free_Array_GPU_Real(F.boundary_isolated_y1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Free_Array_GPU_Real( F.boundary_isolated_z0_d ); - Free_Array_GPU_Real( F.boundary_isolated_z1_d ); + Free_Array_GPU_Real(F.boundary_isolated_z0_d); + Free_Array_GPU_Real(F.boundary_isolated_z1_d); #endif - } - -void Potential_SOR_3D::Reset( void ){ - free( F.output_h ); +void Potential_SOR_3D::Reset(void) +{ + free(F.output_h); FreeMemory_GPU(); } + #ifdef MPI_CHOLLA - -#ifdef MPI_CHOLLA - -int Grid3D::Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer_host ){ - +int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, + Real *buffer_host) +{ int size_buffer; - if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x; - if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y; - if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z; + if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x; + if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y; + if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z; - - //Load the transfer buffer in the GPU - if ( direction == 0 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1(); + // Load the transfer buffer in the GPU + if (direction == 0) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1(); } - if ( direction == 1 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1(); + if (direction == 1) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1(); } - if ( direction == 2 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1(); + if (direction == 2) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1(); } // Copy the device_buffer to the host_buffer Real *buffer_dev; - if ( direction == 0 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d; } - if ( direction == 1 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d; } - if ( direction == 2 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host( size_buffer, buffer_host, buffer_dev ); - + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host(size_buffer, buffer_host, + buffer_dev); return size_buffer; } - -void Grid3D::Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host ){ - +void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, + Real *buffer_host) +{ int size_buffer; - if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x; - if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y; - if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z; - + if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x; + if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y; + if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z; // Copy the host_buffer to the device_buffer Real *buffer_dev; - if ( direction == 0 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; + if (side == 1) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; } - if ( direction == 1 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; + if (side == 1) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; } - if ( direction == 2 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; + if (side == 1) + buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device( size_buffer, buffer_host, buffer_dev ); + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device(size_buffer, buffer_host, + buffer_dev); - //Unload the transfer buffer in the GPU - if ( direction == 0 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1(); + // Unload the transfer buffer in the GPU + if (direction == 0) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1(); } - if ( direction == 1 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1(); + if (direction == 1) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1(); } - if ( direction == 2 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1(); + if (direction == 2) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1(); } - } - - -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d ); - #else - Load_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x0_d); + #else + Load_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_x0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d ); - #else - Load_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x1_d); + #else + Load_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_x1_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d ); - #else - Load_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y0_d); + #else + Load_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_y0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d ); - #else - Load_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y1_d); + #else + Load_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_y1_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d ); - #else - Load_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z0_d); + #else + Load_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_z0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d ); - #else - Load_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z1_d); + #else + Load_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, + n_ghost, F.potential_d, F.boundaries_buffer_z1_d); + #endif } - -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d ); - #else - Unload_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x0_d); + #else + Unload_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d ); - #else - Unload_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x1_d); + #else + Unload_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x1_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d ); - #else - Unload_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y0_d); + #else + Unload_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d ); - #else - Unload_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y1_d); + #else + Unload_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y1_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d ); - #else - Unload_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z0_d); + #else + Unload_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d ); - #else - Unload_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z1_d); + #else + Unload_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, + n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z1_d); + #endif } - - -bool Potential_SOR_3D::Get_Global_Converged( bool converged_local ){ - - int in = (int) converged_local; +bool Potential_SOR_3D::Get_Global_Converged(bool converged_local) +{ + int in = (int)converged_local; int out; bool y; - MPI_Allreduce( &in, &out, 1, MPI_INT, MPI_MIN, world); - y = (bool) out; + MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_MIN, world); + y = (bool)out; return y; - } -#endif - - - - + #endif -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/potential_SOR_3D.h b/src/gravity/potential_SOR_3D.h index d5064b35c..9c4a5f28c 100644 --- a/src/gravity/potential_SOR_3D.h +++ b/src/gravity/potential_SOR_3D.h @@ -1,17 +1,18 @@ #if defined(GRAVITY) && defined(SOR) -#ifndef POTENTIAL_SOR_3D_H -#define POTENTIAL_SOR_3D_H + #ifndef POTENTIAL_SOR_3D_H + #define POTENTIAL_SOR_3D_H -#include "../global/global.h" -#include + #include + + #include "../global/global.h" // #define TIME_SOR // #define HALF_SIZE_BOUNDARIES -class Potential_SOR_3D{ - public: - +class Potential_SOR_3D +{ + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -38,7 +39,6 @@ class Potential_SOR_3D{ grav_int_t n_cells_potential; grav_int_t n_cells_total; - int n_ghost_transfer; int size_buffer_x; int size_buffer_y; @@ -50,90 +50,114 @@ class Potential_SOR_3D{ bool potential_initialized; - struct Fields - { - - Real *output_h; - - Real *input_d; - // Real *output_d; - Real *density_d; - Real *potential_d; - - bool *converged_d; - - bool *converged_h; - - Real *boundaries_buffer_x0_d; - Real *boundaries_buffer_x1_d; - Real *boundaries_buffer_y0_d; - Real *boundaries_buffer_y1_d; - Real *boundaries_buffer_z0_d; - Real *boundaries_buffer_z1_d; - - - Real *boundary_isolated_x0_d; - Real *boundary_isolated_x1_d; - Real *boundary_isolated_y0_d; - Real *boundary_isolated_y1_d; - Real *boundary_isolated_z0_d; - Real *boundary_isolated_z1_d; - - #ifdef MPI_CHOLLA - Real *recv_boundaries_buffer_x0_d; - Real *recv_boundaries_buffer_x1_d; - Real *recv_boundaries_buffer_y0_d; - Real *recv_boundaries_buffer_y1_d; - Real *recv_boundaries_buffer_z0_d; - Real *recv_boundaries_buffer_z1_d; - #endif + struct Fields { + Real *output_h; + + Real *input_d; + // Real *output_d; + Real *density_d; + Real *potential_d; + + bool *converged_d; + + bool *converged_h; + + Real *boundaries_buffer_x0_d; + Real *boundaries_buffer_x1_d; + Real *boundaries_buffer_y0_d; + Real *boundaries_buffer_y1_d; + Real *boundaries_buffer_z0_d; + Real *boundaries_buffer_z1_d; + + Real *boundary_isolated_x0_d; + Real *boundary_isolated_x1_d; + Real *boundary_isolated_y0_d; + Real *boundary_isolated_y1_d; + Real *boundary_isolated_z0_d; + Real *boundary_isolated_z1_d; + + #ifdef MPI_CHOLLA + Real *recv_boundaries_buffer_x0_d; + Real *recv_boundaries_buffer_x1_d; + Real *recv_boundaries_buffer_y0_d; + Real *recv_boundaries_buffer_y1_d; + Real *recv_boundaries_buffer_z0_d; + Real *recv_boundaries_buffer_z1_d; + #endif } F; - Potential_SOR_3D( void ); - - void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx, Real dy, Real dz ); - - void AllocateMemory_CPU( void ); - void AllocateMemory_GPU( void ); - void FreeMemory_GPU( void ); - void Reset( void ); - void Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a ); - - void Copy_Output( Real *output_potential ); - void Copy_Potential_From_Host( Real *output_potential ); - - - void Set_Boundaries( ); - // Real Get_Potential( Real *input_density, Real *output_potential, Real Grav_Constant, Real dens_avrg, Real current_a ); - // void Copy_Potential_From_Host( Real *potential_host ); - - void Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size ); - void Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size ); - void Free_Array_GPU_Real( Real * array_dev ); - void Free_Array_GPU_bool( bool * array_dev ); - - - - void Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d ); - void Copy_Input_And_Initialize( Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg, Real current_a ); - - void Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon ); - - - void Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); - void Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); + Potential_SOR_3D(void); + + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, + int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, + Real dx, Real dy, Real dz); + + void AllocateMemory_CPU(void); + void AllocateMemory_GPU(void); + void FreeMemory_GPU(void); + void Reset(void); + void Copy_Input(int n_cells, Real *input_d, Real *input_density_h, + Real Grav_Constant, Real dens_avrg, Real current_a); + + void Copy_Output(Real *output_potential); + void Copy_Potential_From_Host(Real *output_potential); + + void Set_Boundaries(); + // Real Get_Potential( Real *input_density, Real *output_potential, Real + // Grav_Constant, Real dens_avrg, Real current_a ); void + // Copy_Potential_From_Host( Real *potential_host ); + + void Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size); + void Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size); + void Free_Array_GPU_Real(Real *array_dev); + void Free_Array_GPU_bool(bool *array_dev); + + void Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, + Real *potential_d, Real *density_d); + void Copy_Input_And_Initialize(Real *input_density, + const Real *input_potential, + Real Grav_Constant, Real dens_avrg, + Real current_a); + + void Poisson_iteration(int n_cells, int nx, int ny, int nz, + int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, + bool *converged_d); + void Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, + int n_ghost_potential, Real dx, Real dy, + Real dz, Real omega, Real epsilon, + Real *density_d, Real *potential_d, + bool *converged_h, bool *converged_d); + void Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, + int n_ghost_potential, Real dx, Real dy, + Real dz, Real omega, Real epsilon, + Real *density_d, Real *potential_d, + bool *converged_h, bool *converged_d); + void Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon); + + void Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, + int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d); + void Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, + int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d); void Load_Transfer_Buffer_GPU_x0(); void Load_Transfer_Buffer_GPU_x1(); void Load_Transfer_Buffer_GPU_y0(); void Load_Transfer_Buffer_GPU_y1(); void Load_Transfer_Buffer_GPU_z0(); void Load_Transfer_Buffer_GPU_z1(); - void Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); - void Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); + void Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, + int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d); + void Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, + int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d); void Unload_Transfer_Buffer_GPU_x0(); void Unload_Transfer_Buffer_GPU_x1(); void Unload_Transfer_Buffer_GPU_y0(); @@ -141,27 +165,26 @@ class Potential_SOR_3D{ void Unload_Transfer_Buffer_GPU_z0(); void Unload_Transfer_Buffer_GPU_z1(); - void Copy_Poisson_Boundary_Periodic( int direction, int side ); + void Copy_Poisson_Boundary_Periodic(int direction, int side); - void Copy_Poisson_Boundary_Open( int direction, int side ); + void Copy_Poisson_Boundary_Open(int direction, int side); // void Load_Transfer_Buffer_GPU_All(); // void Unload_Transfer_Buffer_GPU_All(); - void Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d ); - void Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d ); - - void Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P ); - void Set_Isolated_Boundary_GPU( int direction, int side, Real *boundary_d ); + void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, + Real *transfer_buffer_d); + void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, + Real *transfer_buffer_d); + void Set_Isolated_Boundary_Conditions(int *boundary_flags, + struct parameters *P); + void Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d); - #ifdef MPI_CHOLLA - bool Get_Global_Converged( bool converged_local ); - #endif + #ifdef MPI_CHOLLA + bool Get_Global_Converged(bool converged_local); + #endif }; - - - -#endif //POTENTIAL_SOR_H -#endif //GRAVITY + #endif // POTENTIAL_SOR_H +#endif // GRAVITY diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index e9b921e98..93883af5f 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -1,74 +1,92 @@ #if defined(CUDA) && defined(GRAVITY) && defined(SOR) -#include "../gravity/potential_SOR_3D.h" -#include "../global/global_cuda.h" -#include "../io/io.h" + #include "../global/global_cuda.h" + #include "../gravity/potential_SOR_3D.h" + #include "../io/io.h" + #define TPB_SOR 1024 -#define TPB_SOR 1024 - - -void Potential_SOR_3D::Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size ){ - cudaMalloc( (void**)array_dev, size*sizeof(Real)); +void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, + grav_int_t size) +{ + cudaMalloc((void **)array_dev, size * sizeof(Real)); CudaCheckError(); } -void Potential_SOR_3D::Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size ){ - cudaMalloc( (void**)array_dev, size*sizeof(bool)); +void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, + grav_int_t size) +{ + cudaMalloc((void **)array_dev, size * sizeof(bool)); CudaCheckError(); } -void Potential_SOR_3D::Free_Array_GPU_Real( Real *array_dev ){ - cudaFree( array_dev ); +void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) +{ + cudaFree(array_dev); CudaCheckError(); } -void Potential_SOR_3D::Free_Array_GPU_bool( bool *array_dev ){ - cudaFree( array_dev ); +void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) +{ + cudaFree(array_dev); CudaCheckError(); } -__global__ void Copy_Input_Kernel( int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, Real current_a ){ - +__global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, + Real Grav_Constant, Real dens_avrg, + Real current_a) +{ int tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_cells ) return; + if (tid >= n_cells) return; #ifdef COSMOLOGY - density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg ) / current_a; + density_d[tid] = + 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg) / current_a; #else - density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg ); + density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg); #endif // if (tid == 0) printf("dens: %f\n", density_d[tid]); } - -void Potential_SOR_3D::Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a ){ - cudaMemcpy( input_d, input_density_h, n_cells*sizeof(Real), cudaMemcpyHostToDevice ); +void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, + Real *input_density_h, Real Grav_Constant, + Real dens_avrg, Real current_a) +{ + cudaMemcpy(input_d, input_density_h, n_cells * sizeof(Real), + cudaMemcpyHostToDevice); // set values for GPU kernels - int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR; + int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - // Copy_Input_Kernel<<>>( n_cells_local, F.input_d, F.density_d, Grav_Constant, dens_avrg, current_a ); - hipLaunchKernelGGL( Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d, Grav_Constant, dens_avrg, current_a ); - + // Copy_Input_Kernel<<>>( n_cells_local, F.input_d, + // F.density_d, Grav_Constant, dens_avrg, current_a ); + hipLaunchKernelGGL(Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, + n_cells_local, F.input_d, F.density_d, Grav_Constant, + dens_avrg, current_a); } -void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size ){ - cudaMemcpy( isolated_boundary_d, isolated_boundary_h, boundary_size*sizeof(Real), cudaMemcpyHostToDevice ); +void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, + Real *isolated_boundary_d, + int boundary_size) +{ + cudaMemcpy(isolated_boundary_d, isolated_boundary_h, + boundary_size * sizeof(Real), cudaMemcpyHostToDevice); } -__global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz, int n_ghost ){ - +__global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, + Real *density_d, int nx, int ny, + int nz, int n_ghost) +{ int tid_x, tid_y, tid_z, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; // tid = tid_x + tid_y*nx + tid_z*nx*ny; @@ -77,88 +95,93 @@ __global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, R tid_z += n_ghost; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost; - ny_pot = ny + 2*n_ghost; + nx_pot = nx + 2 * n_ghost; + ny_pot = ny + 2 * n_ghost; - - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; potential_d[tid_pot] = init_val; - //if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n", potential_d[tid_pot] ); - + // if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n", + // potential_d[tid_pot] ); // Real dens = density_d[tid]; // potential_d[tid_pot] = -dens; - } - - -void Potential_SOR_3D::Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d ){ +void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, + int n_ghost_potential, + Real *potential_d, Real *density_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; // number of blocks per 1D grid dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - // Initialize_Potential_Kernel<<>>( 1, potential_d, density_d, nx, ny, nz, n_ghost_potential ); - hipLaunchKernelGGL( Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz, n_ghost_potential ); - + // Initialize_Potential_Kernel<<>>( 1, potential_d, + // density_d, nx, ny, nz, n_ghost_potential ); + hipLaunchKernelGGL(Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, + 1, potential_d, density_d, nx, ny, nz, n_ghost_potential); } - -__global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon, bool *converged_d ){ - +__global__ void Iteration_Step_SOR(int n_cells, Real *density_d, + Real *potential_d, int nx, int ny, int nz, + int n_ghost, Real dx, Real dy, Real dz, + Real omega, int parity, Real epsilon, + bool *converged_d) +{ int tid_x, tid_y, tid_z, tid, tid_pot; - tid_x = 2*( blockIdx.x * blockDim.x + threadIdx.x ); + tid_x = 2 * (blockIdx.x * blockDim.x + threadIdx.x); tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; // Make a checkboard 3D grid - if ( tid_y%2 == 0 ){ - if ( tid_z%2 == parity ) tid_x +=1; - } - else if ( (tid_z+1)%2 == parity ) tid_x +=1; + if (tid_y % 2 == 0) { + if (tid_z % 2 == parity) tid_x += 1; + } else if ((tid_z + 1) % 2 == parity) + tid_x += 1; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost; - ny_pot = ny + 2*n_ghost; + nx_pot = nx + 2 * n_ghost; + ny_pot = ny + 2 * n_ghost; // nz_pot = nz + 2*n_ghost; - tid = tid_x + tid_y*nx + tid_z*nx*ny; + tid = tid_x + tid_y * nx + tid_z * nx * ny; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; // //Set neighbors ids int indx_l, indx_r, indx_d, indx_u, indx_b, indx_t; - indx_l = tid_x-1; //Left - indx_r = tid_x+1; //Right - indx_d = tid_y-1; //Down - indx_u = tid_y+1; //Up - indx_b = tid_z-1; //Bottom - indx_t = tid_z+1; //Top + indx_l = tid_x - 1; // Left + indx_r = tid_x + 1; // Right + indx_d = tid_y - 1; // Down + indx_u = tid_y + 1; // Up + indx_b = tid_z - 1; // Bottom + indx_t = tid_z + 1; // Top - //Boundary Conditions are loaded to the potential array, the natural indices work! + // Boundary Conditions are loaded to the potential array, the natural indices + // work! // //Periodic Boundary conditions // indx_l = tid_x == n_ghost ? nx_pot-n_ghost-1 : tid_x-1; //Left - // indx_r = tid_x == nx_pot-n_ghost-1 ? n_ghost : tid_x+1; //Right + // indx_r = tid_x == nx_pot-n_ghost-1 ? n_ghost : tid_x+1; //Right // indx_d = tid_y == n_ghost ? ny_pot-n_ghost-1 : tid_y-1; //Down // indx_u = tid_y == ny_pot-n_ghost-1 ? n_ghost : tid_y+1; //Up - // indx_b = tid_z == n_ghost ? nz_pot-n_ghost-1 : tid_z-1; //Bottom - // indx_t = tid_z == nz_pot-n_ghost-1 ? n_ghost : tid_z+1; //Top + // indx_b = tid_z == n_ghost ? nz_pot-n_ghost-1 : tid_z-1; + // //Bottom indx_t = tid_z == nz_pot-n_ghost-1 ? n_ghost : + // tid_z+1; //Top // // //Zero Gradient Boundary conditions // indx_l = tid_x == n_ghost ? tid_x+1 : tid_x-1; //Left @@ -168,163 +191,201 @@ __global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potentia // indx_b = tid_z == n_ghost ? tid_z+1 : tid_z-1; //Bottom // indx_t = tid_z == nz_pot-n_ghost-1 ? tid_z-1 : tid_z+1; //Top - - Real rho, phi_c, phi_l, phi_r, phi_d, phi_u, phi_b, phi_t, phi_new; - rho = density_d[tid]; + rho = density_d[tid]; phi_c = potential_d[tid_pot]; - phi_l = potential_d[ indx_l + tid_y*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_r = potential_d[ indx_r + tid_y*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_d = potential_d[ tid_x + indx_d*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_u = potential_d[ tid_x + indx_u*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_b = potential_d[ tid_x + tid_y*nx_pot + indx_b*nx_pot*ny_pot ]; - phi_t = potential_d[ tid_x + tid_y*nx_pot + indx_t*nx_pot*ny_pot ]; - - phi_new = (1-omega)*phi_c + omega/6*( phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx*dx*rho ); + phi_l = potential_d[indx_l + tid_y * nx_pot + tid_z * nx_pot * ny_pot]; + phi_r = potential_d[indx_r + tid_y * nx_pot + tid_z * nx_pot * ny_pot]; + phi_d = potential_d[tid_x + indx_d * nx_pot + tid_z * nx_pot * ny_pot]; + phi_u = potential_d[tid_x + indx_u * nx_pot + tid_z * nx_pot * ny_pot]; + phi_b = potential_d[tid_x + tid_y * nx_pot + indx_b * nx_pot * ny_pot]; + phi_t = potential_d[tid_x + tid_y * nx_pot + indx_t * nx_pot * ny_pot]; + + phi_new = (1 - omega) * phi_c + + omega / 6 * + (phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx * dx * rho); potential_d[tid_pot] = phi_new; // potential_d[tid_pot] = parity + 1; - //Check the residual for the convergence criteria - if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) converged_d[0] = 0; - // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n", fabs( ( phi_new - phi_c ) / phi_c) ); - // if ( ( fabs( ( phi_new - phi_c ) ) > epsilon ) ) converged_d[0] = 0; - - - - + // Check the residual for the convergence criteria + if ((fabs((phi_new - phi_c) / phi_c) > epsilon)) converged_d[0] = 0; + // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n", + // fabs( ( phi_new - phi_c ) / phi_c) ); if ( ( fabs( ( phi_new - phi_c ) ) > + // epsilon ) ) converged_d[0] = 0; } -void Potential_SOR_3D::Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, + int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, + Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, + bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - cudaMemset( converged_d, 1, sizeof(bool) ); - - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); - - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - - cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost ); - + cudaMemset(converged_d, 1, sizeof(bool)); + + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, + n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + converged_d); + + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, + n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + converged_d); + + cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } - -void Potential_SOR_3D::Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration_Patial_1( + int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - cudaMemset( converged_d, 1, sizeof(bool) ); - - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); + cudaMemset(converged_d, 1, sizeof(bool)); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, + n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + converged_d); } - -void Potential_SOR_3D::Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration_Patial_2( + int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - - cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost ); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, + n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + converged_d); + cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } - -__global__ void Set_Isolated_Boundary_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int n_ghost, int nx_pot, int ny_pot, int nz_pot, Real *potential_d, Real *boundary_d ){ - +__global__ void Set_Isolated_Boundary_GPU_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int n_ghost, + int nx_pot, int ny_pot, int nz_pot, Real *potential_d, Real *boundary_d) +{ // get a global thread ID int nx_local, ny_local, nz_local; - nx_local = nx_pot - 2*n_ghost; - ny_local = ny_pot - 2*n_ghost; - nz_local = nz_pot - 2*n_ghost; + nx_local = nx_pot - 2 * n_ghost; + ny_local = ny_pot - 2 * n_ghost; + nz_local = nz_pot - 2 * n_ghost; int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = (tid_k) + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_k+nx_local+n_ghost) + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (tid_k) + (tid_i + n_ghost) * nx_pot + + (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_k + nx_local + n_ghost) + (tid_i + n_ghost) * nx_pot + + (tid_j + n_ghost) * nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_k)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_k+ny_local+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i + n_ghost) + (tid_k)*nx_pot + + (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (tid_k + ny_local + n_ghost) * nx_pot + + (tid_j + n_ghost) * nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k+nz_local+n_ghost)*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + + (tid_k)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + + (tid_k + nz_local + n_ghost) * nx_pot * ny_pot; } potential_d[tid_pot] = boundary_d[tid_buffer]; - } -void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side, Real *boundary_d ){ - +void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, + Real *boundary_d) +{ // #ifdef MPI_CHOLLA - // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, side ); - // #endif + // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, + // side ); #endif // int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx_local + 2*n_ghost; - ny_pot = ny_local + 2*n_ghost; - nz_pot = nz_local + 2*n_ghost; + nx_pot = nx_local + 2 * n_ghost; + ny_pot = ny_local + 2 * n_ghost; + nz_pot = nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_local; n_j = nz_local; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_local; n_j = nz_local; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_local; n_j = ny_local; } @@ -332,74 +393,93 @@ void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side, Rea size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - // Set_Isolated_Boundary_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d ); - hipLaunchKernelGGL( Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d ); - + // Set_Isolated_Boundary_GPU_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, + // F.potential_d, boundary_d ); + hipLaunchKernelGGL(Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, + 0, direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, + ny_pot, nz_pot, F.potential_d, boundary_d); } - - -void Potential_SOR_3D::Copy_Output( Real *output_potential ){ - cudaMemcpy( output_potential, F.potential_d, n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost ); +void Potential_SOR_3D::Copy_Output(Real *output_potential) +{ + cudaMemcpy(output_potential, F.potential_d, n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost); } -void Potential_SOR_3D::Copy_Potential_From_Host( Real *output_potential ){ - cudaMemcpy( F.potential_d, output_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice ); +void Potential_SOR_3D::Copy_Potential_From_Host(Real *output_potential) +{ + cudaMemcpy(F.potential_d, output_potential, n_cells_potential * sizeof(Real), + cudaMemcpyHostToDevice); } - - -__global__ void Load_Transfer_Buffer_GPU_kernel_SOR( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Load_Transfer_Buffer_GPU_kernel_SOR( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (tid_i) + + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + if (side == 1) + tid_pot = (tid_i) + (tid_j)*nx + + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } -__global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity ){ - +__global__ void Load_Transfer_Buffer_GPU_Half_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d, int parity) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; int nx_pot, ny_pot, nz_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; - + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; // // Make a checkboard 3D grid // tid_i = 2 * tid_i; @@ -408,78 +488,108 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, i // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx_pot + + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (nx_pot - n_ghost_potential - n_ghost_transfer + tid_k) + + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx_pot + + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = + (tid_i) + + (ny_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + + (tid_j)*nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential + tid_k )*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx_pot + + (n_ghost_potential + tid_k) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i) + (tid_j)*nx_pot + + (nz_pot - n_ghost_potential - n_ghost_transfer + tid_k) * + nx_pot * ny_pot; } - // printf( "Loading Buffer Half: val= %d pot= %f \n", parity+1, potential_d[tid_pot] ); + // printf( "Loading Buffer Half: val= %d pot= %f \n", parity+1, + // potential_d[tid_pot] ); transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } - - -__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + + (tid_j)*nx * ny; + if (side == 1) + tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + + (tid_j)*nx * ny; + if (side == 1) + tid_pot = + (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx + + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 1) + tid_pot = + (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - -__global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity ){ - +__global__ void Unload_Transfer_Buffer_GPU_Half_kernel( + int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d, int parity) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; int nx_pot, ny_pot, nz_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; // // Make a checkboard 3D grid // tid_i = 2 * tid_i; @@ -488,47 +598,60 @@ __global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side, // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost_transfer) + return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 0) { + if (side == 0) + tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (nx_pot - n_ghost_potential + tid_k) + (tid_i)*nx_pot + + (tid_j)*nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i) + (ny_pot - n_ghost_potential + tid_k) * nx_pot + + (tid_j)*nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential + tid_k )*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) + tid_pot = + (tid_i) + (tid_j)*nx_pot + + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i) + (tid_j)*nx_pot + + (nz_pot - n_ghost_potential + tid_k) * nx_pot * ny_pot; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - - -void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Load_Transfer_Buffer_GPU( + int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -536,33 +659,37 @@ void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - + // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, + // size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, + 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, + ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, + potential_d, transfer_buffer_d); } - -void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( + int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int size_buffer, n_i, n_j, ngrid; - nz_pot = nz + 2*n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny; n_j = nz; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx; n_j = nz; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx; n_j = ny; } @@ -571,34 +698,39 @@ void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, i size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Load_Transfer_Buffer_GPU_Half_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - + // Load_Transfer_Buffer_GPU_Half_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, + dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, + nx, ny, nz, n_ghost_transfer, n_ghost_potential, + potential_d, transfer_buffer_d, iteration_parity); } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( + int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -606,32 +738,36 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR,dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - + // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, + dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, + nx_pot, ny_pot, nz_pot, n_ghost_transfer, + n_ghost_potential, potential_d, transfer_buffer_d); } - -void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( + int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int size_buffer, n_i, n_j, ngrid; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny; n_j = nz; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx; n_j = nz; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx; n_j = ny; } @@ -640,31 +776,34 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side, size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Unload_Transfer_Buffer_GPU_Half_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity); - + // Unload_Transfer_Buffer_GPU_Half_kernel<<>>( + // direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, + dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, + nx, ny, nz, n_ghost_transfer, n_ghost_potential, + potential_d, transfer_buffer_d, iteration_parity); } -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){ - CudaSafeCall( cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer*sizeof(Real), cudaMemcpyDeviceToHost ) ); +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, + Real *transfer_buffer_h, + Real *transfer_buffer_d) +{ + CudaSafeCall(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, + size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); } - -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){ - CudaSafeCall( cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice ) ); +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, + Real *transfer_buffer_h, + Real *transfer_buffer_d) +{ + CudaSafeCall(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, + size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); } - -#endif //GRAVITY - - - - - - +#endif // GRAVITY diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 6c9ec503c..1306e1ea3 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -1,79 +1,88 @@ #if defined(GRAVITY) && defined(PARIS) -#include "../gravity/potential_paris_3D.h" -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include -#include -#include - -static void __attribute__((unused)) printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) + #include + #include + #include + + #include "../gravity/potential_paris_3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + +static void __attribute__((unused)) +printDiff(const Real *p, const Real *q, const int ng, const int nx, + const int ny, const int nz, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; -#pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2) + #pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2) for (int k = 0; k < nz; k++) { for (int j = 0; j < ny; j++) { for (int i = 0; i < nx; i++) { - const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng)); + const int ijk = + i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); const Real qAbs = fabs(q[ijk]); - qMax = std::max(qMax,qAbs); + qMax = std::max(qMax, qAbs); qSum += qAbs; - qSum2 += qAbs*qAbs; - const Real d = fabs(q[ijk]-p[ijk]); - dMax = std::max(dMax,d); + qSum2 += qAbs * qAbs; + const Real d = fabs(q[ijk] - p[ijk]); + dMax = std::max(dMax, d); dSum += d; - dSum2 += d*d; + dSum2 += d * d; } } } - Real maxs[2] = {qMax,dMax}; - Real sums[4] = {qSum,qSum2,dSum,dSum2}; - MPI_Allreduce(MPI_IN_PLACE,&maxs,2,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE,&sums,4,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); - chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n",sums[2]/sums[0],sqrt(sums[3]/sums[1]),maxs[1]/maxs[0]); + Real maxs[2] = {qMax, dMax}; + Real sums[4] = {qSum, qSum2, dSum, dSum2}; + MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], + sqrt(sums[3] / sums[1]), maxs[1] / maxs[0]); fflush(stdout); if (!plot) return; printf("###\n"); - const int k = nz/2; - //for (int j = 0; j < ny; j++) { - const int j = ny/2; - for (int i = 0; i < nx; i++) { - const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng)); - //printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); - printf("%d %g %g %g\n",i,q[ijk],p[ijk],q[ijk]-p[ijk]); - } - printf("\n"); + const int k = nz / 2; + // for (int j = 0; j < ny; j++) { + const int j = ny / 2; + for (int i = 0; i < nx; i++) { + const int ijk = + i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); + // printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); + printf("%d %g %g %g\n", i, q[ijk], p[ijk], q[ijk] - p[ijk]); + } + printf("\n"); //} MPI_Finalize(); exit(0); } -Potential_Paris_3D::Potential_Paris_3D(): - dn_{0,0,0}, - dr_{0,0,0}, - lo_{0,0,0}, - lr_{0,0,0}, - myLo_{0,0,0}, - pp_(nullptr), - minBytes_(0), - densityBytes_(0), - potentialBytes_(0), - da_(nullptr), - db_(nullptr) -{} +Potential_Paris_3D::Potential_Paris_3D() + : dn_{0, 0, 0}, + dr_{0, 0, 0}, + lo_{0, 0, 0}, + lr_{0, 0, 0}, + myLo_{0, 0, 0}, + pp_(nullptr), + minBytes_(0), + densityBytes_(0), + potentialBytes_(0), + da_(nullptr), + db_(nullptr) +{ +} Potential_Paris_3D::~Potential_Paris_3D() { Reset(); } -void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, const Real a) +void Potential_Paris_3D::Get_Potential(const Real *const density, + Real *const potential, const Real g, + const Real offset, const Real a) { -#ifdef COSMOLOGY - const Real scale = Real(4)*M_PI*g/a; -#else - const Real scale = Real(4)*M_PI*g; -#endif + #ifdef COSMOLOGY + const Real scale = Real(4) * M_PI * g / a; + #else + const Real scale = Real(4) * M_PI * g; + #endif assert(da_); Real *const da = da_; Real *const db = db_; @@ -83,45 +92,52 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const po const int nj = dn_[1]; const int nk = dn_[0]; - const int n = ni*nj*nk; + const int n = ni * nj * nk; #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyDeviceToDevice)); + CHECK(cudaMemcpy(db, density, densityBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(db, density, densityBytes_, cudaMemcpyHostToDevice)); #endif - const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; + const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; - gpuFor(n,GPU_LAMBDA(const int i) { db[i] = scale*(db[i]-offset); }); - pp_->solve(minBytes_,db,da); gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); - const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL)); - db[ib] = da[ia]; - }); + n, GPU_LAMBDA(const int i) { db[i] = scale * (db[i] - offset); }); + pp_->solve(minBytes_, db, da); + gpuFor( + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); + const int ib = + i + N_GHOST_POTENTIAL + + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + db[ib] = da[ia]; + }); assert(potential); #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToDevice)); + CHECK(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToHost)); #endif } -void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) +void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, + const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, + const int nz, const int nxReal, + const int nyReal, const int nzReal, + const Real dx, const Real dy, const Real dz) { chprintf(" Using Poisson Solver: Paris Periodic"); -#ifdef PARIS_5PT + #ifdef PARIS_5PT chprintf(" 5-Point\n"); -#elif defined PARIS_3PT + #elif defined PARIS_3PT chprintf(" 3-Point\n"); -#else + #else chprintf(" Spectral\n"); -#endif + #endif - const long nl012 = long(nxReal)*long(nyReal)*long(nzReal); + const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); dn_[0] = nzReal; @@ -139,29 +155,39 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, myLo_[0] = zMin; myLo_[1] = yMin; myLo_[2] = xMin; - MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD); - - const Real hi[3] = {lo_[0]+lz-dr_[0],lo_[1]+ly-dr_[1],lo_[2]+lx-dr_[2]}; - const int n[3] = {nz,ny,nx}; - const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal}; - const int id[3] = {int(round((zMin-lo_[0])/(dn_[0]*dr_[0]))),int(round((yMin-lo_[1])/(dn_[1]*dr_[1]))),int(round((xMin-lo_[2])/(dn_[2]*dr_[2])))}; - chprintf(" Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],lo_[2]+lx,lo_[1]+ly,lo_[0]+lz,dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]); - - assert(dn_[0] == n[0]/m[0]); - assert(dn_[1] == n[1]/m[1]); - assert(dn_[2] == n[2]/m[2]); - - pp_ = new ParisPeriodic(n,lo_,hi,m,id); + MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + const Real hi[3] = {lo_[0] + lz - dr_[0], lo_[1] + ly - dr_[1], + lo_[2] + lx - dr_[2]}; + const int n[3] = {nz, ny, nx}; + const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; + const int id[3] = {int(round((zMin - lo_[0]) / (dn_[0] * dr_[0]))), + int(round((yMin - lo_[1]) / (dn_[1] * dr_[1]))), + int(round((xMin - lo_[2]) / (dn_[2] * dr_[2])))}; + chprintf( + " Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d " + "]\n", + lo_[2], lo_[1], lo_[0], lo_[2] + lx, lo_[1] + ly, lo_[0] + lz, dn_[2], + dn_[1], dn_[0], m[2], m[1], m[0]); + + assert(dn_[0] == n[0] / m[0]); + assert(dn_[1] == n[1] / m[1]); + assert(dn_[2] == n[2] / m[2]); + + pp_ = new ParisPeriodic(n, lo_, hi, m, id); assert(pp_); - minBytes_ = pp_->bytes(); - densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2]; - const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg); - - CHECK(cudaMalloc(reinterpret_cast(&da_),std::max(minBytes_,densityBytes_))); + minBytes_ = pp_->bytes(); + densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = + long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); + + CHECK(cudaMalloc(reinterpret_cast(&da_), + std::max(minBytes_, densityBytes_))); assert(da_); - CHECK(cudaMalloc(reinterpret_cast(&db_),std::max(minBytes_,potentialBytes_))); + CHECK(cudaMalloc(reinterpret_cast(&db_), + std::max(minBytes_, potentialBytes_))); assert(db_); } diff --git a/src/gravity/potential_paris_3D.h b/src/gravity/potential_paris_3D.h index b6d85d5d2..06c1d7db6 100644 --- a/src/gravity/potential_paris_3D.h +++ b/src/gravity/potential_paris_3D.h @@ -2,25 +2,30 @@ #if defined(GRAVITY) && defined(PARIS) -#include "paris/ParisPeriodic.hpp" -#include "../global/global.h" + #include "../global/global.h" + #include "paris/ParisPeriodic.hpp" -class Potential_Paris_3D { - public: - Potential_Paris_3D(); - ~Potential_Paris_3D(); - void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); - void Reset(); - protected: - int dn_[3]; - Real dr_[3],lo_[3],lr_[3],myLo_[3]; - ParisPeriodic *pp_; - long minBytes_; - long densityBytes_; - long potentialBytes_; - Real *da_; - Real *db_; +class Potential_Paris_3D +{ + public: + Potential_Paris_3D(); + ~Potential_Paris_3D(); + void Get_Potential(const Real *density, Real *potential, Real g, + Real massInfo, Real a); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, + int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, + Real dx, Real dy, Real dz); + void Reset(); + + protected: + int dn_[3]; + Real dr_[3], lo_[3], lr_[3], myLo_[3]; + ParisPeriodic *pp_; + long minBytes_; + long densityBytes_; + long potentialBytes_; + Real *da_; + Real *db_; }; #endif diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index ba57941d4..c233ad93c 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -1,32 +1,38 @@ #ifdef PARIS_GALACTIC -#include "../gravity/potential_paris_galactic.h" -#include "../io/io.h" -#include "../utils/gpu.hpp" -#include - -Potential_Paris_Galactic::Potential_Paris_Galactic(): - dn_{0,0,0}, - dr_{0,0,0}, - lo_{0,0,0}, - lr_{0,0,0}, - myLo_{0,0,0}, - pp_(nullptr), - densityBytes_(0), - minBytes_(0), - da_(nullptr), - db_(nullptr) -#ifndef GRAVITY_GPU - , potentialBytes_(0), - dc_(nullptr) -#endif -{} + #include + + #include "../gravity/potential_paris_galactic.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + +Potential_Paris_Galactic::Potential_Paris_Galactic() + : dn_{0, 0, 0}, + dr_{0, 0, 0}, + lo_{0, 0, 0}, + lr_{0, 0, 0}, + myLo_{0, 0, 0}, + pp_(nullptr), + densityBytes_(0), + minBytes_(0), + da_(nullptr), + db_(nullptr) + #ifndef GRAVITY_GPU + , + potentialBytes_(0), + dc_(nullptr) + #endif +{ +} Potential_Paris_Galactic::~Potential_Paris_Galactic() { Reset(); } -void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const potential, const Real g, const DiskGalaxy &galaxy) +void Potential_Paris_Galactic::Get_Potential(const Real *const density, + Real *const potential, + const Real g, + const DiskGalaxy &galaxy) { - const Real scale = Real(4)*M_PI*g; + const Real scale = Real(4) * M_PI * g; assert(da_); Real *const da = da_; @@ -37,18 +43,19 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const int nj = dn_[1]; const int nk = dn_[0]; - const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; + const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; -#ifdef GRAVITY_GPU + #ifdef GRAVITY_GPU const Real *const rho = density; - Real *const phi = potential; -#else - CHECK(cudaMemcpyAsync(da,density,densityBytes_,cudaMemcpyHostToDevice,0)); - CHECK(cudaMemcpyAsync(dc_,potential,potentialBytes_,cudaMemcpyHostToDevice,0)); + Real *const phi = potential; + #else + CHECK(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0)); + CHECK(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, + 0)); const Real *const rho = da; - Real *const phi = dc_; -#endif + Real *const phi = dc_; + #endif const Real xMin = myLo_[2]; const Real yMin = myLo_[1]; @@ -58,59 +65,64 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const Real dy = dr_[1]; const Real dz = dr_[0]; - const Real md = SIMULATED_FRACTION*galaxy.getM_d(); + const Real md = SIMULATED_FRACTION * galaxy.getM_d(); const Real rd = galaxy.getR_d(); const Real zd = galaxy.getZ_d(); - const Real rho0 = md*zd*zd/(4.0*M_PI); + const Real rho0 = md * zd * zd / (4.0 * M_PI); gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); - const Real x = xMin+i*dx; - const Real y = yMin+j*dy; - const Real z = zMin+k*dz; + const Real x = xMin + i * dx; + const Real y = yMin + j * dy; + const Real z = zMin + k * dz; - const Real r = sqrt(x*x+y*y); - const Real a = sqrt(z*z+zd*zd); - const Real b = rd+a; - const Real c = r*r+b*b; - const Real dRho = rho0*(rd*c+3.0*a*b*b)/(a*a*a*pow(c,2.5)); + const Real r = sqrt(x * x + y * y); + const Real a = sqrt(z * z + zd * zd); + const Real b = rd + a; + const Real c = r * r + b * b; + const Real dRho = + rho0 * (rd * c + 3.0 * a * b * b) / (a * a * a * pow(c, 2.5)); - da[ia] = scale*(rho[ia]-dRho); - }); + da[ia] = scale * (rho[ia] - dRho); + }); - pp_->solve(minBytes_,da,db); + pp_->solve(minBytes_, da, db); - const Real phi0 = -g*md; + const Real phi0 = -g * md; gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); - const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL)); - - const Real x = xMin+i*dx; - const Real y = yMin+j*dy; - const Real z = zMin+k*dz; - - const Real r = sqrt(x*x+y*y); - const Real a = sqrt(z*z+zd*zd); - const Real b = a+rd; - const Real c = sqrt(r*r+b*b); - const Real dPhi = phi0/c; - - phi[ib] = db[ia]+dPhi; - }); - -#ifndef GRAVITY_GPU - CHECK(cudaMemcpy(potential,dc_,potentialBytes_,cudaMemcpyDeviceToHost)); -#endif + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); + const int ib = + i + N_GHOST_POTENTIAL + + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + + const Real x = xMin + i * dx; + const Real y = yMin + j * dy; + const Real z = zMin + k * dz; + + const Real r = sqrt(x * x + y * y); + const Real a = sqrt(z * z + zd * zd); + const Real b = a + rd; + const Real c = sqrt(r * r + b * b); + const Real dPhi = phi0 / c; + + phi[ib] = db[ia] + dPhi; + }); + + #ifndef GRAVITY_GPU + CHECK(cudaMemcpy(potential, dc_, potentialBytes_, cudaMemcpyDeviceToHost)); + #endif } -void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) +void Potential_Paris_Galactic::Initialize( + const Real lx, const Real ly, const Real lz, const Real xMin, + const Real yMin, const Real zMin, const int nx, const int ny, const int nz, + const int nxReal, const int nyReal, const int nzReal, const Real dx, + const Real dy, const Real dz) { - const long nl012 = long(nxReal)*long(nyReal)*long(nzReal); + const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); dn_[0] = nzReal; @@ -125,43 +137,53 @@ void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Re lr_[1] = ly; lr_[2] = lx; - myLo_[0] = zMin+0.5*dr_[0]; - myLo_[1] = yMin+0.5*dr_[1]; - myLo_[2] = xMin+0.5*dr_[2]; - MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD); - - const Real hi[3] = {lo_[0]+lr_[0]-dr_[0],lo_[1]+lr_[1]-dr_[1],lo_[2]+lr_[1]-dr_[2]}; - const int n[3] = {nz,ny,nx}; - const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal}; - const int id[3] = {int(round((myLo_[0]-lo_[0])/(dn_[0]*dr_[0]))),int(round((myLo_[1]-lo_[1])/(dn_[1]*dr_[1]))),int(round((myLo_[2]-lo_[2])/(dn_[2]*dr_[2])))}; - chprintf(" Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],hi[2],hi[1],hi[0],dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]); - - assert(dn_[0] == n[0]/m[0]); - assert(dn_[1] == n[1]/m[1]); - assert(dn_[2] == n[2]/m[2]); - - pp_ = new PoissonZero3DBlockedGPU(n,lo_,hi,m,id); + myLo_[0] = zMin + 0.5 * dr_[0]; + myLo_[1] = yMin + 0.5 * dr_[1]; + myLo_[2] = xMin + 0.5 * dr_[2]; + MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + const Real hi[3] = {lo_[0] + lr_[0] - dr_[0], lo_[1] + lr_[1] - dr_[1], + lo_[2] + lr_[1] - dr_[2]}; + const int n[3] = {nz, ny, nx}; + const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; + const int id[3] = {int(round((myLo_[0] - lo_[0]) / (dn_[0] * dr_[0]))), + int(round((myLo_[1] - lo_[1]) / (dn_[1] * dr_[1]))), + int(round((myLo_[2] - lo_[2]) / (dn_[2] * dr_[2])))}; + chprintf( + " Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ " + "%d %d %d ]\n", + lo_[2], lo_[1], lo_[0], hi[2], hi[1], hi[0], dn_[2], dn_[1], dn_[0], m[2], + m[1], m[0]); + + assert(dn_[0] == n[0] / m[0]); + assert(dn_[1] == n[1] / m[1]); + assert(dn_[2] == n[2] / m[2]); + + pp_ = new PoissonZero3DBlockedGPU(n, lo_, hi, m, id); assert(pp_); - minBytes_ = pp_->bytes(); - densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2]; - - CHECK(cudaMalloc(reinterpret_cast(&da_),std::max(minBytes_,densityBytes_))); - CHECK(cudaMalloc(reinterpret_cast(&db_),std::max(minBytes_,densityBytes_))); - -#ifndef GRAVITY_GPU - const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg); - CHECK(cudaMalloc(reinterpret_cast(&dc_),potentialBytes_)); -#endif + minBytes_ = pp_->bytes(); + densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; + + CHECK(cudaMalloc(reinterpret_cast(&da_), + std::max(minBytes_, densityBytes_))); + CHECK(cudaMalloc(reinterpret_cast(&db_), + std::max(minBytes_, densityBytes_))); + + #ifndef GRAVITY_GPU + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = + long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); + CHECK(cudaMalloc(reinterpret_cast(&dc_), potentialBytes_)); + #endif } void Potential_Paris_Galactic::Reset() { -#ifndef GRAVITY_GPU + #ifndef GRAVITY_GPU if (dc_) CHECK(cudaFree(dc_)); - dc_ = nullptr; + dc_ = nullptr; potentialBytes_ = 0; -#endif + #endif if (db_) CHECK(cudaFree(db_)); db_ = nullptr; diff --git a/src/gravity/potential_paris_galactic.h b/src/gravity/potential_paris_galactic.h index bb05fa310..9e5df6adf 100644 --- a/src/gravity/potential_paris_galactic.h +++ b/src/gravity/potential_paris_galactic.h @@ -2,29 +2,34 @@ #ifdef PARIS_GALACTIC -#include "paris/PoissonZero3DBlockedGPU.hpp" -#include "../global/global.h" -#include "../model/disk_galaxy.h" + #include "../global/global.h" + #include "../model/disk_galaxy.h" + #include "paris/PoissonZero3DBlockedGPU.hpp" -class Potential_Paris_Galactic { - public: - Potential_Paris_Galactic(); - ~Potential_Paris_Galactic(); - void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); - void Reset(); - protected: - int dn_[3]; - Real dr_[3],lo_[3],lr_[3],myLo_[3]; - PoissonZero3DBlockedGPU *pp_; - long densityBytes_; - long minBytes_; - Real *da_; - Real *db_; -#ifndef GRAVITY_GPU - long potentialBytes_; - Real *dc_; -#endif +class Potential_Paris_Galactic +{ + public: + Potential_Paris_Galactic(); + ~Potential_Paris_Galactic(); + void Get_Potential(const Real *density, Real *potential, Real g, + const DiskGalaxy &galaxy); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, + int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, + Real dx, Real dy, Real dz); + void Reset(); + + protected: + int dn_[3]; + Real dr_[3], lo_[3], lr_[3], myLo_[3]; + PoissonZero3DBlockedGPU *pp_; + long densityBytes_; + long minBytes_; + Real *da_; + Real *db_; + #ifndef GRAVITY_GPU + long potentialBytes_; + Real *dc_; + #endif }; #endif diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 3ddbb86be..d36045148 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -4,159 +4,174 @@ functions in hydro_cuda.cu. */ #ifdef CUDA -#pragma once + #pragma once -#include -#include // provides sqrt log cos sin atan etc. -#include "../global/global.h" // provides GN etc. + #include // provides sqrt log cos sin atan etc. + #include + + #include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 -static inline __device__ Real pow2(const Real x) { return x*x; } +static inline __device__ Real pow2(const Real x) { return x * x; } -inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, + Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; // for disk components, calculate polar r - //r_disk = 0.220970869121; - //r_disk = 6.85009694274; + // r_disk = 0.220970869121; + // r_disk = 6.85009694274; r_disk = 13.9211647546; - //r_disk = 20.9922325665; - // for halo, calculate spherical r - r_halo = sqrt(x_pos*x_pos + r_disk*r_disk); + // r_disk = 20.9922325665; + // for halo, calculate spherical r + r_halo = sqrt(x_pos * x_pos + r_disk * r_disk); // set properties of halo and disk (these must match initial conditions) - Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20.0; // halo concentration - R_h = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc - z_d = 3.5/5.0; // disk scale height in kpc - phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir)); - x = r_halo / R_h; + Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, + x; + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20.0; // halo concentration + R_h = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo); - a_disk_z = - GN * M_d * x_pos * (R_d + sqrt(x_pos*x_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(x_pos*x_pos + z_d*z_d)), 1.5) * sqrt(x_pos*x_pos + z_d*z_d) ); + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_disk_z = + -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * + sqrt(x_pos * x_pos + z_d * z_d)); // total acceleration is the sum of the halo + disk components - *gx = (x_pos/r_halo)*a_halo + a_disk_z; + *gx = (x_pos / r_halo) * a_halo + a_disk_z; return; - } - -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, + int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; // for Gresho, also need r & phi - r = sqrt(x_pos*x_pos + y_pos*y_pos); + r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); -/* - // set acceleration to balance v_phi in Gresho problem - if (r < 0.2) { - *gx = -cos(phi)*25.0*r; - *gy = -sin(phi)*25.0*r; - } - else if (r >= 0.2 && r < 0.4) { - *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - } - else { - *gx = 0.0; - *gy = 0.0; - } -*/ -/* - // set gravitational acceleration for Keplarian potential - Real M; - M = 1*Msun; - *gx = -cos(phi)*GN*M/(r*r); - *gy = -sin(phi)*GN*M/(r*r); -*/ + /* + // set acceleration to balance v_phi in Gresho problem + if (r < 0.2) { + *gx = -cos(phi)*25.0*r; + *gy = -sin(phi)*25.0*r; + } + else if (r >= 0.2 && r < 0.4) { + *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + } + else { + *gx = 0.0; + *gy = 0.0; + } + */ + /* + // set gravitational acceleration for Keplarian potential + Real M; + M = 1*Msun; + *gx = -cos(phi)*GN*M/(r*r); + *gy = -sin(phi)*GN*M/(r*r); + */ // set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun (assume all gas) - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20; // halo concentration - R_s = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun (assume all gas) + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20; // halo concentration + R_s = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc // calculate acceleration - x = r / R_s; - a_d = GN * M_d * r * pow(r*r + R_d*R_d, -1.5); - a_h = GN * M_h * (log(1+x)- x / (1+x)) / ((log(1+c_vir) - c_vir / (1+c_vir)) * r*r); + x = r / R_s; + a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); + a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / + ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); a = a_d + a_h; - *gx = -cos(phi)*a; - *gy = -sin(phi)*a; + *gx = -cos(phi) * a; + *gy = -sin(phi) * a; return; } - -inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, + int y_off, int z_off, int n_ghost, Real dx, + Real dy, Real dz, Real xbound, Real ybound, + Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for disk components, calculate polar r - r_disk = sqrt(x_pos*x_pos + y_pos*y_pos); + r_disk = sqrt(x_pos * x_pos + y_pos * y_pos); // for halo, calculate spherical r - r_halo = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); // set properties of halo and disk (these must match initial conditions) Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; // MW model - M_vir = 1.0e12; // viral mass of in M_sun - M_d = 6.5e10; // viral mass of in M_sun - R_d = 3.5; // disk scale length in kpc - z_d = 3.5/5.0; // disk scale height in kpc - R_vir = 261.; // virial radius in kpc - c_vir = 20.0; // halo concentration + M_vir = 1.0e12; // viral mass of in M_sun + M_d = 6.5e10; // viral mass of in M_sun + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + R_vir = 261.; // virial radius in kpc + c_vir = 20.0; // halo concentration // M82 model - //M_vir = 5.0e10; // viral mass of in M_sun - //M_d = 1.0e10; // mass of disk in M_sun - //R_d = 0.8; // disk scale length in kpc - //z_d = 0.15; // disk scale height in kpc - //R_vir = R_d/0.015; // viral radius in kpc - //c_vir = 10.0; // halo concentration - - M_h = M_vir - M_d; // halo mass in M_sun - R_h = R_vir / c_vir; // halo scale length in kpc - phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir)); - x = r_halo / R_h; + // M_vir = 5.0e10; // viral mass of in M_sun + // M_d = 1.0e10; // mass of disk in M_sun + // R_d = 0.8; // disk scale length in kpc + // z_d = 0.15; // disk scale height in kpc + // R_vir = R_d/0.015; // viral radius in kpc + // c_vir = 10.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun + R_h = R_vir / c_vir; // halo scale length in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo); - a_halo_r = a_halo*(r_disk/r_halo); - a_halo_z = a_halo*(z_pos/r_halo); - a_disk_r = - GN * M_d * r_disk * pow(r_disk*r_disk+ pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), -1.5); - a_disk_z = - GN * M_d * z_pos * (R_d + sqrt(z_pos*z_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), 1.5) * sqrt(z_pos*z_pos + z_d*z_d) ); + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_halo_r = a_halo * (r_disk / r_halo); + a_halo_z = a_halo * (z_pos / r_halo); + a_disk_r = + -GN * M_d * r_disk * + pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_z = + -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * + sqrt(z_pos * z_pos + z_d * z_d)); // total acceleration is the sum of the halo + disk components - *gx = (x_pos/r_disk)*(a_disk_r+a_halo_r); - *gy = (y_pos/r_disk)*(a_disk_r+a_halo_r); - *gz = a_disk_z+a_halo_z; + *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); + *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); + *gz = a_disk_z + a_halo_z; return; } -#endif //CUDA - +#endif // CUDA diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index afe0e0a42..91d08dbe7 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -2,122 +2,128 @@ * \brief Definitions of the boundary conditions for various tests. Functions are members of the Grid3D class. */ -#include -#include #include +#include +#include #include + +#include "../grid/cuda_boundaries.h" // provides SetGhostCells #include "../grid/grid3D.h" #include "../io/io.h" -#include "../utils/error_handling.h" #include "../mpi/mpi_routines.h" - -#include "../grid/cuda_boundaries.h" // provides SetGhostCells - +#include "../utils/error_handling.h" /*! \fn void Set_Boundary_Conditions_Grid(parameters P) - * \brief Set the boundary conditions for all components based on info in the parameters structure. */ -void Grid3D::Set_Boundary_Conditions_Grid( parameters P){ - - #ifndef ONLY_PARTICLES + * \brief Set the boundary conditions for all components based on info in the + * parameters structure. */ +void Grid3D::Set_Boundary_Conditions_Grid(parameters P) +{ +#ifndef ONLY_PARTICLES // Dont transfer Hydro boundaries when only doing particles // Transfer Hydro Conserved boundaries #ifdef CPU_TIME Timer.Boundaries.Start(); - #endif //CPU_TIME + #endif // CPU_TIME H.TRANSFER_HYDRO_BOUNDARIES = true; Set_Boundary_Conditions(P); H.TRANSFER_HYDRO_BOUNDARIES = false; #ifdef CPU_TIME Timer.Boundaries.End(); - #endif //CPU_TIME - #endif //ONLY_PARTICLES + #endif // CPU_TIME +#endif // ONLY_PARTICLES - // If the Gravity coupling is on the CPU, the potential is not in the Conserved arrays, - // and its boundaries need to be transferred separately - #ifdef GRAVITY +// If the Gravity coupling is on the CPU, the potential is not in the Conserved +// arrays, and its boundaries need to be transferred separately +#ifdef GRAVITY #ifdef CPU_TIME Timer.Pot_Boundaries.Start(); - #endif //CPU_TIME + #endif // CPU_TIME Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; Set_Boundary_Conditions(P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; #ifdef CPU_TIME Timer.Pot_Boundaries.End(); - #endif //CPU_TIME - #endif //GRAVITY + #endif // CPU_TIME +#endif // GRAVITY } /*! \fn void Set_Boundary_Conditions(parameters P) - * \brief Set the boundary conditions based on info in the parameters structure. */ -void Grid3D::Set_Boundary_Conditions(parameters P) { - - //Check Only one boundary type id being transferred + * \brief Set the boundary conditions based on info in the parameters + * structure. */ +void Grid3D::Set_Boundary_Conditions(parameters P) +{ + // Check Only one boundary type id being transferred int n_bounds = 0; - n_bounds += (int) H.TRANSFER_HYDRO_BOUNDARIES; - #ifdef GRAVITY - n_bounds += (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES; + n_bounds += (int)H.TRANSFER_HYDRO_BOUNDARIES; +#ifdef GRAVITY + n_bounds += (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES; #ifdef SOR - n_bounds += (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES; - #endif //SOR - #endif //GRAVITY - #ifdef PARTICLES - n_bounds += (int) Particles.TRANSFER_PARTICLES_BOUNDARIES; - n_bounds += (int) Particles.TRANSFER_DENSITY_BOUNDARIES; - #endif //PARTICLES - - if ( n_bounds > 1 ){ - printf("ERROR: More than one boundary type for transfer. N boundary types: %d\n", n_bounds ); - printf(" Boundary Hydro: %d\n", (int) H.TRANSFER_HYDRO_BOUNDARIES ); - #ifdef GRAVITY - printf(" Boundary Potential: %d\n", (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES ); - #ifdef SOR - printf(" Boundary Poisson: %d\n", (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ); - #endif //SOR - #endif //GRAVITY - #ifdef PARTICLES - printf(" Boundary Particles: %d\n", (int) Particles.TRANSFER_PARTICLES_BOUNDARIES ); - printf(" Boundary Particles Density: %d\n", (int) Particles.TRANSFER_DENSITY_BOUNDARIES ); - #endif //PARTICLES + n_bounds += (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES; + #endif // SOR +#endif // GRAVITY +#ifdef PARTICLES + n_bounds += (int)Particles.TRANSFER_PARTICLES_BOUNDARIES; + n_bounds += (int)Particles.TRANSFER_DENSITY_BOUNDARIES; +#endif // PARTICLES + + if (n_bounds > 1) { + printf( + "ERROR: More than one boundary type for transfer. N boundary types: " + "%d\n", + n_bounds); + printf(" Boundary Hydro: %d\n", (int)H.TRANSFER_HYDRO_BOUNDARIES); +#ifdef GRAVITY + printf(" Boundary Potential: %d\n", + (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES); + #ifdef SOR + printf(" Boundary Poisson: %d\n", + (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES); + #endif // SOR +#endif // GRAVITY +#ifdef PARTICLES + printf(" Boundary Particles: %d\n", + (int)Particles.TRANSFER_PARTICLES_BOUNDARIES); + printf(" Boundary Particles Density: %d\n", + (int)Particles.TRANSFER_DENSITY_BOUNDARIES); +#endif // PARTICLES exit(-1); } // If no boundaries are set to be transferred then exit; - if ( n_bounds == 0 ){ - printf( " Warning: No boundary type for transfer \n"); - return; + if (n_bounds == 0) { + printf(" Warning: No boundary type for transfer \n"); + return; } - #ifndef MPI_CHOLLA - int flags[6] = {0,0,0,0,0,0}; + int flags[6] = {0, 0, 0, 0, 0, 0}; // Check for custom boundary conditions and set boundary flags - if(Check_Custom_Boundary(&flags[0], P)) - { + if (Check_Custom_Boundary(&flags[0], P)) { Custom_Boundary(P.custom_bcnd); } // set regular boundaries - if(H.nx>1) { + if (H.nx > 1) { Set_Boundaries(0, flags); Set_Boundaries(1, flags); } - if(H.ny>1) { + if (H.ny > 1) { Set_Boundaries(2, flags); Set_Boundaries(3, flags); } - if(H.nz>1) { + if (H.nz > 1) { Set_Boundaries(4, flags); Set_Boundaries(5, flags); } #ifdef GRAVITY - Grav.Set_Boundary_Flags( flags ); - #endif //Gravity + Grav.Set_Boundary_Flags(flags); + #endif // Gravity -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /*Set boundaries, including MPI exchanges*/ @@ -126,38 +132,33 @@ void Grid3D::Set_Boundary_Conditions(parameters P) { #endif /*MPI_CHOLLA*/ } - /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) * \brief Check for custom boundary conditions and set boundary flags. */ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) { - /*check if any boundary is a custom boundary*/ /*if yes, then return 1*/ /*if no, then return 0*/ /*additionally, set a flag for each boundary*/ - if(H.nx>1) - { - *(flags+0) = P.xl_bcnd; - *(flags+1) = P.xu_bcnd; + if (H.nx > 1) { + *(flags + 0) = P.xl_bcnd; + *(flags + 1) = P.xu_bcnd; } - if(H.ny>1) - { - *(flags+2) = P.yl_bcnd; - *(flags+3) = P.yu_bcnd; + if (H.ny > 1) { + *(flags + 2) = P.yl_bcnd; + *(flags + 3) = P.yu_bcnd; } - if(H.nz>1) - { - *(flags+4) = P.zl_bcnd; - *(flags+5) = P.zu_bcnd; + if (H.nz > 1) { + *(flags + 4) = P.zl_bcnd; + *(flags + 5) = P.zu_bcnd; } - for (int i=0; i<6; i++) - { - if (!( (flags[i]>=0)&&(flags[i]<=5) ) ) - { - chprintf("Invalid boundary conditions. Must select between 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); + for (int i = 0; i < 6; i++) { + if (!((flags[i] >= 0) && (flags[i] <= 5))) { + chprintf( + "Invalid boundary conditions. Must select between 1 (periodic), 2 " + "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); chexit(-1); } if (flags[i] == 4) { @@ -169,154 +170,147 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) return 0; } - - /*! \fn void Set_Boundaries(int dir, int flags[]) * \brief Apply boundary conditions to the grid. */ void Grid3D::Set_Boundaries(int dir, int flags[]) { int i, j, k; - int imin[3] = {0,0,0}; - int imax[3] = {H.nx,H.ny,H.nz}; - Real a[3] = {1,1,1}; //sign of momenta - int idx; //index of a real cell - int gidx; //index of a ghost cell + int imin[3] = {0, 0, 0}; + int imax[3] = {H.nx, H.ny, H.nz}; + Real a[3] = {1, 1, 1}; // sign of momenta + int idx; // index of a real cell + int gidx; // index of a ghost cell int nPB, nBoundaries; int *iaBoundary, *iaCell; /*if the cell face is an custom boundary, exit */ - if(flags[dir]==4) - return; + if (flags[dir] == 4) return; - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA /*if the cell face is an mpi boundary, exit */ - if(flags[dir]==5) - return; - #endif /*MPI_CHOLLA*/ + if (flags[dir] == 5) return; +#endif /*MPI_CHOLLA*/ - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - if ( flags[dir] == 1 ){ - // Set Periodic Boundaries for the ghost cells. - #ifdef GRAVITY_GPU - if ( dir == 0 ) Set_Potential_Boundaries_Periodic_GPU( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Periodic_GPU( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Periodic_GPU( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Periodic_GPU( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Periodic_GPU( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Periodic_GPU( 2, 1, flags ); - #else - if ( dir == 0 ) Set_Potential_Boundaries_Periodic( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Periodic( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Periodic( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Periodic( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Periodic( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Periodic( 2, 1, flags ); - #endif +#ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + if (flags[dir] == 1) { + // Set Periodic Boundaries for the ghost cells. + #ifdef GRAVITY_GPU + if (dir == 0) Set_Potential_Boundaries_Periodic_GPU(0, 0, flags); + if (dir == 1) Set_Potential_Boundaries_Periodic_GPU(0, 1, flags); + if (dir == 2) Set_Potential_Boundaries_Periodic_GPU(1, 0, flags); + if (dir == 3) Set_Potential_Boundaries_Periodic_GPU(1, 1, flags); + if (dir == 4) Set_Potential_Boundaries_Periodic_GPU(2, 0, flags); + if (dir == 5) Set_Potential_Boundaries_Periodic_GPU(2, 1, flags); + #else + if (dir == 0) Set_Potential_Boundaries_Periodic(0, 0, flags); + if (dir == 1) Set_Potential_Boundaries_Periodic(0, 1, flags); + if (dir == 2) Set_Potential_Boundaries_Periodic(1, 0, flags); + if (dir == 3) Set_Potential_Boundaries_Periodic(1, 1, flags); + if (dir == 4) Set_Potential_Boundaries_Periodic(2, 0, flags); + if (dir == 5) Set_Potential_Boundaries_Periodic(2, 1, flags); + #endif } - if ( flags[dir] == 3 ){ - - #ifdef GRAVITY_GPU - if ( dir == 0 ) Set_Potential_Boundaries_Isolated_GPU( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Isolated_GPU( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Isolated_GPU( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Isolated_GPU( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Isolated_GPU( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Isolated_GPU( 2, 1, flags ); - #else - if ( dir == 0 ) Set_Potential_Boundaries_Isolated( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Isolated( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Isolated( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Isolated( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Isolated( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Isolated( 2, 1, flags ); - #endif//GRAVITY_GPU + if (flags[dir] == 3) { + #ifdef GRAVITY_GPU + if (dir == 0) Set_Potential_Boundaries_Isolated_GPU(0, 0, flags); + if (dir == 1) Set_Potential_Boundaries_Isolated_GPU(0, 1, flags); + if (dir == 2) Set_Potential_Boundaries_Isolated_GPU(1, 0, flags); + if (dir == 3) Set_Potential_Boundaries_Isolated_GPU(1, 1, flags); + if (dir == 4) Set_Potential_Boundaries_Isolated_GPU(2, 0, flags); + if (dir == 5) Set_Potential_Boundaries_Isolated_GPU(2, 1, flags); + #else + if (dir == 0) Set_Potential_Boundaries_Isolated(0, 0, flags); + if (dir == 1) Set_Potential_Boundaries_Isolated(0, 1, flags); + if (dir == 2) Set_Potential_Boundaries_Isolated(1, 0, flags); + if (dir == 3) Set_Potential_Boundaries_Isolated(1, 1, flags); + if (dir == 4) Set_Potential_Boundaries_Isolated(2, 0, flags); + if (dir == 5) Set_Potential_Boundaries_Isolated(2, 1, flags); + #endif // GRAVITY_GPU } return; } #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - if ( dir == 0 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 0 ); - if ( dir == 1 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 1 ); - if ( dir == 2 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 0 ); - if ( dir == 3 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 1 ); - if ( dir == 4 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 0 ); - if ( dir == 5 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 1 ); + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) { + if (flags[dir] == 1) { + if (dir == 0) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 0); + if (dir == 1) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 1); + if (dir == 2) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 0); + if (dir == 3) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 1); + if (dir == 4) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 0); + if (dir == 5) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 1); } return; } - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - // Set Periodic Boundaries for the particles density. - #ifdef PARTICLES_GPU - if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 0 ); - if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 1 ); - if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 0 ); - if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 1 ); - if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 0 ); - if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 1 ); - #endif - #ifdef PARTICLES_CPU - if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic( 0, 0 ); - if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic( 0, 1 ); - if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic( 1, 0 ); - if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic( 1, 1 ); - if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic( 2, 0 ); - if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic( 2, 1 ); - #endif + #endif // SOR +#endif // GRAVITY + +#ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + if (flags[dir] == 1) { + // Set Periodic Boundaries for the particles density. + #ifdef PARTICLES_GPU + if (dir == 0) Set_Particles_Density_Boundaries_Periodic_GPU(0, 0); + if (dir == 1) Set_Particles_Density_Boundaries_Periodic_GPU(0, 1); + if (dir == 2) Set_Particles_Density_Boundaries_Periodic_GPU(1, 0); + if (dir == 3) Set_Particles_Density_Boundaries_Periodic_GPU(1, 1); + if (dir == 4) Set_Particles_Density_Boundaries_Periodic_GPU(2, 0); + if (dir == 5) Set_Particles_Density_Boundaries_Periodic_GPU(2, 1); + #endif + #ifdef PARTICLES_CPU + if (dir == 0) Set_Particles_Density_Boundaries_Periodic(0, 0); + if (dir == 1) Set_Particles_Density_Boundaries_Periodic(0, 1); + if (dir == 2) Set_Particles_Density_Boundaries_Periodic(1, 0); + if (dir == 3) Set_Particles_Density_Boundaries_Periodic(1, 1); + if (dir == 4) Set_Particles_Density_Boundaries_Periodic(2, 0); + if (dir == 5) Set_Particles_Density_Boundaries_Periodic(2, 1); + #endif } return; } - #endif //PARTICLES - - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - #ifdef PARTICLES_CPU - if ( dir == 0 ) Set_Particles_Boundary( 0, 0 ); - if ( dir == 1 ) Set_Particles_Boundary( 0, 1 ); - if ( dir == 2 ) Set_Particles_Boundary( 1, 0 ); - if ( dir == 3 ) Set_Particles_Boundary( 1, 1 ); - if ( dir == 4 ) Set_Particles_Boundary( 2, 0 ); - if ( dir == 5 ) Set_Particles_Boundary( 2, 1 ); - #endif//PARTICLES_CPU - - #ifdef PARTICLES_GPU - if ( dir == 0 ) Set_Particles_Boundary_GPU( 0, 0 ); - if ( dir == 1 ) Set_Particles_Boundary_GPU( 0, 1 ); - if ( dir == 2 ) Set_Particles_Boundary_GPU( 1, 0 ); - if ( dir == 3 ) Set_Particles_Boundary_GPU( 1, 1 ); - if ( dir == 4 ) Set_Particles_Boundary_GPU( 2, 0 ); - if ( dir == 5 ) Set_Particles_Boundary_GPU( 2, 1 ); - #endif//PARTICLES_GPU - +#endif // PARTICLES + +#ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + if (flags[dir] == 1) { + #ifdef PARTICLES_CPU + if (dir == 0) Set_Particles_Boundary(0, 0); + if (dir == 1) Set_Particles_Boundary(0, 1); + if (dir == 2) Set_Particles_Boundary(1, 0); + if (dir == 3) Set_Particles_Boundary(1, 1); + if (dir == 4) Set_Particles_Boundary(2, 0); + if (dir == 5) Set_Particles_Boundary(2, 1); + #endif // PARTICLES_CPU + + #ifdef PARTICLES_GPU + if (dir == 0) Set_Particles_Boundary_GPU(0, 0); + if (dir == 1) Set_Particles_Boundary_GPU(0, 1); + if (dir == 2) Set_Particles_Boundary_GPU(1, 0); + if (dir == 3) Set_Particles_Boundary_GPU(1, 1); + if (dir == 4) Set_Particles_Boundary_GPU(2, 0); + if (dir == 5) Set_Particles_Boundary_GPU(2, 1); + #endif // PARTICLES_GPU } else if (flags[dir] == 3) { - #ifdef PARTICLES_CPU - Set_Particles_Open_Boundary_CPU(dir/2, dir%2); - #endif - #ifdef PARTICLES_GPU - Particles.Set_Particles_Open_Boundary_GPU(dir/2, dir%2); - #endif + #ifdef PARTICLES_CPU + Set_Particles_Open_Boundary_CPU(dir / 2, dir % 2); + #endif + #ifdef PARTICLES_GPU + Particles.Set_Particles_Open_Boundary_GPU(dir / 2, dir % 2); + #endif } return; } - #endif//PARTICLES +#endif // PARTICLES - //get the extents of the ghost region we are initializing + // get the extents of the ghost region we are initializing Set_Boundary_Extents(dir, &imin[0], &imax[0]); // from grid/cuda_boundaries.cu - SetGhostCells(C.device, - H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, - imax[0]-imin[0], imax[1]-imin[1], imax[2]-imin[2], - imin[0], imin[1], imin[2], dir); + SetGhostCells(C.device, H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, + flags, imax[0] - imin[0], imax[1] - imin[1], imax[2] - imin[2], + imin[0], imin[1], imin[2], dir); } /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) @@ -330,131 +324,119 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax) ku = H.nz; if (H.ny > 1) { jl = H.n_ghost; - ju = H.ny-H.n_ghost; + ju = H.ny - H.n_ghost; } if (H.nz > 1) { kl = H.n_ghost; - ku = H.nz-H.n_ghost; + ku = H.nz - H.n_ghost; } il = 0; iu = H.n_ghost; /*lower x face*/ - if(dir==0) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 0) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - il = H.nx-H.n_ghost; + il = H.nx - H.n_ghost; iu = H.nx; /*upper x face*/ - if(dir==1) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 1) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } il = 0; iu = H.nx; jl = 0; ju = H.n_ghost; /*lower y face*/ - if(dir==2) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 2) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - jl = H.ny-H.n_ghost; + jl = H.ny - H.n_ghost; ju = H.ny; /*upper y face*/ - if(dir==3) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 3) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } jl = 0; ju = H.ny; kl = 0; ku = H.n_ghost; /*lower z face*/ - if(dir==4) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 4) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - kl = H.nz-H.n_ghost; + kl = H.nz - H.n_ghost; ku = H.nz; /*upper z face*/ - if(dir==5) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 5) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } } - - /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) * \brief Select appropriate custom boundary function. */ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) { - if (strcmp(bcnd, "noh")==0) { + if (strcmp(bcnd, "noh") == 0) { // from grid/cuda_boundaries.cu Noh_Boundary(); } - if (strcmp(bcnd, "wind")==0) { + if (strcmp(bcnd, "wind") == 0) { // from grid/cuda_boundaries.cu Wind_Boundary(); - } - else { + } else { printf("ABORT: %s -> Unknown custom boundary condition.\n", bcnd); exit(0); } } - - /*! \fn void Wind_Boundary() * \brief Apply wind boundary */ void Grid3D::Wind_Boundary() { - int x_off, y_off, z_off; // set x, y, & z offsets of local CPU volume to pass to GPU // so global position on the grid is known x_off = y_off = z_off = 0; - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA x_off = nx_local_start; y_off = ny_local_start; z_off = nz_local_start; - #endif +#endif - Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, - x_off, y_off, z_off, H.dx, H.dy, H.dz, - H.xbound, H.ybound, H.zbound, gama, H.t); + Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, + y_off, z_off, H.dx, H.dy, H.dz, H.xbound, H.ybound, + H.zbound, gama, H.t); } /*! \fn void Noh_Boundary() @@ -469,85 +451,63 @@ void Grid3D::Noh_Boundary() // set x, y, & z offsets of local CPU volume to pass to GPU // so global position on the grid is known x_off = y_off = z_off = 0; - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA x_off = nx_local_start; y_off = ny_local_start; z_off = nz_local_start; - #endif - - Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, - x_off, y_off, z_off, H.dx, H.dy, H.dz, - H.xbound, H.ybound, H.zbound, gama, H.t); - -/* - int i, j, k, id; - Real x_pos, y_pos, z_pos, r; - Real vx, vy, vz, d_0, P_0, P; - d_0 = 1.0; - P_0 = 1.0e-6; - // set exact boundaries on the +x face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - } - } - } - // set exact boundaries on the +y face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; +#endif + + Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, + y_off, z_off, H.dx, H.dy, H.dz, H.xbound, H.ybound, + H.zbound, gama, H.t); + + /* + int i, j, k, id; + Real x_pos, y_pos, z_pos, r; + Real vx, vy, vz, d_0, P_0, P; + d_0 = 1.0; + P_0 = 1.0e-6; + // set exact boundaries on the +x face + for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); + // set the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); + C.momentum_x[id] = vx*C.density[id]; + C.momentum_y[id] = vy*C.density[id]; + C.momentum_z[id] = vz*C.density[id]; + C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; + } } } - } - // set exact boundaries on the +z face - if (H.nz > 1) { - for (k=H.nz-H.n_ghost; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); // set the velocities vx = -x_pos / r; vy = -y_pos / r; - vz = -z_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; // set the conserved quantities - C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); C.momentum_x[id] = vx*C.density[id]; C.momentum_y[id] = vy*C.density[id]; C.momentum_z[id] = vz*C.density[id]; @@ -555,6 +515,28 @@ void Grid3D::Noh_Boundary() } } } - } -*/ + // set exact boundaries on the +z face + if (H.nz > 1) { + for (k=H.nz-H.n_ghost; k= buffer_ncells){ + if (id >= buffer_ncells) { return; } - k = id/(isize*jsize); - j = (id - k*isize*jsize)/isize; - i = id - k*isize*jsize - j*isize; - idx = i + (j+k*ny)*nx + idxoffset; + k = id / (isize * jsize); + j = (id - k * isize * jsize) / isize; + i = id - k * isize * jsize - j * isize; + idx = i + (j + k * ny) * nx + idxoffset; // idxoffset contains offset terms from // idx = (i+ioffset) + (j+joffset)*H.nx + (k+koffset)*H.nx*H.ny; - for (ii=0; ii= buffer_ncells){ + if (id >= buffer_ncells) { return; } - k = id/(isize*jsize); - j = (id - k*isize*jsize)/isize; - i = id - k*isize*jsize - j*isize; - idx = i + (j+k*ny)*nx + idxoffset; - for (ii=0; ii=isize*jsize*ksize){ + k = id / (isize * jsize); + j = (id - k * isize * jsize) / isize; + i = id - k * isize * jsize - j * isize; + if (id >= isize * jsize * ksize) { return; } // true i,j,k conversion i += imin; j += jmin; k += kmin; - gidx = i + j*nx + k*nx*ny; + gidx = i + j * nx + k * nx * ny; // calculate idx (index of real cell) and a[:] for reflection - idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost); + idx = SetBoundaryMapping(i, j, k, &a[0], flags, nx, ny, nz, n_ghost); - if (idx>=0){ - for (ii=0; ii= 0) { + for (ii = 0; ii < n_fields; ii++) { + c_head[gidx + ii * n_cells] = c_head[idx + ii * n_cells]; } // momentum correction for reflection - // these are set to -1 whenever ghost cells in a direction are in a reflective boundary condition - if (flags[0]==2 || flags[1]==2){ + // these are set to -1 whenever ghost cells in a direction are in a + // reflective boundary condition + if (flags[0] == 2 || flags[1] == 2) { c_head[gidx + n_cells] *= a[0]; } - if (flags[2]==2 || flags[3]==2){ - c_head[gidx + 2*n_cells] *= a[1]; + if (flags[2] == 2 || flags[3] == 2) { + c_head[gidx + 2 * n_cells] *= a[1]; } - if (flags[4]==2 || flags[5]==2){ - c_head[gidx + 3*n_cells] *= a[2]; + if (flags[4] == 2 || flags[5] == 2) { + c_head[gidx + 3 * n_cells] *= a[2]; } // energy and momentum correction for transmission // Diode: only allow outflow - if (flags[dir] == 3){ + if (flags[dir] == 3) { // - int momdex = gidx + (dir/2+1)*n_cells; + int momdex = gidx + (dir / 2 + 1) * n_cells; // (X) Dir 0,1 -> Mom 1 -> c_head[gidx+1*n_cells] // (Y) Dir 2,3 -> Mom 2 -> c_head[gidx+2*n_cells] // (Z) Dir 4,5 -> Mom 3 -> c_head[gidx+3*n_cells] // If a momentum is set to 0, subtract its kinetic energy [gidx+4*n_cells] - if (dir%2 == 0){ - // Direction 0,2,4 are left-side, don't allow inflow with positive momentum - if (c_head[momdex] > 0.0) { - c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx]; - c_head[momdex] = 0.0; - } + if (dir % 2 == 0) { + // Direction 0,2,4 are left-side, don't allow inflow with positive + // momentum + if (c_head[momdex] > 0.0) { + c_head[gidx + 4 * n_cells] -= + 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[momdex] = 0.0; + } } else { - // Direction 1,3,5 are right-side, don't allow inflow with negative momentum - if (c_head[momdex] < 0.0) { - c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx]; - c_head[momdex] = 0.0; - } + // Direction 1,3,5 are right-side, don't allow inflow with negative + // momentum + if (c_head[momdex] < 0.0) { + c_head[gidx + 4 * n_cells] -= + 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[momdex] = 0.0; + } } - }//end energy correction for transmissive boundaries - }//end idx>=0 -}//end function - -void SetGhostCells(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir) + } // end energy correction for transmissive boundaries + } // end idx>=0 +} // end function + +void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, + int n_cells, int n_ghost, int flags[], int isize, int jsize, + int ksize, int imin, int jmin, int kmin, int dir) { - dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(SetGhostCellsKernel,dim1dGrid,dim1dBlock,0,0,c_head, - nx,ny,nz,n_fields,n_cells,n_ghost, - flags[0],flags[1],flags[2],flags[3],flags[4],flags[5], - isize,jsize,ksize,imin,jmin,kmin,dir); - + hipLaunchKernelGGL(SetGhostCellsKernel, dim1dGrid, dim1dBlock, 0, 0, c_head, + nx, ny, nz, n_fields, n_cells, n_ghost, flags[0], flags[1], + flags[2], flags[3], flags[4], flags[5], isize, jsize, + ksize, imin, jmin, kmin, dir); } -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost){ +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], + int nx, int ny, int nz, int n_ghost) +{ // nx, ny, nz, n_ghost /* 1D */ int ir, jr, kr, idx; - ir=jr=kr=idx=0; - if (nx>1) { - + ir = jr = kr = idx = 0; + if (nx > 1) { // set index on -x face if (ig < n_ghost) { ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0]); } // set index on +x face - else if (ig >= nx-n_ghost) { + else if (ig >= nx - n_ghost) { ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0]); } // set i index for multi-D problems @@ -177,18 +197,16 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], // otherwise add i index to ghost cell mapping idx += ir; - } /* 2D */ if (ny > 1) { - // set index on -y face if (jg < n_ghost) { jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1]); } // set index on +y face - else if (jg >= ny-n_ghost) { + else if (jg >= ny - n_ghost) { jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1]); } // set j index for multi-D problems @@ -202,19 +220,17 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], } // otherwise add j index to ghost cell mapping - idx += nx*jr; - + idx += nx * jr; } /* 3D */ if (nz > 1) { - // set index on -z face if (kg < n_ghost) { kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2]); } // set index on +z face - else if (kg >= nz-n_ghost) { + else if (kg >= nz - n_ghost) { kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2]); } // set k index for multi-D problems @@ -228,26 +244,26 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], } // otherwise add k index to ghost cell mapping - idx += nx*ny*kr; + idx += nx * ny * kr; } return idx; } -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a){ +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, + Real *a) +{ int id; // lower face - if (face==0) - { - switch(flag) - { + if (face == 0) { + switch (flag) { // periodic case 1: - id = ig+nx-2*n_ghost; + id = ig + nx - 2 * n_ghost; break; // reflective case 2: - id = 2*n_ghost-ig-1; + id = 2 * n_ghost - ig - 1; *(a) = -1.0; break; // transmissive @@ -264,26 +280,24 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * break; // default is periodic default: - id = ig+nx-2*n_ghost; + id = ig + nx - 2 * n_ghost; } } // upper face - else - { - switch(flag) - { + else { + switch (flag) { // periodic case 1: - id = ig-nx+2*n_ghost; + id = ig - nx + 2 * n_ghost; break; // reflective case 2: - id = 2*(nx-n_ghost)-ig-1; + id = 2 * (nx - n_ghost) - ig - 1; *(a) = -1.0; - break; + break; // transmissive case 3: - id = nx-n_ghost-1; + id = nx - n_ghost - 1; break; // custom case 4: @@ -295,31 +309,31 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * break; // default is periodic default: - id = ig-nx+2*n_ghost; + id = ig - nx + 2 * n_ghost; } } return id; } - -__global__ void Wind_Boundary_kernel(Real * c_device, - int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, - Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) +__global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, + int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, + Real dz, Real xbound, Real ybound, + Real zbound, Real gamma, Real t) { int id, xid, yid, zid, gid; Real n_0, T_0; Real mu = 0.6; Real vx, vy, vz, d_0, P_0; - n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) + T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function - d_0 = n_0*mu*MP/DENSITY_UNIT; - P_0 = n_0*KB*T_0/PRESSURE_UNIT; + d_0 = n_0 * mu * MP / DENSITY_UNIT; + P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; - vx = 100*TIME_UNIT/KPC; // km/s * (cholla unit conversion) + vx = 100 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) vy = 0.0; vz = 0.0; @@ -335,36 +349,37 @@ __global__ void Wind_Boundary_kernel(Real * c_device, // not true i,j,k but relative i,j,k in the GPU grid zid = id / (isize * jsize); - yid = (id - zid*isize*jsize) / isize; - xid = id - zid*isize*jsize - yid*isize; + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - xid += 0; // -x boundary - gid = xid + yid*nx + zid*nx*ny; + xid += 0; // -x boundary + gid = xid + yid * nx + zid * nx * ny; if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) { // set conserved variables - c_device[gid] = d_0; - c_device[gid+1*n_cells] = vx*d_0; - c_device[gid+2*n_cells] = vy*d_0; - c_device[gid+3*n_cells] = vz*d_0; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*d_0*(vx*vx + vy*vy + vz*vz); + c_device[gid] = d_0; + c_device[gid + 1 * n_cells] = vx * d_0; + c_device[gid + 2 * n_cells] = vy * d_0; + c_device[gid + 3 * n_cells] = vz * d_0; + c_device[gid + 4 * n_cells] = + P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz); } __syncthreads(); } - -__global__ void Noh_Boundary_kernel(Real * c_device, - int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, - Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) +__global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, + int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, + Real dz, Real xbound, Real ybound, + Real zbound, Real gamma, Real t) { - int id,xid,yid,zid,gid; + int id, xid, yid, zid, gid; Real x_pos, y_pos, z_pos, r; Real vx, vy, vz, d_0, P_0; d_0 = 1.0; - P_0 = 1.0e-6; + P_0 = 1.0e-6; // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; @@ -378,38 +393,43 @@ __global__ void Noh_Boundary_kernel(Real * c_device, // ksize = nz; // not true i,j,k but relative i,j,k in the GPU grid - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - xid += nx-n_ghost; // +x boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + xid += nx - n_ghost; // +x boundary + gid = xid + yid * nx + zid * nx * ny; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid >= nx - n_ghost && xid < nx && yid < ny && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); // for 3D calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + else + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) + vz = -z_pos / r; + else + vz = 0; // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + if (nz > 1) + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + else + c_device[gid] = d_0 * (1.0 + t / r); + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; } // +y boundary next @@ -418,40 +438,45 @@ __global__ void Noh_Boundary_kernel(Real * c_device, // ksize = nz; // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - yid += ny-n_ghost; // +y boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) { + yid += ny - n_ghost; // +y boundary + gid = xid + yid * nx + zid * nx * ny; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid < nx && yid >= ny - n_ghost && yid < ny && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + else + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) + vz = -z_pos / r; + else + vz = 0; // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } - __syncthreads(); + if (nz > 1) + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + else + c_device[gid] = d_0 * (1.0 + t / r); + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; + } + __syncthreads(); // +z boundary last (only if 3D) if (nz == 1) return; @@ -461,45 +486,50 @@ __global__ void Noh_Boundary_kernel(Real * c_device, // ksize = n_ghost; // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - zid += nz-n_ghost; // +z boundary - gid = xid + yid*nx + zid*nx*ny; + zid += nz - n_ghost; // +z boundary + gid = xid + yid * nx + zid * nx * ny; - if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) { - - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid < nx && yid < ny && zid >= nz - n_ghost && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); + if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + else + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) + vz = -z_pos / r; + else + vz = 0; // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } + if (nz > 1) + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + else + c_device[gid] = d_0 * (1.0 + t / r); + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; + } } - -void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t) +void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, + int n_ghost, int x_off, int y_off, int z_off, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real gamma, Real t) { // determine the size of the grid to launch // need at least as many threads as the largest boundary face @@ -509,24 +539,20 @@ void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, in jsize = ny; ksize = nz; - dim3 dim1dGrid((isize*jsize*ksize + TPB-1) / TPB, 1, 1); + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, - c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, dz, - xbound, ybound, zbound, gamma, t); - + hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, + c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, + z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); } - - - -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t) +void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, + int n_ghost, int x_off, int y_off, int z_off, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real gamma, Real t) { - // determine the size of the grid to launch // need at least as many threads as the largest boundary face // current implementation assumes the test is run on a cube... @@ -535,12 +561,11 @@ void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int jsize = ny; ksize = nz; - dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device, - nx,ny,nz,n_cells,n_ghost, - x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t); - + hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, + nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, + dz, xbound, ybound, zbound, gamma, t); } \ No newline at end of file diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index 2c2b20753..8418264bd 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -1,25 +1,31 @@ #ifdef CUDA -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" -//void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void PackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize); +// void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int +// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); +void PackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, + int n_cells, int idxoffset, int isize, int jsize, int ksize); -void UnpackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize); -//void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); +void UnpackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, + int n_cells, int idxoffset, int isize, int jsize, + int ksize); +// void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int +// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void SetGhostCells(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir); +void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, + int n_cells, int n_ghost, int flags[], int isize, int jsize, + int ksize, int imin, int jmin, int kmin, int dir); -void Wind_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t); +void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, + int n_ghost, int x_off, int y_off, int z_off, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real gamma, Real t); -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t); +void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, + int n_ghost, int x_off, int y_off, int z_off, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real gamma, Real t); #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 8025f3744..7ab8be7e0 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -1,46 +1,46 @@ /*! \file grid3D.cpp * \brief Definitions of the Grid3D class */ -#include #include +#include #include #ifdef HDF5 -#include + #include #endif #include "../global/global.h" #include "../grid/grid3D.h" -#include "../grid/grid_enum.h" // provides grid_enum -#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU +#include "../grid/grid_enum.h" // provides grid_enum +#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU #include "../integrators/VL_1D_cuda.h" #include "../integrators/VL_2D_cuda.h" #include "../integrators/VL_3D_cuda.h" -#include "../io/io.h" -#include "../utils/error_handling.h" -#include "../utils/ran.h" #include "../integrators/simple_1D_cuda.h" #include "../integrators/simple_2D_cuda.h" #include "../integrators/simple_3D_cuda.h" +#include "../io/io.h" +#include "../utils/error_handling.h" +#include "../utils/ran.h" #ifdef MPI_CHOLLA -#include -#ifdef HDF5 -#include -#endif -#include "../mpi/mpi_routines.h" + #include + #ifdef HDF5 + #include + #endif + #include "../mpi/mpi_routines.h" #endif #include #ifdef CLOUDY_COOL -#include "../cooling/load_cloudy_texture.h" // provides Load_Cuda_Textures and Free_Cuda_Textures + #include "../cooling/load_cloudy_texture.h" // provides Load_Cuda_Textures and Free_Cuda_Textures #endif #ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" + #include "../utils/parallel_omp.h" #endif #ifdef COOLING_GPU -#include "../cooling/cooling_cuda.h" // provides Cooling_Update + #include "../cooling/cooling_cuda.h" // provides Cooling_Update #endif #ifdef DUST -#include "../dust/dust_cuda.h" // provides Dust_Update + #include "../dust/dust_cuda.h" // provides Dust_Update #endif /*! \fn Grid3D(void) @@ -50,75 +50,76 @@ Grid3D::Grid3D(void) // set initialization flag to 0 flag_init = 0; - // set number of ghost cells - #ifdef PCM +// set number of ghost cells +#ifdef PCM H.n_ghost = 2; - #endif //PCM - #ifdef PLMP +#endif // PCM +#ifdef PLMP H.n_ghost = 3; - #endif //PLMP - #ifdef PLMC +#endif // PLMP +#ifdef PLMC H.n_ghost = 3; - #endif //PLMC - #ifdef PPMP +#endif // PLMC +#ifdef PPMP H.n_ghost = 4; - #endif //PPMP - #ifdef PPMC - H.n_ghost=4; - #endif //PPMC +#endif // PPMP +#ifdef PPMC + H.n_ghost = 4; +#endif // PPMC - #ifdef GRAVITY +#ifdef GRAVITY H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL; - #endif - - #ifdef MHD - // Set the number of ghost cells high enough for MHD - if (H.n_ghost < 3) - { - chprintf("Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting to 3.\n", H.n_ghost); - H.n_ghost = 3; - } - #endif //MHD +#endif +#ifdef MHD + // Set the number of ghost cells high enough for MHD + if (H.n_ghost < 3) { + chprintf( + "Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting " + "to 3.\n", + H.n_ghost); + H.n_ghost = 3; + } +#endif // MHD } -/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) - * \brief Get the cell-centered position based on cell index */ -void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *z_pos) +/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real + * *zpos) \brief Get the cell-centered position based on cell index */ +void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, + Real *z_pos) { +#ifndef MPI_CHOLLA -#ifndef MPI_CHOLLA - - *x_pos = H.xbound + H.dx*(i-H.n_ghost) + 0.5*H.dx; - *y_pos = H.ybound + H.dy*(j-H.n_ghost) + 0.5*H.dy; - *z_pos = H.zbound + H.dz*(k-H.n_ghost) + 0.5*H.dz; + *x_pos = H.xbound + H.dx * (i - H.n_ghost) + 0.5 * H.dx; + *y_pos = H.ybound + H.dy * (j - H.n_ghost) + 0.5 * H.dy; + *z_pos = H.zbound + H.dz * (k - H.n_ghost) + 0.5 * H.dz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* position relative to local xyz bounds */ - /* This approach was replaced because it is less consistent for multiple cores. - Since distributive property does not perfectly hold for floating point operations + /* This approach was replaced because it is less consistent for multiple + cores. Since distributive property does not perfectly hold for floating point + operations > Global_bound + global_i * dx is more consistent than - >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + local_i*dx. + >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + + local_i*dx. *x_pos = H.xblocal + H.dx*(i-H.n_ghost) + 0.5*H.dx; *y_pos = H.yblocal + H.dy*(j-H.n_ghost) + 0.5*H.dy; *z_pos = H.zblocal + H.dz*(k-H.n_ghost) + 0.5*H.dz; */ - *x_pos = H.xbound + (nx_local_start+i-H.n_ghost)*H.dx + 0.5*H.dx; - *y_pos = H.ybound + (ny_local_start+j-H.n_ghost)*H.dy + 0.5*H.dy; - *z_pos = H.zbound + (nz_local_start+k-H.n_ghost)*H.dz + 0.5*H.dz; - -#endif /*MPI_CHOLLA*/ + *x_pos = H.xbound + (nx_local_start + i - H.n_ghost) * H.dx + 0.5 * H.dx; + *y_pos = H.ybound + (ny_local_start + j - H.n_ghost) * H.dy + 0.5 * H.dy; + *z_pos = H.zbound + (nz_local_start + k - H.n_ghost) * H.dz + 0.5 * H.dz; +#endif /*MPI_CHOLLA*/ } - /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ void Grid3D::Initialize(struct parameters *P) @@ -126,20 +127,21 @@ void Grid3D::Initialize(struct parameters *P) // number of fields to track (default 5 is # of conserved variables) H.n_fields = 5; - // if including passive scalars increase the number of fields - #ifdef SCALAR +// if including passive scalars increase the number of fields +#ifdef SCALAR H.n_fields += NSCALARS; - #endif +#endif - // if including magnetic fields increase the number of fields - #ifdef MHD +// if including magnetic fields increase the number of fields +#ifdef MHD H.n_fields += 3; - #endif //MHD +#endif // MHD - // if using dual energy formalism must track internal energy - always the last field! - #ifdef DE +// if using dual energy formalism must track internal energy - always the last +// field! +#ifdef DE H.n_fields++; - #endif +#endif int nx_in = P->nx; int ny_in = P->ny; @@ -148,26 +150,30 @@ void Grid3D::Initialize(struct parameters *P) // Set the CFL coefficient (a global variable) C_cfl = 0.3; - #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number - #endif // AVERAGE_SLOW_CELLS +#ifdef AVERAGE_SLOW_CELLS + H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number +#endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA // set grid dimensions - H.nx = nx_in+2*H.n_ghost; + H.nx = nx_in + 2 * H.n_ghost; H.nx_real = nx_in; - if (ny_in == 1) H.ny = 1; - else H.ny = ny_in+2*H.n_ghost; + if (ny_in == 1) + H.ny = 1; + else + H.ny = ny_in + 2 * H.n_ghost; H.ny_real = ny_in; - if (nz_in == 1) H.nz = 1; - else H.nz = nz_in+2*H.n_ghost; + if (nz_in == 1) + H.nz = 1; + else + H.nz = nz_in + 2 * H.n_ghost; H.nz_real = nz_in; // set total number of cells H.n_cells = H.nx * H.ny * H.nz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* perform domain decomposition * and set grid dimensions @@ -177,20 +183,16 @@ void Grid3D::Initialize(struct parameters *P) #endif /*MPI_CHOLLA*/ // failsafe - if(H.n_cells<=0) - { + if (H.n_cells <= 0) { chprintf("Error initializing grid: H.n_cells = %d\n", H.n_cells); chexit(-1); } // check for initialization - if(flag_init) - { + if (flag_init) { chprintf("Already initialized. Please reset.\n"); return; - } - else - { + } else { // mark that we are initializing flag_init = 1; } @@ -204,404 +206,400 @@ void Grid3D::Initialize(struct parameters *P) // and initialize the timestep H.dt = 0.0; - // Set Transfer flag to false, only set to true before Conserved boundaries are transferred + // Set Transfer flag to false, only set to true before Conserved boundaries + // are transferred H.TRANSFER_HYDRO_BOUNDARIES = false; // Set output to true when data has to be written to file; H.Output_Now = false; - // allocate memory AllocateMemory(); - #ifdef ROTATED_PROJECTION - //x-dir pixels in projection + // x-dir pixels in projection R.nx = P->nxr; - //z-dir pixels in projection + // z-dir pixels in projection R.nz = P->nzr; - //minimum x location to project + // minimum x location to project R.nx_min = 0; - //minimum z location to project + // minimum z location to project R.nz_min = 0; - //maximum x location to project + // maximum x location to project R.nx_max = R.nx; - //maximum z location to project + // maximum z location to project R.nz_max = R.nz; - //rotation angle about z direction - R.delta = M_PI*(P->delta/180.); //convert to radians - //rotation angle about x direction - R.theta = M_PI*(P->theta/180.); //convert to radians - //rotation angle about y direction - R.phi = M_PI*(P->phi/180.); //convert to radians - //x-dir physical size of projection + // rotation angle about z direction + R.delta = M_PI * (P->delta / 180.); // convert to radians + // rotation angle about x direction + R.theta = M_PI * (P->theta / 180.); // convert to radians + // rotation angle about y direction + R.phi = M_PI * (P->phi / 180.); // convert to radians + // x-dir physical size of projection R.Lx = P->Lx; - //z-dir physical size of projection + // z-dir physical size of projection R.Lz = P->Lz; - //initialize a counter for rotated outputs + // initialize a counter for rotated outputs R.i_delta = 0; - //number of rotated outputs in a complete revolution + // number of rotated outputs in a complete revolution R.n_delta = P->n_delta; - //rate of rotation between outputs, for an actual simulation + // rate of rotation between outputs, for an actual simulation R.ddelta_dt = P->ddelta_dt; - //are we not rotating about z(0)? - //are we outputting multiple rotations(1)? or rotating during a simulation(2)? + // are we not rotating about z(0)? + // are we outputting multiple rotations(1)? or rotating during a + // simulation(2)? R.flag_delta = P->flag_delta; #endif /*ROTATED_PROJECTION*/ - // Values for lower limit for density and temperature - #ifdef DENSITY_FLOOR +// Values for lower limit for density and temperature +#ifdef DENSITY_FLOOR H.density_floor = DENS_FLOOR; - #else - H.density_floor = 0.0; - #endif +#else + H.density_floor = 0.0; +#endif - #ifdef TEMPERATURE_FLOOR +#ifdef TEMPERATURE_FLOOR H.temperature_floor = TEMP_FLOOR; - #else +#else H.temperature_floor = 0.0; - #endif +#endif - #ifdef COSMOLOGY - if ( P->scale_outputs_file[0] == '\0' ) H.OUTPUT_SCALE_FACOR = false; - else H.OUTPUT_SCALE_FACOR = true; - #endif +#ifdef COSMOLOGY + if (P->scale_outputs_file[0] == '\0') + H.OUTPUT_SCALE_FACOR = false; + else + H.OUTPUT_SCALE_FACOR = true; +#endif H.Output_Initial = true; - - } - /*! \fn void AllocateMemory(void) * \brief Allocate memory for the arrays. */ void Grid3D::AllocateMemory(void) { // allocate memory for the conserved variable arrays // allocate all the memory to density, to insure contiguous memory - CudaSafeCall( cudaHostAlloc((void**)&C.host, H.n_fields*H.n_cells*sizeof(Real), cudaHostAllocDefault) ); + CudaSafeCall(cudaHostAlloc((void **)&C.host, + H.n_fields * H.n_cells * sizeof(Real), + cudaHostAllocDefault)); // point conserved variables to the appropriate locations - C.density = C.host; + C.density = C.host; C.momentum_x = &(C.host[H.n_cells]); - C.momentum_y = &(C.host[2*H.n_cells]); - C.momentum_z = &(C.host[3*H.n_cells]); - C.Energy = &(C.host[4*H.n_cells]); - #ifdef SCALAR - C.scalar = &(C.host[H.n_cells*grid_enum::scalar]); + C.momentum_y = &(C.host[2 * H.n_cells]); + C.momentum_z = &(C.host[3 * H.n_cells]); + C.Energy = &(C.host[4 * H.n_cells]); +#ifdef SCALAR + C.scalar = &(C.host[H.n_cells * grid_enum::scalar]); #ifdef BASIC_SCALAR - C.basic_scalar = &(C.host[H.n_cells*grid_enum::basic_scalar]); + C.basic_scalar = &(C.host[H.n_cells * grid_enum::basic_scalar]); #endif - #endif //SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD C.magnetic_x = &(C.host[(grid_enum::magnetic_x)*H.n_cells]); C.magnetic_y = &(C.host[(grid_enum::magnetic_y)*H.n_cells]); C.magnetic_z = &(C.host[(grid_enum::magnetic_z)*H.n_cells]); - #endif //MHD - #ifdef DE - C.GasEnergy = &(C.host[(H.n_fields-1)*H.n_cells]); - #endif //DE +#endif // MHD +#ifdef DE + C.GasEnergy = &(C.host[(H.n_fields - 1) * H.n_cells]); +#endif // DE // allocate memory for the conserved variable arrays on the device - CudaSafeCall( cudaMalloc((void**)&C.device, H.n_fields*H.n_cells*sizeof(Real)) ); - cuda_utilities::initGpuMemory(C.device, H.n_fields*H.n_cells*sizeof(Real)); + CudaSafeCall( + cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); + cuda_utilities::initGpuMemory(C.device, + H.n_fields * H.n_cells * sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); - C.d_momentum_y = &(C.device[2*H.n_cells]); - C.d_momentum_z = &(C.device[3*H.n_cells]); - C.d_Energy = &(C.device[4*H.n_cells]); - #ifdef SCALAR - C.d_scalar = &(C.device[H.n_cells*grid_enum::scalar]); + C.d_momentum_y = &(C.device[2 * H.n_cells]); + C.d_momentum_z = &(C.device[3 * H.n_cells]); + C.d_Energy = &(C.device[4 * H.n_cells]); +#ifdef SCALAR + C.d_scalar = &(C.device[H.n_cells * grid_enum::scalar]); #ifdef BASIC_SCALAR - C.d_basic_scalar = &(C.device[H.n_cells*grid_enum::basic_scalar]); - #endif - #endif // SCALAR - #ifdef MHD - C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); - C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]); - C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]); - #endif //MHD - #ifdef DE - C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); - #endif // DE - - #if defined( GRAVITY ) - CudaSafeCall( cudaHostAlloc(&C.Grav_potential, H.n_cells*sizeof(Real), cudaHostAllocDefault) ); - CudaSafeCall( cudaMalloc((void**)&C.d_Grav_potential, H.n_cells*sizeof(Real)) ); - #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.d_basic_scalar = &(C.device[H.n_cells * grid_enum::basic_scalar]); #endif +#endif // SCALAR +#ifdef MHD + C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); + C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]); + C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]); +#endif // MHD +#ifdef DE + C.d_GasEnergy = &(C.device[(H.n_fields - 1) * H.n_cells]); +#endif // DE + +#if defined(GRAVITY) + CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), + cudaHostAllocDefault)); + CudaSafeCall( + cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); +#else + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; +#endif - - #ifdef CHEMISTRY_GPU - C.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; - C.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; - C.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; - C.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; - C.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; - C.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; - #endif +#ifdef CHEMISTRY_GPU + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + C.e_density = &C.host[H.n_cells * grid_enum::e_density]; +#endif // initialize host array - for (int i=0; i 1 && H.ny == 1 && H.nz == 1) //1D + if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D { - #ifdef CUDA - #ifdef VL - VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); - #endif //SIMPLE - #endif //CUDA - } - else if (H.nx > 1 && H.ny > 1 && H.nz == 1) //2D - { - #ifdef CUDA - #ifdef VL - VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); - #endif //SIMPLE - #endif //CUDA - } - else if (H.nx > 1 && H.ny > 1 && H.nz > 1) //3D +#ifdef CUDA + #ifdef VL + VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, + H.n_fields); + #endif // VL + #ifdef SIMPLE + Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, + H.dt, H.n_fields); + #endif // SIMPLE +#endif // CUDA + } else if (H.nx > 1 && H.ny > 1 && H.nz == 1) // 2D { - #ifdef CUDA - #ifdef VL - VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); - #endif//SIMPLE - #endif - } - else +#ifdef CUDA + #ifdef VL + VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, + H.dy, H.xbound, H.ybound, H.dt, H.n_fields); + #endif // VL + #ifdef SIMPLE + Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, + H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); + #endif // SIMPLE +#endif // CUDA + } else if (H.nx > 1 && H.ny > 1 && H.nz > 1) // 3D { - chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); +#ifdef CUDA + #ifdef VL + VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, + y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, H.dt, H.n_fields, density_floor, + U_floor, C.Grav_potential); + #endif // VL + #ifdef SIMPLE + Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, + x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, + H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, + density_floor, U_floor, C.Grav_potential); + #endif // SIMPLE +#endif + } else { + chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", + H.nx, H.ny, H.nz); chexit(-1); } - - #ifdef CUDA +#ifdef CUDA #ifdef COOLING_GPU // ==Apply Cooling from cooling/cooling_cuda.h== Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); - #endif //COOLING_GPU + #endif // COOLING_GPU #ifdef DUST // ==Apply dust from dust/dust_cuda.h== Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); - #endif // DUST + #endif // DUST // Update the H and He ionization fractions and apply cooling and photoheating #ifdef CHEMISTRY_GPU Update_Chemistry(); - #ifdef CPU_TIME - Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step ); - #endif + #ifdef CPU_TIME + Timer.Chemistry.RecordTime(Chem.H.runtime_chemistry_step); + #endif #endif #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell + // Set the min_delta_t for averaging a slow cell Real max_dti_slow; max_dti_slow = 1 / H.min_dt_slow; - Average_Slow_Cells( C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow ); - #endif //AVERAGE_SLOW_CELLS + Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, + H.dy, H.dz, gama, max_dti_slow); + #endif // AVERAGE_SLOW_CELLS // ==Calculate the next time step with Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); - #endif // CUDA - - #ifdef COOLING_GRACKLE - Cool.fields.density = C.density; - Cool.fields.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; - Cool.fields.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; - Cool.fields.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; - Cool.fields.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; - Cool.fields.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; - Cool.fields.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; + max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, + H.dy, H.dz, gama); +#endif // CUDA + +#ifdef COOLING_GRACKLE + Cool.fields.density = C.density; + Cool.fields.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + Cool.fields.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + Cool.fields.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + Cool.fields.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + Cool.fields.e_density = &C.host[H.n_cells * grid_enum::e_density]; #ifdef GRACKLE_METALS - Cool.fields.metal_density = &C.host[ H.n_cells*grid_enum::metal_density ]; - #endif - #endif - - #ifdef CHEMISTRY_GPU - C.HI_density = &C.host[ H.n_cells*grid_enum::HI_density ]; - C.HII_density = &C.host[ H.n_cells*grid_enum::HII_density ]; - C.HeI_density = &C.host[ H.n_cells*grid_enum::HeI_density ]; - C.HeII_density = &C.host[ H.n_cells*grid_enum::HeII_density ]; - C.HeIII_density = &C.host[ H.n_cells*grid_enum::HeIII_density ]; - C.e_density = &C.host[ H.n_cells*grid_enum::e_density ]; + Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density]; #endif +#endif +#ifdef CHEMISTRY_GPU + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + C.e_density = &C.host[H.n_cells * grid_enum::e_density]; +#endif return max_dti; - } /*! \fn void Update_Hydro_Grid(void) * \brief Do all steps to update the hydro. */ -Real Grid3D::Update_Hydro_Grid( ){ - - #ifdef ONLY_PARTICLES +Real Grid3D::Update_Hydro_Grid() +{ +#ifdef ONLY_PARTICLES // Don't integrate the Hydro when only solving for particles return 1e-10; - #endif //ONLY_PARTICLES +#endif // ONLY_PARTICLES Real dti; - #ifdef CPU_TIME +#ifdef CPU_TIME Timer.Hydro.Start(); - #endif //CPU_TIME +#endif // CPU_TIME - #ifdef GRAVITY +#ifdef GRAVITY // Extrapolate gravitational potential for hydro step Extrapolate_Grav_Potential(); - #endif //GRAVITY +#endif // GRAVITY dti = Update_Grid(); - #ifdef CPU_TIME +#ifdef CPU_TIME #ifdef CHEMISTRY_GPU Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); - //Subtract the time spent on the Chemical Update - #endif //CHEMISTRY_GPU + // Subtract the time spent on the Chemical Update + #endif // CHEMISTRY_GPU Timer.Hydro.End(); - #endif //CPU_TIME +#endif // CPU_TIME - #ifdef COOLING_GRACKLE +#ifdef COOLING_GRACKLE #ifdef CPU_TIME Timer.Cooling.Start(); - #endif //CPU_TIME - Do_Cooling_Step_Grackle( ); + #endif // CPU_TIME + Do_Cooling_Step_Grackle(); #ifdef CPU_TIME Timer.Cooling.End(); - #endif //CPU_TIME - #endif//COOLING_GRACKLE - + #endif // CPU_TIME +#endif // COOLING_GRACKLE return dti; } -void Grid3D::Update_Time(){ - +void Grid3D::Update_Time() +{ // update the time H.t += H.dt; - #ifdef PARTICLES +#ifdef PARTICLES Particles.t = H.t; #ifdef COSMOLOGY Cosmo.current_a += Cosmo.delta_a; - Cosmo.current_z = 1./Cosmo.current_a - 1; + Cosmo.current_z = 1. / Cosmo.current_a - 1; Particles.current_a = Cosmo.current_a; Particles.current_z = Cosmo.current_z; - Grav.current_a = Cosmo.current_a; - #endif //COSMOLOGY - #endif //PARTICLES + Grav.current_a = Cosmo.current_a; + #endif // COSMOLOGY +#endif // PARTICLES - #if defined(ANALYSIS) && defined(COSMOLOGY) +#if defined(ANALYSIS) && defined(COSMOLOGY) Analysis.current_z = Cosmo.current_z; - #endif - - - - +#endif } /*! \fn void Reset(void) @@ -613,60 +611,58 @@ void Grid3D::Reset(void) // reset the initialization flag flag_init = 0; - } - /*! \fn void FreeMemory(void) * \brief Free the memory allocated by the Grid3D class. */ void Grid3D::FreeMemory(void) { // free the conserved variable arrays - CudaSafeCall( cudaFreeHost(C.host) ); + CudaSafeCall(cudaFreeHost(C.host)); - #ifdef GRAVITY - CudaSafeCall( cudaFreeHost(C.Grav_potential) ); - CudaSafeCall( cudaFree(C.d_Grav_potential) ); - #endif +#ifdef GRAVITY + CudaSafeCall(cudaFreeHost(C.Grav_potential)); + CudaSafeCall(cudaFree(C.d_Grav_potential)); +#endif - // If memory is single allocated, free the memory at the end of the simulation. - #ifdef VL +// If memory is single allocated, free the memory at the end of the simulation. +#ifdef VL if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_VL_1D(); if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_VL_2D(); if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_VL_3D(); - #endif // VL - #ifdef SIMPLE +#endif // VL +#ifdef SIMPLE if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_Simple_1D(); if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_Simple_2D(); if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_Simple_3D(); - #endif // SIMPLE +#endif // SIMPLE - #ifdef GRAVITY +#ifdef GRAVITY Grav.FreeMemory_CPU(); #ifdef GRAVITY_GPU Grav.FreeMemory_GPU(); #endif - #endif +#endif - #ifdef PARTICLES +#ifdef PARTICLES Particles.Reset(); - #endif +#endif - #ifdef COOLING_GRACKLE +#ifdef COOLING_GRACKLE Cool.Free_Memory(); - #endif +#endif - #ifdef COOLING_GPU +#ifdef COOLING_GPU #ifdef CLOUDY_COOL Free_Cuda_Textures(); #endif - #endif +#endif - #ifdef CHEMISTRY_GPU +#ifdef CHEMISTRY_GPU Chem.Reset(); - #endif +#endif - #ifdef ANALYSIS +#ifdef ANALYSIS Analysis.Reset(); - #endif +#endif } diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index d05b90214..7e207b311 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -4,105 +4,104 @@ #ifndef GRID3D_H #define GRID3D_H -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" +#ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" #endif /*MPI_CHOLLA*/ #include + #include "../global/global.h" #include "../global/global_cuda.h" #ifdef HDF5 -#include + #include #endif #ifdef GRAVITY -#include "../gravity/grav3D.h" + #include "../gravity/grav3D.h" #endif #ifdef PARTICLES -#include "../particles/particles_3D.h" + #include "../particles/particles_3D.h" #endif #include "../model/disk_galaxy.h" #ifdef COSMOLOGY -#include "../cosmology/cosmology.h" + #include "../cosmology/cosmology.h" #endif #ifdef COOLING_GRACKLE -#include "../cooling_grackle/cool_grackle.h" + #include "../cooling_grackle/cool_grackle.h" #endif #ifdef CPU_TIME -#include "../utils/timing_functions.h" + #include "../utils/timing_functions.h" #endif #ifdef CHEMISTRY_GPU -#include "chemistry_gpu/chemistry_gpu.h" + #include "chemistry_gpu/chemistry_gpu.h" #endif #ifdef ANALYSIS -#include "../analysis/analysis.h" + #include "../analysis/analysis.h" #endif - -struct Rotation -{ +struct Rotation { /*! \var nx - * \brief Number of pixels in x-dir of rotated, projected image*/ + * \brief Number of pixels in x-dir of rotated, projected image*/ int nx; /*! \var nz - * \brief Number of pixels in z-dir of rotated, projected image*/ + * \brief Number of pixels in z-dir of rotated, projected image*/ int nz; /*! \var nx_min - * \brief Left most point in the projected image for this subvolume*/ + * \brief Left most point in the projected image for this subvolume*/ int nx_min; /*! \var nx_max - * \brief Right most point in the projected image for this subvolume*/ + * \brief Right most point in the projected image for this subvolume*/ int nx_max; /*! \var nz_min - * \brief Bottom most point in the projected image for this subvolume*/ + * \brief Bottom most point in the projected image for this subvolume*/ int nz_min; /*! \var nz_max - * \brief Top most point in the projected image for this subvolume*/ + * \brief Top most point in the projected image for this subvolume*/ int nz_max; /*! \var delta - * \brief Rotation angle about z axis in simulation frame*/ + * \brief Rotation angle about z axis in simulation frame*/ Real delta; /*! \var theta - * \brief Rotation angle about x axis in simulation frame*/ + * \brief Rotation angle about x axis in simulation frame*/ Real theta; /*! \var phi - * \brief Rotation angle about y axis in simulation frame*/ + * \brief Rotation angle about y axis in simulation frame*/ Real phi; /*! \var Lx - * \brief Physical x-dir size of projected image*/ + * \brief Physical x-dir size of projected image*/ Real Lx; /*! \var Lz - * \brief Physical z-dir size of projected image*/ + * \brief Physical z-dir size of projected image*/ Real Lz; /*! \var i_delta - * \brief number of output projection for delta rotation*/ + * \brief number of output projection for delta rotation*/ int i_delta; /*! \var n_delta - * \brief total number of output projection for delta rotation*/ + * \brief total number of output projection for delta rotation*/ Real n_delta; /*! \var ddelta_dt - * \brief rate of delta rotation*/ + * \brief rate of delta rotation*/ Real ddelta_dt; /*! \var flag_delta @@ -110,38 +109,37 @@ struct Rotation int flag_delta; }; -struct Header -{ +struct Header { /*! \var n_cells - * \brief Total number of cells in the grid (including ghost cells) */ + * \brief Total number of cells in the grid (including ghost cells) */ int n_cells; /*! \var n_ghost - * \brief Number of ghost cells on each side of the grid */ + * \brief Number of ghost cells on each side of the grid */ int n_ghost; /*! \var nx - * \brief Total number of cells in the x-dimension */ + * \brief Total number of cells in the x-dimension */ int nx; /*! \var ny - * \brief Total number of cells in the y-dimension */ + * \brief Total number of cells in the y-dimension */ int ny; /*! \var nz - * \brief Total number of cells in the z-dimension */ + * \brief Total number of cells in the z-dimension */ int nz; /*! \var nx_real - * \brief Number of real cells in the x-dimension */ + * \brief Number of real cells in the x-dimension */ int nx_real; /*! \var ny - * \brief Number of real cells in the y-dimension */ + * \brief Number of real cells in the y-dimension */ int ny_real; /*! \var nz - * \brief Number of real cells in the z-dimension */ + * \brief Number of real cells in the z-dimension */ int nz_real; /*! \var xbound */ @@ -156,7 +154,7 @@ struct Header /* \brief Global domain z-direction minimum */ Real zbound; - /*! \var xblocal */ + /*! \var xblocal */ /* \brief Local domain x-direction minimum */ Real xblocal; @@ -193,39 +191,39 @@ struct Header Real zdglobal; /*! \var dx - * \brief x-width of cells */ + * \brief x-width of cells */ Real dx; /*! \var dy - * \brief y-width of cells */ + * \brief y-width of cells */ Real dy; /*! \var dz - * \brief z-width of cells */ + * \brief z-width of cells */ Real dz; /*! \var t - * \brief Simulation time */ + * \brief Simulation time */ Real t; /*! \var dt - * \brief Length of the current timestep */ + * \brief Length of the current timestep */ Real dt; - #ifdef AVERAGE_SLOW_CELLS +#ifdef AVERAGE_SLOW_CELLS Real min_dt_slow; - #endif +#endif /*! \var t_wall - * \brief Wall time */ + * \brief Wall time */ Real t_wall; /*! \var n_step - * \brief Number of timesteps taken */ + * \brief Number of timesteps taken */ int n_step; /*! \var n_fields - * \brief Number of fields (conserved variables, scalars, etc.) */ + * \brief Number of fields (conserved variables, scalars, etc.) */ int n_fields; // Values for lower limit for density and temperature @@ -234,10 +232,10 @@ struct Header Real Ekin_avrg; - //Flag to indicate when to transfer the Conserved boundaries + // Flag to indicate when to transfer the Conserved boundaries bool TRANSFER_HYDRO_BOUNDARIES; - //Parameters For Spherical Colapse Problem + // Parameters For Spherical Colapse Problem Real sphere_density; Real sphere_radius; Real sphere_background_density; @@ -245,636 +243,674 @@ struct Header Real sphere_center_y; Real sphere_center_z; - #ifdef GRAVITY +#ifdef GRAVITY /*! \var n_ghost_potential_offset - * \brief Number of offset betewen hydro_ghost_cells and potential_ghost_cells */ + * \brief Number of offset betewen hydro_ghost_cells and + * potential_ghost_cells */ int n_ghost_potential_offset; - #endif +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY bool OUTPUT_SCALE_FACOR; - #endif +#endif /*! \var Output_Now - * \brief Flag set to true when data has to be written to file */ + * \brief Flag set to true when data has to be written to file */ bool Output_Now; bool Output_Initial; /*! \var Output_Complete_Data - * \brief Flag set to true when all the data will be written to file (Restart File ) */ + * \brief Flag set to true when all the data will be written to file + * (Restart File ) */ bool Output_Complete_Data; - - }; /*! \class Grid3D * \brief Class to create a 3D grid of cells. */ class Grid3D { - public: + public: + /*! \var flag_init + * \brief Initialization flag */ + int flag_init; - /*! \var flag_init - * \brief Initialization flag */ - int flag_init; + /*! \var struct Header H + * \brief Header for the grid */ + struct Header H; - /*! \var struct Header H - * \brief Header for the grid */ - struct Header H; + /*! \var struct Rotation R + * \brief Rotation struct for data projections */ + struct Rotation R; - /*! \var struct Rotation R - * \brief Rotation struct for data projections */ - struct Rotation R; +#ifdef GRAVITY + // Object that contains data for gravity + Grav3D Grav; +#endif - #ifdef GRAVITY - // Object that contains data for gravity - Grav3D Grav; - #endif +#ifdef PARTICLES + // Object that contains data for particles + Particles_3D Particles; +#endif - #ifdef PARTICLES - // Object that contains data for particles - Particles_3D Particles; - #endif +#ifdef COSMOLOGY + // Object that contains data for cosmology + Cosmology Cosmo; +#endif - #ifdef COSMOLOGY - // Object that contains data for cosmology - Cosmology Cosmo; - #endif +#ifdef COOLING_GRACKLE + // Object that contains data for Grackle cooling + Cool_GK Cool; +#endif - #ifdef COOLING_GRACKLE - // Object that contains data for Grackle cooling - Cool_GK Cool; - #endif +#ifdef CPU_TIME + Time Timer; +#endif - #ifdef CPU_TIME - Time Timer; - #endif +#ifdef CHEMISTRY_GPU + // Object that contains data for the GPU chemistry solver + Chem_GPU Chem; +#endif - #ifdef CHEMISTRY_GPU - // Object that contains data for the GPU chemistry solver - Chem_GPU Chem; - #endif +#ifdef ANALYSIS + Analysis_Module Analysis; +#endif - #ifdef ANALYSIS - Analysis_Module Analysis; - #endif +#ifdef SUPERNOVA // TODO refactor this into Analysis module + Real countSN; + Real countResolved; + Real countUnresolved; + Real totalEnergy; + Real totalMomentum; + Real totalUnresEnergy; +#endif + struct Conserved { + /*! pointer to conserved variable array on the host */ + Real *host; + + /*! \var density + * \brief Array containing the density of each cell in the grid */ + Real *density; + + /*! \var momentum_x + * \brief Array containing the momentum in the x direction of each cell in + * the grid */ + Real *momentum_x; + + /*! \var momentum_y + * \brief Array containing the momentum in the y direction of each cell in + * the grid */ + Real *momentum_y; + + /*! \var momentum_z + * \brief Array containing the momentum in the z direction of each cell in + * the grid */ + Real *momentum_z; + + /*! \var Energy + * \brief Array containing the total Energy of each cell in the grid */ + Real *Energy; + +#ifdef SCALAR + /*! \var scalar + * \brief Array containing the values of passive scalar variable(s). */ + Real *scalar; + #ifdef BASIC_SCALAR + /*! \var basic_scalar + * \brief Array containing the values of a basic passive scalar variable. + */ + Real *basic_scalar; + #endif +#endif // SCALAR + +#ifdef MHD + /*! \var magnetic_x \brief Array containing the magnetic field in the x + * direction of each cell in the grid. Note that this is the magnetic + * field at the x+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_x; + + /*! \var magnetic_y \brief Array containing the magnetic field in the y + * direction of each cell in the grid. Note that this is the magnetic + * field at the y+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_y; + + /*! \var magnetic_z \brief Array containing the magnetic field in the z + * direction of each cell in the grid. Note that this is the magnetic + * field at the z+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_z; +#endif // MHD + +#ifdef DE + /*! \var GasEnergy + * \brief Array containing the internal energy of each cell, only tracked + separately when using the dual-energy formalism. */ + Real *GasEnergy; +#endif // DE + + /*! \var grav_potential + * \brief Array containing the gravitational potential of each cell, only + * tracked separately when using GRAVITY. */ + Real *Grav_potential; - #ifdef SUPERNOVA //TODO refactor this into Analysis module - Real countSN; - Real countResolved; - Real countUnresolved; - Real totalEnergy; - Real totalMomentum; - Real totalUnresEnergy; - #endif - struct Conserved - { - /*! pointer to conserved variable array on the host */ - Real *host; - - /*! \var density - * \brief Array containing the density of each cell in the grid */ - Real *density; - - /*! \var momentum_x - * \brief Array containing the momentum in the x direction of each cell in the grid */ - Real *momentum_x; - - /*! \var momentum_y - * \brief Array containing the momentum in the y direction of each cell in the grid */ - Real *momentum_y; - - /*! \var momentum_z - * \brief Array containing the momentum in the z direction of each cell in the grid */ - Real *momentum_z; - - /*! \var Energy - * \brief Array containing the total Energy of each cell in the grid */ - Real *Energy; - - #ifdef SCALAR - /*! \var scalar - * \brief Array containing the values of passive scalar variable(s). */ - Real *scalar; - #ifdef BASIC_SCALAR - /*! \var basic_scalar - * \brief Array containing the values of a basic passive scalar variable. */ - Real *basic_scalar; - #endif - #endif // SCALAR - - #ifdef MHD - /*! \var magnetic_x \brief Array containing the magnetic field in the x - * direction of each cell in the grid. Note that this is the magnetic - * field at the x+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_x; - - /*! \var magnetic_y \brief Array containing the magnetic field in the y - * direction of each cell in the grid. Note that this is the magnetic - * field at the y+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_y; - - /*! \var magnetic_z \brief Array containing the magnetic field in the z - * direction of each cell in the grid. Note that this is the magnetic - * field at the z+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_z; - #endif // MHD - - #ifdef DE - /*! \var GasEnergy - * \brief Array containing the internal energy of each cell, only tracked separately when using - the dual-energy formalism. */ - Real *GasEnergy; - #endif // DE - - /*! \var grav_potential - * \brief Array containing the gravitational potential of each cell, only tracked separately when using GRAVITY. */ - Real *Grav_potential; - - #ifdef CHEMISTRY_GPU - Real *HI_density; - Real *HII_density; - Real *HeI_density; - Real *HeII_density; - Real *HeIII_density; - Real *e_density; - #endif - - - /*! pointer to conserved variable on device */ - Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, - *d_Energy, *d_scalar, *d_basic_scalar, - *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, - *d_GasEnergy; - - /*! pointer to gravitational potential on device */ - Real *d_Grav_potential; - } C; - - - /*! \fn Grid3D(void) - * \brief Constructor for the grid */ - Grid3D(void); - - /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) - * \brief Initialize the grid. */ - void Initialize(struct parameters *P); - - /*! \fn void AllocateMemory(void) - * \brief Allocate memory for the d, m, E arrays. */ - void AllocateMemory(void); - - /*! \fn void Set_Initial_Conditions(parameters P) - * \brief Set the initial conditions based on info in the parameters structure. */ - void Set_Initial_Conditions(parameters P); - - /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) - * \brief Get the cell-centered position based on cell index */ - void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); - - /*! \fn void Set_Domain_Properties(struct parameters P) - * \brief Set local domain properties */ - void Set_Domain_Properties(struct parameters P); - - /*! \fn void set_dt(Real dti) - * \brief Calculate the timestep. */ - void set_dt(Real dti); - - #ifdef GRAVITY - /*! \fn void set_dt(Real dti) - * \brief Calculate the timestep for Gravity. */ - void set_dt_Gravity(); - #endif +#ifdef CHEMISTRY_GPU + Real *HI_density; + Real *HII_density; + Real *HeI_density; + Real *HeII_density; + Real *HeIII_density; + Real *e_density; +#endif - /*! \fn void Update_Grid(void) - * \brief Update the conserved quantities in each cell. */ - Real Update_Grid(void); + /*! pointer to conserved variable on device */ + Real *device; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, + *d_scalar, *d_basic_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, + *d_GasEnergy; - /*! \fn void Update_Hydro_Grid(void) - * \brief Do all steps to update the hydro. */ - Real Update_Hydro_Grid(void); + /*! pointer to gravitational potential on device */ + Real *d_Grav_potential; + } C; - void Update_Time(); - /*! \fn void Write_Header_Text(FILE *fp) - * \brief Write the relevant header info to a text output file. */ - void Write_Header_Text(FILE *fp); + /*! \fn Grid3D(void) + * \brief Constructor for the grid */ + Grid3D(void); - /*! \fn void Write_Grid_Text(FILE *fp) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_Text(FILE *fp); + /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) + * \brief Initialize the grid. */ + void Initialize(struct parameters *P); - /*! \fn void Write_Header_Binary(FILE *fp) - * \brief Write the relevant header info to a binary output file. */ - void Write_Header_Binary(FILE *fp); + /*! \fn void AllocateMemory(void) + * \brief Allocate memory for the d, m, E arrays. */ + void AllocateMemory(void); - /*! \fn void Write_Grid_Binary(FILE *fp) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_Binary(FILE *fp); + /*! \fn void Set_Initial_Conditions(parameters P) + * \brief Set the initial conditions based on info in the parameters + * structure. */ + void Set_Initial_Conditions(parameters P); -#ifdef HDF5 - /*! \fn void Write_Header_HDF5(hid_t file_id) - * \brief Write the relevant header info to the HDF5 file. */ - void Write_Header_HDF5(hid_t file_id); + /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real + * *zpos) \brief Get the cell-centered position based on cell index */ + void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); - /*! \fn void Write_Grid_HDF5(hid_t file_id) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_HDF5(hid_t file_id); + /*! \fn void Set_Domain_Properties(struct parameters P) + * \brief Set local domain properties */ + void Set_Domain_Properties(struct parameters P); - /*! \fn void Write_Projection_HDF5(hid_t file_id) - * \brief Write projected density and temperature data to a file. */ - void Write_Projection_HDF5(hid_t file_id); + /*! \fn void set_dt(Real dti) + * \brief Calculate the timestep. */ + void set_dt(Real dti); - /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id) - * \brief Write the relevant header info to the HDF5 file for rotated projection. */ - void Write_Header_Rotated_HDF5(hid_t file_id); +#ifdef GRAVITY + /*! \fn void set_dt(Real dti) + * \brief Calculate the timestep for Gravity. */ + void set_dt_Gravity(); +#endif - /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id) - * \brief Write rotated projected data to a file, at the current simulation time. */ - void Write_Rotated_Projection_HDF5(hid_t file_id); + /*! \fn void Update_Grid(void) + * \brief Update the conserved quantities in each cell. */ + Real Update_Grid(void); - /*! \fn void Write_Slices_HDF5(hid_t file_id) - * \brief Write xy, xz, and yz slices of all data to a file. */ - void Write_Slices_HDF5(hid_t file_id); + /*! \fn void Update_Hydro_Grid(void) + * \brief Do all steps to update the hydro. */ + Real Update_Hydro_Grid(void); -#endif + void Update_Time(); + /*! \fn void Write_Header_Text(FILE *fp) + * \brief Write the relevant header info to a text output file. */ + void Write_Header_Text(FILE *fp); + + /*! \fn void Write_Grid_Text(FILE *fp) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_Text(FILE *fp); - /*! \fn void Read_Grid(struct parameters P) - * \brief Read in grid data from an output file. */ - void Read_Grid(struct parameters P); + /*! \fn void Write_Header_Binary(FILE *fp) + * \brief Write the relevant header info to a binary output file. */ + void Write_Header_Binary(FILE *fp); - /*! \fn Read_Grid_Binary(FILE *fp) - * \brief Read in grid data from a binary file. */ - void Read_Grid_Binary(FILE *fp); + /*! \fn void Write_Grid_Binary(FILE *fp) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_Binary(FILE *fp); #ifdef HDF5 - /*! \fn void Read_Grid_HDF5(hid_t file_id) - * \brief Read in grid data from an hdf5 file. */ - void Read_Grid_HDF5(hid_t file_id, struct parameters P); -#endif + /*! \fn void Write_Header_HDF5(hid_t file_id) + * \brief Write the relevant header info to the HDF5 file. */ + void Write_Header_HDF5(hid_t file_id); - /*! \fn void Reset(void) - * \brief Reset the Grid3D class. */ - void Reset(void); - - /*! \fn void FreeMemory(void) - * \brief Free the memory for the density array. */ - void FreeMemory(void); - - /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) - * \brief Constant gas properties. */ - void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz); - - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Sine wave perturbation. */ - void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); - - /*! - * \brief Initialize the grid with a simple linear wave. - * - * \param[in] rho The background density - * \param[in] vx The background velocity in the X-direction - * \param[in] vy The background velocity in the Y-direction - * \param[in] vz The background velocity in the Z-direction - * \param[in] P The background pressure - * \param[in] A The amplitude of the wave - * \param[in] Bx The background magnetic field in the X-direction - * \param[in] By The background magnetic field in the Y-direction - * \param[in] Bz The background magnetic field in the Z-direction - * \param[in] rEigenVec_rho The right eigenvector component for the density - * \param[in] rEigenVec_MomentumX The right eigenvector component for the velocity - * in the X-direction - * \param[in] rEigenVec_MomentumY The right eigenvector component for the velocity - * in the Y-direction - * \param[in] rEigenVec_MomentumZ The right eigenvector component for the velocity - * in the Z-direction - * \param[in] rEigenVec_E The right eigenvector component for the energy - * \param[in] rEigenVec_Bx The right eigenvector component for the magnetic - * field in the X-direction - * \param[in] rEigenVec_By The right eigenvector component for the magnetic - * field in the Y-direction - * \param[in] rEigenVec_Bz The right eigenvector component for the magnetic - * field in the Z-direction - * \param[in] pitch The pitch angle of the linear wave - * \param[in] yaw The yaw angle of the linear wave - */ - void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, - Real Bx, Real By, Real Bz, Real rEigenVec_rho, - Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, - Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, - Real rEigenVec_Bz, Real pitch, Real yaw); - - /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ - void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); - - /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) - * \brief Initialize the grid with a Riemann problem. */ - void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph); + /*! \fn void Write_Grid_HDF5(hid_t file_id) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_HDF5(hid_t file_id); - /*! \fn void Shu_Osher() - * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */ - void Shu_Osher(); + /*! \fn void Write_Projection_HDF5(hid_t file_id) + * \brief Write projected density and temperature data to a file. */ + void Write_Projection_HDF5(hid_t file_id); - /*! \fn void Blast_1D() - * \brief Initialize the grid with two interacting blast waves. See Stone 2008, Section 8.1.*/ - void Blast_1D(); + /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id) + * \brief Write the relevant header info to the HDF5 file for rotated + * projection. */ + void Write_Header_Rotated_HDF5(hid_t file_id); - /*! \fn void KH() - * \brief Initialize the grid with a Kelvin-Helmholtz instability with a discontinuous interface. */ - void KH(); + /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id) + * \brief Write rotated projected data to a file, at the current simulation + * time. */ + void Write_Rotated_Projection_HDF5(hid_t file_id); - /*! \fn void KH_res_ind() - * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */ - void KH_res_ind(); - - /*! \fn void Rayleigh_Taylor() - * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ - void Rayleigh_Taylor(); - - /*! \fn void Gresho() - * \brief Initialize the grid with the 2D Gresho problem described in LW03. */ - void Gresho(); - - /*! \fn void Implosion_2D() - * \brief Implosion test described in Liska, 2003. */ - void Implosion_2D(); - - /*! \fn void Explosion_2D() - * \brief Explosion test described in Liska, 2003. */ - void Explosion_2D(); - - /*! \fn void Noh_2D() - * \brief Noh test described in Liska, 2003. */ - void Noh_2D(); - - /*! \fn void Noh_3D() - * \brief Noh test described in Stone, 2008. */ - void Noh_3D(); - - /*! \fn void Disk_2D() - * \brief Initialize the grid with a 2D disk following a Kuzmin profile. */ - void Disk_2D(); - - /*! \fn void Disk_3D(parameters P) - * \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai profile. */ - void Disk_3D(parameters P); - - /*! \fn void Set_Boundary_Conditions(parameters P) - * \brief Set the boundary conditions based on info in the parameters structure. */ - void Set_Boundary_Conditions(parameters P); - - /*! \fn void Set_Boundary_Conditions_Grid(parameters P) - * \brief Set the boundary conditions for all components based on info in the parameters structure. */ - void Set_Boundary_Conditions_Grid( parameters P); - - /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) - * \brief Check for custom boundary conditions */ - int Check_Custom_Boundary(int *flags, struct parameters P); - - /*! \fn void Set_Boundaries(int dir, int flags[]) - * \brief Apply boundary conditions to the grid. */ - void Set_Boundaries(int dir, int flags[]); - - /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) - * \brief Set the extents of the ghost region we are initializing. */ - void Set_Boundary_Extents(int dir, int *imin, int *imax); - - /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) - * \brief Select appropriate custom boundary function. */ - void Custom_Boundary(char bcnd[MAXLEN]); - - /*! \fn void Wind_Boundary() - * \brief Apply a constant wind to the -x boundary. */ - void Wind_Boundary(); - - /*! \fn void Noh_Boundary() - * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, - as per the Noh problem in Liska, 2003, or in Stone, 2008. */ - void Noh_Boundary(); - - /*! \fn void Spherical_Overpressure_3D() - * \brief Initialize the grid with a 3D spherical overdensity and overpressue. */ - void Spherical_Overpressure_3D(); - - /*! \fn void Spherical_Overpressure_3D() - * \brief Initialize the grid with a 3D spherical overdensity for gravitational collapse */ - void Spherical_Overdensity_3D(); - - void Clouds(); - - void Uniform_Grid(); - - void Zeldovich_Pancake( struct parameters P ); - - void Chemistry_Test( struct parameters P ); + /*! \fn void Write_Slices_HDF5(hid_t file_id) + * \brief Write xy, xz, and yz slices of all data to a file. */ + void Write_Slices_HDF5(hid_t file_id); +#endif -#ifdef MPI_CHOLLA - void Set_Boundaries_MPI(struct parameters P); - void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); - void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); - void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags); - void Unload_MPI_Comm_Buffers(int index); + /*! \fn void Read_Grid(struct parameters P) + * \brief Read in grid data from an output file. */ + void Read_Grid(struct parameters P); - int Load_Hydro_DeviceBuffer_X0(Real *buffer); - int Load_Hydro_DeviceBuffer_X1(Real *buffer); - int Load_Hydro_DeviceBuffer_Y0(Real *buffer); - int Load_Hydro_DeviceBuffer_Y1(Real *buffer); - int Load_Hydro_DeviceBuffer_Z0(Real *buffer); - int Load_Hydro_DeviceBuffer_Z1(Real *buffer); + /*! \fn Read_Grid_Binary(FILE *fp) + * \brief Read in grid data from a binary file. */ + void Read_Grid_Binary(FILE *fp); - void Unload_Hydro_DeviceBuffer_X0(Real *buffer); - void Unload_Hydro_DeviceBuffer_X1(Real *buffer); - void Unload_Hydro_DeviceBuffer_Y0(Real *buffer); - void Unload_Hydro_DeviceBuffer_Y1(Real *buffer); - void Unload_Hydro_DeviceBuffer_Z0(Real *buffer); - void Unload_Hydro_DeviceBuffer_Z1(Real *buffer); +#ifdef HDF5 + /*! \fn void Read_Grid_HDF5(hid_t file_id) + * \brief Read in grid data from an hdf5 file. */ + void Read_Grid_HDF5(hid_t file_id, struct parameters P); +#endif + + /*! \fn void Reset(void) + * \brief Reset the Grid3D class. */ + void Reset(void); + + /*! \fn void FreeMemory(void) + * \brief Free the memory for the density array. */ + void FreeMemory(void); + + /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) + * \brief Constant gas properties. */ + void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, + Real Bz); + + /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) + * \brief Sine wave perturbation. */ + void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + + /*! + * \brief Initialize the grid with a simple linear wave. + * + * \param[in] rho The background density + * \param[in] vx The background velocity in the X-direction + * \param[in] vy The background velocity in the Y-direction + * \param[in] vz The background velocity in the Z-direction + * \param[in] P The background pressure + * \param[in] A The amplitude of the wave + * \param[in] Bx The background magnetic field in the X-direction + * \param[in] By The background magnetic field in the Y-direction + * \param[in] Bz The background magnetic field in the Z-direction + * \param[in] rEigenVec_rho The right eigenvector component for the density + * \param[in] rEigenVec_MomentumX The right eigenvector component for the + * velocity in the X-direction \param[in] rEigenVec_MomentumY The right + * eigenvector component for the velocity in the Y-direction \param[in] + * rEigenVec_MomentumZ The right eigenvector component for the velocity in the + * Z-direction \param[in] rEigenVec_E The right eigenvector component for the + * energy \param[in] rEigenVec_Bx The right eigenvector component for the + * magnetic field in the X-direction \param[in] rEigenVec_By The right + * eigenvector component for the magnetic field in the Y-direction \param[in] + * rEigenVec_Bz The right eigenvector component for the magnetic field in the + * Z-direction \param[in] pitch The pitch angle of the linear wave \param[in] + * yaw The yaw angle of the linear wave + */ + void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, + Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, + Real rEigenVec_MomentumZ, Real rEigenVec_E, + Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, + Real pitch, Real yaw); + + /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. */ + void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + + /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, + Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, + Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) + * \brief Initialize the grid with a Riemann problem. */ + void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, + Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, + Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, + Real diaph); + + /*! \fn void Shu_Osher() + * \brief Initialize the grid with the Shu-Osher shock tube problem. See + * Stone 2008, Section 8.1 */ + void Shu_Osher(); + + /*! \fn void Blast_1D() + * \brief Initialize the grid with two interacting blast waves. See Stone + * 2008, Section 8.1.*/ + void Blast_1D(); + + /*! \fn void KH() + * \brief Initialize the grid with a Kelvin-Helmholtz instability with a + * discontinuous interface. */ + void KH(); + + /*! \fn void KH_res_ind() + * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes + * are resolution independent. */ + void KH_res_ind(); + + /*! \fn void Rayleigh_Taylor() + * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ + void Rayleigh_Taylor(); + + /*! \fn void Gresho() + * \brief Initialize the grid with the 2D Gresho problem described in LW03. + */ + void Gresho(); + + /*! \fn void Implosion_2D() + * \brief Implosion test described in Liska, 2003. */ + void Implosion_2D(); + + /*! \fn void Explosion_2D() + * \brief Explosion test described in Liska, 2003. */ + void Explosion_2D(); + + /*! \fn void Noh_2D() + * \brief Noh test described in Liska, 2003. */ + void Noh_2D(); + + /*! \fn void Noh_3D() + * \brief Noh test described in Stone, 2008. */ + void Noh_3D(); + + /*! \fn void Disk_2D() + * \brief Initialize the grid with a 2D disk following a Kuzmin profile. */ + void Disk_2D(); + + /*! \fn void Disk_3D(parameters P) + * \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai + * profile. */ + void Disk_3D(parameters P); + + /*! \fn void Set_Boundary_Conditions(parameters P) + * \brief Set the boundary conditions based on info in the parameters + * structure. */ + void Set_Boundary_Conditions(parameters P); + + /*! \fn void Set_Boundary_Conditions_Grid(parameters P) + * \brief Set the boundary conditions for all components based on info in the + * parameters structure. */ + void Set_Boundary_Conditions_Grid(parameters P); + + /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) + * \brief Check for custom boundary conditions */ + int Check_Custom_Boundary(int *flags, struct parameters P); + + /*! \fn void Set_Boundaries(int dir, int flags[]) + * \brief Apply boundary conditions to the grid. */ + void Set_Boundaries(int dir, int flags[]); + + /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) + * \brief Set the extents of the ghost region we are initializing. */ + void Set_Boundary_Extents(int dir, int *imin, int *imax); + + /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) + * \brief Select appropriate custom boundary function. */ + void Custom_Boundary(char bcnd[MAXLEN]); + + /*! \fn void Wind_Boundary() + * \brief Apply a constant wind to the -x boundary. */ + void Wind_Boundary(); + + /*! \fn void Noh_Boundary() + * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, + as per the Noh problem in Liska, 2003, or in Stone, 2008. */ + void Noh_Boundary(); + + /*! \fn void Spherical_Overpressure_3D() + * \brief Initialize the grid with a 3D spherical overdensity and + * overpressue. */ + void Spherical_Overpressure_3D(); + + /*! \fn void Spherical_Overpressure_3D() + * \brief Initialize the grid with a 3D spherical overdensity for + * gravitational collapse */ + void Spherical_Overdensity_3D(); + + void Clouds(); + + void Uniform_Grid(); + + void Zeldovich_Pancake(struct parameters P); + + void Chemistry_Test(struct parameters P); + +#ifdef MPI_CHOLLA + void Set_Boundaries_MPI(struct parameters P); + void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); + void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); + void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags); + void Unload_MPI_Comm_Buffers(int index); + + int Load_Hydro_DeviceBuffer_X0(Real *buffer); + int Load_Hydro_DeviceBuffer_X1(Real *buffer); + int Load_Hydro_DeviceBuffer_Y0(Real *buffer); + int Load_Hydro_DeviceBuffer_Y1(Real *buffer); + int Load_Hydro_DeviceBuffer_Z0(Real *buffer); + int Load_Hydro_DeviceBuffer_Z1(Real *buffer); + + void Unload_Hydro_DeviceBuffer_X0(Real *buffer); + void Unload_Hydro_DeviceBuffer_X1(Real *buffer); + void Unload_Hydro_DeviceBuffer_Y0(Real *buffer); + void Unload_Hydro_DeviceBuffer_Y1(Real *buffer); + void Unload_Hydro_DeviceBuffer_Z0(Real *buffer); + void Unload_Hydro_DeviceBuffer_Z1(Real *buffer); #endif /*MPI_CHOLLA*/ - #ifdef GRAVITY - void Initialize_Gravity( struct parameters *P ); - void Compute_Gravitational_Potential( struct parameters *P ); - void Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end); +#ifdef GRAVITY + void Initialize_Gravity(struct parameters *P); + void Compute_Gravitational_Potential(struct parameters *P); + void Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end); void Copy_Hydro_Density_to_Gravity(); - void Extrapolate_Grav_Potential_Function( int g_start, int g_end ); + void Extrapolate_Grav_Potential_Function(int g_start, int g_end); void Extrapolate_Grav_Potential(); - void Set_Potential_Boundaries_Periodic( int direction, int side, int *flags ); - int Load_Gravity_Potential_To_Buffer( int direction, int side, Real *buffer, int buffer_start ); - void Unload_Gravity_Potential_from_Buffer( int direction, int side, Real *buffer, int buffer_start ); - void Set_Potential_Boundaries_Isolated( int direction, int side, int *flags ); - void Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P ); - void Compute_Potential_Isolated_Boundary( int direction, int side, int bc_potential_type ); + void Set_Potential_Boundaries_Periodic(int direction, int side, int *flags); + int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, + int buffer_start); + void Unload_Gravity_Potential_from_Buffer(int direction, int side, + Real *buffer, int buffer_start); + void Set_Potential_Boundaries_Isolated(int direction, int side, int *flags); + void Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P); + void Compute_Potential_Isolated_Boundary(int direction, int side, + int bc_potential_type); #ifdef SOR - void Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P ); - int Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer ); - void Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host ); + void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, + struct parameters *P); + int Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer); + void Unload_Poisson_Boundary_From_Buffer(int direction, int side, + Real *buffer_host); #endif #ifdef GRAVITY_GPU void Copy_Hydro_Density_to_Gravity_GPU(); void Extrapolate_Grav_Potential_GPU(); - int Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ); - void Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ); - void Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags ); - void Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags ); + int Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, + Real *buffer, int buffer_start); + void Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, + Real *buffer, int buffer_start); + void Set_Potential_Boundaries_Isolated_GPU(int direction, int side, + int *flags); + void Set_Potential_Boundaries_Periodic_GPU(int direction, int side, + int *flags); #endif - #endif//GRAVITY +#endif // GRAVITY - #ifdef GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP void Add_Analytic_Potential(); void Add_Analytic_Potential(int g_start, int g_end); void Setup_Analytic_Potential(struct parameters *P); - void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal); + void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal); #ifdef GRAVITY_GPU void Add_Analytic_Potential_GPU(); #endif - #endif //GRAVITY_ANALYTIC_COMP +#endif // GRAVITY_ANALYTIC_COMP - #ifdef PARTICLES - void Initialize_Particles( struct parameters *P ); +#ifdef PARTICLES + void Initialize_Particles(struct parameters *P); void Initialize_Uniform_Particles(); - void Copy_Particles_Density_function( int g_start, int g_end ); + void Copy_Particles_Density_function(int g_start, int g_end); void Copy_Particles_Density(); void Copy_Particles_Density_to_Gravity(struct parameters P); - void Set_Particles_Density_Boundaries_Periodic( int direction, int side ); - void Transfer_Particles_Boundaries( struct parameters P ); - Real Update_Grid_and_Particles_KDK( struct parameters P ); - void Set_Particles_Boundary( int dir, int side); + void Set_Particles_Density_Boundaries_Periodic(int direction, int side); + void Transfer_Particles_Boundaries(struct parameters P); + Real Update_Grid_and_Particles_KDK(struct parameters P); + void Set_Particles_Boundary(int dir, int side); #ifdef PARTICLES_CPU void Set_Particles_Open_Boundary_CPU(int dir, int side); #endif #ifdef MPI_CHOLLA - int Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer ); - void Unload_Particles_Density_Boundary_From_Buffer( int direction, int side, Real *buffer ); - void Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer ); - void Unload_Particles_from_Buffer_X0( int *flags ); - void Unload_Particles_from_Buffer_X1( int *flags ); - void Unload_Particles_from_Buffer_Y0( int *flags ); - void Unload_Particles_from_Buffer_Y1( int *flags ); - void Unload_Particles_from_Buffer_Z0( int *flags ); - void Unload_Particles_from_Buffer_Z1( int *flags ); - void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags); - void Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer); + int Load_Particles_Density_Boundary_to_Buffer(int direction, int side, + Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, + Real *buffer); + void Load_and_Send_Particles_X0(int ireq_n_particles, + int ireq_particles_transfer); + void Load_and_Send_Particles_X1(int ireq_n_particles, + int ireq_particles_transfer); + void Load_and_Send_Particles_Y0(int ireq_n_particles, + int ireq_particles_transfer); + void Load_and_Send_Particles_Y1(int ireq_n_particles, + int ireq_particles_transfer); + void Load_and_Send_Particles_Z0(int ireq_n_particles, + int ireq_particles_transfer); + void Load_and_Send_Particles_Z1(int ireq_n_particles, + int ireq_particles_transfer); + void Unload_Particles_from_Buffer_X0(int *flags); + void Unload_Particles_from_Buffer_X1(int *flags); + void Unload_Particles_from_Buffer_Y0(int *flags); + void Unload_Particles_from_Buffer_Y1(int *flags); + void Unload_Particles_from_Buffer_Z0(int *flags); + void Unload_Particles_from_Buffer_Z1(int *flags); + void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, + int *flags); + void Load_NTtransfer_and_Request_Receive_Particles_Transfer( + int index, int *ireq_particles_transfer); void Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags); - void Unload_Particles_From_Buffers_BLOCK(int index, int *flags ); + void Unload_Particles_From_Buffers_BLOCK(int index, int *flags); void Finish_Particles_Transfer(); - #endif//MPI_CHOLLA - void Transfer_Particles_Density_Boundaries( struct parameters P ); - void Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h ); + #endif // MPI_CHOLLA + void Transfer_Particles_Density_Boundaries(struct parameters P); + void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, + Real *buffer_d, + Real *buffer_h); // void Transfer_Particles_Boundaries( struct parameters P ); - void WriteData_Particles( struct parameters P, int nfile); - void OutputData_Particles( struct parameters P, int nfile); - void Load_Particles_Data( struct parameters P); + void WriteData_Particles(struct parameters P, int nfile); + void OutputData_Particles(struct parameters P, int nfile); + void Load_Particles_Data(struct parameters P); #ifdef HDF5 - void Write_Particles_Header_HDF5( hid_t file_id); - void Write_Particles_Data_HDF5( hid_t file_id); + void Write_Particles_Header_HDF5(hid_t file_id); + void Write_Particles_Data_HDF5(hid_t file_id); void Load_Particles_Data_HDF5(hid_t file_id, int nfile); - #endif//HDF5 - void Get_Gravity_Field_Particles_function( int g_start, int g_end ); + #endif // HDF5 + void Get_Gravity_Field_Particles_function(int g_start, int g_end); void Get_Gravity_Field_Particles(); - void Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ); + void Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end); void Get_Gravity_CIC(); - void Advance_Particles_KDK_Step1( ); - void Advance_Particles_KDK_Step2( ); - void Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end ); - void Advance_Particles_KDK_Step2_function( part_int_t p_start, part_int_t p_end ); + void Advance_Particles_KDK_Step1(); + void Advance_Particles_KDK_Step2(); + void Advance_Particles_KDK_Step1_function(part_int_t p_start, + part_int_t p_end); + void Advance_Particles_KDK_Step2_function(part_int_t p_start, + part_int_t p_end); void Get_Particles_Acceleration(); - void Advance_Particles( int N_KDK_step ); - Real Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ); + void Advance_Particles(int N_KDK_step); + Real Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt(); #ifdef PARTICLES_GPU Real Calc_Particles_dt_GPU(); void Advance_Particles_KDK_Step1_GPU(); void Advance_Particles_KDK_Step2_GPU(); - void Set_Particles_Boundary_GPU( int dir, int side); - void Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ); - #endif//PARTICLES_GPU + void Set_Particles_Boundary_GPU(int dir, int side); + void Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side); + #endif // PARTICLES_GPU #ifdef GRAVITY_GPU void Copy_Potential_From_GPU(); void Copy_Particles_Density_to_GPU(); void Copy_Particles_Density_GPU(); - int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); - void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); - #endif//GRAVITY_GPU - #endif//PARTICLES - - #ifdef COSMOLOGY - void Initialize_Cosmology( struct parameters *P ); - void Change_DM_Frame_System( bool forward ); - void Change_GAS_Frame_System( bool forward ); - void Change_GAS_Frame_System_GPU( bool forward ); - void Change_Cosmological_Frame_Sytem( bool forward ); - void Advance_Particles_KDK_Cosmo_Step1_function( part_int_t p_start, part_int_t p_end ); - void Advance_Particles_KDK_Cosmo_Step2_function( part_int_t p_start, part_int_t p_end ); - Real Calc_Particles_dt_Cosmo_function( part_int_t p_start, part_int_t p_end ); + int Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, + Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, + int side, + Real *buffer); + #endif // GRAVITY_GPU +#endif // PARTICLES + +#ifdef COSMOLOGY + void Initialize_Cosmology(struct parameters *P); + void Change_DM_Frame_System(bool forward); + void Change_GAS_Frame_System(bool forward); + void Change_GAS_Frame_System_GPU(bool forward); + void Change_Cosmological_Frame_Sytem(bool forward); + void Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, + part_int_t p_end); + void Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, + part_int_t p_end); + Real Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt_Cosmo(); #ifdef PARTICLES_GPU void Advance_Particles_KDK_Cosmo_Step1_GPU(); void Advance_Particles_KDK_Cosmo_Step2_GPU(); - #endif//PARTICLES_GPU - #endif//COSMOLOGY + #endif // PARTICLES_GPU +#endif // COSMOLOGY - #ifdef COOLING_GRACKLE - void Initialize_Grackle( struct parameters *P ); +#ifdef COOLING_GRACKLE + void Initialize_Grackle(struct parameters *P); void Allocate_Memory_Grackle(); void Initialize_Fields_Grackle(); - void Copy_Fields_To_Grackle_function( int g_start, int g_end ); + void Copy_Fields_To_Grackle_function(int g_start, int g_end); void Copy_Fields_To_Grackle(); - void Update_Internal_Energy_function( int g_start, int g_end ); + void Update_Internal_Energy_function(int g_start, int g_end); void Update_Internal_Energy(); void Do_Cooling_Step_Grackle(); - #endif +#endif - #ifdef CHEMISTRY_GPU - void Initialize_Chemistry( struct parameters *P ); - void Compute_Gas_Temperature( Real *temperature, bool convert_cosmo_units ); +#ifdef CHEMISTRY_GPU + void Initialize_Chemistry(struct parameters *P); + void Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units); void Update_Chemistry(); - #endif +#endif - #ifdef ANALYSIS - void Initialize_Analysis_Module( struct parameters *P ); - void Compute_and_Output_Analysis( struct parameters *P ); - void Output_Analysis( struct parameters *P ); - void Write_Analysis_Header_HDF5( hid_t file_id ); - void Write_Analysis_Data_HDF5( hid_t file_id ); +#ifdef ANALYSIS + void Initialize_Analysis_Module(struct parameters *P); + void Compute_and_Output_Analysis(struct parameters *P); + void Output_Analysis(struct parameters *P); + void Write_Analysis_Header_HDF5(hid_t file_id); + void Write_Analysis_Data_HDF5(hid_t file_id); #ifdef PHASE_DIAGRAM void Compute_Phase_Diagram(); #endif #ifdef LYA_STATISTICS - void Populate_Lya_Skewers_Local( int axis ); - void Compute_Transmitted_Flux_Skewer( int skewer_id, int axis ); - void Compute_Lya_Statistics( ); - void Compute_Flux_Power_Spectrum_Skewer( int skewer_id, int axis ); - void Initialize_Power_Spectrum_Measurements( int axis ); - #ifdef OUTPUT_SKEWERS - void Output_Skewers_File( struct parameters *P ); - void Write_Skewers_Header_HDF5( hid_t file_id ); - void Write_Skewers_Data_HDF5( hid_t file_id ); - #endif - #endif//LYA_STATISTICS - #endif//ANALYSIS - + void Populate_Lya_Skewers_Local(int axis); + void Compute_Transmitted_Flux_Skewer(int skewer_id, int axis); + void Compute_Lya_Statistics(); + void Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis); + void Initialize_Power_Spectrum_Measurements(int axis); + #ifdef OUTPUT_SKEWERS + void Output_Skewers_File(struct parameters *P); + void Write_Skewers_Header_HDF5(hid_t file_id); + void Write_Skewers_Data_HDF5(hid_t file_id); + #endif + #endif // LYA_STATISTICS +#endif // ANALYSIS }; // typedef for Grid3D_PointerMemberFunction typedef void (Grid3D::*Grid3D_PMF_UnloadHydroBuffer)(Real *); -typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential) - (int, int, Real *, int); -typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity) - (int, int, Real *); +typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)(int, int, Real *, + int); +typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity)(int, int, Real *); -#endif //GRID3D_H +#endif // GRID3D_H diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index b96f0f4ca..268e66226 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -1,24 +1,26 @@ #pragma once // An enum which holds offsets for grid quantities -// In the final form of this approach, this file will also set nfields (not yet) and NSCALARS (done) -// so that adding a field only requires registering it here: +// In the final form of this approach, this file will also set nfields (not yet) +// and NSCALARS (done) so that adding a field only requires registering it here: // grid knows to allocate memory based on nfields and NSCALARS // and values can be accessed with density[id + ncells*grid_enum::enum_name] // example: C.device[id + H.n_cells*grid_enum::basic_scalar] - // enum notes: -// For advanced devs: must be "unscoped" to be implicitly treated as int: this means cannot use "enum class" or "enum struct" -// Wrapped in namespace to give it an effective scope to prevent collisions -// enum values (i.e. density) belong to their enclosing scope, which necessitates the namespace wrapping +// For advanced devs: must be "unscoped" to be implicitly treated as int: this +// means cannot use "enum class" or "enum struct" Wrapped in namespace to give +// it an effective scope to prevent collisions enum values (i.e. density) belong +// to their enclosing scope, which necessitates the namespace wrapping // --otherwise "density" would be available in global scope // ": int" forces underlying type to be int -namespace grid_enum { +namespace grid_enum +{ enum : int { - // Don't change order of hydro quantities until all of hydro is made consistent with grid_enum (if ever) because enum values depend on order + // Don't change order of hydro quantities until all of hydro is made + // consistent with grid_enum (if ever) because enum values depend on order density, momentum_x, momentum_y, @@ -26,11 +28,13 @@ enum : int { Energy, // Code assumes scalars are a contiguous block - // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to compute NSCALARS + // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to + // compute NSCALARS scalar, - scalar_minus_1 = scalar - 1,// so that next enum item starts at same index as scalar + scalar_minus_1 = + scalar - 1, // so that next enum item starts at same index as scalar - #ifdef SCALAR +#ifdef SCALAR // Add scalars here, wrapped appropriately with ifdefs: #ifdef BASIC_SCALAR basic_scalar, @@ -43,63 +47,66 @@ enum : int { HeII_density, HeIII_density, e_density, - #ifdef GRACKLE_METALS + #ifdef GRACKLE_METALS metal_density, - #endif + #endif #endif #ifdef DUST dust_density, - #endif // DUST + #endif // DUST - #endif // SCALAR +#endif // SCALAR - finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct - // so that anything after starts with scalar + NSCALARS + finalscalar_plus_1, // needed to calculate NSCALARS + finalscalar = + finalscalar_plus_1 - + 1, // resets enum to finalscalar so fields afterwards are correct +// so that anything after starts with scalar + NSCALARS - #ifdef MHD +#ifdef MHD magnetic_x, magnetic_y, magnetic_z, - #endif - #ifdef DE +#endif +#ifdef DE GasEnergy, - #endif +#endif num_fields, - //Aliases and manually computed enums + // Aliases and manually computed enums nscalars = finalscalar_plus_1 - scalar, - - #ifdef MHD - num_flux_fields = num_fields-1, - num_interface_fields = num_fields-1, - #else - num_flux_fields = num_fields, + +#ifdef MHD + num_flux_fields = num_fields - 1, + num_interface_fields = num_fields - 1, +#else + num_flux_fields = num_fields, num_interface_fields = num_fields, - #endif //MHD +#endif // MHD - #ifdef MHD +#ifdef MHD magnetic_start = magnetic_x, magnetic_end = magnetic_z, - // Note that the direction of the flux, the suffix _? indicates the direction of the electric field, not the magnetic flux + // Note that the direction of the flux, the suffix _? indicates the direction + // of the electric field, not the magnetic flux fluxX_magnetic_z = magnetic_start, - fluxX_magnetic_y = magnetic_start+1, + fluxX_magnetic_y = magnetic_start + 1, fluxY_magnetic_x = magnetic_start, - fluxY_magnetic_z = magnetic_start+1, + fluxY_magnetic_z = magnetic_start + 1, fluxZ_magnetic_y = magnetic_start, - fluxZ_magnetic_x = magnetic_start+1, + fluxZ_magnetic_x = magnetic_start + 1, Q_x_magnetic_y = magnetic_start, - Q_x_magnetic_z = magnetic_start+1, + Q_x_magnetic_z = magnetic_start + 1, Q_y_magnetic_z = magnetic_start, - Q_y_magnetic_x = magnetic_start+1, + Q_y_magnetic_x = magnetic_start + 1, Q_z_magnetic_x = magnetic_start, - Q_z_magnetic_y = magnetic_start+1 - #endif // MHD + Q_z_magnetic_y = magnetic_start + 1 +#endif // MHD }; -} +} // namespace grid_enum #define NSCALARS grid_enum::nscalars diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index f4edbb693..df487a7c4 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1,101 +1,104 @@ /*! \file initial_conditions.cpp * \brief Definitions of initial conditions for different tests. - Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. - Functions are members of the Grid3D class. */ + Note that the grid is mapped to 1D as i + (x_dim)*j + + (x_dim*y_dim)*k. Functions are members of the Grid3D class. */ - -#include #include +#include +#include #include #include + +#include +#include +#include +#include + #include "../global/global.h" #include "../grid/grid3D.h" -#include "../mpi/mpi_routines.h" #include "../io/io.h" +#include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" -#include "../utils/mhd_utilities.h" #include "../utils/math_utilities.h" -#include -#include -#include -#include -#include +#include "../utils/mhd_utilities.h" using namespace std; /*! \fn void Set_Initial_Conditions(parameters P) - * \brief Set the initial conditions based on info in the parameters structure. */ -void Grid3D::Set_Initial_Conditions(parameters P) { - + * \brief Set the initial conditions based on info in the parameters structure. + */ +void Grid3D::Set_Initial_Conditions(parameters P) +{ Set_Domain_Properties(P); Set_Gammas(P.gamma); - if (strcmp(P.init, "Constant")==0) { + if (strcmp(P.init, "Constant") == 0) { Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); - } else if (strcmp(P.init, "Sound_Wave")==0) { + } else if (strcmp(P.init, "Sound_Wave") == 0) { Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); - } else if (strcmp(P.init, "Linear_Wave")==0) { + } else if (strcmp(P.init, "Linear_Wave") == 0) { Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, - P.rEigenVec_rho, P.rEigenVec_MomentumX, P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, - P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, P.rEigenVec_Bz, P.pitch, P.yaw); - } else if (strcmp(P.init, "Square_Wave")==0) { + P.rEigenVec_rho, P.rEigenVec_MomentumX, P.rEigenVec_MomentumY, + P.rEigenVec_MomentumZ, P.rEigenVec_E, P.rEigenVec_Bx, + P.rEigenVec_By, P.rEigenVec_Bz, P.pitch, P.yaw); + } else if (strcmp(P.init, "Square_Wave") == 0) { Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); - } else if (strcmp(P.init, "Riemann")==0) { + } else if (strcmp(P.init, "Riemann") == 0) { Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, P.Bx_r, P.By_r, P.Bz_r, P.diaph); - } else if (strcmp(P.init, "Shu_Osher")==0) { + } else if (strcmp(P.init, "Shu_Osher") == 0) { Shu_Osher(); - } else if (strcmp(P.init, "Blast_1D")==0) { + } else if (strcmp(P.init, "Blast_1D") == 0) { Blast_1D(); - } else if (strcmp(P.init, "KH")==0) { + } else if (strcmp(P.init, "KH") == 0) { KH(); - } else if (strcmp(P.init, "KH_res_ind")==0) { + } else if (strcmp(P.init, "KH_res_ind") == 0) { KH_res_ind(); - } else if (strcmp(P.init, "Rayleigh_Taylor")==0) { + } else if (strcmp(P.init, "Rayleigh_Taylor") == 0) { Rayleigh_Taylor(); - } else if (strcmp(P.init, "Implosion_2D")==0) { + } else if (strcmp(P.init, "Implosion_2D") == 0) { Implosion_2D(); - } else if (strcmp(P.init, "Gresho")==0) { + } else if (strcmp(P.init, "Gresho") == 0) { Gresho(); - } else if (strcmp(P.init, "Noh_2D")==0) { + } else if (strcmp(P.init, "Noh_2D") == 0) { Noh_2D(); - } else if (strcmp(P.init, "Noh_3D")==0) { + } else if (strcmp(P.init, "Noh_3D") == 0) { Noh_3D(); - } else if (strcmp(P.init, "Disk_2D")==0) { + } else if (strcmp(P.init, "Disk_2D") == 0) { Disk_2D(); - } else if (strcmp(P.init, "Disk_3D")==0 || strcmp(P.init, "Disk_3D_particles")==0) { + } else if (strcmp(P.init, "Disk_3D") == 0 || + strcmp(P.init, "Disk_3D_particles") == 0) { Disk_3D(P); - } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { + } else if (strcmp(P.init, "Spherical_Overpressure_3D") == 0) { Spherical_Overpressure_3D(); - } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { + } else if (strcmp(P.init, "Spherical_Overdensity_3D") == 0) { Spherical_Overdensity_3D(); - } else if (strcmp(P.init, "Clouds")==0) { + } else if (strcmp(P.init, "Clouds") == 0) { Clouds(); - } else if (strcmp(P.init, "Read_Grid")==0) { - #ifndef ONLY_PARTICLES + } else if (strcmp(P.init, "Read_Grid") == 0) { +#ifndef ONLY_PARTICLES Read_Grid(P); - #else // ONLY_PARTICLES +#else // ONLY_PARTICLES // Initialize a uniform hydro grid when only integrating particles Uniform_Grid(); - #endif // ONLY_PARTICLES - } else if (strcmp(P.init, "Uniform")==0) { +#endif // ONLY_PARTICLES + } else if (strcmp(P.init, "Uniform") == 0) { Uniform_Grid(); - } else if (strcmp(P.init, "Zeldovich_Pancake")==0) { + } else if (strcmp(P.init, "Zeldovich_Pancake") == 0) { Zeldovich_Pancake(P); - } else if (strcmp(P.init, "Chemistry_Test")==0) { + } else if (strcmp(P.init, "Chemistry_Test") == 0) { Chemistry_Test(P); } else { - chprintf ("ABORT: %s: Unknown initial conditions!\n", P.init); + chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); chexit(-1); } - if ( C.device != NULL ) - { - CudaSafeCall( - cudaMemcpy(C.device, C.density, H.n_fields*H.n_cells*sizeof(Real), - cudaMemcpyHostToDevice) ); - } + if (C.device != NULL) { + CudaSafeCall(cudaMemcpy(C.device, C.density, + H.n_fields * H.n_cells * sizeof(Real), + cudaMemcpyHostToDevice)); + } } /*! \fn void Set_Domain_Properties(struct parameters P) @@ -113,9 +116,9 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zdglobal = P.zlen; #ifndef MPI_CHOLLA - Real nx_param = (Real) (H.nx - 2*H.n_ghost); - Real ny_param = (Real) (H.ny - 2*H.n_ghost); - Real nz_param = (Real) (H.nz - 2*H.n_ghost); + Real nx_param = (Real)(H.nx - 2 * H.n_ghost); + Real ny_param = (Real)(H.ny - 2 * H.n_ghost); + Real nz_param = (Real)(H.nz - 2 * H.n_ghost); // Local Boundary Coordinates H.xblocal = H.xbound; @@ -127,9 +130,9 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zblocal_max = H.zblocal + P.zlen; #else - Real nx_param = (Real) nx_global; - Real ny_param = (Real) ny_global; - Real nz_param = (Real) nz_global; + Real nx_param = (Real)nx_global; + Real ny_param = (Real)ny_global; + Real nz_param = (Real)nz_global; // Local Boundary Coordinates /* @@ -137,47 +140,45 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.yblocal = H.ybound + P.ylen * ((Real) ny_local_start) / ny_param; H.zblocal = H.zbound + P.zlen * ((Real) nz_local_start) / nz_param; */ - H.xblocal = H.xbound + ((Real) nx_local_start) * (P.xlen / nx_param); - H.yblocal = H.ybound + ((Real) ny_local_start) * (P.ylen / ny_param); - H.zblocal = H.zbound + ((Real) nz_local_start) * (P.zlen / nz_param); + H.xblocal = H.xbound + ((Real)nx_local_start) * (P.xlen / nx_param); + H.yblocal = H.ybound + ((Real)ny_local_start) * (P.ylen / ny_param); + H.zblocal = H.zbound + ((Real)nz_local_start) * (P.zlen / nz_param); - H.xblocal_max = H.xbound + ((Real) (nx_local_start + H.nx - 2*H.n_ghost)) * (P.xlen / nx_param); - H.yblocal_max = H.ybound + ((Real) (ny_local_start + H.ny - 2*H.n_ghost)) * (P.ylen / ny_param); - H.zblocal_max = H.zbound + ((Real) (nz_local_start + H.nz - 2*H.n_ghost)) * (P.zlen / nz_param); + H.xblocal_max = H.xbound + ((Real)(nx_local_start + H.nx - 2 * H.n_ghost)) * + (P.xlen / nx_param); + H.yblocal_max = H.ybound + ((Real)(ny_local_start + H.ny - 2 * H.n_ghost)) * + (P.ylen / ny_param); + H.zblocal_max = H.zbound + ((Real)(nz_local_start + H.nz - 2 * H.n_ghost)) * + (P.zlen / nz_param); #endif /*perform 1-D first*/ - if(H.nx > 1 && H.ny==1 && H.nz==1) - { + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen; H.dz = P.zlen; } /*perform 2-D next*/ - if(H.nx > 1 && H.ny>1 && H.nz==1) - { + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen / ny_param; H.dz = P.zlen; } /*perform 3-D last*/ - if(H.nx>1 && H.ny>1 && H.nz>1) - { + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen / ny_param; H.dz = P.zlen / nz_param; - } } - - -/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) - * \brief Constant gas properties. */ -void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) +/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real + * By, Real Bz) \brief Constant gas properties. */ +void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, + Real By, Real Bz) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -186,62 +187,58 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real Real n, T; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) - { + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { // set constant initial states C.density[id] = rho; - C.momentum_x[id] = rho*vx; - C.momentum_y[id] = rho*vy; - C.momentum_z[id] = rho*vz; - C.Energy[id] = P/(gama-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE + C.momentum_x[id] = rho * vx; + C.momentum_y[id] = rho * vy; + C.momentum_z[id] = rho * vz; + C.Energy[id] = + P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE } - if (i==istart && j==jstart && k==kstart) { - n = rho*DENSITY_UNIT / (mu*MP); - T = P*PRESSURE_UNIT / (n*KB); + if (i == istart && j == jstart && k == kstart) { + n = rho * DENSITY_UNIT / (mu * MP); + T = P * PRESSURE_UNIT / (n * KB); printf("Initial n = %e, T = %e\n", n, T); } } } } - } /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) @@ -253,78 +250,78 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Real x_pos, y_pos, z_pos; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k(H.dx/2, H.dy/2, H.dz/2, pitch, yaw); + auto [stagger, junk1, junk2] = + math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, pitch, yaw); // set initial values of conserved variables - for(int k=H.n_ghost; k(i, j, k, pitch, yaw); + auto [i_rot, j_rot, k_rot] = + math_utils::rotateCoords(i, j, k, pitch, yaw); - //get cell index - int id = i + j*H.nx + k*H.nx*H.ny; + // get cell index + int id = i + j * H.nx + k * H.nx * H.ny; // get cell-centered position Real x_pos, y_pos, z_pos; @@ -335,32 +332,32 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real sine_wave = std::sin(2.0 * PI * x_pos); C.density[id] = rho; - C.momentum_x[id] = rho*vx; - C.momentum_y[id] = rho*vy; - C.momentum_z[id] = rho*vz; - C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); + C.momentum_x[id] = rho * vx; + C.momentum_y[id] = rho * vy; + C.momentum_z[id] = rho * vz; + C.Energy[id] = + mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); // add small-amplitude perturbations - C.density[id] += A * rEigenVec_rho * sine_wave; + C.density[id] += A * rEigenVec_rho * sine_wave; C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; C.momentum_y[id] += A * rEigenVec_MomentumY * sine_wave; C.momentum_z[id] += A * rEigenVec_MomentumZ * sine_wave; - C.Energy[id] += A * rEigenVec_E * sine_wave; - - #ifdef MHD - sine_wave = std::sin(2.0 * PI * (x_pos+stagger)); - C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; - C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; - C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; - #endif //MHD + C.Energy[id] += A * rEigenVec_E * sine_wave; + +#ifdef MHD + sine_wave = std::sin(2.0 * PI * (x_pos + stagger)); + C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; + C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; + C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; +#endif // MHD } } } - } - /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. */ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) { int i, j, k, id; @@ -368,79 +365,77 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Real x_pos, y_pos, z_pos; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k 0.25*H.xdglobal && x_pos < 0.75*H.xdglobal) - { - C.density[id] = rho*A; - C.momentum_x[id] = rho*A * vx; - C.momentum_y[id] = rho*A * vy; - C.momentum_z[id] = rho*A * vz; - C.Energy[id] = P/(gama-1.0) + 0.5*rho*A*(vx*vx + vy*vy + vz*vz); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif - #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = C.density[id]*1.0; - #endif - #endif + // C.momentum_z[id] = rho_l * v_l; + C.Energy[id] = + P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id] * 0.0; + #endif +#endif + if (x_pos > 0.25 * H.xdglobal && x_pos < 0.75 * H.xdglobal) { + C.density[id] = rho * A; + C.momentum_x[id] = rho * A * vx; + C.momentum_y[id] = rho * A * vy; + C.momentum_z[id] = rho * A * vz; + C.Energy[id] = + P / (gama - 1.0) + 0.5 * rho * A * (vx * vx + vy * vy + vz * vz); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id] * 1.0; + #endif +#endif } } } } } - -/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) +/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real + Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real + P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) +void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, + Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, + Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, + Real Bz_r, Real diaph) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -448,86 +443,78 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Real v, P, cs; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) - { - if (x_pos < diaph) - { + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { + if (x_pos < diaph) { C.density[id] = rho_l; C.momentum_x[id] = rho_l * vx_l; C.momentum_y[id] = rho_l * vy_l; C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = mhd::utils::computeEnergy(P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); - #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = 1.0*rho_l; - #endif - #endif //SCALAR - #ifdef DE - C.GasEnergy[id] = P_l/(gama-1.0); - #endif //DE - } - else - { + C.Energy[id] = mhd::utils::computeEnergy( + P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0 * rho_l; + #endif +#endif // SCALAR +#ifdef DE + C.GasEnergy[id] = P_l / (gama - 1.0); +#endif // DE + } else { C.density[id] = rho_r; C.momentum_x[id] = rho_r * vx_r; C.momentum_y[id] = rho_r * vy_r; C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = mhd::utils::computeEnergy(P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); - #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = 0.0*rho_r; - #endif - #endif //SCALAR - #ifdef DE - C.GasEnergy[id] = P_r/(gama-1.0); - #endif //DE + C.Energy[id] = mhd::utils::computeEnergy( + P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0 * rho_r; + #endif +#endif // SCALAR +#ifdef DE + C.GasEnergy[id] = P_r / (gama - 1.0); +#endif // DE } } } @@ -535,9 +522,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real } } - /*! \fn void Shu_Osher() - * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */ + * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone + * 2008, Section 8.1 */ void Grid3D::Shu_Osher() { int i, id; @@ -545,42 +532,37 @@ void Grid3D::Shu_Osher() Real vx, P; // set initial values of conserved variables - for (i=H.n_ghost; i 0.9) - { - C.density[id] = 1.0; + P = 1000.0; + } else if (x_pos > 0.9) { + C.density[id] = 1.0; C.momentum_x[id] = 0.0; C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; - P = 100; - } - else - { - C.density[id] = 1.0; + P = 100; + } else { + C.density[id] = 1.0; C.momentum_x[id] = 0.0; C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; - P = 0.01; + P = 0.01; } - C.Energy[id] = P/(gama-1.0); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif //DE - + C.Energy[id] = P / (gama - 1.0); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE } } - /*! \fn void KH() * \brief Initialize the grid with a Kelvin-Helmholtz instability. This version of KH test has a discontinuous boundary. @@ -642,85 +617,81 @@ void Grid3D::KH() d2 = 1.0; v1 = 0.5; v2 = -0.5; - P = 2.5; - A = 0.1; + P = 2.5; + A = 0.1; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; jstart = H.n_ghost; - jend = H.ny-H.n_ghost; + jend = H.ny - H.n_ghost; if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set the initial values of the conserved variables - for (k=kstart; k= 3.0*H.ydglobal/4.0) - { - C.density[id] = d2; - C.momentum_x[id] = v2*C.density[id]; - C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); + #endif +#endif + } else if (y_pos >= 3.0 * H.ydglobal / 4.0) { + C.density[id] = d2; + C.momentum_x[id] = v2 * C.density[id]; + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); C.momentum_z[id] = 0.0; - #ifdef SCALAR - #ifdef BASIC_SCALAR +#ifdef SCALAR + #ifdef BASIC_SCALAR C.basic_scalar[id] = 0.0; - #endif - #endif + #endif +#endif } // inner half of slab - else - { - C.density[id] = d1; - C.momentum_x[id] = v1*C.density[id]; - C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); + else { + C.density[id] = d1; + C.momentum_x[id] = v1 * C.density[id]; + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); C.momentum_z[id] = 0.0; - #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = 1.0*d1; - #endif - #endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0 * d1; + #endif +#endif } - C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif //DE - - + C.Energy[id] = + P / (gama - 1.0) + 0.5 * + (C.momentum_x[id] * C.momentum_x[id] + + C.momentum_y[id] * C.momentum_y[id]) / + C.density[id]; +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE } } } - } - /*! \fn void KH_res_ind() - * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */ + * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes + * are resolution independent. */ void Grid3D::KH_res_ind() { int i, j, k, id; @@ -730,14 +701,13 @@ void Grid3D::KH_res_ind() Real r, yc, zc, phi; Real d1, d2, v1, v2, P, dy, A; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; jstart = H.n_ghost; - jend = H.ny-H.n_ghost; + jend = H.ny - H.n_ghost; if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } @@ -746,99 +716,176 @@ void Grid3D::KH_res_ind() yc = 0.0; zc = 0.0; - d1 = 100.0; // inner density - d2 = 1.0; // outer density - v1 = 10.5; // inner velocity - v2 = 9.5; // outer velocity - P = 2.5; // pressure - dy = 0.05; // width of ramp function (see Robertson 2009) - A = 0.1; // amplitude of the perturbation + d1 = 100.0; // inner density + d2 = 1.0; // outer density + v1 = 10.5; // inner velocity + v2 = 9.5; // outer velocity + P = 2.5; // pressure + dy = 0.05; // width of ramp function (see Robertson 2009) + A = 0.1; // amplitude of the perturbation - // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy)); + // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + + // exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy)); // set the initial values of the conserved variables - for (k=kstart; k 0.5) - { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1-v2) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ) ; - } - else - { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); + if (fabs(y_pos - 0.5) < 0.25) { + if (y_pos > 0.5) { + C.density[id] = + d1 - + (d1 - d2) * + exp(-0.5 * + pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * + pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * + pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + } else { + C.density[id] = + d1 - + (d1 - d2) * + exp(-0.5 * + pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * + pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * + pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); } } // outer fluid - else - { - if (y_pos > 0.5) - { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - } - else - { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); + else { + if (y_pos > 0.5) { + C.density[id] = + d2 + + (d1 - d2) * + exp(-0.5 * + pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * + pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * + pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + } else { + C.density[id] = + d2 + + (d1 - d2) * + exp(-0.5 * + pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * + pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * + pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); } - } - //C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); + // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; // cylindrical version (3D only) - r = sqrt((z_pos-zc)*(z_pos-zc) + (y_pos-yc)*(y_pos-yc)); // center the cylinder at yc, zc - phi = atan2((z_pos-zc), (y_pos-yc)); + r = sqrt((z_pos - zc) * (z_pos - zc) + + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc + phi = atan2((z_pos - zc), (y_pos - yc)); - if (r < 0.25) // inside the cylinder + if (r < 0.25) // inside the cylinder { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - } - else // outside the cylinder + C.density[id] = + d1 - (d1 - d2) * + exp(-0.5 * + pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v1 * C.density[id] - + C.density[id] * + exp(-0.5 * + pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_z[id] = + sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + } else // outside the cylinder { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) )); - C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) )); + C.density[id] = + d2 + (d1 - d2) * + exp(-0.5 * + pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_x[id] = + v2 * C.density[id] + + C.density[id] * + exp(-0.5 * + pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy)); + C.momentum_y[id] = + cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + (1.0 - + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy))); + C.momentum_z[id] = + sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + (1.0 - + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / + (dy * dy))); } - // No matter what we do with the density and momentum, set the Energy and GasEnergy appropriately - mx = C.momentum_x[id]; - my = C.momentum_y[id]; - mz = C.momentum_z[id]; - C.Energy[id] = P/(gama-1.0) + 0.5*(mx*mx + my*my + mz*mz)/C.density[id]; - - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE - - }// i loop - }// j loop - }//k loop - - + // No matter what we do with the density and momentum, set the Energy + // and GasEnergy appropriately + mx = C.momentum_x[id]; + my = C.momentum_y[id]; + mz = C.momentum_z[id]; + C.Energy[id] = P / (gama - 1.0) + + 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]; + +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE + + } // i loop + } // j loop + } // k loop } - - /*! \fn void Rayleigh_Taylor() * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ void Grid3D::Rayleigh_Taylor() @@ -848,51 +895,49 @@ void Grid3D::Rayleigh_Taylor() Real dl, du, vy, g, P, P_0; dl = 1.0; du = 2.0; - g = -0.1; + g = -0.1; // set the initial values of the conserved variables - for (j=H.n_ghost; j= 0.2 && r < 0.4) { - vx += -sin(phi)*(2.0-5.0*r) + v_boost; - vy += cos(phi)*(2.0-5.0*r); - P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); - } - else { - vx += 0.0; - vy += 0.0; - P += 3.0 + 4.0*log(2.0); - } - } - vx = vx/N; - vy = vy/N; - P = P/N; -*/ + r = sqrt((x_pos - xc) * (x_pos - xc) + (y_pos - yc) * (y_pos - yc)); + phi = atan2((y_pos - yc), (x_pos - xc)); + + /* + // set vx, vy, P to zero before integrating + vx = 0.0; + vy = 0.0; + P = 0.0; + + // monte carlo sample to get an integrated value for vx, vy, P + for (int ii = 0; ii= 0.2 && r < 0.4) { + vx += -sin(phi)*(2.0-5.0*r) + v_boost; + vy += cos(phi)*(2.0-5.0*r); + P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); + } + else { + vx += 0.0; + vy += 0.0; + P += 3.0 + 4.0*log(2.0); + } + } + vx = vx/N; + vy = vy/N; + P = P/N; + */ if (r < 0.2) { - vx = -sin(phi)*5.0*r + v_boost; - vy = cos(phi)*5.0*r; - P = 5.0 + 0.5*25.0*r*r; - } - else if (r >= 0.2 && r < 0.4) { - vx = -sin(phi)*(2.0-5.0*r) + v_boost; - vy = cos(phi)*(2.0-5.0*r); - P = 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); - } - else { + vx = -sin(phi) * 5.0 * r + v_boost; + vy = cos(phi) * 5.0 * r; + P = 5.0 + 0.5 * 25.0 * r * r; + } else if (r >= 0.2 && r < 0.4) { + vx = -sin(phi) * (2.0 - 5.0 * r) + v_boost; + vy = cos(phi) * (2.0 - 5.0 * r); + P = 9.0 - 4.0 * log(0.2) + 0.5 * 25.0 * r * r - 20.0 * r + 4.0 * log(r); + } else { vx = 0.0; vy = 0.0; - P = 3.0 + 4.0*log(2.0); + P = 3.0 + 4.0 * log(2.0); } // set P constant for modified Gresho problem - //P = 5.5; + // P = 5.5; // set values of conserved variables - C.density[id] = d; - C.momentum_x[id] = d*vx; - C.momentum_y[id] = d*vy; + C.density[id] = d; + C.momentum_x[id] = d * vx; + C.momentum_y[id] = d * vy; C.momentum_z[id] = 0.0; - C.Energy[id] = P/(gama-1.0) + 0.5*d*(vx*vx + vy*vy); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE + C.Energy[id] = P / (gama - 1.0) + 0.5 * d * (vx * vx + vy * vy); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE - //r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc)); - //printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy); + // r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc)); + // printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy); } } - - } - - /*! \fn void Implosion_2D() * \brief Implosion test described in Liska, 2003. */ void Grid3D::Implosion_2D() @@ -1008,44 +1047,41 @@ void Grid3D::Implosion_2D() Real x_pos, y_pos, z_pos; Real P; - // set the initial values of the conserved variables - for (j=H.n_ghost; j 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k= kstart) and (j >= jstart) and (i >= istart)) - { - C.density[id] = 0; + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { + C.density[id] = 0; C.momentum_x[id] = 0; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = 0; + C.Energy[id] = 0; - #ifdef DE +#ifdef DE C.GasEnergy[id] = 0; - #endif +#endif } } } } } -void Grid3D::Zeldovich_Pancake( struct parameters P ){ - - #ifndef COSMOLOGY - chprintf( "To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n" ); +void Grid3D::Zeldovich_Pancake(struct parameters P) +{ +#ifndef COSMOLOGY + chprintf("To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n"); exit(-1); - #else - +#else int i, j, k, id; Real x_pos, y_pos, z_pos; @@ -1473,81 +1505,76 @@ void Grid3D::Zeldovich_Pancake( struct parameters P ){ h = H0 / 100; Omega_M = P.Omega_M; - chprintf( " h = %f \n", h ); - chprintf( " Omega_M = %f \n", Omega_M ); + chprintf(" h = %f \n", h); + chprintf(" Omega_M = %f \n", Omega_M); - H0 /= 1000; //[km/s / kpc] + H0 /= 1000; //[km/s / kpc] G = G_COSMO; - rho_0 = 3*H0*H0 / ( 8*M_PI*G ) * Omega_M /h / h; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; z_init = P.Init_redshift; - chprintf( " rho_0 = %f \n", rho_0 ); - chprintf( " z_init = %f \n", z_init ); - chprintf( " z_zeldovich = %f \n", z_zeldovich ); + chprintf(" rho_0 = %f \n", rho_0); + chprintf(" z_init = %f \n", z_init); + chprintf(" z_zeldovich = %f \n", z_zeldovich); x_center = H.xdglobal / 2; - chprintf( " Peak Center = %f \n", x_center ); + chprintf(" Peak Center = %f \n", x_center); T_init = 100; - chprintf( " T initial = %f \n", T_init ); - - k_x = 2 * M_PI / H.xdglobal; + chprintf(" T initial = %f \n", T_init); + k_x = 2 * M_PI / H.xdglobal; char filename[100]; // create the filename to read from strcpy(filename, P.indir); strcat(filename, "ics_zeldovich.dat"); - chprintf( " Loading ICs File: %s\n", filename); + chprintf(" Loading ICs File: %s\n", filename); real_vector_t ics_values; - ifstream file_in( filename ); + ifstream file_in(filename); string line; Real ic_val; - if (file_in.is_open()){ - while ( getline (file_in, line) ){ - ic_val = atof( line.c_str() ); - ics_values.push_back( ic_val ); + if (file_in.is_open()) { + while (getline(file_in, line)) { + ic_val = atof(line.c_str()); + ics_values.push_back(ic_val); // chprintf("%f\n", ic_val); } file_in.close(); - } - else{ + } else { chprintf(" Error: Unable to open ics zeldovich file\n"); exit(1); } int nPoints = 256; - - Real dens, vel, temp, U, E, gamma; gamma = P.gamma; int index; // set the initial values of the conserved variables - for (k=H.n_ghost; k +#include "../global/global_cuda.h" //provides TPB +#include "../grid/cuda_boundaries.h" // provides PackBuffers3D and UnpackBuffers3D +#include "../io/io.h" +#include "../mpi/mpi_routines.h" +#include "../utils/error_handling.h" #include "../utils/gpu.hpp" -#include "../global/global_cuda.h"//provides TPB -#include "../grid/cuda_boundaries.h"// provides PackBuffers3D and UnpackBuffers3D +#include "grid3D.h" #ifdef MPI_CHOLLA void Grid3D::Set_Boundaries_MPI(struct parameters P) { - int flags[6] = {0,0,0,0,0,0}; + int flags[6] = {0, 0, 0, 0, 0, 0}; - if(Check_Custom_Boundary(&flags[0],P)) - { - //perform custom boundaries + if (Check_Custom_Boundary(&flags[0], P)) { + // perform custom boundaries Custom_Boundary(P.custom_bcnd); } - Set_Boundaries_MPI_BLOCK(flags,P); + Set_Boundaries_MPI_BLOCK(flags, P); #ifdef GRAVITY - Grav.Set_Boundary_Flags( flags ); + Grav.Set_Boundary_Flags(flags); #endif - } void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) { #ifdef PARTICLES // Clear the vectors that contain the particles IDs to be transfred - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { Particles.Clear_Particles_For_Transfer(); - Particles.Select_Particles_to_Transfer_All( flags ); + Particles.Select_Particles_to_Transfer_All(flags); } #endif if (H.nx > 1) { - /* Step 1 - Send MPI x-boundaries */ - if (flags[0]==5 || flags[1]==5) { + if (flags[0] == 5 || flags[1] == 5) { Load_and_Send_MPI_Comm_Buffers(0, flags); } @@ -51,20 +48,19 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) /* Step 3 - Receive MPI x-boundaries */ - if (flags[0]==5 || flags[1]==5) { + if (flags[0] == 5 || flags[1] == 5) { Wait_and_Unload_MPI_Comm_Buffers(0, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); + #endif } - } MPI_Barrier(world); if (H.ny > 1) { - /* Step 4 - Send MPI y-boundaries */ - if (flags[2]==5 || flags[3]==5) { + if (flags[2] == 5 || flags[3] == 5) { Load_and_Send_MPI_Comm_Buffers(1, flags); } @@ -73,19 +69,19 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Set_Boundaries(3, flags); /* Step 6 - Receive MPI y-boundaries */ - if (flags[2]==5 || flags[3]==5) { + if (flags[2] == 5 || flags[3] == 5) { Wait_and_Unload_MPI_Comm_Buffers(1, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); + #endif } } MPI_Barrier(world); if (H.nz > 1) { - /* Step 7 - Send MPI z-boundaries */ - if (flags[4]==5 || flags[5]==5) { + if (flags[4] == 5 || flags[5] == 5) { Load_and_Send_MPI_Comm_Buffers(2, flags); } @@ -94,789 +90,843 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Set_Boundaries(5, flags); /* Step 9 - Receive MPI z-boundaries */ - if (flags[4]==5 || flags[5]==5) { + if (flags[4] == 5 || flags[5] == 5) { Wait_and_Unload_MPI_Comm_Buffers(2, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); + #endif } } #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) Finish_Particles_Transfer(); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Finish_Particles_Transfer(); #endif - } - -int Grid3D::Load_Hydro_DeviceBuffer_X0 ( Real *send_buffer_x0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_X0(Real *send_buffer_x0) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.n_ghost; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.n_ghost + H.n_ghost*H.nx; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.n_ghost + H.n_ghost * H.nx; + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { - int idxoffset = H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, + H.nz - 2 * H.n_ghost); } return x_buffer_length; } - // load right x communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_X1 ( Real *send_buffer_x1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_X1(Real *send_buffer_x1) +{ // 1D if (H.ny == 1 && H.nz == 1) { - int idxoffset = H.nx-2*H.n_ghost; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + int idxoffset = H.nx - 2 * H.n_ghost; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { - int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = + H.nx - 2 * H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, + H.nz - 2 * H.n_ghost); } return x_buffer_length; } // load left y communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_Y0 ( Real *send_buffer_y0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Y0(Real *send_buffer_y0) +{ // 2D if (H.nz == 1) { - int idxoffset = H.n_ghost*H.nx; - PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = H.n_ghost * H.nx; + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); } return y_buffer_length; } -int Grid3D::Load_Hydro_DeviceBuffer_Y1 ( Real *send_buffer_y1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Y1(Real *send_buffer_y1) +{ // 2D if (H.nz == 1) { - int idxoffset = (H.ny-2*H.n_ghost)*H.nx; - PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx; + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = (H.ny-2*H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); } return y_buffer_length; - } // load left z communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_Z0 ( Real *send_buffer_z0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Z0(Real *send_buffer_z0) +{ // 3D - int idxoffset = H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } -int Grid3D::Load_Hydro_DeviceBuffer_Z1 ( Real *send_buffer_z1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Z1(Real *send_buffer_z1) +{ // 3D - int idxoffset = (H.nz-2*H.n_ghost)*H.nx*H.ny; - PackBuffers3D(send_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = (H.nz - 2 * H.n_ghost) * H.nx * H.ny; + PackBuffers3D(send_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } -void Grid3D::Unload_Hydro_DeviceBuffer_X0 ( Real *recv_buffer_x0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_X0(Real *recv_buffer_x0) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.n_ghost*H.nx; - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.n_ghost * H.nx; + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*(H.nx+H.nx*H.ny); - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * (H.nx + H.nx * H.ny); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, + H.nz - 2 * H.n_ghost); } - } -void Grid3D::Unload_Hydro_DeviceBuffer_X1 ( Real *recv_buffer_x1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_X1(Real *recv_buffer_x1) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.nx - H.n_ghost; - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.nx - H.n_ghost + H.n_ghost*H.nx; - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.nx - H.n_ghost + H.n_ghost * H.nx; + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.nx - H.n_ghost + H.n_ghost*(H.nx+H.nx*H.ny); - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.nx - H.n_ghost + H.n_ghost * (H.nx + H.nx * H.ny); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, + H.nz - 2 * H.n_ghost); } - } - -void Grid3D::Unload_Hydro_DeviceBuffer_Y0 ( Real *recv_buffer_y0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Y0(Real *recv_buffer_y0) +{ // 2D if (H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); } - } - -void Grid3D::Unload_Hydro_DeviceBuffer_Y1 ( Real *recv_buffer_y1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Y1(Real *recv_buffer_y1) +{ // 2D if (H.nz == 1) { - int idxoffset = (H.ny-H.n_ghost)*H.nx; - UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = (H.ny - H.n_ghost) * H.nx; + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = (H.ny-H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = (H.ny - H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); } - } - - -void Grid3D::Unload_Hydro_DeviceBuffer_Z0 ( Real *recv_buffer_z0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Z0(Real *recv_buffer_z0) +{ // 3D int idxoffset = 0; - UnpackBuffers3D(recv_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + UnpackBuffers3D(recv_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.ny, H.n_ghost); } - -void Grid3D::Unload_Hydro_DeviceBuffer_Z1 ( Real *recv_buffer_z1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Z1(Real *recv_buffer_z1) +{ // 3D - int idxoffset = (H.nz-H.n_ghost)*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = (H.nz - H.n_ghost) * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, + idxoffset, H.nx, H.ny, H.n_ghost); } void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) { - #ifdef PARTICLES // Select which particles need to be transfred for this direction - // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) Particles.Select_Particles_to_Transfer( dir ); + // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) + // Particles.Select_Particles_to_Transfer( dir ); // Initialize MPI requests for particles transfers int ireq_n_particles, ireq_particles_transfer; - ireq_n_particles = 0; + ireq_n_particles = 0; ireq_particles_transfer = 0; #endif int ireq; ireq = 0; - int xbsize = x_buffer_length, - ybsize = y_buffer_length, + int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length; int buffer_length; - // Flag to omit the transfer of the main buffer when tranferring the particles buffer + // Flag to omit the transfer of the main buffer when tranferring the particles + // buffer bool transfer_main_buffer = true; /* x boundaries */ - if(dir == 0) - { - if (flags[0]==5) { - + if (dir == 0) { + if (flags[0] == 5) { // load left x communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X0(d_send_buffer_x0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif } - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 0, d_send_buffer_x0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 0, 0, h_send_buffer_x0, 0 ); - #endif - - } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 0, h_send_buffer_x0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 0, d_send_buffer_x0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0_particles ); - cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(0, 0, d_send_buffer_x0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(0, 0, h_send_buffer_x0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_X0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(0, 0, h_send_buffer_x0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 0, 0, d_send_buffer_x0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 0, 0, h_send_buffer_x0_particles); + cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_X0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left x communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left x communication buffer MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); - //non-blocking send left x communication buffer + // non-blocking send left x communication buffer MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); - #else - //post non-blocking receive left x communication buffer + #else + // post non-blocking receive left x communication buffer MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); - //non-blocking send left x communication buffer + // non-blocking send left x communication buffer MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); - #endif + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[1]==5) - { + if (flags[1] == 5) { // load right x communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X1(d_send_buffer_x1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("X1 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 1, d_send_buffer_x1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 0, 1, h_send_buffer_x1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + // printf("X1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 1, h_send_buffer_x1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 1, d_send_buffer_x1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1_particles ); - cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(0, 1, d_send_buffer_x1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(0, 1, h_send_buffer_x1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_X1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(0, 1, h_send_buffer_x1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 0, 1, d_send_buffer_x1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 0, 1, h_send_buffer_x1_particles); + cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_X1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right x communication buffer - MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right x communication buffer + MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, + world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); - #else - //post non-blocking receive right x communication buffer - MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); + // non-blocking send right x communication buffer + MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, + world, &send_request[1]); + #else + // post non-blocking receive right x communication buffer + MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, + world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); - #endif + // non-blocking send right x communication buffer + MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, + world, &send_request[1]); + #endif - MPI_Request_free(send_request+1); + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - // Receive the number of particles transfer for X - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif - + // Receive the number of particles transfer for X + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + #endif } /* y boundaries */ - if (dir==1) { - if(flags[2] == 5) - { + if (dir == 1) { + if (flags[2] == 5) { // load left y communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y0(d_send_buffer_y0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Y0 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 0, d_send_buffer_y0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 1, 0, h_send_buffer_y0, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + // printf("Y0 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 0, h_send_buffer_y0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 0, d_send_buffer_y0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0_particles ); - cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(1, 0, d_send_buffer_y0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(1, 0, h_send_buffer_y0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Y0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(1, 0, h_send_buffer_y0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 1, 0, d_send_buffer_y0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 1, 0, h_send_buffer_y0_particles); + cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Y0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left y communication buffer - MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left y communication buffer + MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, + world, &recv_request[ireq]); - //non-blocking send left y communication buffer - MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); - #else - //post non-blocking receive left y communication buffer - MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); + // non-blocking send left y communication buffer + MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, + world, &send_request[0]); + #else + // post non-blocking receive left y communication buffer + MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, + world, &recv_request[ireq]); - //non-blocking send left y communication buffer - MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); - #endif + // non-blocking send left y communication buffer + MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, + world, &send_request[0]); + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[3]==5) - { + if (flags[3] == 5) { // load right y communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y1(d_send_buffer_y1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Y1 len: %d\n", buffer_length); - } - - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 1, d_send_buffer_y1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 1, 1, h_send_buffer_y1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + // printf("Y1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 1, h_send_buffer_y1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 1, d_send_buffer_y1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1_particles ); - cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(1, 1, d_send_buffer_y1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(1, 1, h_send_buffer_y1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Y1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(1, 1, h_send_buffer_y1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 1, 1, d_send_buffer_y1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 1, 1, h_send_buffer_y1_particles); + cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Y1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right y communication buffer - MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right y communication buffer + MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, + world, &recv_request[ireq]); - //non-blocking send right y communication buffer - MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); - #else - //post non-blocking receive right y communication buffer - MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); + // non-blocking send right y communication buffer + MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, + world, &send_request[1]); + #else + // post non-blocking receive right y communication buffer + MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, + world, &recv_request[ireq]); - //non-blocking send right y communication buffer - MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); - #endif - MPI_Request_free(send_request+1); + // non-blocking send right y communication buffer + MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, + world, &send_request[1]); + #endif + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - // Receive the number of particles transfer for Y - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif - + // Receive the number of particles transfer for Y + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + #endif } /* z boundaries */ - if (dir==2) { - - if(flags[4]==5) - { + if (dir == 2) { + if (flags[4] == 5) { // left z communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z0(d_send_buffer_z0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Z0 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 0, d_send_buffer_z0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 2, 0, h_send_buffer_z0, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + // printf("Z0 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 0, h_send_buffer_z0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 0, d_send_buffer_z0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0_particles ); - cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(2, 0, d_send_buffer_z0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(2, 0, h_send_buffer_z0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Z0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(2, 0, h_send_buffer_z0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 2, 0, d_send_buffer_z0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 2, 0, h_send_buffer_z0_particles); + cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Z0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left z communication buffer - MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); - //non-blocking send left z communication buffer - MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); - #else - //post non-blocking receive left z communication buffer - MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left z communication buffer + MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, + world, &recv_request[ireq]); + // non-blocking send left z communication buffer + MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, + world, &send_request[0]); + #else + // post non-blocking receive left z communication buffer + MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, + world, &recv_request[ireq]); - //non-blocking send left z communication buffer - MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); - #endif + // non-blocking send left z communication buffer + MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, + world, &send_request[0]); + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[5]==5) - { + if (flags[5] == 5) { // load right z communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z1(d_send_buffer_z1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Z1 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 1, d_send_buffer_z1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 2, 1, h_send_buffer_z1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + // printf("Z1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 1, h_send_buffer_z1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 1, d_send_buffer_z1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1_particles ); - cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = + Load_Gravity_Potential_To_Buffer_GPU(2, 1, d_send_buffer_z1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = + Load_Gravity_Potential_To_Buffer(2, 1, h_send_buffer_z1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Z1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(2, 1, h_send_buffer_z1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( + 2, 1, d_send_buffer_z1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), + cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = + Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer( + 2, 1, h_send_buffer_z1_particles); + cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, + buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Z1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right x communication buffer - MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right x communication buffer + MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, + world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); - #else - //post non-blocking receive right x communication buffer - MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); + // non-blocking send right x communication buffer + MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, + world, &send_request[1]); + #else + // post non-blocking receive right x communication buffer + MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, + world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); - #endif - MPI_Request_free(send_request+1); + // non-blocking send right x communication buffer + MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, + world, &send_request[1]); + #endif + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } // Receive the number of particles transfer for Z - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + #endif } - } void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags) { - #ifdef PARTICLES - // If we are transfering the particles buffers we dont need to unload the main buffers - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) return; + // If we are transfering the particles buffers we dont need to unload the main + // buffers + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) return; #endif int iwait; - int index = 0; - int wait_max=0; + int index = 0; + int wait_max = 0; MPI_Status status; - //find out how many recvs we need to wait for - if (dir==0) { - if(flags[0] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[1] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - if (dir==1) { - if(flags[2] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[3] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - if (dir==2) { - if(flags[4] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[5] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - - //wait for any receives to complete - for(iwait=0;iwait*Fptr_Unload_Hydro_Buffer_X0) ( l_recv_buffer_x0 ); break; - case ( 1 ): (this->*Fptr_Unload_Hydro_Buffer_X1) ( l_recv_buffer_x1 ); break; - case ( 2 ): (this->*Fptr_Unload_Hydro_Buffer_Y0) ( l_recv_buffer_y0 ); break; - case ( 3 ): (this->*Fptr_Unload_Hydro_Buffer_Y1) ( l_recv_buffer_y1 ); break; - case ( 4 ): (this->*Fptr_Unload_Hydro_Buffer_Z0) ( l_recv_buffer_z0 ); break; - case ( 5 ): (this->*Fptr_Unload_Hydro_Buffer_Z1) ( l_recv_buffer_z1 ); break; + switch (index) { + case (0): + (this->*Fptr_Unload_Hydro_Buffer_X0)(l_recv_buffer_x0); + break; + case (1): + (this->*Fptr_Unload_Hydro_Buffer_X1)(l_recv_buffer_x1); + break; + case (2): + (this->*Fptr_Unload_Hydro_Buffer_Y0)(l_recv_buffer_y0); + break; + case (3): + (this->*Fptr_Unload_Hydro_Buffer_Y1)(l_recv_buffer_y1); + break; + case (4): + (this->*Fptr_Unload_Hydro_Buffer_Z0)(l_recv_buffer_z0); + break; + case (5): + (this->*Fptr_Unload_Hydro_Buffer_Z1)(l_recv_buffer_z1); + break; } } #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - #ifndef MPI_GPU - copyHostToDeviceReceiveBuffer ( index ); - #endif // MPI_GPU + #ifndef MPI_GPU + copyHostToDeviceReceiveBuffer(index); + #endif // MPI_GPU l_recv_buffer_x0 = d_recv_buffer_x0; l_recv_buffer_x1 = d_recv_buffer_x1; @@ -916,8 +977,8 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Gravity_Potential - = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; + Fptr_Unload_Gravity_Potential = + &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; #else @@ -928,21 +989,27 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - Fptr_Unload_Gravity_Potential - = &Grid3D::Unload_Gravity_Potential_from_Buffer; - - #endif // GRAVITY_GPU - - if ( index == 0 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 0, l_recv_buffer_x0, 0 ); - if ( index == 1 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 1, l_recv_buffer_x1, 0 ); - if ( index == 2 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 0, l_recv_buffer_y0, 0 ); - if ( index == 3 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 1, l_recv_buffer_y1, 0 ); - if ( index == 4 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 0, l_recv_buffer_z0, 0 ); - if ( index == 5 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 1, l_recv_buffer_z1, 0 ); + Fptr_Unload_Gravity_Potential = + &Grid3D::Unload_Gravity_Potential_from_Buffer; + + #endif // GRAVITY_GPU + + if (index == 0) + (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); + if (index == 1) + (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); + if (index == 2) + (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); + if (index == 3) + (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); + if (index == 4) + (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); + if (index == 5) + (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){ + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) { l_recv_buffer_x0 = h_recv_buffer_x0; l_recv_buffer_x1 = h_recv_buffer_x1; l_recv_buffer_y0 = h_recv_buffer_y0; @@ -950,23 +1017,22 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - if ( index == 0 ) Unload_Poisson_Boundary_From_Buffer( 0, 0, l_recv_buffer_x0 ); - if ( index == 1 ) Unload_Poisson_Boundary_From_Buffer( 0, 1, l_recv_buffer_x1 ); - if ( index == 2 ) Unload_Poisson_Boundary_From_Buffer( 1, 0, l_recv_buffer_y0 ); - if ( index == 3 ) Unload_Poisson_Boundary_From_Buffer( 1, 1, l_recv_buffer_y1 ); - if ( index == 4 ) Unload_Poisson_Boundary_From_Buffer( 2, 0, l_recv_buffer_z0 ); - if ( index == 5 ) Unload_Poisson_Boundary_From_Buffer( 2, 1, l_recv_buffer_z1 ); + if (index == 0) Unload_Poisson_Boundary_From_Buffer(0, 0, l_recv_buffer_x0); + if (index == 1) Unload_Poisson_Boundary_From_Buffer(0, 1, l_recv_buffer_x1); + if (index == 2) Unload_Poisson_Boundary_From_Buffer(1, 0, l_recv_buffer_y0); + if (index == 3) Unload_Poisson_Boundary_From_Buffer(1, 1, l_recv_buffer_y1); + if (index == 4) Unload_Poisson_Boundary_From_Buffer(2, 0, l_recv_buffer_z0); + if (index == 5) Unload_Poisson_Boundary_From_Buffer(2, 1, l_recv_buffer_z1); } - #endif //SOR - - #endif //GRAVITY + #endif // SOR + #endif // GRAVITY #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES ){ + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU #ifndef MPI_GPU - copyHostToDeviceReceiveBuffer ( index ); + copyHostToDeviceReceiveBuffer(index); #endif l_recv_buffer_x0 = d_recv_buffer_x0; @@ -976,48 +1042,65 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Particle_Density - = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; + Fptr_Unload_Particle_Density = + &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; #else - - #ifdef MPI_GPU - if ( index == 0 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles ); - if ( index == 1 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles ); - if ( index == 2 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles ); - if ( index == 3 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles ); - if ( index == 4 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles ); - if ( index == 5 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles ); + + #ifdef MPI_GPU + if (index == 0) + Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, + h_recv_buffer_x0_particles); + if (index == 1) + Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, + h_recv_buffer_x1_particles); + if (index == 2) + Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, + h_recv_buffer_y0_particles); + if (index == 3) + Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, + h_recv_buffer_y1_particles); + if (index == 4) + Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, + h_recv_buffer_z0_particles); + if (index == 5) + Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, + h_recv_buffer_z1_particles); l_recv_buffer_x0 = h_recv_buffer_x0_particles; l_recv_buffer_x1 = h_recv_buffer_x1_particles; l_recv_buffer_y0 = h_recv_buffer_y0_particles; l_recv_buffer_y1 = h_recv_buffer_y1_particles; l_recv_buffer_z0 = h_recv_buffer_z0_particles; l_recv_buffer_z1 = h_recv_buffer_z1_particles; - #else + #else l_recv_buffer_x0 = h_recv_buffer_x0; l_recv_buffer_x1 = h_recv_buffer_x1; l_recv_buffer_y0 = h_recv_buffer_y0; l_recv_buffer_y1 = h_recv_buffer_y1; l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - #endif //MPI_GPU - - Fptr_Unload_Particle_Density - = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; - - #endif // PARTICLES_GPU - - if ( index == 0 ) (this->*Fptr_Unload_Particle_Density)( 0, 0, l_recv_buffer_x0 ); - if ( index == 1 ) (this->*Fptr_Unload_Particle_Density)( 0, 1, l_recv_buffer_x1 ); - if ( index == 2 ) (this->*Fptr_Unload_Particle_Density)( 1, 0, l_recv_buffer_y0 ); - if ( index == 3 ) (this->*Fptr_Unload_Particle_Density)( 1, 1, l_recv_buffer_y1 ); - if ( index == 4 ) (this->*Fptr_Unload_Particle_Density)( 2, 0, l_recv_buffer_z0 ); - if ( index == 5 ) (this->*Fptr_Unload_Particle_Density)( 2, 1, l_recv_buffer_z1 ); + #endif // MPI_GPU + + Fptr_Unload_Particle_Density = + &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; + + #endif // PARTICLES_GPU + + if (index == 0) + (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); + if (index == 1) + (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); + if (index == 2) + (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); + if (index == 3) + (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); + if (index == 4) + (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); + if (index == 5) + (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); } - #endif //PARTICLES - + #endif // PARTICLES } #endif /*MPI_CHOLLA*/ diff --git a/src/h_correction/flux_correction.h b/src/h_correction/flux_correction.h index 15aac55e3..f4294c73a 100644 --- a/src/h_correction/flux_correction.h +++ b/src/h_correction/flux_correction.h @@ -1,22 +1,39 @@ /*! \file flux_correction.h - * \brief Declarations of functions used in the first-order flux correction method. */ + * \brief Declarations of functions used in the first-order flux correction + * method. */ #ifndef FLUX_CORRECTION_H #define FLUX_CORRECTION_H -void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt); +void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, + int y_off, int z_off, int n_ghost, Real dx, Real dy, + Real dz, Real xbound, Real ybound, Real zbound, + Real dt); -void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, int dir); +void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, + int dir); -void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[], Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[], Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt, int n_fields, int nx, int ny, int nz, int n_cells); +void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], + Real C_imt[], Real C_ipo[], Real C_ipt[], Real C_jmo[], + Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], + Real C_kmt[], Real C_kpo[], Real C_kpt[], int i, int j, + int k, Real dx, Real dy, Real dz, Real dt, + int n_fields, int nx, int ny, int nz, int n_cells); -void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, int n_cells, int n_fields); +void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, + int n_cells, int n_fields); -void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells); +void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, + Real dtody, Real dtodz, int nfields, int nx, int ny, + int nz, int n_cells); -void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells); +void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, + Real dtody, Real dtodz, int nfields, int nx, int ny, + int nz, int n_cells); -void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); +void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, + int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, + Real zbound, Real *gx, Real *gy, Real *gz); void cooling_CPU(Real *C2, int id, int n_cells, Real dt); @@ -24,4 +41,4 @@ Real Schure_cool_CPU(Real n, Real T); Real Wiersma_cool_CPU(Real n, Real T); -#endif //FLUX_CORRECTION_H +#endif // FLUX_CORRECTION_H diff --git a/src/h_correction/h_correction_2D_cuda.cu b/src/h_correction/h_correction_2D_cuda.cu index f04f7816e..3ee7c74e5 100644 --- a/src/h_correction/h_correction_2D_cuda.cu +++ b/src/h_correction/h_correction_2D_cuda.cu @@ -2,171 +2,184 @@ * \brief Functions definitions for the H correction kernels. Written following Sanders et al. 1998. */ #ifdef CUDA -#ifdef H_CORRECTION - -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../h_correction/h_correction_2D_cuda.h" - - - -/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) + #ifdef H_CORRECTION + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../h_correction/h_correction_2D_cuda.h" + #include "../utils/gpu.hpp" + +/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, + int nx, int ny, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_x, int nx, int ny, int n_ghost, + Real gamma) { - int n_cells = nx*ny; + int n_cells = nx * ny; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1) - { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && + yid < ny - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx; + pl = + (dev_bounds_L[4 * n_cells + id] - + 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = + (dev_bounds_R[4 * n_cells + id] - + 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_x[id] = + 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma) +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_y, int nx, int ny, int n_ghost, + Real gamma) { - int n_cells = nx*ny; + int n_cells = nx * ny; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1) - { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && + xid < nx - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx; + pl = (dev_bounds_L[4 * n_cells + id] - + 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - + 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_y[id] = + 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, + int ny, int n_ghost) { - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - id = xid + yid*nx; + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { + id = xid + yid * nx; - etah = fmax(eta_y[xid + (yid-1)*nx], eta_y[id]); - etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_y[xid+1 + (yid-1)*nx]); - etah = fmax(etah, eta_y[xid+1 + yid*nx]); + etah = fmax(eta_y[xid + (yid - 1) * nx], eta_y[id]); + etah = fmax(etah, eta_x[id]); + etah = fmax(etah, eta_y[xid + 1 + (yid - 1) * nx]); + etah = fmax(etah, eta_y[xid + 1 + yid * nx]); - etah_x[id] = etah; + etah_x[id] = etah; } - } - -/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, + int ny, int n_ghost) { - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost) - { - id = xid + yid*nx; + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && + xid < nx - n_ghost) { + id = xid + yid * nx; - etah = fmax(eta_x[xid-1 + yid*nx], eta_x[id]); - etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx]); - etah = fmax(etah, eta_x[xid + (yid+1)*nx]); + etah = fmax(eta_x[xid - 1 + yid * nx], eta_x[id]); + etah = fmax(etah, eta_y[id]); + etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx]); + etah = fmax(etah, eta_x[xid + (yid + 1) * nx]); - etah_y[id] = etah; + etah_y[id] = etah; } - } - - - -#endif //H_CORRECTION -#endif //CUDA + #endif // H_CORRECTION +#endif // CUDA diff --git a/src/h_correction/h_correction_2D_cuda.h b/src/h_correction/h_correction_2D_cuda.h index daa11e39a..8fea8200f 100644 --- a/src/h_correction/h_correction_2D_cuda.h +++ b/src/h_correction/h_correction_2D_cuda.h @@ -2,41 +2,50 @@ * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ #ifdef CUDA -#ifdef H_CORRECTION -#ifndef H_CORRECTION_2D_H -#define H_CORRECTION_2D_H - -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" - - - -/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma); - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma); - - -/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) + #ifdef H_CORRECTION + #ifndef H_CORRECTION_2D_H + #define H_CORRECTION_2D_H + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + +/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_x, int nx, int ny, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_y, int nx, int ny, int n_ghost, + Real gamma); + +/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost); - + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, + int ny, int n_ghost); -/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost); - - -#endif //H_CORRECTION_2D_H -#endif //H_CORRECTION -#endif //CUDA + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, + int ny, int n_ghost); + + #endif // H_CORRECTION_2D_H + #endif // H_CORRECTION +#endif // CUDA diff --git a/src/h_correction/h_correction_3D_cuda.cu b/src/h_correction/h_correction_3D_cuda.cu index e58632eaf..089665bdc 100644 --- a/src/h_correction/h_correction_3D_cuda.cu +++ b/src/h_correction/h_correction_3D_cuda.cu @@ -3,261 +3,291 @@ Written following Sanders et al. 1998. */ #ifdef CUDA -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../h_correction/h_correction_3D_cuda.h" - - - -/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../h_correction/h_correction_3D_cuda.h" + #include "../utils/gpu.hpp" + +/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, + int nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_x, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // x-direction - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1) - { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && + yid < ny - n_ghost + 1 && zid > n_ghost - 2 && zid < nz - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = + (dev_bounds_L[4 * n_cells + id] - + 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = + (dev_bounds_R[4 * n_cells + id] - + 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_x[id] = + 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_y, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // y-direction - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1) - { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && + xid < nx - n_ghost + 1 && zid > n_ghost - 2 && zid < nz - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = (dev_bounds_L[4 * n_cells + id] - + 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - + 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_y[id] = + 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } - } - -/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) +/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_z, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // z-direction - if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && yid > n_ghost-2 && yid < ny-n_ghost+1) - { + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 2 && + xid < nx - n_ghost + 1 && yid > n_ghost - 2 && yid < ny - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_z[id] = 0.5*fabs((dev_bounds_R[3*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[3*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = + (dev_bounds_L[4 * n_cells + id] - + 0.5 * + (dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = + (dev_bounds_R[4 * n_cells + id] - + 0.5 * + (dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_z[id] = + 0.5 * fabs((dev_bounds_R[3 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[3 * n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_x, int nx, int ny, int nz, + int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // x-direction - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_y[xid + (yid-1)*nx + zid*nx*ny], eta_y[xid+1 + (yid-1)*nx + zid*nx*ny]); + etah = fmax(eta_y[xid + (yid - 1) * nx + zid * nx * ny], + eta_y[xid + 1 + (yid - 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_y[xid+1 + yid*nx + zid*nx*ny]); + etah = fmax(etah, eta_y[xid + 1 + yid * nx + zid * nx * ny]); - etah = fmax(etah, eta_z[xid + yid*nx + (zid-1)*nx*ny]); - etah = fmax(etah, eta_z[xid+1 + yid*nx + (zid-1)*nx*ny]); + etah = fmax(etah, eta_z[xid + yid * nx + (zid - 1) * nx * ny]); + etah = fmax(etah, eta_z[xid + 1 + yid * nx + (zid - 1) * nx * ny]); etah = fmax(etah, eta_z[id]); - etah = fmax(etah, eta_z[xid+1 + yid*nx + zid*nx*ny]); + etah = fmax(etah, eta_z[xid + 1 + yid * nx + zid * nx * ny]); etah = fmax(etah, eta_x[id]); etah_x[id] = etah; - } - } - -/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_y, int nx, int ny, int nz, + int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // y-direction - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && + xid < nx - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_z[xid + yid*nx + (zid-1)*nx*ny], eta_z[xid + (yid+1)*nx + (zid-1)*nx*ny]); + etah = fmax(eta_z[xid + yid * nx + (zid - 1) * nx * ny], + eta_z[xid + (yid + 1) * nx + (zid - 1) * nx * ny]); etah = fmax(etah, eta_z[id]); - etah = fmax(etah, eta_z[xid + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_z[xid + (yid + 1) * nx + zid * nx * ny]); - etah = fmax(etah, eta_x[xid-1 + yid*nx + zid*nx*ny]); - etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_x[xid - 1 + yid * nx + zid * nx * ny]); + etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_x[xid + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_x[xid + (yid + 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_y[id]); etah_y[id] = etah; - } - } - - -/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_z, int nx, int ny, int nz, + int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // z-direction - if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 1 && + xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_x[xid-1 + yid*nx + zid*nx*ny], eta_x[xid-1 + yid*nx + (zid+1)*nx*ny]); + etah = fmax(eta_x[xid - 1 + yid * nx + zid * nx * ny], + eta_x[xid - 1 + yid * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_x[xid + yid*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_x[xid + yid * nx + (zid + 1) * nx * ny]); - etah = fmax(etah, eta_y[xid + (yid-1)*nx + zid*nx*ny]); - etah = fmax(etah, eta_y[xid + (yid-1)*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_y[xid + (yid - 1) * nx + zid * nx * ny]); + etah = fmax(etah, eta_y[xid + (yid - 1) * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_y[xid + yid*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_y[xid + yid * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_z[id]); etah_z[id] = etah; - } - } - -#endif //CUDA +#endif // CUDA diff --git a/src/h_correction/h_correction_3D_cuda.h b/src/h_correction/h_correction_3D_cuda.h index b22041423..6277ca264 100644 --- a/src/h_correction/h_correction_3D_cuda.h +++ b/src/h_correction/h_correction_3D_cuda.h @@ -2,49 +2,65 @@ * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ #ifdef CUDA -#ifndef H_CORRECTION_3D_H -#define H_CORRECTION_3D_H - -#include "../utils/gpu.hpp" -#include "../global/global.h" - - - -/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) + #ifndef H_CORRECTION_3D_H + #define H_CORRECTION_3D_H + + #include "../global/global.h" + #include "../utils/gpu.hpp" + +/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_x, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_y, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, + Real *eta_z, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost); - - -/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_x, int nx, int ny, int nz, + int n_ghost); + +/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost); - - -/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_y, int nx, int ny, int nz, + int n_ghost); + +/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost); - - -#endif //H_CORRECTION_3D_H -#endif //CUDA + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, + Real *etah_z, int nx, int ny, int nz, + int n_ghost); + + #endif // H_CORRECTION_3D_H +#endif // CUDA diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 7ad6e994a..d9dc46e68 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -2,23 +2,26 @@ * \brief Definitions of functions used in all cuda integration algorithms. */ #ifdef CUDA -#include -#include -#include -#include - -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../gravity/static_grav.h" -#include "../utils/hydro_utilities.h" -#include "../utils/cuda_utilities.h" -#include "../utils/reduction_utilities.h" -#include "../utils/DeviceVector.h" - - -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields) + #include + #include + #include + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../gravity/static_grav.h" + #include "../hydro/hydro_cuda.h" + #include "../utils/DeviceVector.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + #include "../utils/reduction_utilities.h" + +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, + int n_cells, int x_off, + int n_ghost, Real dx, Real xbound, + Real dt, Real gamma, int n_fields) { int id; #ifdef STATIC_GRAV @@ -27,60 +30,71 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, gx = 0.0; #endif - Real dtodx = dt/dx; + Real dtodx = dt / dx; // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; - // threads corresponding to real cells do the calculation - if (id > n_ghost - 1 && id < n_cells-n_ghost) - { - #ifdef STATIC_GRAV - d = dev_conserved[ id]; + if (id > n_ghost - 1 && id < n_cells - n_ghost) { + #ifdef STATIC_GRAV + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + #endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F[ id-1] - dev_F[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F[ n_cells + id-1] - dev_F[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F[2*n_cells + id-1] - dev_F[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F[3*n_cells + id-1] - dev_F[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F[4*n_cells + id-1] - dev_F[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - #ifdef STATIC_GRAV - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { + #ifdef STATIC_GRAV + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + #endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - #if defined(STATIC_GRAV) || defined(GRAVITY) - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + #if defined(STATIC_GRAV) || defined(GRAVITY) + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + #endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]) - + dtodz * (dev_F_z[ kmo] - dev_F_z[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]) - + dtodz * (dev_F_z[ n_cells + kmo] - dev_F_z[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]) - + dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]) - + dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]) - + dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i 0){ - dens_0 = dev_conserved[ id]; + #ifdef DENSITY_FLOOR + if (dev_conserved[id] < density_floor) { + if (dev_conserved[id] > 0) { + dens_0 = dev_conserved[id]; // Set the density to the density floor - dev_conserved[ id] = density_floor; + dev_conserved[id] = density_floor; // Scale the conserved values to the new density - dev_conserved[1*n_cells + id] *= (density_floor / dens_0); - dev_conserved[2*n_cells + id] *= (density_floor / dens_0); - dev_conserved[3*n_cells + id] *= (density_floor / dens_0); - dev_conserved[4*n_cells + id] *= (density_floor / dens_0); - #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif - } - else{ + dev_conserved[1 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[2 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[3 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[4 * n_cells + id] *= (density_floor / dens_0); + #ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] *= + (density_floor / dens_0); + #endif + } else { // If the density is negative: average the density on that cell - dens_0 = dev_conserved[ id]; - Average_Cell_Single_Field( 0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved ); + dens_0 = dev_conserved[id]; + Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, + dev_conserved); } } - #endif//DENSITY_FLOOR + #endif // DENSITY_FLOOR - #ifdef STATIC_GRAV - calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); - d_n = dev_conserved[ id]; + #ifdef STATIC_GRAV + calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, + ybound, zbound, &gx, &gy, &gz); + d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; - vx_n = dev_conserved[1*n_cells + id] * d_inv_n; - vy_n = dev_conserved[2*n_cells + id] * d_inv_n; - vz_n = dev_conserved[3*n_cells + id] * d_inv_n; - dev_conserved[ n_cells + id] += 0.5*dt*gx*(d + d_n); - dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n); - dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n); - dev_conserved[4*n_cells + id] += 0.25*dt*gx*(d + d_n)*(vx + vx_n) - + 0.25*dt*gy*(d + d_n)*(vy + vy_n) - + 0.25*dt*gz*(d + d_n)*(vz + vz_n); - #endif + vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; + vy_n = dev_conserved[2 * n_cells + id] * d_inv_n; + vz_n = dev_conserved[3 * n_cells + id] * d_inv_n; + dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); + dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); + dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n); + dev_conserved[4 * n_cells + id] += + 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + + 0.25 * dt * gy * (d + d_n) * (vy + vy_n) + + 0.25 * dt * gz * (d + d_n) * (vz + vz_n); + #endif - #ifdef GRAVITY - d_n = dev_conserved[ id]; + #ifdef GRAVITY + d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; - vx_n = dev_conserved[1*n_cells + id] * d_inv_n; - vy_n = dev_conserved[2*n_cells + id] * d_inv_n; - vz_n = dev_conserved[3*n_cells + id] * d_inv_n; + vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; + vy_n = dev_conserved[2 * n_cells + id] * d_inv_n; + vz_n = dev_conserved[3 * n_cells + id] * d_inv_n; // Calculate the -gradient of potential // Get X componet of gravity field - id_l = (xid-1) + (yid)*nx + (zid)*nx*ny; - id_r = (xid+1) + (yid)*nx + (zid)*nx*ny; + id_l = (xid - 1) + (yid)*nx + (zid)*nx * ny; + id_r = (xid + 1) + (yid)*nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid-2) + (yid)*nx + (zid)*nx*ny; - id_rr = (xid+2) + (yid)*nx + (zid)*nx*ny; + id_ll = (xid - 2) + (yid)*nx + (zid)*nx * ny; + id_rr = (xid + 2) + (yid)*nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gx = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); + gx = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); #else - gx = -0.5*( pot_r - pot_l ) / dx; + gx = -0.5 * (pot_r - pot_l) / dx; #endif - //Get Y componet of gravity field - id_l = (xid) + (yid-1)*nx + (zid)*nx*ny; - id_r = (xid) + (yid+1)*nx + (zid)*nx*ny; + // Get Y componet of gravity field + id_l = (xid) + (yid - 1) * nx + (zid)*nx * ny; + id_r = (xid) + (yid + 1) * nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid) + (yid-2)*nx + (zid)*nx*ny; - id_rr = (xid) + (yid+2)*nx + (zid)*nx*ny; + id_ll = (xid) + (yid - 2) * nx + (zid)*nx * ny; + id_rr = (xid) + (yid + 2) * nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gy = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); + gy = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); #else - gy = -0.5*( pot_r - pot_l ) / dy; + gy = -0.5 * (pot_r - pot_l) / dy; #endif - //Get Z componet of gravity field - id_l = (xid) + (yid)*nx + (zid-1)*nx*ny; - id_r = (xid) + (yid)*nx + (zid+1)*nx*ny; + // Get Z componet of gravity field + id_l = (xid) + (yid)*nx + (zid - 1) * nx * ny; + id_r = (xid) + (yid)*nx + (zid + 1) * nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid) + (yid)*nx + (zid-2)*nx*ny; - id_rr = (xid) + (yid)*nx + (zid+2)*nx*ny; + id_ll = (xid) + (yid)*nx + (zid - 2) * nx * ny; + id_rr = (xid) + (yid)*nx + (zid + 2) * nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gz = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); + gz = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); #else - gz = -0.5*( pot_r - pot_l ) / dz; + gz = -0.5 * (pot_r - pot_l) / dz; #endif - //Add gravity term to Momentum - dev_conserved[ n_cells + id] += 0.5*dt*gx*(d + d_n); - dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n); - dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n); - - //Add gravity term to Total Energy - //Add the work done by the gravitational force - dev_conserved[4*n_cells + id] += 0.5* dt * ( gx*(d*vx + d_n*vx_n) + gy*(d*vy + d_n*vy_n) + gz*(d*vz + d_n*vz_n) ); - - #endif //GRAVITY - - - #if !( defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR) ) - if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4*n_cells + id] < 0.0 || dev_conserved[4*n_cells+id] != dev_conserved[4*n_cells+id]) { - printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid+x_off, yid+y_off, zid+z_off, dev_conserved[id], dtodx*(dev_F_x[imo]-dev_F_x[id]), dtody*(dev_F_y[jmo]-dev_F_y[id]), dtodz*(dev_F_z[kmo]-dev_F_z[id]), dev_conserved[4*n_cells+id]); + // Add gravity term to Momentum + dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); + dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); + dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n); + + // Add gravity term to Total Energy + // Add the work done by the gravitational force + dev_conserved[4 * n_cells + id] += + 0.5 * dt * + (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + + gz * (d * vz + d_n * vz_n)); + + #endif // GRAVITY + + #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) + if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || + dev_conserved[4 * n_cells + id] < 0.0 || + dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { + printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", + xid + x_off, yid + y_off, zid + z_off, dev_conserved[id], + dtodx * (dev_F_x[imo] - dev_F_x[id]), + dtody * (dev_F_y[jmo] - dev_F_y[id]), + dtodz * (dev_F_z[kmo] - dev_F_z[id]), + dev_conserved[4 * n_cells + id]); } - #endif//DENSITY_FLOOR + #endif // DENSITY_FLOOR /* d = dev_conserved[ id]; d_inv = 1.0 / d; vx = dev_conserved[1*n_cells + id] * d_inv; vy = dev_conserved[2*n_cells + id] * d_inv; vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - if (P < 0.0) printf("%3d %3d %3d Negative pressure after final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells + id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P); + P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * + (gamma - 1.0); if (P < 0.0) printf("%3d %3d %3d Negative pressure after + final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells + + id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P); */ } - } - __device__ __host__ Real hydroInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma) +__device__ __host__ Real hydroInverseCrossingTime( + Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma) { // Compute pressure and sound speed - Real P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + Real P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); Real cs = sqrt(d_inv * gamma * P); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) - Real cellMaxInverseDt = fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy); - cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz)+cs)/dz); + Real cellMaxInverseDt = fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy); + cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz) + cs) / dz); cellMaxInverseDt = fmax(cellMaxInverseDt, 0.0); return cellMaxInverseDt; } -__device__ __host__ Real mhdInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &avgBx, - Real const &avgBy, - Real const &avgBz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma) +__device__ __host__ Real mhdInverseCrossingTime( + Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, + Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhd::utils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); - Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); + Real gasP = mhd::utils::computeGasPressure(E, d, vx * d, vy * d, vz * d, + avgBx, avgBy, avgBz, gamma); + Real cf = + mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) - Real cellMaxInverseDt = fmax((fabs(vx)+cf)/dx, (fabs(vy)+cf)/dy); - cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz)+cf)/dz); + Real cellMaxInverseDt = fmax((fabs(vx) + cf) / dx, (fabs(vy) + cf) / dy); + cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz) + cf) / dz); cellMaxInverseDt = fmax(cellMaxInverseDt, 0.0); return cellMaxInverseDt; } - - -__global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, Real dx) +__global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, + int n_ghost, int nx, Real dx) { Real max_dti = -DBL_MAX; @@ -452,23 +497,26 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; + id += blockDim.x * gridDim.x) { // threads corresponding to real cells do the calculation - if (id > n_ghost - 1 && id < n_cells-n_ghost) - { + if (id > n_ghost - 1 && id < n_cells - n_ghost) { // start timestep calculation here - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) TINY_NUMBER); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti = fmax(max_dti,(fabs(vx)+cs)/dx); + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - + 0.5 * d * (vx * vx + vy * vy + vz * vz)) * + (gamma - 1.0); + P = fmax(P, (Real)TINY_NUMBER); + // find the max wavespeed in that cell, use it to calculate the inverse + // timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti, (fabs(vx) + cs) / dx); } } @@ -476,41 +524,44 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n reduction_utilities::gridReduceMax(max_dti, dev_dti); } - - -__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, Real dy) +__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, + int n_ghost, int nx, int ny, Real dx, Real dy) { Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, P, cs; int xid, yid, n_cells; - n_cells = nx*ny; + n_cells = nx * ny; // Grid stride loop to perform as much of the reduction as possible. The // fact that `id` has type `size_t` is important. I'm not totally sure why // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; + id += blockDim.x * gridDim.x) { // get a global thread ID yid = id / nx; - xid = id - yid*nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) 1.0e-20); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti = fmax(max_dti,fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy)); + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - + 0.5 * d * (vx * vx + vy * vy + vz * vz)) * + (gamma - 1.0); + P = fmax(P, (Real)1.0e-20); + // find the max wavespeed in that cell, use it to calculate the inverse + // timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti, fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy)); } } @@ -518,47 +569,52 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n reduction_utilities::gridReduceMax(max_dti, dev_dti); } - -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, + int n_ghost, int n_fields, int nx, int ny, int nz, + Real dx, Real dy, Real dz) { Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, E; int xid, yid, zid, n_cells; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // Grid stride loop to perform as much of the reduction as possible. The // fact that `id` has type `size_t` is important. I'm not totally sure why // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; + id += blockDim.x * gridDim.x) { // get a global thread ID cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - - // Compute the maximum inverse crossing time in the cell - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of - // the faces - auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); - #else // not MHD - max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); - #endif //MHD - + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + + // Compute the maximum inverse crossing time in the cell + #ifdef MHD + // Compute the cell centered magnetic field using a straight average of + // the faces + auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields( + dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + max_dti = fmax( + max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, + avgBz, dx, dy, dz, gamma)); + #else // not MHD + max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, + dx, dy, dz, gamma)); + #endif // MHD } } @@ -566,7 +622,8 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n reduction_utilities::gridReduceMax(max_dti, dev_dti); } -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ) +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dx, Real dy, Real dz, Real gamma) { // Allocate the device memory cuda_utilities::DeviceVector static dev_dti(1); @@ -576,252 +633,274 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n dev_dti.assign(std::numeric_limits::lowest()); // compute dt and store in dev_dti - if (nx > 1 && ny == 1 && nz == 1) //1D + if (nx > 1 && ny == 1 && nz == 1) // 1D { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D); - hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - dev_conserved, dev_dti.data(), gamma, n_ghost, nx, dx); - } - else if (nx > 1 && ny > 1 && nz == 1) //2D + hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, + launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, nx, dx); + } else if (nx > 1 && ny > 1 && nz == 1) // 2D { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D); - hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - dev_conserved, dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); - } - else if (nx > 1 && ny > 1 && nz > 1) //3D + hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, + launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); + } else if (nx > 1 && ny > 1 && nz > 1) // 3D { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_3D); - hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - dev_conserved, dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); + hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, + launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, + dy, dz); } CudaCheckError(); - // Note: dev_dti[0] is DeviceVector syntactic sugar for returning a value via cudaMemcpy + // Note: dev_dti[0] is DeviceVector syntactic sugar for returning a value via + // cudaMemcpy return dev_dti[0]; } + #ifdef AVERAGE_SLOW_CELLS -#ifdef AVERAGE_SLOW_CELLS - -void Average_Slow_Cells( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){ - +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma, Real max_dti_slow) +{ // set values for GPU kernels - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - if (nx > 1 && ny > 1 && nz > 1){ //3D - hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, gamma, max_dti_slow ); + if (nx > 1 && ny > 1 && nz > 1) { // 3D + hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, + gamma, max_dti_slow); } } -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){ - +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields, + Real dx, Real dy, Real dz, Real gamma, + Real max_dti_slow) +{ int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, max_dti; Real speed, temp, P, cs; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - n_cells = nx*ny*nz; + id = threadIdx.x + blockIdx.x * blockDim.x; + n_cells = nx * ny * nz; cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; // Compute the maximum inverse crossing time in the cell - max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); - - if (max_dti > max_dti_slow){ - speed = sqrt(vx*vx + vy*vy + vz*vz); - temp = (gamma - 1)*(E - 0.5*(speed*speed)*d)*ENERGY_UNIT/(d*DENSITY_UNIT/0.6/MP)/KB; - P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - cs = sqrt(d_inv * gamma * P)*VELOCITY_UNIT*1e-5; + max_dti = + hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + + if (max_dti > max_dti_slow) { + speed = sqrt(vx * vx + vy * vy + vz * vz); + temp = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / + (d * DENSITY_UNIT / 0.6 / MP) / KB; + P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; // Average this cell - printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, - dev_conserved[id]*DENSITY_UNIT/0.6/MP, temp, speed*VELOCITY_UNIT*1e-5, vx*VELOCITY_UNIT*1e-5, vy*VELOCITY_UNIT*1e-5, vz*VELOCITY_UNIT*1e-5, cs); - Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); + printf( + " Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, " + "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", + xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, + dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, + speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, + vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, cs); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, + dev_conserved); } } } -#endif //AVERAGE_SLOW_CELLS - - -#ifdef DE -__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ){ + #endif // AVERAGE_SLOW_CELLS + #ifdef DE +__global__ void Partial_Update_Advected_Internal_Energy_1D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, + Real dt, Real gamma, int n_fields) +{ int id, xid, n_cells; int imo, ipo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo; - Real P, E, E_kin, GE; + Real P, E, E_kin, GE; - - Real dtodx = dt/dx; - n_cells = nx; + Real dtodx = dt / dx; + n_cells = nx; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1; - ipo = xid+1; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1; + ipo = xid + 1; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo)); - + dev_conserved[(n_fields - 1) * n_cells + id] += + 0.5 * P * (dtodx * (vx_imo - vx_ipo)); } } - -__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields ){ - +__global__ void Partial_Update_Advected_Internal_Energy_2D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, + int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields) +{ int id, xid, yid, n_cells; int imo, jmo; int ipo, jpo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo; - Real P, E, E_kin, GE; - + Real P, E, E_kin, GE; - Real dtodx = dt/dx; - Real dtody = dt/dy; - n_cells = nx*ny; + Real dtodx = dt / dx; + Real dtody = dt / dy; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1 + yid*nx; - ipo = xid+1 + yid*nx; - jmo = xid + (yid-1)*nx; - jpo = xid + (yid+1)*nx; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1 + yid * nx; + ipo = xid + 1 + yid * nx; + jmo = xid + (yid - 1) * nx; + jpo = xid + (yid + 1) * nx; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo)); - + dev_conserved[(n_fields - 1) * n_cells + id] += + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); } } -__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields ){ - +__global__ void Partial_Update_Advected_Internal_Energy_3D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, + Real dy, Real dz, Real dt, Real gamma, int n_fields) +{ int id, xid, yid, zid, n_cells; int imo, jmo, kmo; int ipo, jpo, kpo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo; - Real P, E, E_kin, GE; + Real P, E, E_kin, GE; // Real vx_L, vx_R, vy_L, vy_R, vz_L, vz_R; - - Real dtodx = dt/dx; - Real dtody = dt/dy; - Real dtodz = dt/dz; - n_cells = nx*ny*nz; + Real dtodx = dt / dx; + Real dtody = dt / dy; + Real dtodz = dt / dz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); - #ifdef MHD - // Add the magnetic energy - auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) - E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); - #endif //MHD - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1 + yid*nx + zid*nx*ny; - jmo = xid + (yid-1)*nx + zid*nx*ny; - kmo = xid + yid*nx + (zid-1)*nx*ny; - - ipo = xid+1 + yid*nx + zid*nx*ny; - jpo = xid + (yid+1)*nx + zid*nx*ny; - kpo = xid + yid*nx + (zid+1)*nx*ny; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; - vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; - vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; + #ifdef MHD + // Add the magnetic energy + auto [centeredBx, centeredBy, centeredBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, + n_cells, nx, ny) E_kin += + mhd::utils::computeMagneticEnergy(magX, magY, magZ); + #endif // MHD + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1 + yid * nx + zid * nx * ny; + jmo = xid + (yid - 1) * nx + zid * nx * ny; + kmo = xid + yid * nx + (zid - 1) * nx * ny; + + ipo = xid + 1 + yid * nx + zid * nx * ny; + jpo = xid + (yid + 1) * nx + zid * nx * ny; + kpo = xid + yid * nx + (zid + 1) * nx * ny; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; + vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo]; + vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo)); - - // OPTION 2: Use the reconstructed velocities to compute the velocity gradient - //Use the reconstructed Velocities instead of neighbor cells centered values + dev_conserved[(n_fields - 1) * n_cells + id] += + 0.5 * P * + (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + + dtodz * (vz_kmo - vz_kpo)); + + // OPTION 2: Use the reconstructed velocities to compute the velocity + // gradient + // Use the reconstructed Velocities instead of neighbor cells centered + // values // vx_R = Q_Lx[1*n_cells + id] / Q_Lx[id]; // vx_L = Q_Rx[1*n_cells + imo] / Q_Rx[imo]; // vy_R = Q_Ly[2*n_cells + id] / Q_Ly[id]; @@ -829,16 +908,16 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, // vz_R = Q_Lz[3*n_cells + id] / Q_Lz[id]; // vz_L = Q_Rz[3*n_cells + kmo] / Q_Rz[kmo]; - //Use the reconstructed Velocities instead of neighbor cells centered values - // dev_conserved[(n_fields-1)*n_cells + id] += P * ( dtodx * ( vx_L - vx_R ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) ); - - + // Use the reconstructed Velocities instead of neighbor cells centered + // values + // dev_conserved[(n_fields-1)*n_cells + id] += P * ( dtodx * ( vx_L - vx_R + // ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) ); } } - -__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, + int n_ghost, int n_fields) +{ int id, xid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo; @@ -847,339 +926,341 @@ __global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_gh Real eta_2 = DE_ETA_2; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - imo = max(xid-1, n_ghost); - ipo = min(xid+1, nx-n_ghost-1); - + imo = max(xid - 1, n_ghost); + ipo = min(xid + 1, nx - n_ghost - 1); // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); - - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbor cells - dev_conserved[(n_fields-1)*n_cells + id] = U; - + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + + if (U_total / Emax > eta_2) + U = U_total; + else + U = U_advected; + + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbor cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } - -__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, + int n_ghost, int n_fields) +{ int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo, jmo, jpo; - n_cells = nx*ny; + n_cells = nx * ny; Real eta_2 = DE_ETA_2; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - - imo = max(xid-1, n_ghost); - imo = imo + yid*nx; - ipo = min(xid+1, nx-n_ghost-1); - ipo = ipo + yid*nx; - jmo = max(yid-1, n_ghost); - jmo = xid + jmo*nx; - jpo = min(yid+1, ny-n_ghost-1); - jpo = xid + jpo*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; + + imo = max(xid - 1, n_ghost); + imo = imo + yid * nx; + ipo = min(xid + 1, nx - n_ghost - 1); + ipo = ipo + yid * nx; + jmo = max(yid - 1, n_ghost); + jmo = xid + jmo * nx; + jpo = min(yid + 1, ny - n_ghost - 1); + jpo = xid + jpo * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); - - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbour cells - dev_conserved[(n_fields-1)*n_cells + id] = U; - + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); + + if (U_total / Emax > eta_2) + U = U_total; + else + U = U_advected; + + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbour cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } - -__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields) +{ int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo, jmo, jpo, kmo, kpo; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; Real eta_2 = DE_ETA_2; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; - - imo = max(xid-1, n_ghost); - imo = imo + yid*nx + zid*nx*ny; - ipo = min(xid+1, nx-n_ghost-1); - ipo = ipo + yid*nx + zid*nx*ny; - jmo = max(yid-1, n_ghost); - jmo = xid + jmo*nx + zid*nx*ny; - jpo = min(yid+1, ny-n_ghost-1); - jpo = xid + jpo*nx + zid*nx*ny; - kmo = max(zid-1, n_ghost); - kmo = xid + yid*nx + kmo*nx*ny; - kpo = min(zid+1, nz-n_ghost-1); - kpo = xid + yid*nx + kpo*nx*ny; - + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + + imo = max(xid - 1, n_ghost); + imo = imo + yid * nx + zid * nx * ny; + ipo = min(xid + 1, nx - n_ghost - 1); + ipo = ipo + yid * nx + zid * nx * ny; + jmo = max(yid - 1, n_ghost); + jmo = xid + jmo * nx + zid * nx * ny; + jpo = min(yid + 1, ny - n_ghost - 1); + jpo = xid + jpo * nx + zid * nx * ny; + kmo = max(zid - 1, n_ghost); + kmo = xid + yid * nx + kmo * nx * ny; + kpo = min(zid + 1, nz - n_ghost - 1); + kpo = xid + yid * nx + kpo * nx * ny; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + kmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + kpo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); - - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbour cells - dev_conserved[(n_fields-1)*n_cells + id] = U; - + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + kmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + kpo]); + + if (U_total / Emax > eta_2) + U = U_total; + else + U = U_advected; + + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbour cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } -__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields) +__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, + Real gamma, int n_fields) { int id, xid, n_cells; Real d, d_inv, vx, vy, vz, U; n_cells = nx; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; - - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; + + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = + 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } - } - -__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields) +__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, + int n_ghost, Real gamma, int n_fields) { int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, U; - n_cells = nx*ny; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; - - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; + + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = + 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } - } - -__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields) +__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, Real gamma, int n_fields) { - //Called in a separate kernel to avoid interfering with energy selection in Select_Internal_Energy + // Called in a separate kernel to avoid interfering with energy selection in + // Select_Internal_Energy int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, U; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; - - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; + + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = + 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } } + #endif // DE -#endif //DE - -#ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ) + #ifdef TEMPERATURE_FLOOR +__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields, + Real U_floor) { int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, Ekin, U; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; - + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - Ekin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + Ekin = 0.5 * d * (vx * vx + vy * vy + vz * vz); - U = ( E - Ekin ) / d; - if ( U < U_floor ) dev_conserved[4*n_cells + id] = Ekin + d*U_floor; + U = (E - Ekin) / d; + if (U < U_floor) dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; #ifdef DE - U = dev_conserved[(n_fields-1)*n_cells + id] / d ; - if ( U < U_floor ) dev_conserved[(n_fields-1)*n_cells + id] = d*U_floor ; + U = dev_conserved[(n_fields - 1) * n_cells + id] / d; + if (U < U_floor) dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; #endif } } -#endif //TEMPERATURE_FLOOR + #endif // TEMPERATURE_FLOOR - -__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved ){ +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, + int nx, int ny, int nz, int ncells, + Real *conserved) +{ Real v_l, v_r, v_d, v_u, v_b, v_t, v_avrg; int id; - id = (i-1) + (j)*nx + (k)*nx*ny; - v_l = conserved[ field_indx*ncells + id ]; - id = (i+1) + (j)*nx + (k)*nx*ny; - v_r = conserved[ field_indx*ncells + id ]; - id = (i) + (j-1)*nx + (k)*nx*ny; - v_d = conserved[ field_indx*ncells + id ]; - id = (i) + (j+1)*nx + (k)*nx*ny; - v_u = conserved[ field_indx*ncells + id ]; - id = (i) + (j)*nx + (k-1)*nx*ny; - v_b = conserved[ field_indx*ncells + id ]; - id = (i) + (j)*nx + (k+1)*nx*ny; - v_t = conserved[ field_indx*ncells + id ]; - v_avrg = ( v_l + v_r + v_d + v_u + v_b + v_t ) / 6; - id = (i) + (j)*nx + (k)*nx*ny; - conserved[ field_indx*ncells + id ] = v_avrg; + id = (i - 1) + (j)*nx + (k)*nx * ny; + v_l = conserved[field_indx * ncells + id]; + id = (i + 1) + (j)*nx + (k)*nx * ny; + v_r = conserved[field_indx * ncells + id]; + id = (i) + (j - 1) * nx + (k)*nx * ny; + v_d = conserved[field_indx * ncells + id]; + id = (i) + (j + 1) * nx + (k)*nx * ny; + v_u = conserved[field_indx * ncells + id]; + id = (i) + (j)*nx + (k - 1) * nx * ny; + v_b = conserved[field_indx * ncells + id]; + id = (i) + (j)*nx + (k + 1) * nx * ny; + v_t = conserved[field_indx * ncells + id]; + v_avrg = (v_l + v_r + v_d + v_u + v_b + v_t) / 6; + id = (i) + (j)*nx + (k)*nx * ny; + conserved[field_indx * ncells + id] = v_avrg; return v_avrg; - } -__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved ){ - +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, + int nz, int ncells, int n_fields, + Real *conserved) +{ // Average Density - Average_Cell_Single_Field( 0, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field(0, i, j, k, nx, ny, nz, ncells, conserved); // Average Momentum_x - Average_Cell_Single_Field( 1, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field(1, i, j, k, nx, ny, nz, ncells, conserved); // Average Momentum_y - Average_Cell_Single_Field( 2, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field(2, i, j, k, nx, ny, nz, ncells, conserved); // Average Momentum_z - Average_Cell_Single_Field( 3, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field(3, i, j, k, nx, ny, nz, ncells, conserved); // Average Energy - Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); + Average_Cell_Single_Field(4, i, j, k, nx, ny, nz, ncells, conserved); #ifdef DE // Average GasEnergy - Average_Cell_Single_Field( n_fields-1, i, j, k, nx, ny, nz, ncells, conserved ); - #endif //DE + Average_Cell_Single_Field(n_fields - 1, i, j, k, nx, ny, nz, ncells, + conserved); + #endif // DE } - -#endif //CUDA +#endif // CUDA diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index c801882d3..99bb98820 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -2,20 +2,29 @@ * \brief Declarations of functions used in all cuda integration algorithms. */ #ifdef CUDA -#ifndef HYDRO_CUDA_H -#define HYDRO_CUDA_H - -#include "../global/global.h" -#include "../utils/mhd_utilities.h" - -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields); - - -__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, Real gamma, int n_fields); - - -__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, Real gamma, int n_fields, Real density_floor, Real *dev_potential ); - + #ifndef HYDRO_CUDA_H + #define HYDRO_CUDA_H + + #include "../global/global.h" + #include "../utils/mhd_utilities.h" + +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, + int n_cells, int x_off, + int n_ghost, Real dx, Real xbound, + Real dt, Real gamma, + int n_fields); + +__global__ void Update_Conserved_Variables_2D( + Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, + int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real dt, Real gamma, int n_fields); + +__global__ void Update_Conserved_Variables_3D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, + int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, + Real gamma, int n_fields, Real density_floor, Real *dev_potential); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -32,16 +41,10 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real hydroInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma); +__device__ __host__ Real hydroInverseCrossingTime( + Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -61,59 +64,75 @@ __device__ __host__ Real hydroInverseCrossingTime(Real const &E, * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real mhdInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &avgBx, - Real const &avgBy, - Real const &avgBz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma); - -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz); - -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ); - -__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields); +__device__ __host__ Real mhdInverseCrossingTime( + Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, + Real const &gamma); +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, + int n_ghost, int n_fields, int nx, int ny, int nz, + Real dx, Real dy, Real dz); -__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields); +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, + int n_fields, Real dx, Real dy, Real dz, Real gamma); +__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, + Real gamma, int n_fields); -__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); +__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, + int n_ghost, Real gamma, int n_fields); -#ifdef AVERAGE_SLOW_CELLS +__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, Real gamma, int n_fields); -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); + #ifdef AVERAGE_SLOW_CELLS -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); -#endif +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, + int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma, Real max_dti_slow); -#ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ); -#endif +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields, + Real dx, Real dy, Real dz, Real gamma, + Real max_dti_slow); + #endif -__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ); + #ifdef TEMPERATURE_FLOOR +__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields, + Real U_floor); + #endif -__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_1D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, + Real dt, Real gamma, int n_fields); -__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_2D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, + int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields); -__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_3D( + Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, + Real dy, Real dz, Real dt, Real gamma, int n_fields); -__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields ); +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, + int n_ghost, int n_fields); -__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields ); +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, + int n_ghost, int n_fields); -__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved ); +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, + int nz, int n_ghost, int n_fields); -__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved ); +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, + int nz, int ncells, int n_fields, + Real *conserved); +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, + int nx, int ny, int nz, int ncells, + Real *conserved); -#endif //HYDRO_CUDA_H -#endif //CUDA + #endif // HYDRO_CUDA_H +#endif // CUDA diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 490e8eadb..524e61469 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -1,26 +1,27 @@ /*! -* \file hydro_cuda_tests.cu -* \author Evan Schneider (evs34@pitt.edu) -* \brief Test the code units within hydro_cuda.cu -* -*/ + * \file hydro_cuda_tests.cu + * \author Evan Schneider (evs34@pitt.edu) + * \brief Test the code units within hydro_cuda.cu + * + */ // STL Includes +#include + #include -#include #include -#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../hydro/hydro_cuda.h" // Include code to test +#include "../utils/DeviceVector.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../utils/DeviceVector.h" -#include "../hydro/hydro_cuda.h" // Include code to test #if defined(CUDA) @@ -33,29 +34,32 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) int num_blocks = 1; dim3 dim1dGrid(num_blocks, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - int const nx = 1; - int const ny = 1; - int const nz = 1; - int const n_fields = 5; // Total number of conserved fields - int const n_ghost = 0; - Real dx = 1.0; - Real dy = 1.0; - Real dz = 1.0; + int const nx = 1; + int const ny = 1; + int const nz = 1; + int const n_fields = 5; // Total number of conserved fields + int const n_ghost = 0; + Real dx = 1.0; + Real dy = 1.0; + Real dz = 1.0; std::vector host_conserved(n_fields); cuda_utilities::DeviceVector dev_conserved(n_fields); cuda_utilities::DeviceVector dev_dti(1); - Real gamma = 5.0/3.0; + Real gamma = 5.0 / 3.0; // Set values of conserved variables for input (host) - host_conserved.at(0) = 1.0; // density - host_conserved.at(1) = 0.0; // x momentum - host_conserved.at(2) = 0.0; // y momentum - host_conserved.at(3) = 0.0; // z momentum - host_conserved.at(4) = 1.0; // Energy + host_conserved.at(0) = 1.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); - //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) + CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, + n_fields * sizeof(Real), cudaMemcpyHostToDevice)); + //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, + // int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real + // dz) // Run the kernel hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, @@ -66,16 +70,20 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // Compare results // Check for equality and if not equal return difference double const fiducialDt = 1.0540925533894598; - double const testData = dev_dti.at(0); + double const testData = dev_dti.at(0); double absoluteDiff; int64_t ulpsDiff; bool areEqual; - areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); - EXPECT_TRUE(areEqual) - << "The fiducial value is: " << fiducialDt << std::endl - << "The test value is: " << testData << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, + absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) << "The fiducial value is: " << fiducialDt + << std::endl + << "The test value is: " << testData + << std::endl + << "The absolute difference is: " << absoluteDiff + << std::endl + << "The ULP difference is: " << ulpsDiff + << std::endl; } // ============================================================================= // End of tests for the Calc_dt_GPU function @@ -84,37 +92,31 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // ============================================================================= // Tests for the hydroInverseCrossingTime function // ============================================================================= -TEST(tHYDROHydroInverseCrossingTime, - CorrectInputExpectCorrectOutput) +TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput) { -// Set test values -double const energy = 7.6976906577e2; -double const density = 1.6756968986; -double const velocityX = 7.0829278656; -double const velocityY = 5.9283073464; -double const velocityZ = 8.8417748226; -double const cellSizeX = 8.1019429453e2; -double const cellSizeY = 7.1254780684e2; -double const cellSizeZ = 7.5676716066e2; -double const gamma = 5./3.; - -// Fiducial Values -double const fiducialInverseCrossingTime = 0.038751126881804446; - -// Function to test -double testInverseCrossingTime = hydroInverseCrossingTime(energy, - density, - 1./density, - velocityX, - velocityY, - velocityZ, - cellSizeX, - cellSizeY, - cellSizeZ, - gamma); - -// Check results -testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + // Set test values + double const energy = 7.6976906577e2; + double const density = 1.6756968986; + double const velocityX = 7.0829278656; + double const velocityY = 5.9283073464; + double const velocityZ = 8.8417748226; + double const cellSizeX = 8.1019429453e2; + double const cellSizeY = 7.1254780684e2; + double const cellSizeZ = 7.5676716066e2; + double const gamma = 5. / 3.; + + // Fiducial Values + double const fiducialInverseCrossingTime = 0.038751126881804446; + + // Function to test + double testInverseCrossingTime = hydroInverseCrossingTime( + energy, density, 1. / density, velocityX, velocityY, velocityZ, cellSizeX, + cellSizeY, cellSizeZ, gamma); + + // Check results + testingUtilities::checkResults(fiducialInverseCrossingTime, + testInverseCrossingTime, + "inverse crossing time"); } // ============================================================================= // End of tests for the hydroInverseCrossingTime function @@ -123,8 +125,7 @@ testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingT // ============================================================================= // Tests for the mhdInverseCrossingTime function // ============================================================================= -TEST(tMHDMhdInverseCrossingTime, - CorrectInputExpectCorrectOutput) +TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) { // Set test values double const energy = 7.6976906577e2; @@ -138,29 +139,20 @@ TEST(tMHDMhdInverseCrossingTime, double const cellSizeX = 8.1019429453e2; double const cellSizeY = 7.1254780684e2; double const cellSizeZ = 7.5676716066e2; - double const gamma = 5./3.; + double const gamma = 5. / 3.; // Fiducial Values double const fiducialInverseCrossingTime = 0.038688028391959103; // Function to test - double testInverseCrossingTime = mhdInverseCrossingTime(energy, - density, - 1./density, - velocityX, - velocityY, - velocityZ, - magneticX, - magneticY, - magneticZ, - cellSizeX, - cellSizeY, - cellSizeZ, - gamma); - + double testInverseCrossingTime = mhdInverseCrossingTime( + energy, density, 1. / density, velocityX, velocityY, velocityZ, magneticX, + magneticY, magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testingUtilities::checkResults(fiducialInverseCrossingTime, + testInverseCrossingTime, + "inverse crossing time"); } // ============================================================================= // End of tests for the mhdInverseCrossingTime function diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 0eaecc899..bc40da90d 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -2,203 +2,240 @@ * \brief Definitions of the cuda VL algorithm functions. */ #ifdef CUDA -#ifdef VL - -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/VL_1D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../utils/error_handling.h" -#include "../io/io.h" - - -__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, - int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); - - - -void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields) + #ifdef VL + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_1D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/error_handling.h" + #include "../utils/gpu.hpp" + +__global__ void Update_Conserved_Variables_1D_half( + Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields); + +void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, int n_fields) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int ny = 1; + int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); dim3 dimBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, + n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Use PCM reconstruction to put conserved variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); + // Step 1: Use PCM reconstruction to put conserved variables into interface + // arrays + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); CudaCheckError(); - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif CudaCheckError(); - // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, n_cells, n_ghost, dx, 0.5*dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, + 0, dev_conserved, dev_conserved_half, F_x, n_cells, + n_ghost, dx, 0.5 * dt, gama, n_fields); CudaCheckError(); - - // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - #endif - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); + #endif + #ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + #endif + #ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + #endif + #ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + #endif + #ifdef PPMC + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + #endif CudaCheckError(); - - // Step 5: Calculate the fluxes again - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif + // Step 5: Calculate the fluxes again + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + #endif CudaCheckError(); - #ifdef DE - // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields ); - #endif - + #ifdef DE + // Compute the divergence of velocity before updating the conserved array, + // this solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, + dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, + dt, gama, n_fields); + #endif // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, + dt, gama, n_fields); CudaCheckError(); - - #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields); + #ifdef DE + hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, nx, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, + nx, n_ghost, gama, n_fields); CudaCheckError(); - #endif + #endif return; - - } -void Free_Memory_VL_1D() { - +void Free_Memory_VL_1D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); cudaFree(Q_Lx); cudaFree(Q_Rx); cudaFree(F_x); - } -__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_1D_half( + Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields) { int id, imo; - Real dtodx = dt/dx; + Real dtodx = dt / dx; // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; - #ifdef DE + #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, P; int ipo; - #endif + #endif - // threads corresponding all cells except outer ring of ghost cells do the calculation - if (id > 0 && id < n_cells-1) - { - imo = id-1; + // threads corresponding all cells except outer ring of ghost cells do the + // calculation + if (id > 0 && id < n_cells - 1) { + imo = id - 1; #ifdef DE - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = id+1; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - + 0.5 * d * (vx * vx + vy * vy + vz * vz)) * + (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = id + 1; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; #endif // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] + dtodx * (dev_F[ imo] - dev_F[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] + dtodx * (dev_F[ n_cells + imo] - dev_F[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] + dtodx * (dev_F[2*n_cells + imo] - dev_F[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] + dtodx * (dev_F[3*n_cells + imo] - dev_F[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] + dtodx * (dev_F[4*n_cells + imo] - dev_F[4*n_cells + id]); + dev_conserved_half[id] = + dev_conserved[id] + dtodx * (dev_F[imo] - dev_F[id]); + dev_conserved_half[n_cells + id] = + dev_conserved[n_cells + id] + + dtodx * (dev_F[n_cells + imo] - dev_F[n_cells + id]); + dev_conserved_half[2 * n_cells + id] = + dev_conserved[2 * n_cells + id] + + dtodx * (dev_F[2 * n_cells + imo] - dev_F[2 * n_cells + id]); + dev_conserved_half[3 * n_cells + id] = + dev_conserved[3 * n_cells + id] + + dtodx * (dev_F[3 * n_cells + imo] - dev_F[3 * n_cells + id]); + dev_conserved_half[4 * n_cells + id] = + dev_conserved[4 * n_cells + id] + + dtodx * (dev_F[4 * n_cells + imo] - dev_F[4 * n_cells + id]); #ifdef SCALAR - for (int i=0; i -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/VL_2D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" - - -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, - Real *dev_F_x, Real *dev_F_y, int nx, int ny, - int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields); - - -void VL_Algorithm_2D_CUDA ( Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, - Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields) + #ifdef VL + + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_2D_cuda.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, + Real *dev_conserved_half, + Real *dev_F_x, Real *dev_F_y, + int nx, int ny, int n_ghost, + Real dx, Real dy, Real dt, + Real gamma, int n_fields); + +void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, + int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real dt, int n_fields) { + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - - int n_cells = nx*ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny; + int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid dim3 dim2dGrid(ngrid, 1, 1); - //number of threads per 1D block + // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate GPU arrays - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); + // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, + n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Use PCM reconstruction to put conserved variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); + // Step 1: Use PCM reconstruction to put conserved variables into interface + // arrays + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, + gama, n_fields); CudaCheckError(); - - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif CudaCheckError(); - // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5*dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, + 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, + n_ghost, dx, dy, 0.5 * dt, gama, n_fields); CudaCheckError(); - - // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif //PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif //PPMC + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + #endif + #ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + #endif + #ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + #endif // PPMP + #ifdef PPMC + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + #endif // PPMC CudaCheckError(); - - // Step 5: Calculate the fluxes again - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif + // Step 5: Calculate the fluxes again + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + #endif CudaCheckError(); - #ifdef DE - // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields ); - #endif - + #ifdef DE + // Compute the divergence of velocity before updating the conserved array, + // this solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, + dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + nx, ny, n_ghost, dx, dy, dt, gama, n_fields); + #endif // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, + dy, xbound, ybound, dt, gama, n_fields); CudaCheckError(); - - #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); + #ifdef DE + hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, n_ghost, gama, n_fields); CudaCheckError(); - #endif + #endif return; - } -void Free_Memory_VL_2D() { - +void Free_Memory_VL_2D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); @@ -156,89 +195,99 @@ void Free_Memory_VL_2D() { cudaFree(Q_Ry); cudaFree(F_x); cudaFree(F_y); - } - -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, + Real *dev_conserved_half, + Real *dev_F_x, Real *dev_F_y, + int nx, int ny, int n_ghost, + Real dx, Real dy, Real dt, + Real gamma, int n_fields) { int id, xid, yid, n_cells; int imo, jmo; - Real dtodx = dt/dx; - Real dtody = dt/dy; + Real dtodx = dt / dx; + Real dtody = dt / dy; - n_cells = nx*ny; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; - #ifdef DE + #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, P; int ipo, jpo; - #endif - + #endif // all threads but one outer ring of ghost cells - if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1) - { - imo = xid-1 + yid*nx; - jmo = xid + (yid-1)*nx; + if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1) { + imo = xid - 1 + yid * nx; + jmo = xid + (yid - 1) * nx; #ifdef DE - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = xid+1 + yid*nx; - jpo = xid + (yid+1)*nx; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - + 0.5 * d * (vx * vx + vy * vy + vz * vz)) * + (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = xid + 1 + yid * nx; + jpo = xid + (yid + 1) * nx; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; #endif // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] - + dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] - + dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] - + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] - + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] - + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]); + dev_conserved_half[id] = dev_conserved[id] + + dtodx * (dev_F_x[imo] - dev_F_x[id]) + + dtody * (dev_F_y[jmo] - dev_F_y[id]); + dev_conserved_half[n_cells + id] = + dev_conserved[n_cells + id] + + dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + + dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]); + dev_conserved_half[2 * n_cells + id] = + dev_conserved[2 * n_cells + id] + + dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + + dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]); + dev_conserved_half[3 * n_cells + id] = + dev_conserved[3 * n_cells + id] + + dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + + dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); + dev_conserved_half[4 * n_cells + id] = + dev_conserved[4 * n_cells + id] + + dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); #ifdef SCALAR - for (int i=0; i -#include -#include -#include "../utils/gpu.hpp" -#include "../utils/hydro_utilities.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../integrators/VL_3D_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../riemann_solvers/hll_cuda.h" -#include "../riemann_solvers/hlld_cuda.h" -#include "../mhd/ct_electric_fields.h" -#include "../mhd/magnetic_update.h" -#include "../io/io.h" - -__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); - - - -void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ) +#if defined(CUDA) && defined(VL) + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_3D_cuda.h" + #include "../io/io.h" + #include "../mhd/ct_electric_fields.h" + #include "../mhd/magnetic_update.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/hlld_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + +__global__ void Update_Conserved_Variables_3D_half( + Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, + Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, + Real dz, Real dt, Real gamma, int n_fields, Real density_floor); + +void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, + int ny, int nz, int x_off, int y_off, int z_off, + int n_ghost, Real dx, Real dy, Real dz, Real xbound, + Real ybound, Real zbound, Real dt, int n_fields, + Real density_floor, Real U_floor, + Real *host_grav_potential) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -51,57 +55,61 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - //host_grav_potential is NULL if not using GRAVITY + // host_grav_potential is NULL if not using GRAVITY temp_potential = host_grav_potential; - if ( !memory_allocated ){ - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - // Set the size of the interface and flux arrays - #ifdef MHD - // In MHD/Constrained Transport the interface arrays have one fewer fields - // since the magnetic field that is stored on the face does not require - // reconstructions. Similarly the fluxes have one fewer fields since the - // magnetic field on that face doesn't have an associated flux. Each - // interface array store the magnetic fields on that interface that are - // not perpendicular to the interface and arranged cyclically. I.e. the - // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in - // that order, the `Q_Ly` interface stores the Z and X mangetic fields in - // that order, and the `Q_Lz` interface stores the X and Y magnetic fields - // in that order. These fields can be indexed with the Q_?_dir grid_enums. The interface state arrays - // store in the interface on the "right" side of the cell, so the flux - // arrays store the fluxes through the right interface - // - // According to Stone et al. 2008 section 5.3 and the source code of - // Athena, the following equation relate the magnetic flux to the face - // centered electric fields/EMF. -cross(V,B)x is the negative of the - // x-component of V cross B. Note that "X" is the direction the solver is - // running in this case, not necessarily the true "X". - // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z - // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y - // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X - // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z - // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y - // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X - size_t const arraySize = (n_fields-1) * n_cells * sizeof(Real); - size_t const ctArraySize = 3 * n_cells * sizeof(Real); - #else // not MHD - size_t const arraySize = n_fields*n_cells*sizeof(Real); - #endif //MHD - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&F_x, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&F_y, arraySize) ); - CudaSafeCall( cudaMalloc((void**)&F_z, arraySize) ); - - cuda_utilities::initGpuMemory(dev_conserved_half, n_fields*n_cells*sizeof(Real)); + // Set the size of the interface and flux arrays + #ifdef MHD + // In MHD/Constrained Transport the interface arrays have one fewer fields + // since the magnetic field that is stored on the face does not require + // reconstructions. Similarly the fluxes have one fewer fields since the + // magnetic field on that face doesn't have an associated flux. Each + // interface array store the magnetic fields on that interface that are + // not perpendicular to the interface and arranged cyclically. I.e. the + // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in + // that order, the `Q_Ly` interface stores the Z and X mangetic fields in + // that order, and the `Q_Lz` interface stores the X and Y magnetic fields + // in that order. These fields can be indexed with the Q_?_dir grid_enums. + // The interface state arrays store in the interface on the "right" side of + // the cell, so the flux arrays store the fluxes through the right interface + // + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the face + // centered electric fields/EMF. -cross(V,B)x is the negative of the + // x-component of V cross B. Note that "X" is the direction the solver is + // running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_X + size_t const arraySize = (n_fields - 1) * n_cells * sizeof(Real); + size_t const ctArraySize = 3 * n_cells * sizeof(Real); + #else // not MHD + size_t const arraySize = n_fields * n_cells * sizeof(Real); + #endif // MHD + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, + n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, arraySize)); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, arraySize)); + CudaSafeCall(cudaMalloc((void **)&Q_Ly, arraySize)); + CudaSafeCall(cudaMalloc((void **)&Q_Ry, arraySize)); + CudaSafeCall(cudaMalloc((void **)&Q_Lz, arraySize)); + CudaSafeCall(cudaMalloc((void **)&Q_Rz, arraySize)); + CudaSafeCall(cudaMalloc((void **)&F_x, arraySize)); + CudaSafeCall(cudaMalloc((void **)&F_y, arraySize)); + CudaSafeCall(cudaMalloc((void **)&F_z, arraySize)); + + cuda_utilities::initGpuMemory(dev_conserved_half, + n_fields * n_cells * sizeof(Real)); cuda_utilities::initGpuMemory(Q_Lx, arraySize); cuda_utilities::initGpuMemory(Q_Rx, arraySize); cuda_utilities::initGpuMemory(Q_Ly, arraySize); @@ -112,169 +120,263 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int cuda_utilities::initGpuMemory(F_y, arraySize); cuda_utilities::initGpuMemory(F_z, arraySize); - #ifdef MHD - CudaSafeCall( cudaMalloc((void**)&ctElectricFields, ctArraySize) ); - cuda_utilities::initGpuMemory(ctElectricFields, ctArraySize); - #endif //MHD + #ifdef MHD + CudaSafeCall(cudaMalloc((void **)&ctElectricFields, ctArraySize)); + cuda_utilities::initGpuMemory(ctElectricFields, ctArraySize); + #endif // MHD - #if defined( GRAVITY ) + #if defined(GRAVITY) dev_grav_potential = d_grav_potential; - #else // not GRAVITY - dev_grav_potential = NULL; - #endif //GRAVITY - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + #else // not GRAVITY + dev_grav_potential = NULL; + #endif // GRAVITY + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) - CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif //GRAVITY and GRAVITY_GPU - - - // Step 1: Use PCM reconstruction to put primitive variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); - - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLL - #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLD - CudaCheckError(); - - #ifdef MHD - // Step 2.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); - CudaCheckError(); - #endif //MHD - - // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5*dt, gama, n_fields, density_floor ); - CudaCheckError(); - #ifdef MHD - // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells, 0.5*dt, dx, dy, dz); - CudaCheckError(); - #endif //MHD - - // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif //PCM - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PLMP - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PLMC - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMC - CudaCheckError(); - - - // Step 5: Calculate the fluxes again - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLD - CudaCheckError(); + #if defined(GRAVITY) && !defined(GRAVITY_GPU) + CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, + n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + #endif // GRAVITY and GRAVITY_GPU + + // Step 1: Use PCM reconstruction to put primitive variables into interface + // arrays + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, + nz, n_ghost, gama, n_fields); + CudaCheckError(); + + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // EXACT + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // ROE + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLL + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLL + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lx, Q_Rx, + &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, nx, + ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Ly, Q_Ry, + &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, nx, + ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lz, Q_Rz, + &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, nx, + ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLD + CudaCheckError(); + + #ifdef MHD + // Step 2.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, + 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, + ny, nz, n_cells); + CudaCheckError(); + #endif // MHD + + // Step 3: Update the conserved variables half a timestep + hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, + 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, + ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, + density_floor); + CudaCheckError(); + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, dev_conserved_half, ctElectricFields, nx, + ny, nz, n_cells, 0.5 * dt, dx, dy, dz); + CudaCheckError(); + #endif // MHD + + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, + ny, nz, n_ghost, gama, n_fields); + #endif // PCM + #ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PLMP + #ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PLMC + #ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PPMP + #ifdef PPMC + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PPMC + CudaCheckError(); + + // Step 5: Calculate the fluxes again + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // EXACT + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // ROE + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLL + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lx, Q_Rx, + &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), + F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Ly, Q_Ry, + &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), + F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lz, Q_Rz, + &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), + F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLD + CudaCheckError(); - #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); - CudaCheckError(); - #endif //DE - - #ifdef MHD - // Step 5.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); - CudaCheckError(); - #endif //MHD - - // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); - CudaCheckError(); - - #ifdef MHD - // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); - CudaCheckError(); - #endif //MHD - - #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); - #endif //DE + #ifdef DE + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables_3D + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, + dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, + n_fields); + CudaCheckError(); + #endif // DE + + #ifdef MHD + // Step 5.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, + 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, + nx, ny, nz, n_cells); + CudaCheckError(); + #endif // MHD + + // Step 6: Update the conserved variable array + hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, + F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, + dz, xbound, ybound, zbound, dt, gama, n_fields, + density_floor, dev_grav_potential); + CudaCheckError(); + + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, + n_cells, dt, dx, dy, dz); + CudaCheckError(); + #endif // MHD - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); - CudaCheckError(); - #endif //TEMPERATURE_FLOOR + #ifdef DE + hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); + CudaCheckError(); + #endif // DE + + #ifdef TEMPERATURE_FLOOR + hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); + CudaCheckError(); + #endif // TEMPERATURE_FLOOR return; - } - -void Free_Memory_VL_3D(){ - +void Free_Memory_VL_3D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); @@ -288,125 +390,146 @@ void Free_Memory_VL_3D(){ cudaFree(F_y); cudaFree(F_z); cudaFree(ctElectricFields); - } -__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor ) +__global__ void Update_Conserved_Variables_3D_half( + Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, + Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, + Real dz, Real dt, Real gamma, int n_fields, Real density_floor) { - Real dtodx = dt/dx; - Real dtody = dt/dy; - Real dtodz = dt/dz; - int n_cells = nx*ny*nz; + Real dtodx = dt / dx; + Real dtody = dt / dy; + Real dtodz = dt / dz; + int n_cells = nx * ny * nz; // get a global thread ID int tid = threadIdx.x + blockIdx.x * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - int id = xid + yid*nx + zid*nx*ny; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; + int id = xid + yid * nx + zid * nx * ny; - int imo = xid-1 + yid*nx + zid*nx*ny; - int jmo = xid + (yid-1)*nx + zid*nx*ny; - int kmo = xid + yid*nx + (zid-1)*nx*ny; + int imo = xid - 1 + yid * nx + zid * nx * ny; + int jmo = xid + (yid - 1) * nx + zid * nx * ny; + int kmo = xid + yid * nx + (zid - 1) * nx * ny; #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo, P, E, E_kin, GE; int ipo, jpo, kpo; - #endif //DE + #endif // DE #ifdef DENSITY_FLOOR Real dens_0; - #endif //DENSITY_FLOOR + #endif // DENSITY_FLOOR - // threads corresponding to all cells except outer ring of ghost cells do the calculation - if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1 && zid > 0 && zid < nz-1) - { - #ifdef DE - d = dev_conserved[ id]; + // threads corresponding to all cells except outer ring of ghost cells do the + // calculation + if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && + zid < nz - 1) { + #ifdef DE + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); - #ifdef MHD - // Add the magnetic energy - auto const [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) - E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); - #endif //MHD - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = xid+1 + yid*nx + zid*nx*ny; - jpo = xid + (yid+1)*nx + zid*nx*ny; - kpo = xid + yid*nx + (zid+1)*nx*ny; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; - vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; - vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; - #endif //DE + #ifdef MHD + // Add the magnetic energy + auto const [centeredBx, centeredBy, centeredBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, + n_cells, nx, ny) E_kin += + mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + #endif // MHD + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * + // (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = xid + 1 + yid * nx + zid * nx * ny; + jpo = xid + (yid + 1) * nx + zid * nx * ny; + kpo = xid + yid * nx + (zid + 1) * nx * ny; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; + vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo]; + vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo]; + #endif // DE // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] - + dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]) - + dtodz * (dev_F_z[ kmo] - dev_F_z[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] - + dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]) - + dtodz * (dev_F_z[ n_cells + kmo] - dev_F_z[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] - + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]) - + dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] - + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]) - + dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] - + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]) - + dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i %f \n", dens_0, density_floor ); - dev_conserved_half[ id] = density_floor; + #endif // SCALAR + #ifdef DE + dev_conserved_half[(n_fields - 1) * n_cells + id] = + dev_conserved[(n_fields - 1) * n_cells + id] + + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - + dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - + dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - + dev_F_z[(n_fields - 1) * n_cells + id]) + + 0.5 * P * + (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + + dtodz * (vz_kmo - vz_kpo)); + #endif // DE + + #ifdef DENSITY_FLOOR + if (dev_conserved_half[id] < density_floor) { + dens_0 = dev_conserved_half[id]; + printf("###Thread density change %f -> %f \n", dens_0, density_floor); + dev_conserved_half[id] = density_floor; // Scale the conserved values to the new density - dev_conserved_half[1*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[2*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[3*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[4*n_cells + id] *= (density_floor / dens_0); - #ifdef DE - dev_conserved_half[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif //DE + dev_conserved_half[1 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[2 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0); + #ifdef DE + dev_conserved_half[(n_fields - 1) * n_cells + id] *= + (density_floor / dens_0); + #endif // DE } - #endif //DENSITY_FLOOR + #endif // DENSITY_FLOOR } - } -#endif //CUDA and VL +#endif // CUDA and VL diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 0d28710ab..6446bb2cb 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -3,18 +3,19 @@ #ifdef CUDA -#ifndef VL_3D_CUDA_H -#define VL_3D_CUDA_H + #ifndef VL_3D_CUDA_H + #define VL_3D_CUDA_H -#include "../global/global.h" + #include "../global/global.h" -void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ); +void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, + int ny, int nz, int x_off, int y_off, int z_off, + int n_ghost, Real dx, Real dy, Real dz, Real xbound, + Real ybound, Real zbound, Real dt, int n_fields, + Real density_floor, Real U_floor, + Real *host_grav_potential); void Free_Memory_VL_3D(); -#endif //VL_3D_CUDA_H -#endif //CUDA + #endif // VL_3D_CUDA_H +#endif // CUDA diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index c1f209f01..1560be9e0 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -3,122 +3,133 @@ #ifdef CUDA -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/simple_1D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../utils/error_handling.h" -#include "../io/io.h" - - - -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields) + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/simple_1D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/error_handling.h" + #include "../utils/gpu.hpp" + +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, int n_fields) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int ny = 1; + int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); dim3 dimBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, (n_fields)*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_x, (n_fields)*n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } // Step 1: Do the reconstruction #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); CudaCheckError(); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, + Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); CudaCheckError(); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, + Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); CudaCheckError(); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, + Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); CudaCheckError(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, + Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); CudaCheckError(); #endif - // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, + Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif CudaCheckError(); #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields ); + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, + dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, + dt, gama, n_fields); #endif - // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, + dt, gama, n_fields); CudaCheckError(); - // Synchronize the total and internal energy, if using dual-energy formalism #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, + dev_conserved, nx, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, + n_cells, n_ghost, gama, n_fields); CudaCheckError(); #endif - return; - } -void Free_Memory_Simple_1D() { - +void Free_Memory_Simple_1D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); cudaFree(Q_Rx); cudaFree(F_x); - } - -#endif //CUDA +#endif // CUDA diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h index 6aba36059..03a7a8f7c 100644 --- a/src/integrators/simple_1D_cuda.h +++ b/src/integrators/simple_1D_cuda.h @@ -3,14 +3,15 @@ #ifdef CUDA -#ifndef SIMPLE_1D_CUDA_H -#define SIMPLE_1D_CUDA_H + #ifndef SIMPLE_1D_CUDA_H + #define SIMPLE_1D_CUDA_H -#include "../global/global.h" + #include "../global/global.h" -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields); +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, int n_fields); void Free_Memory_Simple_1D(); -#endif //Simple_1D_CUDA_H -#endif //CUDA + #endif // Simple_1D_CUDA_H +#endif // CUDA diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 87cd87e58..eb78e86d7 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -3,118 +3,149 @@ #ifdef CUDA -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/simple_2D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" - - - -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields) + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/simple_2D_cuda.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, + int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields) { - - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - int n_cells = nx*ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; - + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array + int n_cells = nx * ny; + int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid dim3 dim2dGrid(ngrid, 1, 1); - //number of threads per 1D block + // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } // Step 1: Do the reconstruction #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, + gama, n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); #endif CudaCheckError(); - // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif CudaCheckError(); #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields ); + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, + dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + nx, ny, n_ghost, dx, dy, dt, gama, n_fields); #endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, + dy, xbound, ybound, dt, gama, n_fields); CudaCheckError(); // Synchronize the total and internal energy #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, n_ghost, gama, n_fields); CudaCheckError(); #endif return; - } -void Free_Memory_Simple_2D() { - +void Free_Memory_Simple_2D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); @@ -123,8 +154,6 @@ void Free_Memory_Simple_2D() { cudaFree(Q_Ry); cudaFree(F_x); cudaFree(F_y); - } -#endif //CUDA - +#endif // CUDA diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h index 7a531f952..58fc7077a 100644 --- a/src/integrators/simple_2D_cuda.h +++ b/src/integrators/simple_2D_cuda.h @@ -3,14 +3,16 @@ #ifdef CUDA -#ifndef SIMPLE_2D_CUDA_H -#define SIMPLE_2D_CUDA_H + #ifndef SIMPLE_2D_CUDA_H + #define SIMPLE_2D_CUDA_H -#include "../global/global.h" + #include "../global/global.h" -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields); +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, + int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields); void Free_Memory_Simple_2D(); -#endif //SIMPLE_2D_CUDA_H -#endif //CUDA + #endif // SIMPLE_2D_CUDA_H +#endif // CUDA diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 1b854dea9..24a40562f 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -2,40 +2,40 @@ * \brief Definitions of the cuda 3D simple algorithm functions. */ #ifdef CUDA -#ifdef SIMPLE - -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/simple_3D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../io/io.h" -#include "../riemann_solvers/hll_cuda.h" - - - -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ) + #ifdef SIMPLE + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/simple_3D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, + int ny, int nz, int x_off, int y_off, int z_off, + int n_ghost, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real dt, + int n_fields, Real density_floor, Real U_floor, + Real *host_grav_potential) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -43,125 +43,179 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - //host_grav_potential is NULL if not using GRAVITY + // host_grav_potential is NULL if not using GRAVITY temp_potential = host_grav_potential; - if ( !memory_allocated ){ + if (!memory_allocated) { size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + // allocate memory on the GPU - chprintf( " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d nz: %d \n", n_fields, n_cells, nx, ny, nz ); - chprintf( " Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields*n_cells*sizeof(Real)/1e9, global_free/1e9, global_total/1e9 ); + chprintf( + " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d " + "nz: %d \n", + n_fields, n_cells, nx, ny, nz); + chprintf(" Memory needed: %f GB Free: %f GB Total: %f GB \n", + n_fields * n_cells * sizeof(Real) / 1e9, global_free / 1e9, + global_total / 1e9); dev_conserved = d_conserved; - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_z, n_fields*n_cells*sizeof(Real)) ); - - #if defined( GRAVITY ) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Lz, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&Q_Rz, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real))); + + #if defined(GRAVITY) + // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, + // n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; #else dev_grav_potential = NULL; #endif - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; - chprintf( " Memory allocated \n" ); - + chprintf(" Memory allocated \n"); } - #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) - CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif - - - // Step 1: Construct left and right interface values using updated conserved variables - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PLMP - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); + #if defined(GRAVITY) && !defined(GRAVITY_GPU) + CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, + n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + #endif + + // Step 1: Construct left and right interface values using updated conserved + // variables + #ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, + nz, n_ghost, gama, n_fields); + #endif + #ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PLMP + #ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif + #ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); + #endif // PPMP + #ifdef PPMC + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, + n_fields); CudaCheckError(); - #endif //PPMC - - - // Step 2: Calculate the fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLL + #endif // PPMC + + // Step 2: Calculate the fluxes + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // EXACT + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // ROE + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLL + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLL CudaCheckError(); - - #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); + + #ifdef DE + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables_3D + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, + dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, + n_fields); CudaCheckError(); - #endif - + #endif + // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); + hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, + F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, + dz, xbound, ybound, zbound, dt, gama, n_fields, + density_floor, dev_grav_potential); CudaCheckError(); - - #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); - #endif - - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); + + #ifdef DE + hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); - #endif //TEMPERATURE_FLOOR + #endif + #ifdef TEMPERATURE_FLOOR + hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); + CudaCheckError(); + #endif // TEMPERATURE_FLOOR return; - } - -void Free_Memory_Simple_3D(){ - +void Free_Memory_Simple_3D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); @@ -173,11 +227,7 @@ void Free_Memory_Simple_3D(){ cudaFree(F_x); cudaFree(F_y); cudaFree(F_z); - } - - - -#endif //SIMPLE -#endif //CUDA + #endif // SIMPLE +#endif // CUDA diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 9c904d2e7..c1a5f5126 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -3,19 +3,20 @@ #ifdef CUDA -#ifndef SIMPLE_3D_CUDA_H -#define SIMPLE_3D_CUDA_H + #ifndef SIMPLE_3D_CUDA_H + #define SIMPLE_3D_CUDA_H -#include"../global/global.h" -#include"../chemistry_gpu/chemistry_gpu.h" + #include "../chemistry_gpu/chemistry_gpu.h" + #include "../global/global.h" -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, - Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ); +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, + int ny, int nz, int x_off, int y_off, int z_off, + int n_ghost, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real dt, + int n_fields, Real density_floor, Real U_floor, + Real *host_grav_potential); void Free_Memory_Simple_3D(); -#endif //SIMPLE_3D_CUDA_H -#endif //CUDA + #endif // SIMPLE_3D_CUDA_H +#endif // CUDA diff --git a/src/io/io.cpp b/src/io/io.cpp index e0c6fc089..6f7440b98 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1,175 +1,177 @@ +#include +#include #include #include -#include #include -#include -#include -#include + #include #include +#include +#include #ifdef HDF5 -#include -#endif //HDF5 -#include "../io/io.h" + #include +#endif // HDF5 #include "../grid/grid3D.h" +#include "../io/io.h" #ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif //MPI_CHOLLA -#include "../utils/error_handling.h" + #include "../mpi/mpi_routines.h" +#endif // MPI_CHOLLA #include "../utils/DeviceVector.h" +#include "../utils/error_handling.h" #ifdef COSMOLOGY -#include "../cosmology/cosmology.h" -#endif //COSMOLOGY + #include "../cosmology/cosmology.h" +#endif // COSMOLOGY using namespace std; -//#define OUTPUT_ENERGY -//#define OUTPUT_MOMENTUM - -/* function used to rotate points about an axis in 3D for the rotated projection output routine */ -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); +// #define OUTPUT_ENERGY +// #define OUTPUT_MOMENTUM -void Create_Log_File( struct parameters P ){ +/* function used to rotate points about an axis in 3D for the rotated projection + * output routine */ +void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, + Real *xp, Real *yp, Real *zp); - #ifdef MPI_CHOLLA - if ( procID != 0 ) return; - #endif +void Create_Log_File(struct parameters P) +{ +#ifdef MPI_CHOLLA + if (procID != 0) return; +#endif - string file_name ( LOG_FILE_NAME ); - chprintf( "\nCreating Log File: %s \n\n", file_name.c_str() ); + string file_name(LOG_FILE_NAME); + chprintf("\nCreating Log File: %s \n\n", file_name.c_str()); bool file_exists = false; - if (FILE *file = fopen(file_name.c_str(), "r")){ + if (FILE *file = fopen(file_name.c_str(), "r")) { file_exists = true; - chprintf( " File exists, appending values: %s \n\n", file_name.c_str() ); - fclose( file ); + chprintf(" File exists, appending values: %s \n\n", file_name.c_str()); + fclose(file); } // current date/time based on current system time_t now = time(0); // convert now to string form - char* dt = ctime(&now); + char *dt = ctime(&now); ofstream out_file; out_file.open(file_name.c_str(), ios::app); out_file << "\n"; out_file << "Run date: " << dt; out_file.close(); - } -void Write_Message_To_Log_File( const char* message ){ - - #ifdef MPI_CHOLLA - if ( procID != 0 ) return; - #endif - +void Write_Message_To_Log_File(const char *message) +{ +#ifdef MPI_CHOLLA + if (procID != 0) return; +#endif - string file_name ( LOG_FILE_NAME ); - ofstream out_file; - out_file.open(file_name.c_str(), ios::app); - out_file << message << endl; - out_file.close(); + string file_name(LOG_FILE_NAME); + ofstream out_file; + out_file.open(file_name.c_str(), ios::app); + out_file << message << endl; + out_file.close(); } /* Write Cholla Output Data */ void WriteData(Grid3D &G, struct parameters P, int nfile) { + cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), + cudaMemcpyDeviceToHost); - cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); - - chprintf( "\nSaving Snapshot: %d \n", nfile ); + chprintf("\nSaving Snapshot: %d \n", nfile); - #ifdef HDF5 +#ifdef HDF5 // Initialize HDF5 interface H5open(); - #endif +#endif - #ifdef HDF5 +#ifdef HDF5 // Initialize HDF5 interface H5open(); - #endif +#endif - #ifdef N_OUTPUT_COMPLETE - //If nfile is multiple of N_OUTPUT_COMPLETE then output all data - if ( nfile%N_OUTPUT_COMPLETE == 0 ){ +#ifdef N_OUTPUT_COMPLETE + // If nfile is multiple of N_OUTPUT_COMPLETE then output all data + if (nfile % N_OUTPUT_COMPLETE == 0) { G.H.Output_Complete_Data = true; - chprintf( " Writing all data ( Restart File ).\n"); - } - else{ + chprintf(" Writing all data ( Restart File ).\n"); + } else { G.H.Output_Complete_Data = false; } - #else - //If NOT N_OUTPUT_COMPLETE: always output complete data +#else + // If NOT N_OUTPUT_COMPLETE: always output complete data G.H.Output_Complete_Data = true; - #endif +#endif - #ifdef COSMOLOGY - G.Change_Cosmological_Frame_Sytem( false ); - #endif +#ifdef COSMOLOGY + G.Change_Cosmological_Frame_Sytem(false); +#endif - #ifndef ONLY_PARTICLES +#ifndef ONLY_PARTICLES /*call the data output routine for Hydro data*/ - if (nfile % P.n_hydro == 0) OutputData(G,P,nfile); - #endif + if (nfile % P.n_hydro == 0) OutputData(G, P, nfile); +#endif - // This function does other checks to make sure it is valid (3D only) - #ifdef HDF5 - if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile); - #endif +// This function does other checks to make sure it is valid (3D only) +#ifdef HDF5 + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) + OutputFloat32(G, P, nfile); +#endif - #ifdef PROJECTION - if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); - #endif /*PROJECTION*/ +#ifdef PROJECTION + if (nfile % P.n_projection == 0) OutputProjectedData(G, P, nfile); +#endif /*PROJECTION*/ - #ifdef ROTATED_PROJECTION - if (nfile % P.n_rotated_projection == 0) OutputRotatedProjectedData(G,P,nfile); - #endif /*ROTATED_PROJECTION*/ +#ifdef ROTATED_PROJECTION + if (nfile % P.n_rotated_projection == 0) + OutputRotatedProjectedData(G, P, nfile); +#endif /*ROTATED_PROJECTION*/ - #ifdef SLICES - if (nfile % P.n_slice == 0) OutputSlices(G,P,nfile); - #endif /*SLICES*/ +#ifdef SLICES + if (nfile % P.n_slice == 0) OutputSlices(G, P, nfile); +#endif /*SLICES*/ - #ifdef PARTICLES +#ifdef PARTICLES if (nfile % P.n_particle == 0) G.WriteData_Particles(P, nfile); - #endif +#endif - #ifdef COSMOLOGY - if ( G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial){ +#ifdef COSMOLOGY + if (G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial) { G.Cosmo.Set_Next_Scale_Output(); - if ( !G.Cosmo.exit_now ){ - chprintf( " Saved Snapshot: %d z:%f next_output: %f\n", nfile, G.Cosmo.current_z, 1/G.Cosmo.next_output - 1 ); + if (!G.Cosmo.exit_now) { + chprintf(" Saved Snapshot: %d z:%f next_output: %f\n", nfile, + G.Cosmo.current_z, 1 / G.Cosmo.next_output - 1); G.H.Output_Initial = false; - } - else{ - chprintf( " Saved Snapshot: %d z:%f Exiting now\n", nfile, G.Cosmo.current_z ); + } else { + chprintf(" Saved Snapshot: %d z:%f Exiting now\n", nfile, + G.Cosmo.current_z); } - } - else chprintf( " Saved Snapshot: %d z:%f\n", nfile, G.Cosmo.current_z ); - G.Change_Cosmological_Frame_Sytem( true ); - chprintf( "\n" ); + } else + chprintf(" Saved Snapshot: %d z:%f\n", nfile, G.Cosmo.current_z); + G.Change_Cosmological_Frame_Sytem(true); + chprintf("\n"); G.H.Output_Now = false; - #endif +#endif - #ifdef HDF5 +#ifdef HDF5 // Cleanup HDF5 H5close(); - #endif +#endif - #ifdef HDF5 +#ifdef HDF5 // Cleanup HDF5 H5close(); - #endif +#endif - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA MPI_Barrier(world); - #endif +#endif } - /* Output the grid data to file. */ void OutputData(Grid3D &G, struct parameters P, int nfile) { @@ -180,23 +182,27 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) strcpy(filename, P.outdir); sprintf(timestep, "%d", nfile); strcat(filename, timestep); - #if defined BINARY +#if defined BINARY strcat(filename, ".bin"); - #elif defined HDF5 +#elif defined HDF5 strcat(filename, ".h5"); - #else +#else strcat(filename, ".txt"); - if (G.H.nx*G.H.ny*G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); - #endif - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif + if (G.H.nx * G.H.ny * G.H.nz > 1000) + printf("Ascii outputs only recommended for small problems!\n"); +#endif +#ifdef MPI_CHOLLA + sprintf(filename, "%s.%d", filename, procID); +#endif - // open the file for binary writes - #if defined BINARY +// open the file for binary writes +#if defined BINARY FILE *out; out = fopen(filename, "w"); - if(out == NULL) {printf("Error opening output file.\n"); exit(-1); } + if (out == NULL) { + printf("Error opening output file.\n"); + exit(-1); + } // write the header to the output file G.Write_Header_Binary(out); @@ -207,10 +213,10 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the output file fclose(out); - // create the file for hdf5 writes - #elif defined HDF5 - hid_t file_id; /* file identifier */ - herr_t status; +// create the file for hdf5 writes +#elif defined HDF5 + hid_t file_id; /* file identifier */ + herr_t status; // Create a new file using default properties. file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -224,13 +230,19 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the file status = H5Fclose(file_id); - if (status < 0) {printf("File write failed.\n"); exit(-1); } + if (status < 0) { + printf("File write failed.\n"); + exit(-1); + } - #else +#else // open the file for txt writes FILE *out; out = fopen(filename, "w"); - if(out == NULL) {printf("Error opening output file.\n"); exit(-1); } + if (out == NULL) { + printf("Error opening output file.\n"); + exit(-1); + } // write the header to the output file G.Write_Header_Text(out); @@ -240,12 +252,11 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the output file fclose(out); - #endif +#endif } void OutputFloat32(Grid3D &G, struct parameters P, int nfile) { - Header H = G.H; // Do nothing in 1-D and 2-D case if (H.ny_real == 1) { @@ -267,13 +278,13 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) strcpy(filename, P.outdir); strcat(filename, timestep); strcat(filename, ".float32.h5"); - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif +#ifdef MPI_CHOLLA + sprintf(filename, "%s.%d", filename, procID); +#endif // create hdf5 file - hid_t file_id; /* file identifier */ - herr_t status; + hid_t file_id; /* file identifier */ + herr_t status; // Create a new file using default properties. file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -284,69 +295,98 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // write the conserved variables to the output file // 3-D Case - if (H.nx>1 && H.ny>1 && H.nz>1) { + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { int nx_dset = H.nx_real; int ny_dset = H.ny_real; int nz_dset = H.nz_real; size_t buffer_size; - // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. - // If the buffer is too large, it does not cause bugs (Oct 6 2022) + // Need a larger device buffer for MHD. In the future, if other fields need + // a larger device buffer, choose the maximum of the sizes. If the buffer is + // too large, it does not cause bugs (Oct 6 2022) #ifdef MHD - buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); + buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); #else - buffer_size = nx_dset*ny_dset*nz_dset; + buffer_size = nx_dset * ny_dset * nz_dset; #endif - // Using static DeviceVector here automatically allocates the buffer the first time it is needed - // It persists until program exit, and then calls Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - float* device_dataset_buffer = device_dataset_vector.data(); - float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float)); - - if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); - if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); - if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); - if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); - if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); + // Using static DeviceVector here automatically allocates the buffer the + // first time it is needed It persists until program exit, and then calls + // Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{ + buffer_size}; + float *device_dataset_buffer = device_dataset_vector.data(); + float *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); + + if (P.out_float32_density > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_density, "/density"); + if (P.out_float32_momentum_x > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_momentum_x, "/momentum_x"); + if (P.out_float32_momentum_y > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_momentum_y, "/momentum_y"); + if (P.out_float32_momentum_z > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_momentum_z, "/momentum_z"); + if (P.out_float32_Energy > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_Energy, "/Energy"); #ifdef DE - if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); -#endif //DE + if (P.out_float32_GasEnergy > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + G.C.d_GasEnergy, "/GasEnergy"); +#endif // DE #ifdef MHD - if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); - if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); - if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); + if (P.out_float32_magnetic_x > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, + H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + if (P.out_float32_magnetic_y > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, + H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + if (P.out_float32_magnetic_z > 0) + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, + H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); #endif - free(dataset_buffer); - if (status < 0) {printf("File write failed.\n"); exit(-1); } - } // 3-D case + if (status < 0) { + printf("File write failed.\n"); + exit(-1); + } + } // 3-D case - // close the file + // close the file status = H5Fclose(file_id); - - } - /* Output a projection of the grid data to file. */ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) { char filename[100]; char timestep[20]; - #ifdef HDF5 - hid_t file_id; - herr_t status; +#ifdef HDF5 + hid_t file_id; + herr_t status; // create the filename strcpy(filename, P.outdir); sprintf(timestep, "%d_proj", nfile); - strcat(filename,timestep); - strcat(filename,".h5"); + strcat(filename, timestep); + strcat(filename, ".h5"); #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); + sprintf(filename, "%s.%d", filename, procID); #endif /*MPI_CHOLLA*/ // Create a new file @@ -362,50 +402,53 @@ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) status = H5Fclose(file_id); #ifdef MPI_CHOLLA - if (status < 0) {printf("OutputProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); } + if (status < 0) { + printf("OutputProjectedData: File write failed. ProcID: %d\n", procID); + chexit(-1); + } #else - if (status < 0) {printf("OutputProjectedData: File write failed.\n"); exit(-1); } + if (status < 0) { + printf("OutputProjectedData: File write failed.\n"); + exit(-1); + } #endif - #else +#else printf("OutputProjected Data only defined for hdf5 writes.\n"); - #endif //HDF5 +#endif // HDF5 } - /* Output a rotated projection of the grid data to file. */ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) { char filename[100]; char timestep[20]; - #ifdef HDF5 - hid_t file_id; - herr_t status; +#ifdef HDF5 + hid_t file_id; + herr_t status; // create the filename strcpy(filename, P.outdir); sprintf(timestep, "%d_rot_proj", nfile); - strcat(filename,timestep); - strcat(filename,".h5"); + strcat(filename, timestep); + strcat(filename, ".h5"); #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); + sprintf(filename, "%s.%d", filename, procID); #endif /*MPI_CHOLLA*/ - if(G.R.flag_delta==1) - { - //if flag_delta==1, then we are just outputting a - //bunch of rotations of the same snapshot + if (G.R.flag_delta == 1) { + // if flag_delta==1, then we are just outputting a + // bunch of rotations of the same snapshot int i_delta; char fname[200]; - for(i_delta=0;i_delta1 && H.ny==1 && H.nz==1) { + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { fprintf(fp, "id\trho\tmx\tmy\tmz\tE"); - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD - #ifdef DE +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD +#ifdef DE fprintf(fp, "\tge"); - #endif +#endif fprintf(fp, "\n"); - for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) { + for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { id = i; - fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD - #ifdef DE + fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, C.density[id], + C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], + C.Energy[id]); +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], + C.magnetic_z[id]); +#endif // MHD +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE +#endif // DE fprintf(fp, "\n"); } - #ifdef MHD - // Save the last line of magnetic fields - id = H.nx-H.n_ghost; - fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD +#ifdef MHD + // Save the last line of magnetic fields + id = H.nx - H.n_ghost; + fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], + C.magnetic_y[id], C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE + fprintf(fp, "\n"); +#endif // MHD } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - + else if (H.nx > 1 && H.ny > 1 && H.nz == 1) { fprintf(fp, "idx\tidy\trho\tmx\tmy\tmz\tE"); - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD - #ifdef DE +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD +#ifdef DE fprintf(fp, "\tge"); - #endif +#endif fprintf(fp, "\n"); - for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) { - for (j=H.n_ghost; j < H.ny-H.n_ghost; j++) { - id = i + j*H.nx; - fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD - #ifdef DE + for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { + for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) { + id = i + j * H.nx; + fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, + C.density[id], C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], C.Energy[id]); +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], + C.magnetic_z[id]); +#endif // MHD +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE +#endif // DE fprintf(fp, "\n"); } - #ifdef MHD - // Save the last line of magnetic fields - id = i + (H.ny-H.n_ghost)*H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i-H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD - } - #ifdef MHD +#ifdef MHD // Save the last line of magnetic fields - id = H.nx-H.n_ghost + (H.ny-H.n_ghost)*H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx-2*H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE + id = i + (H.ny - H.n_ghost) * H.nx; + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i - H.n_ghost, + H.ny - 2 * H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], + C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE fprintf(fp, "\n"); - #endif //MHD +#endif // MHD + } +#ifdef MHD + // Save the last line of magnetic fields + id = H.nx - H.n_ghost + (H.ny - H.n_ghost) * H.nx; + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", + H.nx - 2 * H.n_ghost, H.ny - 2 * H.n_ghost, C.magnetic_x[id], + C.magnetic_y[id], C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE + fprintf(fp, "\n"); +#endif // MHD } // 3D case else { fprintf(fp, "idx\tidy\tidz\trho\tmx\tmy\tmz\tE"); - #ifdef DE +#ifdef DE fprintf(fp, "\tge"); - #endif - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD +#endif +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD fprintf(fp, "\n"); - for (i=H.n_ghost-1; i < H.nx-H.n_ghost; i++) { - for (j=H.n_ghost-1; j < H.ny-H.n_ghost; j++) { - for (k=H.n_ghost-1; k < H.nz-H.n_ghost; k++) { - id = i + j*H.nx + k*H.nx*H.ny; + for (i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + for (j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + id = i + j * H.nx + k * H.nx * H.ny; // Exclude the rightmost ghost cell on the "left" side for the hydro // variables - if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) - { - fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef DE + if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) { + fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, + j - H.n_ghost, k - H.n_ghost, C.density[id], + C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], + C.Energy[id]); +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE - } - else - { - fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost); - #ifdef DE - fprintf(fp, "\tn/a"); - #endif //DE +#endif // DE + } else { + fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i - H.n_ghost, + j - H.n_ghost, k - H.n_ghost); +#ifdef DE + fprintf(fp, "\tn/a"); +#endif // DE } - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], + C.magnetic_z[id]); +#endif // MHD fprintf(fp, "\n"); } } @@ -1089,9 +1210,6 @@ void Grid3D::Write_Grid_Text(FILE *fp) } } - - - /*! \fn void Write_Grid_Binary(FILE *fp) * \brief Write the conserved quantities to a binary output file. */ void Grid3D::Write_Grid_Binary(FILE *fp) @@ -1101,153 +1219,159 @@ void Grid3D::Write_Grid_Binary(FILE *fp) // Write the conserved quantities to the output file // 1D case - if (H.nx>1 && H.ny==1 && H.nz==1) { - + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { id = H.n_ghost; - fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); + fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp); - fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); - #ifdef DE - fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); - #endif //DE + fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); +#ifdef DE + fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); +#endif // DE } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - - for (j=0; j 1 && H.ny > 1 && H.nz == 1) { + for (j = 0; j < H.ny_real; j++) { + id = H.n_ghost + (j + H.n_ghost) * H.nx; fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); } - for (j=0; j1 && H.ny==1 && H.nz==1) { - - int nx_dset = H.nx_real; - hsize_t dims[1]; - dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real)); + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + int nx_dset = H.nx_real; + hsize_t dims[1]; + dataset_buffer = (Real *)malloc(H.nx_real * sizeof(Real)); // Create the data space for the datasets - dims[0] = nx_dset; + dims[0] = nx_dset; dataspace_id = H5Screate_simple(1, dims, NULL); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, + "/density"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_x, "/momentum_x"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_y, "/momentum_y"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_z, "/momentum_z"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, + "/Energy"); - #ifdef SCALAR - for (int s=0; s 1 this substitution can be attempted. - // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + // TODO: If there is a test case for regression testing NSCALARS > 1 this + // substitution can be attempted. Write_HDF5_Field_1D_CPU(H, file_id, + // dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); + memcpy(&dataset_buffer[0], &(C.scalar[id + s * H.n_cells]), + H.nx_real * sizeof(Real)); // dataset here is just a name status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } - #endif //SCALAR + #endif // SCALAR - #ifdef DE - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); - #endif //DE + #ifdef DE + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.GasEnergy, "/GasEnergy"); + #endif // DE // Free the dataspace id status = H5Sclose(dataspace_id); } - // 2D case - if (H.nx>1 && H.ny>1 && H.nz==1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - hsize_t dims[2]; - dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real)); + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + hsize_t dims[2]; + dataset_buffer = (Real *)malloc(H.ny_real * H.nx_real * sizeof(Real)); // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; + dims[0] = nx_dset; + dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, + "/density"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_x, "/momentum_x"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_y, "/momentum_y"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, + C.momentum_z, "/momentum_z"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, + "/Energy"); - #ifdef SCALAR - for (int s=0; s 1 this substitution can be attempted. - // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + // TODO: If there is a test case for regression testing NSCALARS > 1 this + // substitution can be attempted. Write_HDF5_Field_1D_CPU(H, file_id, + // dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); // Copy the scalar array to the memory buffer - for (j=0; j1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[3]; - hsize_t dims_full[3]; + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[3]; + hsize_t dims_full[3]; size_t buffer_size; - // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. - // If the buffer is too large, it does not cause bugs (Oct 6 2022) - #ifdef MHD - buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); - #else - buffer_size = nx_dset*ny_dset*nz_dset; - #endif - // Using static DeviceVector here automatically allocates the buffer the first time it is needed - // It persists until program exit, and then calls Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - double* device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real)); - //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - - - // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one) - dims[0] = nx_dset; - dims[1] = ny_dset; - dims[2] = nz_dset; + // Need a larger device buffer for MHD. In the future, if other fields need a + // larger device buffer, choose the maximum of the sizes. If the buffer is too + // large, it does not cause bugs (Oct 6 2022) + #ifdef MHD + buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); + #else + buffer_size = nx_dset * ny_dset * nz_dset; + #endif + // Using static DeviceVector here automatically allocates the buffer the + // first time it is needed It persists until program exit, and then calls + // Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{ + buffer_size}; + double *device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); + // CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); + + // Create the data space for the datasets (note: WriteHDF5Field3D creates + // its own dataspace, does not use the shared one) + dims[0] = nx_dset; + dims[1] = ny_dset; + dims[2] = nz_dset; dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); - if ( output_momentum || H.Output_Complete_Data ) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, + dataset_buffer, device_dataset_buffer, C.d_density, + "/density"); + if (output_momentum || H.Output_Complete_Data) { + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + C.d_momentum_x, "/momentum_x"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + C.d_momentum_y, "/momentum_y"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + C.d_momentum_z, "/momentum_z"); + } + + if (output_energy || H.Output_Complete_Data) { + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, + file_id, dataset_buffer, device_dataset_buffer, + C.d_Energy, "/Energy"); } - if ( output_energy || H.Output_Complete_Data ){ - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); - } - - #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle - for (int s=0; s1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[2]; - dataset_buffer_dxy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_dxz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real)); - dataset_buffer_Txy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_Txz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real)); + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[2]; + dataset_buffer_dxy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; + dims[0] = nx_dset; + dims[1] = ny_dset; dataspace_xy_id = H5Screate_simple(2, dims, NULL); - dims[1] = nz_dset; + dims[1] = nz_dset; dataspace_xz_id = H5Screate_simple(2, dims, NULL); // Copy the xy density and temperature projections to the memory buffer - for (j=0; j1 && H.ny>1 && H.nz>1) { - - Real Lx = R.Lx; //projected box size in x dir - Real Lz = R.Lz; //projected box size in z dir + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Real Lx = R.Lx; // projected box size in x dir + Real Lz = R.Lz; // projected box size in z dir int nx_dset = R.nx; int nz_dset = R.nz; if (R.nx * R.nz == 0) { - chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n"); + chprintf( + "WARNING: compiled with -DROTATED_PROJECTION but input parameters " + "nxr or nzr = 0\n"); return; } @@ -1839,117 +2024,118 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) // this piece of the simulation volume // min and max values were set in the header write int nx_min, nx_max, nz_min, nz_max; - nx_min = R.nx_min; - nx_max = R.nx_max; - nz_min = R.nz_min; - nz_max = R.nz_max; - nx_dset = nx_max-nx_min; - nz_dset = nz_max-nz_min; + nx_min = R.nx_min; + nx_max = R.nx_max; + nz_min = R.nz_min; + nz_max = R.nz_max; + nx_dset = nx_max - nx_min; + nz_dset = nz_max - nz_min; - hsize_t dims[2]; + hsize_t dims[2]; // allocate the buffers for the projected dataset // and initialize to zero - dataset_buffer_dxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_Txzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vxxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vyxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vzxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); + dataset_buffer_dxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_Txzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vxxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vyxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vzxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = nz_dset; + dims[0] = nx_dset; + dims[1] = nz_dset; dataspace_xzr_id = H5Screate_simple(2, dims, NULL); // Copy the xz rotated projection to the memory buffer - for (k=0; k=0)&&(ix=0)&&(iz= 0) && (ix < nx_dset) && (iz >= 0) && (iz < nz_dset)) { + buf_id = iz + ix * nz_dset; + d = C.density[id]; // project density - dataset_buffer_dxzr[buf_id] += d*H.dy; + dataset_buffer_dxzr[buf_id] += d * H.dy; // calculate number density - n = d*DENSITY_UNIT/(mu*MP); - // calculate temperature - #ifndef DE + n = d * DENSITY_UNIT / (mu * MP); + // calculate temperature + #ifndef DE Real mx = C.momentum_x[id]; Real my = C.momentum_y[id]; Real mz = C.momentum_z[id]; - Real E = C.Energy[id]; - T = (E - 0.5*(mx*mx + my*my + mz*mz)/C.density[id])*(gama-1.0)*PRESSURE_UNIT / (n*KB); - #endif - #ifdef DE - T = C.GasEnergy[id]*PRESSURE_UNIT*(gama-1.0) / (n*KB); - #endif - Txz = T*d*H.dy; + Real E = C.Energy[id]; + T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * + (gama - 1.0) * PRESSURE_UNIT / (n * KB); + #endif + #ifdef DE + T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); + #endif + Txz = T * d * H.dy; dataset_buffer_Txzr[buf_id] += Txz; - //compute velocities + // compute velocities vx = C.momentum_x[id]; - dataset_buffer_vxxzr[buf_id] += vx*H.dy; + dataset_buffer_vxxzr[buf_id] += vx * H.dy; vy = C.momentum_y[id]; - dataset_buffer_vyxzr[buf_id] += vy*H.dy; + dataset_buffer_vyxzr[buf_id] += vy * H.dy; vz = C.momentum_z[id]; - dataset_buffer_vzxzr[buf_id] += vz*H.dy; + dataset_buffer_vzxzr[buf_id] += vz * H.dy; } } } } // Write projected d,T,vx,vy,vz - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr"); + status = + HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); + status = + HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); + status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, + "/vx_xzr"); + status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, + "/vy_xzr"); + status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, + "/vz_xzr"); // Free the dataspace id status = H5Sclose(dataspace_xzr_id); - //free the data + // free the data free(dataset_buffer_dxzr); free(dataset_buffer_Txzr); free(dataset_buffer_vxxzr); free(dataset_buffer_vyxzr); free(dataset_buffer_vzxzr); - } - else chprintf("Rotated projection write only implemented for 3D data.\n"); - - - + } else + chprintf("Rotated projection write only implemented for 3D data.\n"); } -#endif //HDF5 - +#endif // HDF5 #ifdef HDF5 /*! \fn void Write_Slices_HDF5(hid_t file_id) @@ -1958,81 +2144,82 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) void Grid3D::Write_Slices_HDF5(hid_t file_id) { int i, j, k, id, buf_id; - hid_t dataset_id, dataspace_id; - Real *dataset_buffer_d; - Real *dataset_buffer_mx; - Real *dataset_buffer_my; - Real *dataset_buffer_mz; - Real *dataset_buffer_E; + hid_t dataset_id, dataspace_id; + Real *dataset_buffer_d; + Real *dataset_buffer_mx; + Real *dataset_buffer_my; + Real *dataset_buffer_mz; + Real *dataset_buffer_E; #ifdef DE - Real *dataset_buffer_GE; + Real *dataset_buffer_GE; #endif #ifdef SCALAR - Real *dataset_buffer_scalar; + Real *dataset_buffer_scalar; #endif - herr_t status; + herr_t status; int xslice, yslice, zslice; - xslice = H.nx/2; - yslice = H.ny/2; - zslice = H.nz/2; + xslice = H.nx / 2; + yslice = H.ny / 2; + zslice = H.nz / 2; #ifdef MPI_CHOLLA - xslice = nx_global/2; - yslice = ny_global/2; - zslice = nz_global/2; + xslice = nx_global / 2; + yslice = ny_global / 2; + zslice = nz_global / 2; #endif - // 3D - if (H.nx>1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[2]; - + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[2]; // Create the xy data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; + dims[0] = nx_dset; + dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); // Allocate memory for the xy slices - dataset_buffer_d = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_mx = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_my = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_mz = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_E = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - #ifdef DE - dataset_buffer_GE = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - #endif - #ifdef SCALAR - dataset_buffer_scalar = (Real *) malloc(NSCALARS*H.nx_real*H.ny_real*sizeof(Real)); - #endif + dataset_buffer_d = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = + (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); + #endif // Copy the xy slices to the memory buffers - for (j=0; j= nz_local_start && zslice < nz_local_start+nz_local) { - id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (zslice-nz_local_start+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + zslice * H.nx * H.ny; + buf_id = j + i * H.ny_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (zslice >= nz_local_start && zslice < nz_local_start + nz_local) { + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + + (zslice - nz_local_start + H.n_ghost) * H.nx * H.ny; + #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; dataset_buffer_my[buf_id] = C.momentum_y[id]; dataset_buffer_mz[buf_id] = C.momentum_z[id]; dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE + #ifdef DE dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= ny_local_start && yslice < ny_local_start+ny_local) { - id = (i+H.n_ghost) + (yslice-ny_local_start+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA - dataset_buffer_d[buf_id] = C.density[id]; - dataset_buffer_mx[buf_id] = C.momentum_x[id]; - dataset_buffer_my[buf_id] = C.momentum_y[id]; - dataset_buffer_mz[buf_id] = C.momentum_z[id]; - dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE - dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= ny_local_start && yslice < ny_local_start + ny_local) { + id = (i + H.n_ghost) + (yslice - ny_local_start + H.n_ghost) * H.nx + + (k + H.n_ghost) * H.nx * H.ny; + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = + C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA } // if the slice isn't in your domain, just write out zeros else { @@ -2134,16 +2324,16 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; - #ifdef DE + #ifdef DE dataset_buffer_GE[buf_id] = 0; - #endif - #ifdef SCALAR - for (int ii=0; ii= nx_local_start && xslice < nx_local_start+nx_local) { - id = (xslice-nx_local_start) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA - dataset_buffer_d[buf_id] = C.density[id]; - dataset_buffer_mx[buf_id] = C.momentum_x[id]; - dataset_buffer_my[buf_id] = C.momentum_y[id]; - dataset_buffer_mz[buf_id] = C.momentum_z[id]; - dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE - dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= nx_local_start && xslice < nx_local_start + nx_local) { + id = (xslice - nx_local_start) + (j + H.n_ghost) * H.nx + + (k + H.n_ghost) * H.nx * H.ny; + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = + C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA } // if the slice isn't in your domain, just write out zeros else { @@ -2228,32 +2421,32 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; - #ifdef DE + #ifdef DE dataset_buffer_GE[buf_id] = 0; - #endif - #ifdef SCALAR - for (int ii=0; ii1 && H.ny==1 && H.nz==1) { - + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { id = H.n_ghost; - fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); + fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp); - fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); - #ifdef DE - fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); - #endif + fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); + #ifdef DE + fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); + #endif } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - for (j=0; j 1 && H.ny > 1 && H.nz == 1) { + for (j = 0; j < H.ny_real; j++) { + id = H.n_ghost + (j + H.n_ghost) * H.nx; fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); } - for (j=0; j1 && H.ny==1 && H.nz==1) { - + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real)); + dataset_buffer = (Real *)malloc(H.nx_real * sizeof(Real)); // Open the density dataset dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the density array to the grid id = H.n_ghost; - memcpy(&(C.density[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - + memcpy(&(C.density[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); // Open the x momentum dataset dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the x momentum array to the grid id = H.n_ghost; - memcpy(&(C.momentum_x[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - + memcpy(&(C.momentum_x[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); // Open the y momentum dataset dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the y momentum array to the grid id = H.n_ghost; - memcpy(&(C.momentum_y[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - + memcpy(&(C.momentum_y[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); // Open the z momentum dataset dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the z momentum array to the grid id = H.n_ghost; - memcpy(&(C.momentum_z[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - + memcpy(&(C.momentum_z[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); // Open the Energy dataset dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the Energy array to the grid id = H.n_ghost; - memcpy(&(C.Energy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); + memcpy(&(C.Energy[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - - #ifdef DE + #ifdef DE // Open the internal energy dataset dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the internal energy array to the grid id = H.n_ghost; - memcpy(&(C.GasEnergy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - #endif //DE + memcpy(&(C.GasEnergy[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); + #endif // DE - #ifdef SCALAR - for (int s=0; s1 && H.ny>1 && H.nz==1) { - + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real)); - + dataset_buffer = (Real *)malloc(H.ny_real * H.nx_real * sizeof(Real)); // Open the density dataset dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the density array to the grid - for (j=0; j1 && H.ny>1 && H.nz>1) { - + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { // Compute Statistic of Initial data Real mean_l, min_l, max_l; Real mean_g, min_g, max_g; // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.nz_real*H.ny_real*H.nx_real*sizeof(Real)); - + dataset_buffer = + (Real *)malloc(H.nz_real * H.ny_real * H.nx_real * sizeof(Real)); // Open the density dataset dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); - mean_l = 0; - min_l = 1e65; - max_l = -1; + min_l = 1e65; + max_l = -1; // Copy the density array to the grid - for (k=0; k max_l ) max_l = C.density[id]; - if ( C.density[id] < min_l ) min_l = C.density[id]; + if (C.density[id] > max_l) max_l = C.density[id]; + if (C.density[id] < min_l) min_l = C.density[id]; } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY // Open the x momentum dataset dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; + min_l = 1e65; + max_l = -1; // Copy the x momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_x[id]); - if ( fabs(C.momentum_x[id]) < min_l ) min_l = fabs(C.momentum_x[id]); + if (fabs(C.momentum_x[id]) > max_l) max_l = fabs(C.momentum_x[id]); + if (fabs(C.momentum_x[id]) < min_l) min_l = fabs(C.momentum_x[id]); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum X) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Momentum X) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " + "km s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY // Open the y momentum dataset dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; + min_l = 1e65; + max_l = -1; // Copy the y momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_y[id]); - if ( fabs(C.momentum_y[id]) < min_l ) min_l = fabs(C.momentum_y[id]); + if (fabs(C.momentum_y[id]) > max_l) max_l = fabs(C.momentum_y[id]); + if (fabs(C.momentum_y[id]) < min_l) min_l = fabs(C.momentum_y[id]); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum Y) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Momentum Y) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " + "km s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY // Open the z momentum dataset dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX + // FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; + min_l = 1e65; + max_l = -1; // Copy the z momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_z[id]); - if ( fabs(C.momentum_z[id]) < min_l ) min_l = fabs(C.momentum_z[id]); + if (fabs(C.momentum_z[id]) > max_l) max_l = fabs(C.momentum_z[id]); + if (fabs(C.momentum_z[id]) < min_l) min_l = fabs(C.momentum_z[id]); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum Z) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Momentum Z) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " + "km s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY // Open the Energy dataset dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR + // FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; + min_l = 1e65; + max_l = -1; // Copy the Energy array to the grid - for (k=0; k max_l ) max_l = C.Energy[id]; - if ( C.Energy[id] < min_l ) min_l = C.Energy[id]; + if (C.Energy[id] > max_l) max_l = C.Energy[id]; + if (C.Energy[id] < min_l) min_l = C.Energy[id]; } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Energy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " Energy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 " + "s^-2 ] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY - #ifdef DE + #ifdef DE // Open the internal Energy dataset dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the internal Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Read the internal Energy array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); Real temp, temp_max_l, temp_min_l, temp_mean_l; Real temp_min_g, temp_max_g, temp_mean_g; temp_mean_l = 0; - temp_min_l = 1e65; - temp_max_l = -1; - mean_l = 0; - min_l = 1e65; - max_l = -1; + temp_min_l = 1e65; + temp_max_l = -1; + mean_l = 0; + min_l = 1e65; + max_l = -1; // Copy the internal Energy array to the grid - for (k=0; k max_l ) max_l = C.GasEnergy[id]; - if ( C.GasEnergy[id] < min_l ) min_l = C.GasEnergy[id]; - temp = C.GasEnergy[id] / C.density[id] * ( gama - 1 ) * MP / KB * 1e10 ; + if (C.GasEnergy[id] > max_l) max_l = C.GasEnergy[id]; + if (C.GasEnergy[id] < min_l) min_l = C.GasEnergy[id]; + temp = C.GasEnergy[id] / C.density[id] * (gama - 1) * MP / KB * 1e10; temp_mean_l += temp; // chprintf( "%f\n", temp); - if ( temp > temp_max_l ) temp_max_l = temp; - if ( temp < temp_min_l ) temp_min_l = temp; + if (temp > temp_max_l) temp_max_l = temp; + if (temp < temp_min_l) temp_min_l = temp; } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); - temp_mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= (H.nz_real * H.ny_real * H.nx_real); + temp_mean_l /= (H.nz_real * H.ny_real * H.nx_real); #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - temp_mean_g = ReduceRealAvg( temp_mean_l ); - temp_max_g = ReduceRealMax( temp_max_l ); - temp_min_g = ReduceRealMin( temp_min_l ); + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); + mean_l = mean_g; + max_l = max_g; + min_l = min_g; + temp_mean_g = ReduceRealAvg(temp_mean_l); + temp_max_g = ReduceRealMax(temp_max_l); + temp_min_g = ReduceRealMin(temp_min_l); temp_mean_l = temp_mean_g; - temp_max_l = temp_max_g; - temp_min_l = temp_min_g; - #endif //MPI_CHOLLA + temp_max_l = temp_max_g; + temp_min_l = temp_min_g; + #endif // MPI_CHOLLA #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " GasEnergy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l ); - chprintf( " Temperature Mean: %f Min: %f Max: %f [ K ] \n", temp_mean_l, temp_min_l, temp_max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY + chprintf( + " GasEnergy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 " + "s^-2 ] \n", + mean_l, min_l, max_l); + chprintf(" Temperature Mean: %f Min: %f Max: %f [ K ] \n", + temp_mean_l, temp_min_l, temp_max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY - #endif//DE + #endif // DE - #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont Load scalars when using grackle or CHEMISTRY_GPU - for (int s=0; s max_l ) max_l = fabs(C.magnetic_x[id]); - if ( fabs(C.magnetic_x[id]) < min_l ) min_l = fabs(C.magnetic_x[id]); - } + mean_l = 0; + min_l = 1e65; + max_l = -1; + // Copy the x magnetic field array to the grid + for (k = 0; k < H.nz_real + 1; k++) { + for (j = 0; j < H.ny_real + 1; j++) { + for (i = 0; i < H.nx_real + 1; i++) { + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = + k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + C.magnetic_x[id] = dataset_buffer[buf_id]; + mean_l += fabs(C.magnetic_x[id]); + if (fabs(C.magnetic_x[id]) > max_l) max_l = fabs(C.magnetic_x[id]); + if (fabs(C.magnetic_x[id]) < min_l) min_l = fabs(C.magnetic_x[id]); } } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic X) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - // Open the y magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); - // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + } + mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the y magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_y[id]); - if ( fabs(C.magnetic_y[id]) < min_l ) min_l = fabs(C.magnetic_y[id]); - } + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); + mean_l = mean_g; + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Magnetic X) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + + // Open the y magnetic field dataset + dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); + // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + + mean_l = 0; + min_l = 1e65; + max_l = -1; + // Copy the y magnetic field array to the grid + for (k = 0; k < H.nz_real + 1; k++) { + for (j = 0; j < H.ny_real + 1; j++) { + for (i = 0; i < H.nx_real + 1; i++) { + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = + k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + C.magnetic_y[id] = dataset_buffer[buf_id]; + mean_l += fabs(C.magnetic_y[id]); + if (fabs(C.magnetic_y[id]) > max_l) max_l = fabs(C.magnetic_y[id]); + if (fabs(C.magnetic_y[id]) < min_l) min_l = fabs(C.magnetic_y[id]); } } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic Y) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - // Open the z magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); - // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + } + mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the z magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_z[id]); - if ( fabs(C.magnetic_z[id]) < min_l ) min_l = fabs(C.magnetic_z[id]); - } + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); + mean_l = mean_g; + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Magnetic Y) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + + // Open the z magnetic field dataset + dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); + // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + + mean_l = 0; + min_l = 1e65; + max_l = -1; + // Copy the z magnetic field array to the grid + for (k = 0; k < H.nz_real + 1; k++) { + for (j = 0; j < H.ny_real + 1; j++) { + for (i = 0; i < H.nx_real + 1; i++) { + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = + k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + C.magnetic_z[id] = dataset_buffer[buf_id]; + mean_l += fabs(C.magnetic_z[id]); + if (fabs(C.magnetic_z[id]) > max_l) max_l = fabs(C.magnetic_z[id]); + if (fabs(C.magnetic_z[id]) < min_l) min_l = fabs(C.magnetic_z[id]); } } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic Z) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - #endif //MHD + } + mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); + + #if MPI_CHOLLA + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); + mean_l = mean_g; + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA + + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf( + " abs(Magnetic Z) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + #endif // MHD } free(dataset_buffer); } #endif - - /* MPI-safe printf routine */ -int chprintf(const char * __restrict sdata, ...) +int chprintf(const char *__restrict sdata, ...) { int code = 0; #ifdef MPI_CHOLLA /*limit printf to root process only*/ - if(procID==root) - { + if (procID == root) { #endif /*MPI_CHOLLA*/ - va_list ap; - va_start(ap, sdata); - code = vfprintf(stdout, sdata, ap); - va_end(ap); - fflush(stdout); + va_list ap; + va_start(ap, sdata); + code = vfprintf(stdout, sdata, ap); + va_end(ap); + fflush(stdout); #ifdef MPI_CHOLLA } @@ -3321,15 +3585,15 @@ int chprintf(const char * __restrict sdata, ...) return code; } - -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) { - - Real cd,sd,cp,sp,ct,st; //sines and cosines - Real a00, a01, a02; //rotation matrix elements +void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, + Real *xp, Real *yp, Real *zp) +{ + Real cd, sd, cp, sp, ct, st; // sines and cosines + Real a00, a01, a02; // rotation matrix elements Real a10, a11, a12; Real a20, a21, a22; - //compute trig functions of rotation angles + // compute trig functions of rotation angles cd = cos(delta); sd = sin(delta); cp = cos(phi); @@ -3337,7 +3601,7 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real ct = cos(theta); st = sin(theta); - //compute the rotation matrix elements + // compute the rotation matrix elements /*a00 = cosp*cosd - sinp*cost*sind; a01 = -1.0*(cosp*sind + sinp*cost*cosd); a02 = sinp*sint; @@ -3349,32 +3613,30 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real a20 = sint*sind; a21 = sint*cosd; a22 = cost;*/ - a00 = (cp*cd - sp*ct*sd); - a01 = -1.0*(cp*sd+sp*ct*cd); - a02 = sp*st; - a10 = (sp*cd + cp*ct*sd); - a11 = (cp*ct*cd -st*sd); - a12 = cp*st; - a20 = st*sd; - a21 = st*cd; + a00 = (cp * cd - sp * ct * sd); + a01 = -1.0 * (cp * sd + sp * ct * cd); + a02 = sp * st; + a10 = (sp * cd + cp * ct * sd); + a11 = (cp * ct * cd - st * sd); + a12 = cp * st; + a20 = st * sd; + a21 = st * cd; a22 = ct; - *xp = a00*x + a01*y + a02*z; - *yp = a10*x + a11*y + a12*z; - *zp = a20*x + a21*y + a22*z; - + *xp = a00 * x + a01 * y + a02 * z; + *yp = a10 * x + a11 * y + a12 * z; + *zp = a20 * x + a21 * y + a22 * z; } -void write_debug ( Real *Value, const char *fname, int nValues, int iProc ) - { +void write_debug(Real *Value, const char *fname, int nValues, int iProc) +{ char fn[1024]; int ret; sprintf(fn, "%s_%07d.txt", fname, iProc); FILE *fp = fopen(fn, "w"); - for ( int iV = 0; iV < nValues; iV++ ) - fprintf(fp, "%e\n", Value[iV]); + for (int iV = 0; iV < nValues; iV++) fprintf(fp, "%e\n", Value[iV]); - fclose (fp); - } + fclose(fp); +} diff --git a/src/io/io.h b/src/io/io.h index 08489f7da..d267abb38 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,32 +1,32 @@ #pragma once -#include "../global/global.h" -#include "../grid/grid3D.h" +#include #include #include -#include +#include "../global/global.h" +#include "../grid/grid3D.h" /* Write the data */ -void WriteData(Grid3D &G, struct parameters P, int nfile); +void WriteData(Grid3D& G, struct parameters P, int nfile); /* Output the grid data to file. */ -void OutputData(Grid3D &G, struct parameters P, int nfile); +void OutputData(Grid3D& G, struct parameters P, int nfile); /* Output the grid data to file as 32-bit floats. */ -void OutputFloat32(Grid3D &G, struct parameters P, int nfile); +void OutputFloat32(Grid3D& G, struct parameters P, int nfile); /* Output a projection of the grid data to file. */ -void OutputProjectedData(Grid3D &G, struct parameters P, int nfile); +void OutputProjectedData(Grid3D& G, struct parameters P, int nfile); /* Output a rotated projection of the grid data to file. */ -void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile); +void OutputRotatedProjectedData(Grid3D& G, struct parameters P, int nfile); /* Output xy, xz, and yz slices of the grid data to file. */ -void OutputSlices(Grid3D &G, struct parameters P, int nfile); +void OutputSlices(Grid3D& G, struct parameters P, int nfile); /* MPI-safe printf routine */ -int chprintf(const char * __restrict sdata, ...); +int chprintf(const char* __restrict sdata, ...); /*! * \brief Convert a floating point number to a string such that it can be @@ -36,28 +36,35 @@ int chprintf(const char * __restrict sdata, ...); * \param[in] input The floating point number to convert * \return std::string The string representation of the input floating point */ -template -std::string to_string_exact(T const &input) +template +std::string to_string_exact(T const& input) { - std::stringstream output; - output << std::setprecision(std::numeric_limits::max_digits10); - output << input; - return output.str(); + std::stringstream output; + output << std::setprecision(std::numeric_limits::max_digits10); + output << input; + return output.str(); } -void Create_Log_File( struct parameters P ); +void Create_Log_File(struct parameters P); -void Write_Message_To_Log_File( const char* message ); +void Write_Message_To_Log_File(const char* message); -void write_debug ( Real *Value, const char *fname, int nValues, int iProc ); +void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, + const char* name); +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, + const char* name); // From io/io_gpu.cu -// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, then write HDF5 field -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); +// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, +// then write HDF5 field +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, + int n_ghost, hid_t file_id, float* buffer, + float* device_buffer, Real* source, const char* name); +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, + int n_ghost, hid_t file_id, double* buffer, + double* device_buffer, Real* source, const char* name); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index c6cab6e8a..253d9ae79 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -1,110 +1,129 @@ // Require HDF5 #ifdef HDF5 -#include - -#include "../grid/grid3D.h" - -#include "../io/io.h" // To provide io.h with OutputViz3D - -// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz -// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz -// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. - -// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) + #include + + #include "../grid/grid3D.h" + #include "../io/io.h" // To provide io.h with OutputViz3D + +// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real +// whereas the conserved variables have size nx,ny,nz Note that magnetic fields +// add +1 to nx_real ny_real nz_real since an extra face needs to be output, but +// also has the same size nx ny nz For the magnetic field case, a different +// nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. + +// Copy Real (non-ghost) cells from source to a double destination (for writing +// HDF5 in double precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, + int nz_real, int n_ghost, + double* destination, Real* source) { - - int dest_id,source_id,id,i,j,k; + int dest_id, source_id, id, i, j, k; id = threadIdx.x + blockIdx.x * blockDim.x; - k = id/(nx_real*ny_real); - j = (id - k*nx_real*ny_real)/nx_real; - i = id - j*nx_real - k*nx_real*ny_real; + k = id / (nx_real * ny_real); + j = (id - k * nx_real * ny_real) / nx_real; + i = id - j * nx_real - k * nx_real * ny_real; if (k >= nz_real) { return; } // This converts into HDF5 indexing that plays well with Python - dest_id = k + j*nz_real + i*ny_real*nz_real; - source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + dest_id = k + j * nz_real + i * ny_real * nz_real; + source_id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; - destination[dest_id] = (double) source[source_id]; + destination[dest_id] = (double)source[source_id]; } -// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) +// Copy Real (non-ghost) cells from source to a float destination (for writing +// HDF5 in float precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, + int nz_real, int n_ghost, + float* destination, Real* source) { - - int dest_id,source_id,id,i,j,k; + int dest_id, source_id, id, i, j, k; id = threadIdx.x + blockIdx.x * blockDim.x; - k = id/(nx_real*ny_real); - j = (id - k*nx_real*ny_real)/nx_real; - i = id - j*nx_real - k*nx_real*ny_real; + k = id / (nx_real * ny_real); + j = (id - k * nx_real * ny_real) / nx_real; + i = id - j * nx_real - k * nx_real * ny_real; if (k >= nz_real) { return; } // This converts into HDF5 indexing that plays well with Python - dest_id = k + j*nz_real + i*ny_real*nz_real; - source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + dest_id = k + j * nz_real + i * ny_real * nz_real; + source_id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; - destination[dest_id] = (float) source[source_id]; + destination[dest_id] = (float)source[source_id]; } -// When buffer is double, automatically use the double version of everything using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) +// When buffer is double, automatically use the double version of everything +// using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, + int n_ghost, hid_t file_id, double* buffer, + double* device_buffer, Real* device_source, + const char* name) { herr_t status; hsize_t dims[3]; - dims[0] = nx_real; - dims[1] = ny_real; - dims[2] = nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - //Copy non-ghost parts of source to buffer - dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + // Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); - CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, + nx_real, ny_real, nz_real, n_ghost, device_buffer, + device_source); + CudaSafeCall(cudaMemcpy(buffer, device_buffer, + nx_real * ny_real * nz_real * sizeof(double), + cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); - if (status < 0) {printf("File write failed.\n");} - - + if (status < 0) { + printf("File write failed.\n"); + } } - -// When buffer is float, automatically use the float version of everything using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) +// When buffer is float, automatically use the float version of everything using +// function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, + int n_ghost, hid_t file_id, float* buffer, + float* device_buffer, Real* device_source, + const char* name) { - herr_t status; hsize_t dims[3]; - dims[0] = nx_real; - dims[1] = ny_real; - dims[2] = nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - //Copy non-ghost parts of source to buffer - dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + // Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); - CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, + nx_real, ny_real, nz_real, n_ghost, device_buffer, + device_source); + CudaSafeCall(cudaMemcpy(buffer, device_buffer, + nx_real * ny_real * nz_real * sizeof(float), + cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); - if (status < 0) {printf("File write failed.\n");} - + if (status < 0) { + printf("File write failed.\n"); + } } - -#endif //HDF5 +#endif // HDF5 diff --git a/src/main.cpp b/src/main.cpp index 9e59bd651..61707ae97 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,29 +2,31 @@ * \brief Program to run the grid code. */ #ifdef MPI_CHOLLA -#include -#include "mpi/mpi_routines.h" + #include + + #include "mpi/mpi_routines.h" #endif +#include #include #include -#include #include + #include "global/global.h" #include "grid/grid3D.h" #include "io/io.h" #include "utils/error_handling.h" #ifdef SUPERNOVA -#include "particles/supernova.h" -#ifdef ANALYSIS -#include "analysis/feedback_analysis.h" -#endif -#endif //SUPERNOVA + #include "particles/supernova.h" + #ifdef ANALYSIS + #include "analysis/feedback_analysis.h" + #endif +#endif // SUPERNOVA #ifdef STAR_FORMATION -#include "particles/star_formation.h" + #include "particles/star_formation.h" #endif -#ifdef MHD -#include "mhd/magnetic_divergence.h" -#endif //MHD +#ifdef MHD + #include "mhd/magnetic_divergence.h" +#endif // MHD #include "grid/grid_enum.h" @@ -32,33 +34,32 @@ int main(int argc, char *argv[]) { // timing variables double start_total, stop_total, start_step, stop_step; - #ifdef CPU_TIME +#ifdef CPU_TIME double stop_init, init_min, init_max, init_avg; double start_bound, stop_bound, bound_min, bound_max, bound_avg; double start_hydro, stop_hydro, hydro_min, hydro_max, hydro_avg; double init, bound, hydro; init = bound = hydro = 0; - #endif //CPU_TIME +#endif // CPU_TIME // start the total time start_total = get_time(); - /* Initialize MPI communication */ - #ifdef MPI_CHOLLA +/* Initialize MPI communication */ +#ifdef MPI_CHOLLA InitializeChollaMPI(&argc, &argv); - #endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ - Real dti = 0; // inverse time step, 1.0 / dt + Real dti = 0; // inverse time step, 1.0 / dt // input parameter variables char *param_file; struct parameters P; - int nfile = 0; // number of output files - Real outtime = 0; // current output time + int nfile = 0; // number of output files + Real outtime = 0; // current output time // read in command line arguments - if (argc < 2) - { + if (argc < 2) { chprintf("usage: %s \n", argv[0]); chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); @@ -71,31 +72,34 @@ int main(int argc, char *argv[]) Grid3D G; // read in the parameters - parse_params (param_file, &P, argc, argv); + parse_params(param_file, &P, argc, argv); // and output to screen chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); - chprintf ("Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, boundaries = %d %d %d %d %d %d\n", - P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); - if (strcmp(P.init, "Read_Grid") == 0 ) chprintf ("Input directory: %s\n", P.indir); - chprintf ("Output directory: %s\n", P.outdir); - - //Create a Log file to output run-time messages and output the git hash and - //macro flags used + chprintf( + "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " + "boundaries = %d %d %d %d %d %d\n", + P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, + P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + if (strcmp(P.init, "Read_Grid") == 0) + chprintf("Input directory: %s\n", P.indir); + chprintf("Output directory: %s\n", P.outdir); + + // Create a Log file to output run-time messages and output the git hash and + // macro flags used Create_Log_File(P); std::string message = "Git Commit Hash = " + std::string(GIT_HASH); - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); - Write_Message_To_Log_File( message.c_str() ); - - + Write_Message_To_Log_File(message.c_str()); // initialize the grid G.Initialize(&P); - chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); + chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, + G.H.ny_real, G.H.nz_real, G.H.n_cells); message = "Initializing Simulation"; - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); // Set initial conditions and calculate first dt chprintf("Setting initial conditions...\n"); @@ -108,123 +112,127 @@ int main(int argc, char *argv[]) nfile = P.nfile; } - #ifdef DE - chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2 ); - message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); - Write_Message_To_Log_File( message.c_str() ); - #endif +#ifdef DE + chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", + DE_ETA_1, DE_ETA_2); + message = " eta_1: " + std::to_string(DE_ETA_1) + + " eta_2: " + std::to_string(DE_ETA_2); + Write_Message_To_Log_File(message.c_str()); +#endif - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.Initialize(); - #endif +#endif - #ifdef GRAVITY +#ifdef GRAVITY G.Initialize_Gravity(&P); - #endif +#endif - #ifdef PARTICLES +#ifdef PARTICLES G.Initialize_Particles(&P); - #endif +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY G.Initialize_Cosmology(&P); - #endif +#endif - #ifdef COOLING_GRACKLE +#ifdef COOLING_GRACKLE G.Initialize_Grackle(&P); - #endif +#endif - #ifdef CHEMISTRY_GPU +#ifdef CHEMISTRY_GPU G.Initialize_Chemistry(&P); - #endif +#endif - #ifdef ANALYSIS +#ifdef ANALYSIS G.Initialize_Analysis_Module(&P); - if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #endif + if (G.Analysis.Output_Now) G.Compute_and_Output_Analysis(&P); +#endif - #if defined(SUPERNOVA) && defined(PARTICLE_AGE) +#if defined(SUPERNOVA) && defined(PARTICLE_AGE) FeedbackAnalysis sn_analysis(G); #ifdef MPI_CHOLLA supernova::initState(&P, G.Particles.n_total_initial); #else supernova::initState(&P, G.Particles.n_local); - #endif // MPI_CHOLLA - #endif // SUPERNOVA && PARTICLE_AGE + #endif // MPI_CHOLLA +#endif // SUPERNOVA && PARTICLE_AGE - #ifdef STAR_FORMATION +#ifdef STAR_FORMATION star_formation::Initialize(G); - #endif +#endif - #ifdef GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP G.Setup_Analytic_Potential(&P); - #endif +#endif - #ifdef GRAVITY +#ifdef GRAVITY // Get the gravitational potential for the first timestep - G.Compute_Gravitational_Potential( &P); - #endif + G.Compute_Gravitational_Potential(&P); +#endif - // Set boundary conditions (assign appropriate values to ghost cells) for hydro and potential + // Set boundary conditions (assign appropriate values to ghost cells) for + // hydro and potential chprintf("Setting boundary conditions...\n"); G.Set_Boundary_Conditions_Grid(P); chprintf("Boundary conditions set.\n"); - #ifdef GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP G.Add_Analytic_Potential(); - #endif +#endif - #ifdef PARTICLES +#ifdef PARTICLES // Get the particles acceleration for the first timestep G.Get_Particles_Acceleration(); - #endif - - chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, G.H.dz); - chprintf("Ratio of specific heats gamma = %f\n",gama); - chprintf("Nstep = %d Timestep = %f Simulation time = %f\n", G.H.n_step, G.H.dt, G.H.t); +#endif + chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, + G.H.dz); + chprintf("Ratio of specific heats gamma = %f\n", gama); + chprintf("Nstep = %d Timestep = %f Simulation time = %f\n", G.H.n_step, + G.H.dt, G.H.t); - #ifdef OUTPUT - if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now ) { +#ifdef OUTPUT + if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now) { // write the initial conditions to file chprintf("Writing initial conditions to file...\n"); WriteData(G, P, nfile); } // add one to the output file count nfile++; - #endif //OUTPUT +#endif // OUTPUT - #ifdef MHD - // Check that the initial magnetic field has zero divergence - mhd::checkMagneticDivergence(G); - #endif //MHD +#ifdef MHD + // Check that the initial magnetic field has zero divergence + mhd::checkMagneticDivergence(G); +#endif // MHD // increment the next output time outtime += P.outstep; - #ifdef CPU_TIME +#ifdef CPU_TIME stop_init = get_time(); - init = stop_init - start_total; + init = stop_init - start_total; #ifdef MPI_CHOLLA init_min = ReduceRealMin(init); init_max = ReduceRealMax(init); init_avg = ReduceRealAvg(init); - chprintf("Init min: %9.4f max: %9.4f avg: %9.4f\n", init_min, init_max, init_avg); + chprintf("Init min: %9.4f max: %9.4f avg: %9.4f\n", init_min, init_max, + init_avg); #else printf("Init %9.4f\n", init); - #endif //MPI_CHOLLA - #endif //CPU_TIME + #endif // MPI_CHOLLA +#endif // CPU_TIME // Evolve the grid, one timestep at a time chprintf("Starting calculations.\n"); message = "Starting calculations."; - Write_Message_To_Log_File( message.c_str() ); - while (G.H.t < P.tout) - { - // get the start time - #ifdef CPU_TIME + Write_Message_To_Log_File(message.c_str()); + while (G.H.t < P.tout) { +// get the start time +#ifdef CPU_TIME G.Timer.Total.Start(); - #endif //CPU_TIME +#endif // CPU_TIME start_step = get_time(); // calculate the timestep. Note: this computes the timestep ONLY on the @@ -234,16 +242,17 @@ int main(int argc, char *argv[]) if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; - #if defined(SUPERNOVA) && defined(PARTICLE_AGE) +#if defined(SUPERNOVA) && defined(PARTICLE_AGE) supernova::Cluster_Feedback(G, sn_analysis); - #endif //SUPERNOVA && PARTICLE_AGE +#endif // SUPERNOVA && PARTICLE_AGE - #ifdef PARTICLES - //Advance the particles KDK( first step ): Velocities are updated by 0.5*dt and positions are updated by dt - G.Advance_Particles( 1 ); - //Transfer the particles that moved outside the local domain +#ifdef PARTICLES + // Advance the particles KDK( first step ): Velocities are updated by 0.5*dt + // and positions are updated by dt + G.Advance_Particles(1); + // Transfer the particles that moved outside the local domain G.Transfer_Particles_Boundaries(P); - #endif +#endif // Advance the grid by one timestep dti = G.Update_Hydro_Grid(); @@ -251,117 +260,116 @@ int main(int argc, char *argv[]) // update the simulation time ( t += dt ) G.Update_Time(); - - #ifdef GRAVITY - //Compute Gravitational potential for next step - G.Compute_Gravitational_Potential( &P); - #endif +#ifdef GRAVITY + // Compute Gravitational potential for next step + G.Compute_Gravitational_Potential(&P); +#endif // add one to the timestep count G.H.n_step++; - //Set the Grid boundary conditions for next time step + // Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - #ifdef GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP G.Add_Analytic_Potential(); - #endif +#endif - #ifdef PARTICLES - ///Advance the particles KDK( second step ): Velocities are updated by 0.5*dt using the Accelerations at the new positions - G.Advance_Particles( 2 ); - #endif +#ifdef PARTICLES + /// Advance the particles KDK( second step ): Velocities are updated by + /// 0.5*dt using the Accelerations at the new positions + G.Advance_Particles(2); +#endif - #ifdef STAR_FORMATION +#ifdef STAR_FORMATION star_formation::Star_Formation(G); - #endif +#endif - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.Total.End(); - #endif //CPU_TIME +#endif // CPU_TIME - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.Print_Times(); - #endif +#endif // get the time to compute the total timestep - stop_step = get_time(); + stop_step = get_time(); stop_total = get_time(); - G.H.t_wall = stop_total-start_total; - #ifdef MPI_CHOLLA + G.H.t_wall = stop_total - start_total; +#ifdef MPI_CHOLLA G.H.t_wall = ReduceRealMax(G.H.t_wall); - #endif - chprintf("n_step: %d sim time: %10.7f sim timestep: %7.4e timestep time = %9.3f ms total time = %9.4f s\n\n", - G.H.n_step, G.H.t, G.H.dt, (stop_step-start_step)*1000, G.H.t_wall); +#endif + chprintf( + "n_step: %d sim time: %10.7f sim timestep: %7.4e timestep time = " + "%9.3f ms total time = %9.4f s\n\n", + G.H.n_step, G.H.t, G.H.dt, (stop_step - start_step) * 1000, G.H.t_wall); - #ifdef OUTPUT_ALWAYS +#ifdef OUTPUT_ALWAYS G.H.Output_Now = true; - #endif +#endif - #ifdef ANALYSIS - if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #if defined(SUPERNOVA) && defined(PARTICLE_AGE) - sn_analysis.Compute_Gas_Velocity_Dispersion(G); - #endif - #endif +#ifdef ANALYSIS + if (G.Analysis.Output_Now) G.Compute_and_Output_Analysis(&P); + #if defined(SUPERNOVA) && defined(PARTICLE_AGE) + sn_analysis.Compute_Gas_Velocity_Dispersion(G); + #endif +#endif - // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) G.H.Output_Now = true; + // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) + // G.H.Output_Now = true; - if (G.H.t == outtime || G.H.Output_Now ) - { - #ifdef OUTPUT + if (G.H.t == outtime || G.H.Output_Now) { +#ifdef OUTPUT /*output the grid data*/ WriteData(G, P, nfile); // add one to the output file count nfile++; - #endif //OUTPUT +#endif // OUTPUT // update to the next output time outtime += P.outstep; } - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.n_steps += 1; - #endif +#endif - #ifdef N_STEPS_LIMIT +#ifdef N_STEPS_LIMIT // Exit the loop when reached the limit number of steps (optional) - if ( G.H.n_step == N_STEPS_LIMIT) { + if (G.H.n_step == N_STEPS_LIMIT) { WriteData(G, P, nfile); break; } - #endif - +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY // Exit the loop when reached the last scale_factor output - if ( G.Cosmo.exit_now ) { - chprintf( "\nReached Last Cosmological Output: Ending Simulation\n"); + if (G.Cosmo.exit_now) { + chprintf("\nReached Last Cosmological Output: Ending Simulation\n"); break; } - #endif - - #ifdef MHD - // Check that the magnetic field has zero divergence - mhd::checkMagneticDivergence(G); - #endif //MHD - } /*end loop over timesteps*/ +#endif +#ifdef MHD + // Check that the magnetic field has zero divergence + mhd::checkMagneticDivergence(G); +#endif // MHD + } /*end loop over timesteps*/ - #ifdef CPU_TIME +#ifdef CPU_TIME // Print timing statistics - G.Timer.Print_Average_Times( P ); - #endif + G.Timer.Print_Average_Times(P); +#endif message = "Simulation completed successfully."; - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); // free the grid G.Reset(); - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ return 0; - } diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 29e56b496..d88f5a50f 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -6,10 +6,10 @@ */ // STL includes -#include #include -#include #include +#include +#include // External Libraries and Headers #include @@ -25,78 +25,74 @@ testingUtilities::GlobalString globalMpiLauncher; bool globalRunCholla; bool globalCompareSystemTestResults; - /*! * \brief Class for parsing input flags. Modified from * https://stackoverflow.com/questions/865668/parsing-command-line-arguments-in-c * */ -class InputParser{ - public: - // ===================================================================== - /*! - * \brief Get the option that follows the given flag. Also checks that - * the flag exists and is not empty - * - * \param option The string option to look for - * \return const std::string& The option the follows a given flag - */ - const std::string& getCmdOption(const std::string &option) const - { - // First check that the option exists - if(not cmdOptionExists(option)) - { - std::string errMessage = "Error: argument '" + option + "' not found. "; - throw std::invalid_argument(errMessage); - } +class InputParser +{ + public: + // ===================================================================== + /*! + * \brief Get the option that follows the given flag. Also checks that + * the flag exists and is not empty + * + * \param option The string option to look for + * \return const std::string& The option the follows a given flag + */ + const std::string &getCmdOption(const std::string &option) const + { + // First check that the option exists + if (not cmdOptionExists(option)) { + std::string errMessage = "Error: argument '" + option + "' not found. "; + throw std::invalid_argument(errMessage); + } - std::vector::const_iterator itr; - itr = std::find(this->_tokens.begin(), this->_tokens.end(), option); - if (itr != this->_tokens.end() && ++itr != this->_tokens.end()) - { - return *itr; - } - else - { - std::string errMessage = "Error: empty argument '" + option + "'"; - throw std::invalid_argument(errMessage); - } - } - // ===================================================================== + std::vector::const_iterator itr; + itr = std::find(this->_tokens.begin(), this->_tokens.end(), option); + if (itr != this->_tokens.end() && ++itr != this->_tokens.end()) { + return *itr; + } else { + std::string errMessage = "Error: empty argument '" + option + "'"; + throw std::invalid_argument(errMessage); + } + } + // ===================================================================== - // ===================================================================== - /*! - * \brief Checks that an option exists. Returns True if it exists and - * False otherwise - * - * \param option The option flag to search for - * \return true The option flag exists in argv - * \return false The option flage does not exist in argv - */ - bool cmdOptionExists(const std::string &option) const - { - return std::find(this->_tokens.begin(), this->_tokens.end(), option) - != this->_tokens.end(); - } - // ===================================================================== + // ===================================================================== + /*! + * \brief Checks that an option exists. Returns True if it exists and + * False otherwise + * + * \param option The option flag to search for + * \return true The option flag exists in argv + * \return false The option flage does not exist in argv + */ + bool cmdOptionExists(const std::string &option) const + { + return std::find(this->_tokens.begin(), this->_tokens.end(), option) != + this->_tokens.end(); + } + // ===================================================================== - // ===================================================================== - // constructor and destructor - /*! - * \brief Construct a new Input Parser object - * - * \param argc argc from main - * \param argv argv from main - */ - InputParser (int &argc, char **argv) - { - for (int i=1; i < argc; ++i) - this->_tokens.push_back(std::string(argv[i])); - } - ~InputParser() = default; - // ===================================================================== - private: - std::vector _tokens; + // ===================================================================== + // constructor and destructor + /*! + * \brief Construct a new Input Parser object + * + * \param argc argc from main + * \param argv argv from main + */ + InputParser(int &argc, char **argv) + { + for (int i = 1; i < argc; ++i) + this->_tokens.push_back(std::string(argv[i])); + } + ~InputParser() = default; + // ===================================================================== + private: + std::vector _tokens; }; /*! @@ -111,48 +107,39 @@ class InputParser{ */ int main(int argc, char **argv) { - // First we initialize Googletest. Note, this removes all gtest related - // arguments from argv and argc - ::testing::InitGoogleTest(&argc, argv); + // First we initialize Googletest. Note, this removes all gtest related + // arguments from argv and argc + ::testing::InitGoogleTest(&argc, argv); - // Make sure death tests are threadsafe. This is potentially much slower than - // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe - // in a multithreaded environment. If the performance becomes an issue we can - // try "fast", it can also be set on a test by test basis - ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; + // Make sure death tests are threadsafe. This is potentially much slower than + // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe + // in a multithreaded environment. If the performance becomes an issue we can + // try "fast", it can also be set on a test by test basis + ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; - // Initialize global variables - InputParser input(argc, argv); - globalChollaRoot.init(input.getCmdOption("--cholla-root")); - globalChollaBuild.init(input.getCmdOption("--build-type")); - globalChollaMachine.init(input.getCmdOption("--machine")); - if (input.cmdOptionExists("--mpi-launcher")) - { - globalMpiLauncher.init(input.getCmdOption("--mpi-launcher")); - } - else - { - globalMpiLauncher.init("mpirun -np"); - } + // Initialize global variables + InputParser input(argc, argv); + globalChollaRoot.init(input.getCmdOption("--cholla-root")); + globalChollaBuild.init(input.getCmdOption("--build-type")); + globalChollaMachine.init(input.getCmdOption("--machine")); + if (input.cmdOptionExists("--mpi-launcher")) { + globalMpiLauncher.init(input.getCmdOption("--mpi-launcher")); + } else { + globalMpiLauncher.init("mpirun -np"); + } - if (input.cmdOptionExists("--runCholla=false")) - { - globalRunCholla = false; - } - else - { - globalRunCholla = true; - } + if (input.cmdOptionExists("--runCholla=false")) { + globalRunCholla = false; + } else { + globalRunCholla = true; + } - if (input.cmdOptionExists("--compareSystemTestResults=false")) - { - globalCompareSystemTestResults = false; - } - else - { - globalCompareSystemTestResults = true; - } + if (input.cmdOptionExists("--compareSystemTestResults=false")) { + globalCompareSystemTestResults = false; + } else { + globalCompareSystemTestResults = true; + } - // Run test and return result - return RUN_ALL_TESTS(); + // Run test and return result + return RUN_ALL_TESTS(); } diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 542dda3a7..e816c0a65 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -16,331 +16,368 @@ #ifdef MHD namespace mhd { - // ========================================================================= - __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, - Real const *fluxY, - Real const *fluxZ, - Real const *dev_conserved, - Real *ctElectricFields, - int const nx, - int const ny, - int const nz, - int const n_cells) - { - // get a thread index - int const threadId = threadIdx.x + blockIdx.x * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); +// ========================================================================= +__global__ void Calculate_CT_Electric_Fields( + Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells) +{ + // get a thread index + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); - // Thread guard to avoid overrun and to skip the first two cells since - // those ghost cells can't be reconstructed - if ( xid > 1 - and yid > 1 - and zid > 1 - and xid < nx - and yid < ny - and zid < nz) - { - // According to Stone et al. 2008 section 5.3 and the source code of - // Athena, the following equation relate the magnetic flux to the - // face centered electric fields/EMF. -cross(V,B)x is the negative - // of the x-component of V cross B. Note that "X" is the direction - // the solver is running in this case, not necessarily the true "X". - // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z - // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y - // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X - // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z - // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y - // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + // Thread guard to avoid overrun and to skip the first two cells since + // those ghost cells can't be reconstructed + if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and zid < nz) { + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the + // face centered electric fields/EMF. -cross(V,B)x is the negative + // of the x-component of V cross B. Note that "X" is the direction + // the solver is running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_X - // Notes on Implementation Details - // - The density flux has the same sign as the velocity on the face - // and we only care about the sign so we're using the density flux - // to perform upwinding checks - // - All slopes are computed without the factor of two shown in - // Stone & Gardiner 2008 eqn. 24. That factor of two is taken care - // of in the final assembly of the electric field + // Notes on Implementation Details + // - The density flux has the same sign as the velocity on the face + // and we only care about the sign so we're using the density flux + // to perform upwinding checks + // - All slopes are computed without the factor of two shown in + // Stone & Gardiner 2008 eqn. 24. That factor of two is taken care + // of in the final assembly of the electric field - // Variable to get the sign of the velocity at the interface. - Real signUpwind; + // Variable to get the sign of the velocity at the interface. + Real signUpwind; - // Slope and face variables. Format is - // "__". Slope/Face indicates if the - // value is a slope or a face centered EMF, direction indicates the - // direction of the derivative/face and pos/neg indicates if it's - // the slope on the positive or negative side of the edge field - // being computed. Note that the direction for the face is parallel - // to the face and the other direction that is parallel to that face - // is the direction of the electric field being calculated - Real slope_x_pos, slope_x_neg, - slope_y_pos, slope_y_neg, - slope_z_pos, slope_z_neg, - face_x_pos, face_x_neg, - face_y_pos, face_y_neg, - face_z_pos, face_z_neg; - // ================ - // X electric field - // ================ + // Slope and face variables. Format is + // "__". Slope/Face indicates if the + // value is a slope or a face centered EMF, direction indicates the + // direction of the derivative/face and pos/neg indicates if it's + // the slope on the positive or negative side of the edge field + // being computed. Note that the direction for the face is parallel + // to the face and the other direction that is parallel to that face + // is the direction of the electric field being calculated + Real slope_x_pos, slope_x_neg, slope_y_pos, slope_y_neg, slope_z_pos, + slope_z_neg, face_x_pos, face_x_neg, face_y_pos, face_y_neg, face_z_pos, + face_z_neg; + // ================ + // X electric field + // ================ - // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_y_pos = + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, + xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_pos = + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_pos = + 0.5 * + (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, + xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, + xid, yid, zid, nx, ny, n_cells)); + } - // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_y_neg = + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_neg = + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_z_pos = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_pos = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_pos = + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); - } + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_z_neg = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_neg = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, + 2, xid, yid, zid, nx, ny, n_cells)); + } - // Load the face centered electric fields Note the negative signs to - // convert from magnetic flux to electric field + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field - face_y_pos = + fluxZ[cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; - face_y_neg = + fluxZ[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; - face_z_pos = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; - face_z_neg = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + face_y_pos = + +fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_y_neg = + +fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_z_pos = + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + + (grid_enum::fluxY_magnetic_x)*n_cells]; + face_z_neg = + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + + (grid_enum::fluxY_magnetic_x)*n_cells]; - // sum and average face centered electric fields and slopes to get the - // edge averaged electric field. - // S&G 2009 equation 22 - ctElectricFields[threadId + 0*n_cells] = 0.25 * (+ face_y_pos - + face_y_neg - + face_z_pos - + face_z_neg - + slope_y_pos - + slope_y_neg - + slope_z_pos - + slope_z_neg); + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 0 * n_cells] = + 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); - // ================ - // Y electric field - // ================ + // ================ + // Y electric field + // ================ - // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); - } + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_x_pos = + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_pos = + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_pos = + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); - } + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_x_neg = + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_neg = + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_z_pos = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, + xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_pos = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_pos = + 0.5 * + (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, + xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, + xid, yid, zid, nx, ny, n_cells)); + } - // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; - if (signUpwind > 0.0) - { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + if (signUpwind > 0.0) { + slope_z_neg = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_neg = + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, + 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Load the face centered electric fields Note the negative signs to - // convert from magnetic flux to electric field - face_x_pos = - fluxZ[cuda_utilities::compute1DIndex(xid , yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; - face_x_neg = - fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; - face_z_pos = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid , nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; - face_z_neg = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + -fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_x_neg = + -fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_z_pos = + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + + (grid_enum::fluxX_magnetic_y)*n_cells]; + face_z_neg = + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + + (grid_enum::fluxX_magnetic_y)*n_cells]; - // sum and average face centered electric fields and slopes to get the - // edge averaged electric field. - // S&G 2009 equation 22 - ctElectricFields[threadId + 1*n_cells] = 0.25 * (+ face_x_pos - + face_x_neg - + face_z_pos - + face_z_neg - + slope_x_pos - + slope_x_neg - + slope_z_pos - + slope_z_neg); + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 1 * n_cells] = + 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); - // ================ - // Z electric field - // ================ + // ================ + // Z electric field + // ================ - // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_y_pos = + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_pos = + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_pos = + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, + 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); - } + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_y_neg = + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_neg = + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, + 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); - } + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_x_pos = + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, + xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_pos = + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_pos = + 0.5 * + (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, + xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, + xid, yid, zid, nx, ny, n_cells)); + } - // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; - if (signUpwind > 0.0) - { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); - } - else if (signUpwind < 0.0) - { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); - } - else - { - slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) - + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); - } + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + if (signUpwind > 0.0) { + slope_x_neg = + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, + yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_neg = + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, + xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_neg = + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, + 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, + -1, xid, yid, zid, nx, ny, n_cells)); + } - // Load the face centered electric fields Note the negative signs to - // convert from magnetic flux to electric field - face_x_pos = + fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; - face_x_neg = + fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; - face_y_pos = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid , zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; - face_y_neg = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + +fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_x_neg = + +fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_y_pos = + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + + (grid_enum::fluxX_magnetic_z)*n_cells]; + face_y_neg = + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + + (grid_enum::fluxX_magnetic_z)*n_cells]; - // sum and average face centered electric fields and slopes to get the - // edge averaged electric field. - // S&G 2009 equation 22 - ctElectricFields[threadId + 2*n_cells] = 0.25 * (+ face_x_pos - + face_x_neg - + face_y_pos - + face_y_neg - + slope_x_pos - + slope_x_neg - + slope_y_pos - + slope_y_neg); - } - } - // ========================================================================= -} // end namespace mhd -#endif // MHD + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 2 * n_cells] = + 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); + } +} +// ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 9ba8780ee..eb79fa17e 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -16,142 +16,148 @@ // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" -#include "../utils/gpu.hpp" #include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" #ifdef MHD namespace mhd { - /*! - * \brief Namespace for functions required by functions within the mhd - * namespace. Everything in this name space should be regarded as private - * but is made accesible for testing - * - */ - namespace _internal - { - // ===================================================================== - /*! - * \brief Compute and return the slope of the electric field used to - * compute the CT electric fields. This function implements S&G 2009 - * equation 24 - * - * \param[in] flux The flux array - * \param[in] dev_conserved The conserved variable array - * \param[in] fluxSign The sign of the flux to convert it to magnetic - * field. Also serves to choose which magnetic flux is used, i.e. the Y - * or Z field - * \param[in] ctDirection The direction of the CT field that this slope - will be used to calculate - * \param[in] conservedQuadrent1 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid - * \param[in] conservedQuadrent2 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid - * \param[in] fluxQuadrent1 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid - * \param[in] fluxQuadrent2 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid - * \param[in] xid The x index - * \param[in] yid The y index - * \param[in] zid The z index - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[in] n_cells The total number of cells - * \return Real The slope of the electric field - */ - inline __host__ __device__ Real _ctSlope(Real const *flux, - Real const *dev_conserved, - Real const &fluxSign, - int const &ctDirection, - int const &conservedQuadrent1, - int const &conservedQuadrent2, - int const &fluxQuadrent1, - int const &fluxQuadrent2, - int const &xid, - int const &yid, - int const &zid, - int const &nx, - int const &ny, - int const &n_cells - ) - { - // Compute the various required indices +/*! + * \brief Namespace for functions required by functions within the mhd + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ +namespace _internal +{ +// ===================================================================== +/*! + * \brief Compute and return the slope of the electric field used to + * compute the CT electric fields. This function implements S&G 2009 + * equation 24 + * + * \param[in] flux The flux array + * \param[in] dev_conserved The conserved variable array + * \param[in] fluxSign The sign of the flux to convert it to magnetic + * field. Also serves to choose which magnetic flux is used, i.e. the Y + * or Z field + * \param[in] ctDirection The direction of the CT field that this slope + will be used to calculate + * \param[in] conservedQuadrent1 Which index should be reduced by one to get the + correct conserved variables. Options are -1 for no reduction, 0 for reducing + xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] conservedQuadrent2 Which index should be reduced by one to get the + correct conserved variables. Options are -1 for no reduction, 0 for reducing + xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent1 Which index should be reduced by one to get the + correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 + for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent2 Which index should be reduced by one to get the + correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 + for reducing yid, and 2 for reducing zid + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] n_cells The total number of cells + * \return Real The slope of the electric field + */ +inline __host__ __device__ Real +_ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, + int const &ctDirection, int const &conservedQuadrent1, + int const &conservedQuadrent2, int const &fluxQuadrent1, + int const &fluxQuadrent2, int const &xid, int const &yid, + int const &zid, int const &nx, int const &ny, int const &n_cells) +{ + // Compute the various required indices - // Get the shifted modulos of the ctDirection. - int const modPlus1 = (ctDirection + 1) % 3; - int const modPlus2 = (ctDirection + 2) % 3; + // Get the shifted modulos of the ctDirection. + int const modPlus1 = (ctDirection + 1) % 3; + int const modPlus2 = (ctDirection + 2) % 3; - // Indices for the cell centered values - int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); - int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); - int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); - int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny); + // Indices for the cell centered values + int const xidCentered = + xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); + int const yidCentered = + yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); + int const zidCentered = + zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); + int const idxCentered = cuda_utilities::compute1DIndex( + xidCentered, yidCentered, zidCentered, nx, ny); - // Index for the flux - int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), - yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), - zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), - nx, ny); + // Index for the flux + int const idxFlux = cuda_utilities::compute1DIndex( + xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), + yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), + zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), nx, ny); - // Indices for the face centered magnetic fields that need to be averaged - int const idxB2Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus1 == 0), - yidCentered - int(modPlus1 == 1), - zidCentered - int(modPlus1 == 2), - nx, ny); - int const idxB3Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus2 == 0), - yidCentered - int(modPlus2 == 1), - zidCentered - int(modPlus2 == 2), - nx, ny); + // Indices for the face centered magnetic fields that need to be averaged + int const idxB2Shift = cuda_utilities::compute1DIndex( + xidCentered - int(modPlus1 == 0), yidCentered - int(modPlus1 == 1), + zidCentered - int(modPlus1 == 2), nx, ny); + int const idxB3Shift = cuda_utilities::compute1DIndex( + xidCentered - int(modPlus2 == 0), yidCentered - int(modPlus2 == 1), + zidCentered - int(modPlus2 == 2), nx, ny); - // Load values for cell centered electric field. B1 (not present) is - // the magnetic field in the same direction as the `ctDirection` - // variable, B2 and B3 are the next two fields cyclically. i.e. if - // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The - // same rules apply for the momentum - Real const density = dev_conserved[idxCentered ]; - Real const Momentum2 = dev_conserved[idxCentered + (modPlus1+1) *n_cells]; - Real const Momentum3 = dev_conserved[idxCentered + (modPlus2+1) *n_cells]; - Real const B2Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus1+grid_enum::magnetic_start)*n_cells] - + dev_conserved[idxB2Shift + (modPlus1+grid_enum::magnetic_start)*n_cells]); - Real const B3Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus2+grid_enum::magnetic_start)*n_cells] - + dev_conserved[idxB3Shift + (modPlus2+grid_enum::magnetic_start)*n_cells]); + // Load values for cell centered electric field. B1 (not present) is + // the magnetic field in the same direction as the `ctDirection` + // variable, B2 and B3 are the next two fields cyclically. i.e. if + // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The + // same rules apply for the momentum + Real const density = dev_conserved[idxCentered]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + 1) * n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + 1) * n_cells]; + Real const B2Centered = + 0.5 * (dev_conserved[idxCentered + + (modPlus1 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB2Shift + + (modPlus1 + grid_enum::magnetic_start) * n_cells]); + Real const B3Centered = + 0.5 * (dev_conserved[idxCentered + + (modPlus2 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB3Shift + + (modPlus2 + grid_enum::magnetic_start) * n_cells]); - // Compute the electric field in the center with a cross product - Real const electric_centered = (Momentum3*B2Centered - Momentum2*B3Centered) / density; + // Compute the electric field in the center with a cross product + Real const electric_centered = + (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; - // Load face centered electric field, note fluxSign to correctly do - // the shift from magnetic flux to EMF/electric field and to choose - // which field to use - Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+grid_enum::magnetic_start)*n_cells]; + // Load face centered electric field, note fluxSign to correctly do + // the shift from magnetic flux to EMF/electric field and to choose + // which field to use + Real const electric_face = + fluxSign * + flux[idxFlux + + (int(fluxSign == 1) + grid_enum::magnetic_start) * n_cells]; - // Compute the slope and return it - // S&G 2009 equation 24 - return electric_face - electric_centered; - } - // ===================================================================== - }// mhd::_internal namespace + // Compute the slope and return it + // S&G 2009 equation 24 + return electric_face - electric_centered; +} +// ===================================================================== +} // namespace _internal - // ========================================================================= - /*! - * \brief Compute the Constrained Transport electric fields used to evolve - * the magnetic field. Note that this function requires that the density be - * non-zero or it will return Nans. - * - * \param[in] fluxX The flux on the x+1/2 face of each cell - * \param[in] fluxY The flux on the y+1/2 face of each cell - * \param[in] fluxZ The flux on the z+1/2 face of each cell - * \param[in] dev_conserved The device resident grid - * \param[out] ctElectricFields The CT electric fields - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[in] nz The number of cells in the z-direction - * \param[in] n_cells The total number of cells - */ - __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, - Real const *fluxY, - Real const *fluxZ, - Real const *dev_conserved, - Real *ctElectricFields, - int const nx, - int const ny, - int const nz, - int const n_cells); - // ========================================================================= -} // end namespace mhd -#endif // MHD \ No newline at end of file +// ========================================================================= +/*! + * \brief Compute the Constrained Transport electric fields used to evolve + * the magnetic field. Note that this function requires that the density be + * non-zero or it will return Nans. + * + * \param[in] fluxX The flux on the x+1/2 face of each cell + * \param[in] fluxY The flux on the y+1/2 face of each cell + * \param[in] fluxZ The flux on the z+1/2 face of each cell + * \param[in] dev_conserved The device resident grid + * \param[out] ctElectricFields The CT electric fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + */ +__global__ void Calculate_CT_Electric_Fields( + Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells); +// ========================================================================= +} // end namespace mhd +#endif // MHD \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index b526ab7e0..3dbcc008a 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -6,19 +6,19 @@ */ // STL Includes -#include -#include +#include #include #include -#include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../mhd/ct_electric_fields.h" #include "../global/global.h" +#include "../mhd/ct_electric_fields.h" +#include "../utils/testing_utilities.h" #ifdef MHD // ============================================================================= @@ -32,178 +32,166 @@ */ class tMHDCalculateCTElectricFields : public ::testing::Test { -public: - - /*! - * \brief Initialize and allocate all the various required variables and - * arrays - * - */ - tMHDCalculateCTElectricFields() - : - nx(3), + public: + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDCalculateCTElectricFields() + : nx(3), ny(nx), nz(nx), - n_cells(nx*ny*nz), + n_cells(nx * ny * nz), fluxX(n_cells * (grid_enum::num_flux_fields)), fluxY(n_cells * (grid_enum::num_flux_fields)), fluxZ(n_cells * (grid_enum::num_flux_fields)), - grid (n_cells * (grid_enum::num_fields)), + grid(n_cells * (grid_enum::num_fields)), testCTElectricFields(n_cells * 3, -999.), fiducialData(n_cells * 3, -999.), - dimGrid((n_cells + TPB - 1),1,1), - dimBlock(TPB,1,1) - { - // Allocate device arrays - CudaSafeCall ( cudaMalloc(&dev_fluxX, fluxX.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_fluxY, fluxY.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_fluxZ, fluxZ.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_grid, grid.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size()*sizeof(double)) ); - - // Populate the grids with values where vector.at(i) = double(i). The - // values chosen aren't that important, just that every cell has a unique - // value - std::iota(std::begin(fluxX), std::end(fluxX), 0.); - std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1); - std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1); - std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1); - } - ~tMHDCalculateCTElectricFields() = default; -protected: - // Initialize the test grid and other state variables - size_t const nx, ny, nz; - size_t const n_cells; - - // Launch Parameters - dim3 const dimGrid; // How many blocks in the grid - dim3 const dimBlock; // How many threads per block - - // Make sure the vector is large enough that the locations where the - // magnetic field would be in the real grid are filled - std::vector fluxX; - std::vector fluxY; - std::vector fluxZ; - std::vector grid; - std::vector testCTElectricFields; - std::vector fiducialData; - - // device pointers - double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields; - - /*! - * \brief Launch the kernel and check results - * - */ - void runTest() - { - // Copy values to GPU - CudaSafeCall( cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_grid, grid.data(), grid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_testCTElectricFields, - testCTElectricFields.data(), - testCTElectricFields.size()*sizeof(Real), - cudaMemcpyHostToDevice) ); - - // Call the kernel to test - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, - dimGrid, - dimBlock, - 0, - 0, - dev_fluxX, - dev_fluxY, - dev_fluxZ, - dev_grid, - dev_testCTElectricFields, - nx, - ny, - nz, - n_cells); - CudaCheckError(); - - // Copy test data back - CudaSafeCall( cudaMemcpy(testCTElectricFields.data(), - dev_testCTElectricFields, - testCTElectricFields.size()*sizeof(Real), - cudaMemcpyDeviceToHost) ); - cudaDeviceSynchronize(); - - // Check the results - for (size_t i = 0; i < fiducialData.size(); i++) - { - int xid, yid, zid; - cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), - testCTElectricFields.at(i), - "value at i = " + std::to_string(i) - + ", xid = " + std::to_string(xid) - + ", yid = " + std::to_string(yid) - + ", zid = " + std::to_string(zid)); - } + dimGrid((n_cells + TPB - 1), 1, 1), + dimBlock(TPB, 1, 1) + { + // Allocate device arrays + CudaSafeCall(cudaMalloc(&dev_fluxX, fluxX.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_grid, grid.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_testCTElectricFields, + testCTElectricFields.size() * sizeof(double))); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(fluxX), std::end(fluxX), 0.); + std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1); + std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1); + std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1); + } + ~tMHDCalculateCTElectricFields() = default; + + protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector fluxX; + std::vector fluxY; + std::vector fluxZ; + std::vector grid; + std::vector testCTElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, + *dev_testCTElectricFields; + + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall(cudaMemcpy(dev_fluxX, fluxX.data(), + fluxX.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_fluxY, fluxY.data(), + fluxY.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_fluxZ, fluxZ.data(), + fluxZ.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy( + dev_testCTElectricFields, testCTElectricFields.data(), + testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, + 0, dev_fluxX, dev_fluxY, dev_fluxZ, dev_grid, + dev_testCTElectricFields, nx, ny, nz, n_cells); + CudaCheckError(); + + // Copy test data back + CudaSafeCall(cudaMemcpy( + testCTElectricFields.data(), dev_testCTElectricFields, + testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults( + fiducialData.at(i), testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + ", xid = " + + std::to_string(xid) + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); } + } }; // ============================================================================= // ============================================================================= -TEST_F(tMHDCalculateCTElectricFields, - PositiveVelocityExpectCorrectOutput) +TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput) { - // Fiducial values - fiducialData.at(26) = 206.29859653255295; - fiducialData.at(53) = -334.90052254763339; - fiducialData.at(80) = 209.53472440298236; + // Fiducial values + fiducialData.at(26) = 206.29859653255295; + fiducialData.at(53) = -334.90052254763339; + fiducialData.at(80) = 209.53472440298236; - // Launch kernel and check results - runTest(); + // Launch kernel and check results + runTest(); } // ============================================================================= // ============================================================================= -TEST_F(tMHDCalculateCTElectricFields, - NegativeVelocityExpectCorrectOutput) +TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput) { - // Fiducial values - fiducialData.at(26) = 203.35149422304994; - fiducialData.at(53) = -330.9860399765279; - fiducialData.at(80) = 208.55149905461991; - - // Set the density fluxes to be negative to indicate a negative velocity - // across the face - for (size_t i = 0; i < n_cells; i++) - { - fluxX.at(i) = -fluxX.at(i); - fluxY.at(i) = -fluxY.at(i); - fluxZ.at(i) = -fluxZ.at(i); - } - - // Launch kernel and check results - runTest(); + // Fiducial values + fiducialData.at(26) = 203.35149422304994; + fiducialData.at(53) = -330.9860399765279; + fiducialData.at(80) = 208.55149905461991; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) { + fluxX.at(i) = -fluxX.at(i); + fluxY.at(i) = -fluxY.at(i); + fluxZ.at(i) = -fluxZ.at(i); + } + + // Launch kernel and check results + runTest(); } // ============================================================================= // ============================================================================= -TEST_F(tMHDCalculateCTElectricFields, - ZeroVelocityExpectCorrectOutput) +TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput) { - // Fiducial values - fiducialData.at(26) = 204.82504537780144; - fiducialData.at(53) = -332.94328126208063; - fiducialData.at(80) = 209.04311172880114; - - // Set the density fluxes to be negative to indicate a negative velocity - // across the face - for (size_t i = 0; i < n_cells; i++) - { - fluxX.at(i) = 0.0; - fluxY.at(i) = 0.0; - fluxZ.at(i) = 0.0; - } - - // Launch kernel and check results - runTest(); + // Fiducial values + fiducialData.at(26) = 204.82504537780144; + fiducialData.at(53) = -332.94328126208063; + fiducialData.at(80) = 209.04311172880114; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) { + fluxX.at(i) = 0.0; + fluxY.at(i) = 0.0; + fluxZ.at(i) = 0.0; + } + + // Launch kernel and check results + runTest(); } // ============================================================================= -#endif // MHD +#endif // MHD diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index fc84cbb3f..b51d7bfee 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -21,118 +21,115 @@ #include "../grid/grid3D.h" #include "../io/io.h" #include "../mhd/magnetic_divergence.h" -#include "../utils/cuda_utilities.h" -#include "../utils/reduction_utilities.h" #include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" #include "../utils/error_handling.h" +#include "../utils/reduction_utilities.h" #ifdef MHD namespace mhd { - // ========================================================================= - __global__ void calculateMagneticDivergence(Real const *dev_conserved, - Real *dev_maxDivergence, - Real const dx, - Real const dy, - Real const dz, - int const nx, - int const ny, - int const nz, - int const n_cells) - { - // Variables to store the divergence - Real cellDivergence; - Real maxDivergence = 0.0; - - // Index variables - int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; - - // Grid stride loop to perform as much of the reduction as possible - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { - // compute the real indices - cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); - - // Thread guard to avoid overrun and to skip ghost cells that cannot - // have their divergences computed due to a missing face; - if ( xid > 1 and yid > 1 and zid > 1 - and xid < nx and yid < ny and zid < nz) - { - // Compute the various offset indices - id_xMin1 = cuda_utilities::compute1DIndex(xid-1, yid , zid , nx, ny); - id_yMin1 = cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny); - id_zMin1 = cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny); - - // Compute divergence - // Stone et al. 2008 equation 25 - cellDivergence = - (( dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - - dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) - / dx) - + (( dev_conserved[id + (grid_enum::magnetic_y)*n_cells] - - dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) - / dy) - + (( dev_conserved[id + (grid_enum::magnetic_z)*n_cells] - - dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) - / dz); - - maxDivergence = max(maxDivergence, fabs(cellDivergence)); - } - } - - // Perform reduction across the entire grid - reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); +// ========================================================================= +__global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *dev_maxDivergence, + Real const dx, Real const dy, + Real const dz, int const nx, + int const ny, int const nz, + int const n_cells) +{ + // Variables to store the divergence + Real cellDivergence; + Real maxDivergence = 0.0; + + // Index variables + int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; + + // Grid stride loop to perform as much of the reduction as possible + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; + id += blockDim.x * gridDim.x) { + // compute the real indices + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot + // have their divergences computed due to a missing face; + if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and + zid < nz) { + // Compute the various offset indices + id_xMin1 = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); + id_yMin1 = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); + id_zMin1 = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny); + + // Compute divergence + // Stone et al. 2008 equation 25 + cellDivergence = + ((dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - + dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) / + dx) + + ((dev_conserved[id + (grid_enum::magnetic_y)*n_cells] - + dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) / + dy) + + ((dev_conserved[id + (grid_enum::magnetic_z)*n_cells] - + dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) / + dz); + + maxDivergence = max(maxDivergence, fabs(cellDivergence)); } - // ========================================================================= + } + + // Perform reduction across the entire grid + reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); +} +// ========================================================================= // ============================================================================= Real checkMagneticDivergence(Grid3D const &G) { - // Compute the local value of the divergence - // First let's create some variables we'll need. - cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); - cuda_utilities::DeviceVector static dev_maxDivergence(1); - - // Set the device side divergence to the smallest possible double so that - // the reduction isn't using the maximum value of the previous iteration - dev_maxDivergence.assign(std::numeric_limits::lowest()); - - // Now lets get the local maximum divergence - hipLaunchKernelGGL(mhd::calculateMagneticDivergence, - launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - G.C.device, dev_maxDivergence.data(), - G.H.dx, G.H.dy, G.H.dz, - G.H.nx, G.H.ny, G.H.nz, - G.H.n_cells); - CudaCheckError(); - Real max_magnetic_divergence = dev_maxDivergence[0]; - - #ifdef MPI_CHOLLA - // Now that we have the local maximum let's get the global maximum - max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); - #endif //MPI_CHOLLA - - // If the magnetic divergence is greater than the limit then raise a warning and exit - Real static const magnetic_divergence_limit = 1.0E-14; - if (max_magnetic_divergence > magnetic_divergence_limit) - { - // Report the error and exit - chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", max_magnetic_divergence, magnetic_divergence_limit); - chexit(-1); - } - else if (max_magnetic_divergence < 0.0) - { - // Report the error and exit - chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence); - chexit(-1); - } - else // The magnetic divergence is within acceptable bounds - { - chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence); - } - - return max_magnetic_divergence; + // Compute the local value of the divergence + // First let's create some variables we'll need. + cuda_utilities::AutomaticLaunchParams static const launchParams( + mhd::calculateMagneticDivergence); + cuda_utilities::DeviceVector static dev_maxDivergence(1); + + // Set the device side divergence to the smallest possible double so that + // the reduction isn't using the maximum value of the previous iteration + dev_maxDivergence.assign(std::numeric_limits::lowest()); + + // Now lets get the local maximum divergence + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, + launchParams.threadsPerBlock, 0, 0, G.C.device, + dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, + G.H.ny, G.H.nz, G.H.n_cells); + CudaCheckError(); + Real max_magnetic_divergence = dev_maxDivergence[0]; + + #ifdef MPI_CHOLLA + // Now that we have the local maximum let's get the global maximum + max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); + #endif // MPI_CHOLLA + + // If the magnetic divergence is greater than the limit then raise a warning + // and exit + Real static const magnetic_divergence_limit = 1.0E-14; + if (max_magnetic_divergence > magnetic_divergence_limit) { + // Report the error and exit + chprintf( + "The magnetic divergence has exceeded the maximum allowed value. " + "Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", + max_magnetic_divergence, magnetic_divergence_limit); + chexit(-1); + } else if (max_magnetic_divergence < 0.0) { + // Report the error and exit + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", + max_magnetic_divergence); + chexit(-1); + } else // The magnetic divergence is within acceptable bounds + { + chprintf("Global maximum magnetic divergence = %7.4e\n", + max_magnetic_divergence); + } + + return max_magnetic_divergence; } // ============================================================================= -} // end namespace mhd -#endif // MHD +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h index a32c7ec74..af324bb47 100644 --- a/src/mhd/magnetic_divergence.h +++ b/src/mhd/magnetic_divergence.h @@ -14,57 +14,50 @@ // Local Includes #include "../global/global.h" -#include "../grid/grid3D.h" #include "../global/global_cuda.h" +#include "../grid/grid3D.h" #include "../utils/gpu.hpp" - /*! * \brief Namespace for MHD code * */ namespace mhd { - // ========================================================================= - /*! - * \brief Kernel to compute the maximum divergence of the magnetic field in - * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be - * called with the minimum number of blocks. Recommend using the occupancy - * API - * - * \param[in] dev_conserved The device array of conserved variables - * \param[out] maxDivergence The device scalar to store the reduced divergence at - * \param[in] dx Cell size in the X-direction - * \param[in] dy Cell size in the Y-direction - * \param[in] dz Cell size in the Z-direction - * \param[in] nx Number of cells in the X-direction - * \param[in] ny Number of cells in the Y-direction - * \param[in] nz Number of cells in the Z-direction - * \param[in] n_cells Total number of cells - */ - __global__ void calculateMagneticDivergence(Real const *dev_conserved, - Real *maxDivergence, - Real const dx, - Real const dy, - Real const dz, - int const nx, - int const ny, - int const nz, - int const n_cells); - // ========================================================================= +// ========================================================================= +/*! + * \brief Kernel to compute the maximum divergence of the magnetic field in + * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be + * called with the minimum number of blocks. Recommend using the occupancy + * API + * + * \param[in] dev_conserved The device array of conserved variables + * \param[out] maxDivergence The device scalar to store the reduced divergence + * at \param[in] dx Cell size in the X-direction \param[in] dy Cell size in the + * Y-direction \param[in] dz Cell size in the Z-direction \param[in] nx Number + * of cells in the X-direction \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells + */ +__global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *maxDivergence, Real const dx, + Real const dy, Real const dz, + int const nx, int const ny, + int const nz, int const n_cells); +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the maximum magnetic divergence in the grid and report - * an error if it exceeds the magnetic divergence limit or is negative. The - * magnetic divergence limit is 1E-14 as determined by Athena as a - * reasonable upper bound for correctness. - * - * \param G The grid object - * \return Real The maximum magnetic divergence found in the grid. Can - * usually be ignored since all checking is done in the fucntion, mostly - * this return is for testing. - */ - Real checkMagneticDivergence(Grid3D const &G); - // ========================================================================= -} // end namespace mhd \ No newline at end of file +// ========================================================================= +/*! + * \brief Compute the maximum magnetic divergence in the grid and report + * an error if it exceeds the magnetic divergence limit or is negative. The + * magnetic divergence limit is 1E-14 as determined by Athena as a + * reasonable upper bound for correctness. + * + * \param G The grid object + * \return Real The maximum magnetic divergence found in the grid. Can + * usually be ignored since all checking is done in the fucntion, mostly + * this return is for testing. + */ +Real checkMagneticDivergence(Grid3D const &G); +// ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 509d9af30..4fcdb3447 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -5,21 +5,20 @@ * */ - // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" +#include "../global/global.h" #include "../mhd/magnetic_divergence.h" #include "../utils/DeviceVector.h" -#include "../global/global.h" +#include "../utils/testing_utilities.h" #ifdef MHD // ============================================================================= @@ -27,45 +26,46 @@ // ============================================================================= TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) { - // Grid Parameters & testing parameters - size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value - size_t const n_ghost = 4; + // Grid Parameters & testing parameters + size_t const gridSize = + 96; // Needs to be at least 64 so that each thread has a value + size_t const n_ghost = 4; - // Instantiate Grid3D object - Grid3D G; - G.H.dx = 3; - G.H.dy = G.H.dx; - G.H.dz = G.H.dx; - G.H.nx = gridSize+2*n_ghost; - G.H.ny = G.H.nx; - G.H.nz = G.H.nx; - G.H.n_cells = G.H.nx * G.H.ny * G.H.nz; - G.H.n_fields = 8; + // Instantiate Grid3D object + Grid3D G; + G.H.dx = 3; + G.H.dy = G.H.dx; + G.H.dz = G.H.dx; + G.H.nx = gridSize + 2 * n_ghost; + G.H.ny = G.H.nx; + G.H.nz = G.H.nx; + G.H.n_cells = G.H.nx * G.H.ny * G.H.nz; + G.H.n_fields = 8; - // Setup host grid. Fill host grid with random values and randomly assign - // maximum value - std::vector host_grid(G.H.n_cells * G.H.n_fields); - std::mt19937 prng(1); - std::uniform_real_distribution doubleRand(1, 5); - for (size_t i = 0; i < host_grid.size(); i++) - { - host_grid.at(i) = doubleRand(prng) / 1E15; - } + // Setup host grid. Fill host grid with random values and randomly assign + // maximum value + std::vector host_grid(G.H.n_cells * G.H.n_fields); + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(1, 5); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng) / 1E15; + } - // Allocating and copying to device - cuda_utilities::DeviceVector dev_grid(host_grid.size()); - G.C.device = dev_grid.data(); - dev_grid.cpyHostToDevice(host_grid); + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + G.C.device = dev_grid.data(); + dev_grid.cpyHostToDevice(host_grid); - // Perform test - InitializeChollaMPI(NULL, NULL); - double max_magnetic_divergence = mhd::checkMagneticDivergence(G); - MPI_Finalize(); - // Perform Comparison - Real const fiducialDivergence = 3.6318132783263106 / 1E15; - testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); + // Perform test + InitializeChollaMPI(NULL, NULL); + double max_magnetic_divergence = mhd::checkMagneticDivergence(G); + MPI_Finalize(); + // Perform Comparison + Real const fiducialDivergence = 3.6318132783263106 / 1E15; + testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, + "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions // ============================================================================= -#endif // MHD +#endif // MHD diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 541fb83ba..2077d3656 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -17,70 +17,78 @@ #ifdef MHD namespace mhd { - // ========================================================================= - __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, - Real *destinationGrid, - Real *ctElectricFields, - int const nx, - int const ny, - int const nz, - int const n_cells, - Real const dt, - Real const dx, - Real const dy, - Real const dz) - { - // get a thread index - int const blockId = blockIdx.x + blockIdx.y*gridDim.x; - int const threadId = threadIdx.x + blockId * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); +// ========================================================================= +__global__ void Update_Magnetic_Field_3D( + Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, + int const nx, int const ny, int const nz, int const n_cells, Real const dt, + Real const dx, Real const dy, Real const dz) +{ + // get a thread index + int const blockId = blockIdx.x + blockIdx.y * gridDim.x; + int const threadId = threadIdx.x + blockId * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); - // Thread guard to avoid overrun and to skip ghost cells that cannot be - // evolved due to missing electric fields that can't be reconstructed - if ( xid < nx-2 - and yid < ny-2 - and zid < nz-2) - { - // Compute the three dt/dx quantities - Real const dtodx = dt/dx; - Real const dtody = dt/dy; - Real const dtodz = dt/dz; + // Thread guard to avoid overrun and to skip ghost cells that cannot be + // evolved due to missing electric fields that can't be reconstructed + if (xid < nx - 2 and yid < ny - 2 and zid < nz - 2) { + // Compute the three dt/dx quantities + Real const dtodx = dt / dx; + Real const dtody = dt / dy; + Real const dtodz = dt / dz; - // Load the various edge electric fields required. The '1' and '2' - // fields are not shared and the '3' fields are shared by two of the - // updates - Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny))]; - Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny))]; - Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid+1, nx, ny))]; - Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + n_cells]; - Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny)) + n_cells]; - Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid+1, nx, ny)) + n_cells]; - Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + 2 * n_cells]; - Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny)) + 2 * n_cells]; - Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid+1, zid , nx, ny)) + 2 * n_cells]; + // Load the various edge electric fields required. The '1' and '2' + // fields are not shared and the '3' fields are shared by two of the + // updates + Real electric_x_1 = ctElectricFields[( + cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny))]; + Real electric_x_2 = ctElectricFields[( + cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny))]; + Real electric_x_3 = ctElectricFields[( + cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny))]; + Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid, nx, ny)) + + n_cells]; + Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid, zid + 1, nx, ny)) + + n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid + 1, nx, ny)) + + n_cells]; + Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid, nx, ny)) + + 2 * n_cells]; + Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid + 1, zid, nx, ny)) + + 2 * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid + 1, zid, nx, ny)) + + 2 * n_cells]; - // Perform Updates + // Perform Updates - // X field update - // S&G 2009 equation 10 - destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] - + dtodz * (electric_y_3 - electric_y_1) - + dtody * (electric_z_1 - electric_z_3); + // X field update + // S&G 2009 equation 10 + destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = + sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + + dtodz * (electric_y_3 - electric_y_1) + + dtody * (electric_z_1 - electric_z_3); - // Y field update - // S&G 2009 equation 11 - destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] - + dtodx * (electric_z_3 - electric_z_2) - + dtodz * (electric_x_1 - electric_x_3); + // Y field update + // S&G 2009 equation 11 + destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = + sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + + dtodx * (electric_z_3 - electric_z_2) + + dtodz * (electric_x_1 - electric_x_3); - // Z field update - // S&G 2009 equation 12 - destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] - + dtody * (electric_x_3 - electric_x_2) - + dtodx * (electric_y_2 - electric_y_3); - } - } - // ========================================================================= -} // end namespace mhd -#endif // MHD + // Z field update + // S&G 2009 equation 12 + destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = + sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + + dtody * (electric_x_3 - electric_x_2) + + dtodx * (electric_y_2 - electric_y_3); + } +} +// ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h index 4b71689b6..b0398fec3 100644 --- a/src/mhd/magnetic_update.h +++ b/src/mhd/magnetic_update.h @@ -24,36 +24,29 @@ */ namespace mhd { - // ========================================================================= - /*! - * \brief Update the magnetic field using the CT electric fields - * - * \param[in] sourceGrid The array which holds the old values of the - * magnetic field - * \param[out] destinationGrid The array to hold the updated values of the - * magnetic field - * \param[in] ctElectricFields The array of constrained transport electric - * fields - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[in] nz The number of cells in the z-direction - * \param[in] n_cells The total number of cells - * \param[in] dt The time step. If doing the half time step update make sure - * to divide it by two when passing the time step to this kernel - * \param[in] dx The size of each cell in the x-direction - * \param[in] dy The size of each cell in the y-direction - * \param[in] dz The size of each cell in the z-direction - */ - __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, - Real *destinationGrid, - Real *ctElectricFields, - int const nx, - int const ny, - int const nz, - int const n_cells, - Real const dt, - Real const dx, - Real const dy, - Real const dz); - // ========================================================================= -} // end namespace mhd \ No newline at end of file +// ========================================================================= +/*! + * \brief Update the magnetic field using the CT electric fields + * + * \param[in] sourceGrid The array which holds the old values of the + * magnetic field + * \param[out] destinationGrid The array to hold the updated values of the + * magnetic field + * \param[in] ctElectricFields The array of constrained transport electric + * fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + * \param[in] dt The time step. If doing the half time step update make sure + * to divide it by two when passing the time step to this kernel + * \param[in] dx The size of each cell in the x-direction + * \param[in] dy The size of each cell in the y-direction + * \param[in] dz The size of each cell in the z-direction + */ +__global__ void Update_Magnetic_Field_3D( + Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, + int const nx, int const ny, int const nz, int const n_cells, Real const dt, + Real const dx, Real const dy, Real const dz); +// ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index f4d0d44a0..ecf76ade6 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -6,18 +6,18 @@ */ // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/cuda_utilities.h" #include "../mhd/magnetic_update.h" +#include "../utils/cuda_utilities.h" +#include "../utils/testing_utilities.h" #ifdef MHD // ============================================================================= @@ -27,126 +27,120 @@ */ class tMHDUpdateMagneticField3D : public ::testing::Test { -public: - - /*! - * \brief Initialize and allocate all the various required variables and - * arrays - * - */ - tMHDUpdateMagneticField3D() - : - nx(3), + public: + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDUpdateMagneticField3D() + : nx(3), ny(nx), nz(nx), - n_cells(nx*ny*nz), + n_cells(nx * ny * nz), dt(3.2), dx(2.5), dy(2.5), dz(2.5), - sourceGrid (n_cells * (grid_enum::num_fields)), - destinationGrid (n_cells * (grid_enum::num_fields), -999.), + sourceGrid(n_cells * (grid_enum::num_fields)), + destinationGrid(n_cells * (grid_enum::num_fields), -999.), ctElectricFields(n_cells * 3), - fiducialData (n_cells * (grid_enum::num_fields), -999.), - dimGrid((n_cells + TPB - 1),1,1), - dimBlock(TPB,1,1) - { - // Allocate device arrays - CudaSafeCall ( cudaMalloc(&dev_sourceGrid, sourceGrid.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_destinationGrid, destinationGrid.size()*sizeof(double)) ); - CudaSafeCall ( cudaMalloc(&dev_ctElectricFields, ctElectricFields.size()*sizeof(double)) ); + fiducialData(n_cells * (grid_enum::num_fields), -999.), + dimGrid((n_cells + TPB - 1), 1, 1), + dimBlock(TPB, 1, 1) + { + // Allocate device arrays + CudaSafeCall( + cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_destinationGrid, + destinationGrid.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_ctElectricFields, + ctElectricFields.size() * sizeof(double))); - // Populate the grids with values where vector.at(i) = double(i). The - // values chosen aren't that important, just that every cell has a unique - // value - std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); - std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1); - } - ~tMHDUpdateMagneticField3D() = default; -protected: - // Initialize the test grid and other state variables - size_t const nx, ny, nz; - size_t const n_cells; - Real const dt, dx, dy, dz; + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); + std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), + sourceGrid.back() + 1); + } + ~tMHDUpdateMagneticField3D() = default; + + protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + Real const dt, dx, dy, dz; - // Launch Parameters - dim3 const dimGrid; // How many blocks in the grid - dim3 const dimBlock; // How many threads per block + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block - // Make sure the vector is large enough that the locations where the - // magnetic field would be in the real grid are filled - std::vector sourceGrid; - std::vector destinationGrid; - std::vector ctElectricFields; - std::vector fiducialData; + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector sourceGrid; + std::vector destinationGrid; + std::vector ctElectricFields; + std::vector fiducialData; - // device pointers - double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData; + // device pointers + double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, + *dev_fiducialData; - /*! - * \brief Launch the kernel and check results - * - */ - void runTest() - { - // Copy values to GPU - CudaSafeCall( cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaSafeCall( cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall(cudaMemcpy(dev_sourceGrid, sourceGrid.data(), + sourceGrid.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), + destinationGrid.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), + ctElectricFields.size() * sizeof(Real), + cudaMemcpyHostToDevice)); - // Call the kernel to test - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, - dimGrid, - dimBlock, - 0, - 0, - dev_sourceGrid, - dev_destinationGrid, - dev_ctElectricFields, - nx, - ny, - nz, - n_cells, - dt, - dx, - dy, - dz); - CudaCheckError(); + // Call the kernel to test + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, + dev_sourceGrid, dev_destinationGrid, + dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, + dz); + CudaCheckError(); - // Copy test data back - CudaSafeCall( cudaMemcpy(destinationGrid.data(), - dev_destinationGrid, - destinationGrid.size()*sizeof(Real), - cudaMemcpyDeviceToHost) ); - cudaDeviceSynchronize(); + // Copy test data back + CudaSafeCall(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, + destinationGrid.size() * sizeof(Real), + cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); - // Check the results - for (size_t i = 0; i < fiducialData.size(); i++) - { - int xid, yid, zid; - cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), - destinationGrid.at(i), - "value at i = " + std::to_string(i) - + ", xid = " + std::to_string(xid) - + ", yid = " + std::to_string(yid) - + ", zid = " + std::to_string(zid)); - } + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults(fiducialData.at(i), destinationGrid.at(i), + "value at i = " + std::to_string(i) + + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); } + } }; // ============================================================================= // ============================================================================= -TEST_F(tMHDUpdateMagneticField3D, - CorrectInputExpectCorrectOutput) +TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput) { - // Fiducial values - fiducialData.at(135) = 142.68000000000001; - fiducialData.at(162) = 151.75999999999999; - fiducialData.at(189) = 191.56; + // Fiducial values + fiducialData.at(135) = 142.68000000000001; + fiducialData.at(162) = 151.75999999999999; + fiducialData.at(189) = 191.56; - // Launch kernel and check results - runTest(); + // Launch kernel and check results + runTest(); } // ============================================================================= -#endif // MHD +#endif // MHD diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 858eb874a..f834cd015 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -1,301 +1,298 @@ /*! \file disk_ICs.cpp * \brief Definitions of initial conditions for hydrostatic disks. - Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. */ + Note that the grid is mapped to 1D as i + (x_dim)*j + + (x_dim*y_dim)*k. */ -#include -#include #include -#include +#include +#include #include #include + +#include + #include "../global/global.h" #include "../grid/grid3D.h" -#include "../mpi/mpi_routines.h" #include "../io/io.h" +#include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" #include "disk_galaxy.h" - // #define DISK_ICS // function with logarithms used in NFW definitions -Real log_func(Real y) -{ - return log(1+y) - y/(1+y); -} +Real log_func(Real y) { return log(1 + y) - y / (1 + y); } -//vertical acceleration in NFW halo +// vertical acceleration in NFW halo Real gz_halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real z_comp = z/r; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real z_comp = z / r; Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); + Real B = 1.0 / (r * r); + Real C = GN * M_h / log_func(c_vir); - //checked with wolfram alpha - return -C*A*B*z_comp; + // checked with wolfram alpha + return -C * A * B * z_comp; } - -//radial acceleration in NFW halo +// radial acceleration in NFW halo Real gr_halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real r_comp = R/r; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real r_comp = R / r; Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); + Real B = 1.0 / (r * r); + Real C = GN * M_h / log_func(c_vir); - //checked with wolfram alpha - return -C*A*B*r_comp; + // checked with wolfram alpha + return -C * A * B * r_comp; } -//disk radial surface density profile +// disk radial surface density profile Real Sigma_disk_D3D(Real r, Real *hdp) { - //return the exponential surface density + // return the exponential surface density Real Sigma_0 = hdp[9]; Real R_g = hdp[10]; - Real R_c = 4.5; + Real R_c = 4.5; Real Sigma; Real delta = 0.1; - Real norm = log(1.0/3.0); - Sigma = Sigma_0 * exp(-r/R_g); + Real norm = log(1.0 / 3.0); + Sigma = Sigma_0 * exp(-r / R_g); // taper the edge of the disk to 0 if (r < R_c) { - Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta*norm))/delta)); - } - else { - Sigma *= 1.0 / (1.0 - exp(((4.5 + delta*norm) - r)/delta)) - 1.0; + Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta * norm)) / delta)); + } else { + Sigma *= 1.0 / (1.0 - exp(((4.5 + delta * norm) - r) / delta)) - 1.0; } return Sigma; } -//vertical acceleration in miyamoto nagai +// vertical acceleration in miyamoto nagai Real gz_disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real a = R_d; - Real b = Z_d; - Real A = sqrt(b*b + z*z); - Real B = a + A; - Real C = pow(B*B + R*R, 1.5); - - //checked with wolfram alpha - return -GN*M_d*z*B/(A*C); + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real a = R_d; + Real b = Z_d; + Real A = sqrt(b * b + z * z); + Real B = a + A; + Real C = pow(B * B + R * R, 1.5); + + // checked with wolfram alpha + return -GN * M_d * z * B / (A * C); } -//radial acceleration in miyamoto nagai +// radial acceleration in miyamoto nagai Real gr_disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real A = sqrt(Z_d*Z_d + z*z); - Real B = R_d + A; - Real C = pow(B*B + R*R, 1.5); - - //checked with wolfram alpha - return -GN*M_d*R/C; + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real A = sqrt(Z_d * Z_d + z * z); + Real B = R_d + A; + Real C = pow(B * B + R * R, 1.5); + + // checked with wolfram alpha + return -GN * M_d * R / C; } - -//NFW halo potential +// NFW halo potential Real phi_halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; - Real C = GN*M_h/(R_h*log_func(c_vir)); + Real C = GN * M_h / (R_h * log_func(c_vir)); - //limit x to non-zero value - if(x<1.0e-9) - x = 1.0e-9; + // limit x to non-zero value + if (x < 1.0e-9) x = 1.0e-9; - //checked with wolfram alpha - return -C*log(1+x)/x; + // checked with wolfram alpha + return -C * log(1 + x) / x; } -//Miyamoto-Nagai potential +// Miyamoto-Nagai potential Real phi_disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real A = sqrt(z*z + Z_d*Z_d); - Real B = R_d + A; - Real C = sqrt(R*R + B*B); - - //patel et al. 2017, eqn 2 - return -GN*M_d/C; + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real A = sqrt(z * z + Z_d * Z_d); + Real B = R_d + A; + Real C = sqrt(R * R + B * B); + + // patel et al. 2017, eqn 2 + return -GN * M_d / C; } -//total potential +// total potential Real phi_total_D3D(Real R, Real z, Real *hdp) { - Real Phi_A = phi_halo_D3D(R,z,hdp); - Real Phi_B = phi_disk_D3D(R,z,hdp); + Real Phi_A = phi_halo_D3D(R, z, hdp); + Real Phi_B = phi_disk_D3D(R, z, hdp); return Phi_A + Phi_B; } Real phi_hot_halo_D3D(Real r, Real *hdp) { - Real Phi_A = phi_halo_D3D(0,r,hdp); - Real Phi_B = phi_disk_D3D(0,r,hdp); - //return Phi_A; + Real Phi_A = phi_halo_D3D(0, r, hdp); + Real Phi_B = phi_disk_D3D(0, r, hdp); + // return Phi_A; return Phi_A + Phi_B; } - -//returns the cell-centered vertical -//location of the cell with index k -//k is indexed at 0 at the lowest ghost cell +// returns the cell-centered vertical +// location of the cell with index k +// k is indexed at 0 at the lowest ghost cell Real z_hc_D3D(int k, Real dz, int nz, int ng) { - //checked that this works, such that the - //if dz = L_z/nz for the real domain, then the z positions - //are set correctly for cell centers with nz spanning - //the real domain, and nz + 2*ng spanning the real + ghost domains - if(!(nz%2)) - { - //even # of cells - return 0.5*dz + ((Real) (k-ng-nz/2))*dz; - }else{ - //odd # of cells - return ((Real) (k-ng-(nz-1)/2))*dz; + // checked that this works, such that the + // if dz = L_z/nz for the real domain, then the z positions + // are set correctly for cell centers with nz spanning + // the real domain, and nz + 2*ng spanning the real + ghost domains + if (!(nz % 2)) { + // even # of cells + return 0.5 * dz + ((Real)(k - ng - nz / 2)) * dz; + } else { + // odd # of cells + return ((Real)(k - ng - (nz - 1) / 2)) * dz; } } -//returns the cell-centered radial -//location of the cell with index i +// returns the cell-centered radial +// location of the cell with index i Real r_hc_D3D(int i, Real dr) { - //the zeroth cell is centered at 0.5*dr - return 0.5*dr + ((Real) i)*dr; + // the zeroth cell is centered at 0.5*dr + return 0.5 * dr + ((Real)i) * dr; } - - -/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) - * \brief Calculate the density at spherical radius r due to a hydrostatic halo. Uses an analytic - expression normalized by the value of the potential at the cooling radius. */ -void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) +/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real + dr, int nr) + * \brief Calculate the density at spherical radius r due to a hydrostatic + halo. Uses an analytic expression normalized by the value of the potential at + the cooling radius. */ +void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, + int nr) { - //Routine to determine the hydrostatic density profile - //along a ray from the galaxy center - int i; //index along r direction + // Routine to determine the hydrostatic density profile + // along a ray from the galaxy center + int i; // index along r direction - Real gamma = hdp[13]; //adiabatic index - Real rho_eos = hdp[18]; //density where K_EOS is set - Real cs = hdp[19]; //sound speed at rho_eos - Real r_cool = hdp[20]; //cooling radius + Real gamma = hdp[13]; // adiabatic index + Real rho_eos = hdp[18]; // density where K_EOS is set + Real cs = hdp[19]; // sound speed at rho_eos + Real r_cool = hdp[20]; // cooling radius - Real Phi_0; //potential at cooling radius + Real Phi_0; // potential at cooling radius - Real D_rho; //ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos - Real gmo = gamma - 1.0; //gamma-1 + Real gmo = gamma - 1.0; // gamma-1 - //compute the potential at the cooling radius - Phi_0 = phi_hot_halo_D3D(r_cool,hdp); + // compute the potential at the cooling radius + Phi_0 = phi_hot_halo_D3D(r_cool, hdp); - //We are normalizing to the central density - //so D_rho == 1 + // We are normalizing to the central density + // so D_rho == 1 D_rho = 1.0; - //store densities - for(i=0;i0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e Phi_0 %E cs %e\n",R,D_rho,z_1,phi_total_D3D(R,z_1,hdp),Phi_0,cs); - - - //let's find the cell above the disk where the - //density falls by exp(-7) < 1.0e-3. - for(k=ks;k=7.0) - break; + // perform a simple check about the fraction of density within + // a single cell + z_1 = z_hc_D3D(ks, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + + if (exp(-1 * D_rho) < 0.1) + printf( + "WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e " + "Phi_0 %E cs %e\n", + R, D_rho, z_1, phi_total_D3D(R, z_1, hdp), Phi_0, cs); + + // let's find the cell above the disk where the + // density falls by exp(-7) < 1.0e-3. + for (k = ks; k < nzt; k++) { + z_1 = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + if (D_rho >= 7.0) break; } - //if(R<1.0) - // printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = %e\n",k-ks,z_1,exp(-1*D_rho),R); + // if(R<1.0) + // printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = + // %e\n",k-ks,z_1,exp(-1*D_rho),R); - //now we can compute the unnormalized integral of the density + // now we can compute the unnormalized integral of the density z_disk_max = z_1; - //Compute surface density - z_int_min = 0.0; //kpc - z_int_max = z_1; //kpc - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - phi_int = 0.0; - for(k=0;kz_disk_max) - z_int_max = z_disk_max; - if(!flag) - { - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); + n_int = 10; // integrate over a 1/10 cell + for (k = ks; k < nzt; k++) { + // find cell center, bottom, and top + z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; + if (z_int_max > z_disk_max) z_int_max = z_disk_max; + if (!flag) { + dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; - for(i=0;i 10.) - z_2 = 10.*z_1; - //advance limit + z_0 = 1.0e-3; + z_1 = 1.0e-2; + while (!flag_phi) { + A_0 = D_rho - (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) z_2 = 10. * z_1; + // advance limit z_0 = z_1; z_1 = z_2; - if(fabs(z_1-z_0)1000) - { + if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); - printf("iter_phi = %d\n",iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(R,z_0,hdp),phi_total_D3D(R,z_1,hdp)); - #ifdef MPI_CHOLLA + printf("iter_phi = %d\n", iter_phi); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, + z_1, z_2, A_0, A_1, phi_total_D3D(R, z_0, hdp), + phi_total_D3D(R, z_1, hdp)); +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - A_1 = D_rho - (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs); + A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); z_disk_max = z_1; - //Compute surface density - z_int_min = 0.0; //kpc - z_int_max = z_1; //kpc - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - phi_int = 0.0; - for(k=0;k100) - { + if (iter > 100) { printf("About to exit...\n"); - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - //OK, at this stage we know how to set the densities - //so let's take cell averages + // OK, at this stage we know how to set the densities + // so let's take cell averages flag = 0; - n_int = 10; // integrate over a 1/10 cell - for(k=ks;kz_disk_max) - z_int_max = z_disk_max; - if(!flag) - { - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); + n_int = 10; // integrate over a 1/10 cell + for (k = ks; k < nzt; k++) { + // find cell center, bottom, and top + z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; + if (z_int_max > z_disk_max) z_int_max = z_disk_max; + if (!flag) { + dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; - for(i=0;i 10.) - z_2 = 10.*z_1; + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) z_2 = 10. * z_1; - //advance limit + // advance limit z_0 = z_1; z_1 = z_2; - //printf("z_0 %e z_1 %e\n",z_0,z_1); - if(fabs(z_1-z_0)1000) - { + if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); - printf("iter_phi = %d\n",iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(0,z_0,hdp),phi_total_D3D(0,z_1,hdp)); - #ifdef MPI_CHOLLA + printf("iter_phi = %d\n", iter_phi); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, + z_2, A_0, A_1, phi_total_D3D(0, z_0, hdp), + phi_total_D3D(0, z_1, hdp)); +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - //generate a high resolution density and z profile - int n_int = 1000; - Real z_int_min = 0.0; //kpc - Real z_int_max = z_1; //kpc - Real dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - Real phi_int = 0.0; - - //now integrate the density profile - for(k=0;k=nr-1) - { - if(i<0) - { + // find the index of the current + // position in r_halo (based on r_hc_D3D) + i = (int)((r - 0.5 * dr) / dr); + if (i < 0 || i >= nr - 1) { + if (i < 0) { i = 0; - }else{ - i = nr-2; + } else { + i = nr - 2; } } // return the interpolated density profile - return (rho_halo[i+1] - rho_halo[i])*(r - r_halo[i])/(r_halo[i+1]-r_halo[i]) + rho_halo[i]; + return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / + (r_halo[i + 1] - r_halo[i]) + + rho_halo[i]; } - - - - /*! \fn void Disk_3D(parameters P) * \brief Initialize the grid with a 3D disk. */ void Grid3D::Disk_3D(parameters p) { - - #ifdef DISK_ICS +#ifdef DISK_ICS int i, j, k, id; Real x_pos, y_pos, z_pos, r, phi; @@ -775,62 +740,67 @@ void Grid3D::Disk_3D(parameters p) DiskGalaxy galaxy = Galaxies::MW; // M82 model Galaxies::M82; - M_vir = galaxy.getM_vir(); // viral mass in M_sun - M_d = galaxy.getM_d(); // mass of disk in M_sun (assume all stars) - R_d = galaxy.getR_d(); // stellar disk scale length in kpc - z_d = galaxy.getZ_d(); // stellar disk scale height in kpc - R_vir = galaxy.getR_vir(); // viral radius in kpc - c_vir = galaxy.getC_vir(); // halo concentration (to account for adiabatic contraction) - r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) - - M_h = M_vir - M_d; // halo mass in M_sun - R_s = R_vir / c_vir; // halo scale length in kpc - //T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 - //T_d = 2.0e5; - T_d = 1.0e4; // CHANGED FOR ISOTHERMAL - T_h = 1.0e6; // halo temperature, at density floor - rho_eos = 1.0e7; //gas eos normalized at 1e7 Msun/kpc^3 - rho_eos_h = 3.0e3; //gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) + M_vir = galaxy.getM_vir(); // viral mass in M_sun + M_d = galaxy.getM_d(); // mass of disk in M_sun (assume all stars) + R_d = galaxy.getR_d(); // stellar disk scale length in kpc + z_d = galaxy.getZ_d(); // stellar disk scale height in kpc + R_vir = galaxy.getR_vir(); // viral radius in kpc + c_vir = galaxy.getC_vir(); // halo concentration (to account for adiabatic + // contraction) + r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) + + M_h = M_vir - M_d; // halo mass in M_sun + R_s = R_vir / c_vir; // halo scale length in kpc + // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 + // T_d = 2.0e5; + T_d = 1.0e4; // CHANGED FOR ISOTHERMAL + T_h = 1.0e6; // halo temperature, at density floor + rho_eos = 1.0e7; // gas eos normalized at 1e7 Msun/kpc^3 + rho_eos_h = + 3.0e3; // gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) mu = 0.6; - R_g = 2.0*R_d; //gas scale length in kpc - Sigma_0 = 0.25*M_d/(2*M_PI*R_g*R_g); //central surface density in Msun/kpc^2 - H_g = z_d; //initial guess for gas scale height - //rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3 - - //EOS info - cs = sqrt(KB*T_d/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr - cs_h = sqrt(KB*T_h/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr - - //set some initial parameters - int nhdp = 21; //number of parameters to pass hydrostatic column - Real *hdp = (Real *) calloc(nhdp,sizeof(Real)); //parameters - hdp[0] = M_vir; - hdp[1] = M_d; - hdp[2] = M_h; - hdp[3] = R_vir; - hdp[4] = c_vir; - hdp[5] = R_s; - hdp[6] = R_d; - hdp[7] = z_d; - hdp[8] = T_d; - hdp[9] = Sigma_0; - hdp[10] = R_g; - hdp[11] = H_g; - hdp[13] = p.gamma; - - //determine rho_eos by setting central density of disk - //based on central temperature + R_g = 2.0 * R_d; // gas scale length in kpc + Sigma_0 = 0.25 * M_d / + (2 * M_PI * R_g * R_g); // central surface density in Msun/kpc^2 + H_g = z_d; // initial guess for gas scale height + // rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3 + + // EOS info + cs = sqrt(KB * T_d / (mu * MP)) * TIME_UNIT / + LENGTH_UNIT; // sound speed in kpc/kyr + cs_h = sqrt(KB * T_h / (mu * MP)) * TIME_UNIT / + LENGTH_UNIT; // sound speed in kpc/kyr + + // set some initial parameters + int nhdp = 21; // number of parameters to pass hydrostatic column + Real *hdp = (Real *)calloc(nhdp, sizeof(Real)); // parameters + hdp[0] = M_vir; + hdp[1] = M_d; + hdp[2] = M_h; + hdp[3] = R_vir; + hdp[4] = c_vir; + hdp[5] = R_s; + hdp[6] = R_d; + hdp[7] = z_d; + hdp[8] = T_d; + hdp[9] = Sigma_0; + hdp[10] = R_g; + hdp[11] = H_g; + hdp[13] = p.gamma; + + // determine rho_eos by setting central density of disk + // based on central temperature rho_eos = determine_rho_eos_D3D(cs, Sigma_0, hdp); - //set EOS parameters - //K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma - K_eos = cs*cs*rho_eos; // CHANGED FOR ISOTHERMAL - K_eos_h = cs_h*cs_h*pow(rho_eos_h,1.0-p.gamma)/p.gamma; + // set EOS parameters + // K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma + K_eos = cs * cs * rho_eos; // CHANGED FOR ISOTHERMAL + K_eos_h = cs_h * cs_h * pow(rho_eos_h, 1.0 - p.gamma) / p.gamma; - //Store remaining parameters + // Store remaining parameters hdp[12] = K_eos; - hdp[14] = 0.0; //rho_floor, set to 0 + hdp[14] = 0.0; // rho_floor, set to 0 hdp[15] = rho_eos; hdp[16] = cs; hdp[17] = K_eos_h; @@ -838,22 +808,19 @@ void Grid3D::Disk_3D(parameters p) hdp[19] = cs_h; hdp[20] = r_cool; - - //Now we can start the density calculation - //we will loop over each column and compute - //the density distribution - int nz = p.nz; - int nzt = 2*H.n_ghost + nz; - Real dz = p.zlen / ((Real) nz); - Real *rho = (Real *) calloc(nzt,sizeof(Real)); - + // Now we can start the density calculation + // we will loop over each column and compute + // the density distribution + int nz = p.nz; + int nzt = 2 * H.n_ghost + nz; + Real dz = p.zlen / ((Real)nz); + Real *rho = (Real *)calloc(nzt, sizeof(Real)); // create a look up table for the halo gas profile - int nr = 1000; - Real dr = sqrt(3)*0.5*fmax(p.xlen, p.zlen) / ((Real) nr); - Real *rho_halo = (Real *) calloc(nr,sizeof(Real)); - Real *r_halo = (Real *) calloc(nr,sizeof(Real)); - + int nr = 1000; + Real dr = sqrt(3) * 0.5 * fmax(p.xlen, p.zlen) / ((Real)nr); + Real *rho_halo = (Real *)calloc(nr, sizeof(Real)); + Real *r_halo = (Real *)calloc(nr, sizeof(Real)); ////////////////////////////////////////////// ////////////////////////////////////////////// @@ -863,7 +830,6 @@ void Grid3D::Disk_3D(parameters p) hydrostatic_ray_analytical_D3D(rho_halo, r_halo, hdp, dr, nr); chprintf("Hot halo lookup table generated...\n"); - ////////////////////////////////////////////// ////////////////////////////////////////////// // Add a disk component @@ -873,45 +839,44 @@ void Grid3D::Disk_3D(parameters p) // hydrostatic column for the disk // and add the disk density and thermal energy // to the density and energy arrays - for (j=H.n_ghost; j0.0) - { + // restrict to regions where the density + // has been set + if (d > 0.0) { // get the centered x, y, and z positions Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - // calculate radial position and phi (assumes disk is centered at 0, 0) - r = sqrt(x_pos*x_pos + y_pos*y_pos); - phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane) + // calculate radial position and phi (assumes disk is centered at 0, + // 0) + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane) // radial acceleration from disk a_d = fabs(gr_disk_D3D(r, z_pos, hdp)); @@ -954,68 +918,82 @@ void Grid3D::Disk_3D(parameters p) // pressure gradient along x direction // gradient calc is first order at boundaries - if (i == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny; - else idm = (i-1) + j*H.nx + k*H.nx*H.ny; - if (i == H.nx-H.n_ghost-1) idp = i + j*H.nx + k*H.nx*H.ny; - else idp = (i+1) + j*H.nx + k*H.nx*H.ny; - Get_Position(i-1, j, k, &xpm, &ypm, &zpm); - Get_Position(i+1, j, k, &xpp, &ypp, &zpp); - Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently - dPdx = (Pp-Pm)/(xpp-xpm); - - //pressure gradient along y direction - if (j == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny; - else idm = i + (j-1)*H.nx + k*H.nx*H.ny; - if (j == H.ny-H.n_ghost-1) idp = i + j*H.nx + k*H.nx*H.ny; - else idp = i + (j+1)*H.nx + k*H.nx*H.ny; - Get_Position(i, j-1, k, &xpm, &ypm, &zpm); - Get_Position(i, j+1, k, &xpp, &ypp, &zpm); - Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently - dPdy = (Pp-Pm)/(ypp-ypm); - - //radial pressure gradient - dPdr = x_pos*dPdx/r + y_pos*dPdy/r; - - //radial acceleration - a = a_d + a_h + dPdr/d; - - if(isnan(a)||(a!=a)||(r*a<0)) - { - //printf("i %d j %d k %d a %e a_d %e dPdr %e d %e\n",i,j,k,a,a_d,dPdr,d); - //printf("i %d j %d k %d x_pos %e y_pos %e z_pos %e dPdx %e dPdy %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy); - //printf("i %d j %d k %d Pm %e Pp %e\n",i,j,k,Pm,Pp); - //printf("ypp %e ypm %e xpp %e zpm %e r %e\n",ypp,ypm, xpp, xpm ,r); - //printf("Energy pm %e pp %e density pm %e pp %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]); - } - else { - + if (i == H.n_ghost) + idm = i + j * H.nx + k * H.nx * H.ny; + else + idm = (i - 1) + j * H.nx + k * H.nx * H.ny; + if (i == H.nx - H.n_ghost - 1) + idp = i + j * H.nx + k * H.nx * H.ny; + else + idp = (i + 1) + j * H.nx + k * H.nx * H.ny; + Get_Position(i - 1, j, k, &xpm, &ypm, &zpm); + Get_Position(i + 1, j, k, &xpp, &ypp, &zpp); + Pm = C.Energy[idm] * + (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * + (gama - 1.0); // only internal energy stored in energy currently + dPdx = (Pp - Pm) / (xpp - xpm); + + // pressure gradient along y direction + if (j == H.n_ghost) + idm = i + j * H.nx + k * H.nx * H.ny; + else + idm = i + (j - 1) * H.nx + k * H.nx * H.ny; + if (j == H.ny - H.n_ghost - 1) + idp = i + j * H.nx + k * H.nx * H.ny; + else + idp = i + (j + 1) * H.nx + k * H.nx * H.ny; + Get_Position(i, j - 1, k, &xpm, &ypm, &zpm); + Get_Position(i, j + 1, k, &xpp, &ypp, &zpm); + Pm = C.Energy[idm] * + (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * + (gama - 1.0); // only internal energy stored in energy currently + dPdy = (Pp - Pm) / (ypp - ypm); + + // radial pressure gradient + dPdr = x_pos * dPdx / r + y_pos * dPdy / r; + + // radial acceleration + a = a_d + a_h + dPdr / d; + + if (isnan(a) || (a != a) || (r * a < 0)) { + // printf("i %d j %d k %d a %e a_d %e dPdr %e d + // %e\n",i,j,k,a,a_d,dPdr,d); printf("i %d j %d k %d x_pos %e y_pos + // %e z_pos %e dPdx %e dPdy + // %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy); printf("i %d j %d k %d + // Pm %e Pp %e\n",i,j,k,Pm,Pp); printf("ypp %e ypm %e xpp %e zpm %e + // r %e\n",ypp,ypm, xpp, xpm ,r); printf("Energy pm %e pp %e density + // pm %e pp + // %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]); + } else { // radial velocity - v = sqrt(r*a); - vx = -sin(phi)*v; - vy = cos(phi)*v; + v = sqrt(r * a); + vx = -sin(phi) * v; + vy = cos(phi) * v; vz = 0; // set the momenta - C.momentum_x[id] = d*vx; - C.momentum_y[id] = d*vy; - C.momentum_z[id] = d*vz; - - //sheepishly check for NaN's! - - if((d<0)||(P<0)||(isnan(d))||(isnan(P))||(d!=d)||(P!=P)) - printf("d %e P %e i %d j %d k %d id %d\n",d,P,i,j,k,id); - - if((isnan(vx))||(isnan(vy))||(isnan(vz))||(vx!=vx)||(vy!=vy)||(vz!=vz)) { - printf("vx %e vy %e vz %e i %d j %d k %d id %d\n",vx,vy,vz,i,j,k,id); - } - else { - //if the density is negative, there - //is a bigger problem! - if(d<0) - { - printf("pid %d error negative density i %d j %d k %d d %e\n",-1,i,j,k,d); + C.momentum_x[id] = d * vx; + C.momentum_y[id] = d * vy; + C.momentum_z[id] = d * vz; + + // sheepishly check for NaN's! + + if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || + (P != P)) + printf("d %e P %e i %d j %d k %d id %d\n", d, P, i, j, k, id); + + if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || + (vy != vy) || (vz != vz)) { + printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, + j, k, id); + } else { + // if the density is negative, there + // is a bigger problem! + if (d < 0) { + printf("pid %d error negative density i %d j %d k %d d %e\n", + -1, i, j, k, d); } } } @@ -1029,31 +1007,30 @@ void Grid3D::Disk_3D(parameters p) // Add a hot, hydrostatic halo ////////////////////////////////////////////// ////////////////////////////////////////////// - for (k=H.n_ghost; k #include #include -#include "../global/global.h" - -class DiskGalaxy { - -private: - Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h; - Real log_func(Real y) { - return log(1+y) - y/(1+y); - }; - - -public: - DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) { - M_d = md; - R_d = rd; - Z_d = zd; - M_vir = mvir; - R_vir = rvir; - c_vir = cvir; - r_cool = rcool; - M_h = M_vir - M_d; - R_h = R_vir / c_vir; - }; - - - /** - * Radial acceleration in miyamoto nagai - */ - Real gr_disk_D3D(Real R, Real z) { - Real A = R_d + sqrt(Z_d*Z_d + z*z); - Real B = pow(A*A + R*R, 1.5); - - return -GN*M_d*R/B; - }; - - - /** - * Radial acceleration in NFW halo - */ - Real gr_halo_D3D(Real R, Real z){ - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real r_comp = R/r; - - Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); - - return -C*A*B*r_comp; - }; - - - /** - * Convenience method that returns the combined radial acceleration - * of a disk galaxy at a specified point. - * @param R the cylindrical radius at the desired point - * @param z the distance perpendicular to the plane of the disk of the desired point - * @return - */ - Real gr_total_D3D(Real R, Real z) { - return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); - }; - - - /** - * Potential of NFW halo - */ - Real phi_halo_D3D(Real R, Real z) { - Real r = sqrt(R * R + z * z); //spherical radius - Real x = r / R_h; - Real C = GN * M_h / (R_h * log_func(c_vir)); - - //limit x to non-zero value - if (x < 1.0e-9) x = 1.0e-9; - - return -C * log(1 + x) / x; - }; - - - /** - * Miyamoto-Nagai potential - */ - Real phi_disk_D3D(Real R, Real z) { - Real A = sqrt(z*z + Z_d*Z_d); - Real B = R_d + A; - Real C = sqrt(R*R + B*B); - - //patel et al. 2017, eqn 2 - return -GN * M_d / C; - }; - - Real rho_disk_D3D(const Real r, const Real z) { - const Real a = R_d; - const Real c = Z_d; - const Real b = sqrt(z*z+c*c); - const Real d = a+b; - const Real s = r*r+d*d; - return M_d*c*c*(a*(d*d+r*r)+3.0*b*d*d)/(4.0*M_PI*b*b*b*pow(s,2.5)); - } - - /** - * Convenience method that returns the combined gravitational potential - * of the disk and halo. - */ - Real phi_total_D3D(Real R, Real z) { - return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); - }; - - - /** - * epicyclic frequency - */ - Real kappa2(Real R, Real z) { - Real r = sqrt(R*R + z*z); - Real x = r/R_h; - Real C = GN * M_h / (R_h * log_func(c_vir)); - Real A = R_d + sqrt(z*z + Z_d*Z_d); - Real B = sqrt(R*R + A*A); - - Real phiH_prime = -C*R/(r*r)/(1 + x) + C*log(1+x)*R_h*R/(r*r*r) + GN*M_d*R/(B*B*B); - Real phiH_prime_prime = -C/(r*r)/(1+x) + 2*C*R*R/(r*r*r*r)/(1+x) + C/((1+x)*(1+x))*R*R/R_h/(r*r*r) + - C*R*R/(1+x)/(r*r*r*r) + C*log(1+x)*R_h/(r*r*r)*(1 - 3*R*R/(r*r)) + - GN*M_d/(B*B*B)*(1 - 3*R*R/(B*B)); - - return 3/R*phiH_prime + phiH_prime_prime; - }; +#include "../global/global.h" - Real surface_density(Real R) { - return M_d/(2*M_PI)/(R_d*R_d)*exp(-R/R_d); - }; - - Real sigma_crit(Real R) { - return 3.36*GN*surface_density(R)/sqrt(kappa2(R,0.0)); - }; - - - Real getM_d() const { return M_d; }; - Real getR_d() const { return R_d; }; - Real getZ_d() const { return Z_d; }; - Real getM_vir() const { return M_vir; }; - Real getR_vir() const { return R_vir; }; - Real getC_vir() const { return c_vir; }; - Real getR_cool() const { return r_cool; }; - +class DiskGalaxy +{ + private: + Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h; + Real log_func(Real y) { return log(1 + y) - y / (1 + y); }; + + public: + DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, + Real rcool) + { + M_d = md; + R_d = rd; + Z_d = zd; + M_vir = mvir; + R_vir = rvir; + c_vir = cvir; + r_cool = rcool; + M_h = M_vir - M_d; + R_h = R_vir / c_vir; + }; + + /** + * Radial acceleration in miyamoto nagai + */ + Real gr_disk_D3D(Real R, Real z) + { + Real A = R_d + sqrt(Z_d * Z_d + z * z); + Real B = pow(A * A + R * R, 1.5); + + return -GN * M_d * R / B; + }; + + /** + * Radial acceleration in NFW halo + */ + Real gr_halo_D3D(Real R, Real z) + { + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real r_comp = R / r; + + Real A = log_func(x); + Real B = 1.0 / (r * r); + Real C = GN * M_h / log_func(c_vir); + + return -C * A * B * r_comp; + }; + + /** + * Convenience method that returns the combined radial acceleration + * of a disk galaxy at a specified point. + * @param R the cylindrical radius at the desired point + * @param z the distance perpendicular to the plane of the disk of the desired + * point + * @return + */ + Real gr_total_D3D(Real R, Real z) + { + return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); + }; + + /** + * Potential of NFW halo + */ + Real phi_halo_D3D(Real R, Real z) + { + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real C = GN * M_h / (R_h * log_func(c_vir)); + + // limit x to non-zero value + if (x < 1.0e-9) x = 1.0e-9; + + return -C * log(1 + x) / x; + }; + + /** + * Miyamoto-Nagai potential + */ + Real phi_disk_D3D(Real R, Real z) + { + Real A = sqrt(z * z + Z_d * Z_d); + Real B = R_d + A; + Real C = sqrt(R * R + B * B); + + // patel et al. 2017, eqn 2 + return -GN * M_d / C; + }; + + Real rho_disk_D3D(const Real r, const Real z) + { + const Real a = R_d; + const Real c = Z_d; + const Real b = sqrt(z * z + c * c); + const Real d = a + b; + const Real s = r * r + d * d; + return M_d * c * c * (a * (d * d + r * r) + 3.0 * b * d * d) / + (4.0 * M_PI * b * b * b * pow(s, 2.5)); + } + + /** + * Convenience method that returns the combined gravitational potential + * of the disk and halo. + */ + Real phi_total_D3D(Real R, Real z) + { + return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); + }; + + /** + * epicyclic frequency + */ + Real kappa2(Real R, Real z) + { + Real r = sqrt(R * R + z * z); + Real x = r / R_h; + Real C = GN * M_h / (R_h * log_func(c_vir)); + Real A = R_d + sqrt(z * z + Z_d * Z_d); + Real B = sqrt(R * R + A * A); + + Real phiH_prime = -C * R / (r * r) / (1 + x) + + C * log(1 + x) * R_h * R / (r * r * r) + + GN * M_d * R / (B * B * B); + Real phiH_prime_prime = + -C / (r * r) / (1 + x) + 2 * C * R * R / (r * r * r * r) / (1 + x) + + C / ((1 + x) * (1 + x)) * R * R / R_h / (r * r * r) + + C * R * R / (1 + x) / (r * r * r * r) + + C * log(1 + x) * R_h / (r * r * r) * (1 - 3 * R * R / (r * r)) + + GN * M_d / (B * B * B) * (1 - 3 * R * R / (B * B)); + + return 3 / R * phiH_prime + phiH_prime_prime; + }; + + Real surface_density(Real R) + { + return M_d / (2 * M_PI) / (R_d * R_d) * exp(-R / R_d); + }; + + Real sigma_crit(Real R) + { + return 3.36 * GN * surface_density(R) / sqrt(kappa2(R, 0.0)); + }; + + Real getM_d() const { return M_d; }; + Real getR_d() const { return R_d; }; + Real getZ_d() const { return Z_d; }; + Real getM_vir() const { return M_vir; }; + Real getR_vir() const { return R_vir; }; + Real getC_vir() const { return c_vir; }; + Real getR_cool() const { return r_cool; }; }; -class ClusteredDiskGalaxy: public DiskGalaxy { - private: - Real lower_cluster_mass, higher_cluster_mass; - Real normalization; - - public: - ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) - : DiskGalaxy {md, rd, zd, mvir, rvir, cvir, rcool}, lower_cluster_mass {lm}, higher_cluster_mass {hm} { - //if (lower_cluster_mass >= higher_cluster_mass) - normalization = 1/log(higher_cluster_mass/lower_cluster_mass); - }; - - Real getLowerClusterMass() const {return lower_cluster_mass;} - Real getHigherClusterMass() const {return higher_cluster_mass;} - Real getNormalization() const {return normalization;} - - - std::vector generateClusterPopulationMasses(int N, std::mt19937_64 generator) { - std::vector population; - for (int i = 0; i < N; i++) { - population.push_back(singleClusterMass(generator)); - } - return population; - } - - Real singleClusterMass(std::mt19937_64 generator) { - std::uniform_real_distribution uniform_distro(0, 1); - return lower_cluster_mass * exp(uniform_distro(generator)/normalization); - } - +class ClusteredDiskGalaxy : public DiskGalaxy +{ + private: + Real lower_cluster_mass, higher_cluster_mass; + Real normalization; + + public: + ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, + Real rvir, Real cvir, Real rcool) + : DiskGalaxy{md, rd, zd, mvir, rvir, cvir, rcool}, + lower_cluster_mass{lm}, + higher_cluster_mass{hm} + { + // if (lower_cluster_mass >= higher_cluster_mass) + normalization = 1 / log(higher_cluster_mass / lower_cluster_mass); + }; + + Real getLowerClusterMass() const { return lower_cluster_mass; } + Real getHigherClusterMass() const { return higher_cluster_mass; } + Real getNormalization() const { return normalization; } + + std::vector generateClusterPopulationMasses(int N, + std::mt19937_64 generator) + { + std::vector population; + for (int i = 0; i < N; i++) { + population.push_back(singleClusterMass(generator)); + } + return population; + } + + Real singleClusterMass(std::mt19937_64 generator) + { + std::uniform_real_distribution uniform_distro(0, 1); + return lower_cluster_mass * exp(uniform_distro(generator) / normalization); + } }; -namespace Galaxies { - // all masses in M_sun and all distances in kpc - //static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); - static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); - static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8/0.015, 10, 100.0); -}; +namespace Galaxies +{ +// all masses in M_sun and all distances in kpc +// static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); +static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, + 157.0); +static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); +}; // namespace Galaxies -#endif //DISK_GALAXY +#endif // DISK_GALAXY diff --git a/src/mpi/MPI_Comm_node.c b/src/mpi/MPI_Comm_node.c index 057233d8c..ae519dec5 100644 --- a/src/mpi/MPI_Comm_node.c +++ b/src/mpi/MPI_Comm_node.c @@ -1,9 +1,9 @@ #ifdef MPI_CHOLLA -#include -#include -#include -#include "../mpi/MPI_Comm_node.h" + #include "../mpi/MPI_Comm_node.h" + #include + #include + #include /*! \fn int djb2_hash(char *str) * \brief Simple hash function by Dan Bernstein */ @@ -13,44 +13,42 @@ int djb2_hash(char *str); * \brief Returns an MPI_Comm for processes on each node.*/ MPI_Comm MPI_Comm_node(int *myid_node, int *nproc_node) { - int myid; //global rank - int nproc; //global rank - char pname[MPI_MAX_PROCESSOR_NAME]; //node hostname - int pname_length; //length of node hostname - int hash; //hash of node hostname + int myid; // global rank + int nproc; // global rank + char pname[MPI_MAX_PROCESSOR_NAME]; // node hostname + int pname_length; // length of node hostname + int hash; // hash of node hostname - MPI_Comm node_comm; //communicator for the procs on each node + MPI_Comm node_comm; // communicator for the procs on each node - //get the global process rank - MPI_Comm_rank(MPI_COMM_WORLD,&myid); - MPI_Comm_size(MPI_COMM_WORLD,&nproc); + // get the global process rank + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); - - //if we're the only process, then just return - //the global rank, size, and comm - if(nproc==1) - { + // if we're the only process, then just return + // the global rank, size, and comm + if (nproc == 1) { *myid_node = myid; *nproc_node = nproc; return MPI_COMM_WORLD; } - //get the hostname of the node + // get the hostname of the node MPI_Get_processor_name(pname, &pname_length); - //hash the name of the node + // hash the name of the node hash = abs(djb2_hash(pname)); - //printf("hash %d\n",hash); + // printf("hash %d\n",hash); - //split the communicator + // split the communicator MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm); - //get size and rank - MPI_Comm_rank(node_comm,myid_node); - MPI_Comm_size(node_comm,nproc_node); + // get size and rank + MPI_Comm_rank(node_comm, myid_node); + MPI_Comm_size(node_comm, nproc_node); - //return the communicator for processors on the node + // return the communicator for processors on the node return node_comm; } @@ -60,8 +58,7 @@ int djb2_hash(char *str) { int hash = 5381; int c; - while((c = *str++)) - hash = ((hash<<5) + hash) + c; /*hash*33 + c*/ + while ((c = *str++)) hash = ((hash << 5) + hash) + c; /*hash*33 + c*/ return hash; } #endif /*MPI_CHOLLA*/ diff --git a/src/mpi/MPI_Comm_node.h b/src/mpi/MPI_Comm_node.h index 0d8820d02..eaa975aef 100644 --- a/src/mpi/MPI_Comm_node.h +++ b/src/mpi/MPI_Comm_node.h @@ -5,10 +5,10 @@ #ifdef __cplusplus extern "C" { -#endif //__cplusplus +#endif //__cplusplus MPI_Comm MPI_Comm_node(int *pid, int *np); #ifdef __cplusplus } -#endif //__cplusplus +#endif //__cplusplus -#endif //MPI_COMM_NODE +#endif // MPI_COMM_NODE diff --git a/src/mpi/cuda_mpi_routines.cu b/src/mpi/cuda_mpi_routines.cu index 3b2e65e51..8983797fa 100644 --- a/src/mpi/cuda_mpi_routines.cu +++ b/src/mpi/cuda_mpi_routines.cu @@ -1,8 +1,9 @@ #ifdef MPI_CHOLLA -#include -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include "../mpi/cuda_mpi_routines.h" + #include + + #include "../io/io.h" + #include "../mpi/cuda_mpi_routines.h" + #include "../utils/gpu.hpp" // #define PRINT_DEVICE_IDS @@ -10,43 +11,50 @@ * \brief CUDA initialization within MPI. */ int initialize_cuda_mpi(int myid, int nprocs) { - int i_device = 0; //GPU device for this process - int n_device; //number of GPU devices available + int i_device = 0; // GPU device for this process + int n_device; // number of GPU devices available cudaError_t flag_error; - //get the number of cuda devices + // get the number of cuda devices flag_error = cudaGetDeviceCount(&n_device); - //check for errors - if(flag_error!=cudaSuccess) - { - if(flag_error==cudaErrorNoDevice) - fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorNoDevice\n",myid,n_device); - if(flag_error==cudaErrorInsufficientDriver) - fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorInsufficientDriver\n",myid,n_device); + // check for errors + if (flag_error != cudaSuccess) { + if (flag_error == cudaErrorNoDevice) + fprintf(stderr, + "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " + "cudaErrorNoDevice\n", + myid, n_device); + if (flag_error == cudaErrorInsufficientDriver) + fprintf(stderr, + "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " + "cudaErrorInsufficientDriver\n", + myid, n_device); fflush(stderr); return 1; } - //get host name - char pname[MPI_MAX_PROCESSOR_NAME]; //node hostname - int pname_length; //length of node hostname + // get host name + char pname[MPI_MAX_PROCESSOR_NAME]; // node hostname + int pname_length; // length of node hostname MPI_Get_processor_name(pname, &pname_length); - //set a cuda device for each process - cudaSetDevice(myid%n_device); + // set a cuda device for each process + cudaSetDevice(myid % n_device); - //double check + // double check cudaGetDevice(&i_device); #ifdef PRINT_DEVICE_IDS - printf("In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = %d\n",pname, myid,i_device,n_device); + printf( + "In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = " + "%d\n", + pname, myid, i_device, n_device); fflush(stdout); MPI_Barrier(world); #endif return 0; - } -#endif //MPI_CHOLLA +#endif // MPI_CHOLLA diff --git a/src/mpi/cuda_mpi_routines.h b/src/mpi/cuda_mpi_routines.h index d408afae6..31b1f89a0 100644 --- a/src/mpi/cuda_mpi_routines.h +++ b/src/mpi/cuda_mpi_routines.h @@ -3,7 +3,7 @@ #ifdef __cplusplus extern "C" { -#endif //__cplusplus +#endif //__cplusplus /*! \fn int initialize_cuda_mpi(int myid, int nprocs); * \brief CUDA initialization within MPI. */ @@ -11,7 +11,6 @@ int initialize_cuda_mpi(int myid, int nprocs); #ifdef __cplusplus } -#endif //__cplusplus +#endif //__cplusplus - -#endif //CUDA_MPI_ROUTINES +#endif // CUDA_MPI_ROUTINES diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 6d51ee643..4b6a21474 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -1,13 +1,16 @@ #ifdef MPI_CHOLLA -#include -#include -#include "../mpi/mpi_routines.h" -#include "../global/global.h" -#include "../utils/error_handling.h" -#include "../io/io.h" -#include "../mpi/cuda_mpi_routines.h" -#include "../mpi/MPI_Comm_node.h" -#include + #include "../mpi/mpi_routines.h" + + #include + #include + + #include + + #include "../global/global.h" + #include "../io/io.h" + #include "../mpi/MPI_Comm_node.h" + #include "../mpi/cuda_mpi_routines.h" + #include "../utils/error_handling.h" /*Global MPI Variables*/ int procID; /*process rank*/ @@ -22,19 +25,19 @@ MPI_Comm node; /*global communicator*/ MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/ -#ifdef PARTICLES + #ifdef PARTICLES MPI_Datatype MPI_PART_INT; /*set equal to MPI_INT or MPI_LONG*/ -#endif + #endif -//MPI_Requests for nonblocking comm +// MPI_Requests for nonblocking comm MPI_Request *send_request; MPI_Request *recv_request; -//MPI destinations and sources +// MPI destinations and sources int dest[6]; int source[6]; -//Communication buffers +// Communication buffers // For BLOCK Real *d_send_buffer_x0; @@ -69,8 +72,8 @@ int x_buffer_length; int y_buffer_length; int z_buffer_length; -#ifdef PARTICLES -//Buffers for particles transfers + #ifdef PARTICLES +// Buffers for particles transfers Real *d_send_buffer_x0_particles; Real *d_send_buffer_x1_particles; Real *d_send_buffer_y0_particles; @@ -84,7 +87,7 @@ Real *d_recv_buffer_y1_particles; Real *d_recv_buffer_z0_particles; Real *d_recv_buffer_z1_particles; -//Buffers for particles transfers +// Buffers for particles transfers Real *h_send_buffer_x0_particles; Real *h_send_buffer_x1_particles; Real *h_send_buffer_y0_particles; @@ -118,7 +121,7 @@ MPI_Request *recv_request_n_particles; // Request for Particles Transfer MPI_Request *send_request_particles_transfer; MPI_Request *recv_request_particles_transfer; -#endif//PARTICLES + #endif // PARTICLES /*local domain sizes*/ /*none of these include ghost cells!*/ @@ -137,16 +140,14 @@ int nproc_x; int nproc_y; int nproc_z; -#ifdef FFTW + #ifdef FFTW ptrdiff_t n_local_complex; -#endif /*FFTW*/ - + #endif /*FFTW*/ /*\fn void InitializeChollaMPI(void) */ /* Routine to initialize MPI */ void InitializeChollaMPI(int *pargc, char **pargv[]) { - /*initialize MPI*/ MPI_Init(pargc, pargv); @@ -157,7 +158,7 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) MPI_Comm_size(MPI_COMM_WORLD, &nproc); /*print a cute message*/ - //printf("Processor %d of %d: Hello!\n", procID, nproc); + // printf("Processor %d of %d: Hello!\n", procID, nproc); /* set the root process rank */ root = 0; @@ -174,44 +175,46 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) #endif /*PRECISION*/ #ifdef PARTICLES - #ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS MPI_PART_INT = MPI_LONG; - #else + #else MPI_PART_INT = MPI_INT; - #endif + #endif #endif /*create the MPI_Request arrays for non-blocking sends*/ - if(!(send_request = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + if (!(send_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating send_request.\n"); chexit(-2); } - if(!(recv_request = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + if (!(recv_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating recv_request.\n"); chexit(-2); } #ifdef PARTICLES - if(!(send_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { - chprintf("Error allocating send_request for number of particles for transfer.\n"); + if (!(send_request_n_particles = + (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + chprintf( + "Error allocating send_request for number of particles for " + "transfer.\n"); chexit(-2); } - if(!(recv_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { - chprintf("Error allocating recv_request for number of particles for transfer.\n"); + if (!(recv_request_n_particles = + (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + chprintf( + "Error allocating recv_request for number of particles for " + "transfer.\n"); chexit(-2); } - if(!(send_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + if (!(send_request_particles_transfer = + (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating send_request for particles transfer.\n"); chexit(-2); } - if(!(recv_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + if (!(recv_request_particles_transfer = + (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating recv_request for particles transfer.\n"); chexit(-2); } @@ -226,57 +229,58 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) // // Needed to initialize cuda after gravity in order to work on Summit // //initialize cuda for use with mpi #ifdef CUDA - if(initialize_cuda_mpi(procID_node,nproc_node)) - { + if (initialize_cuda_mpi(procID_node, nproc_node)) { chprintf("Error initializing cuda with mpi.\n"); chexit(-10); } #endif /*CUDA*/ // #endif//ONLY_PARTICLES - } - - /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, + int ny_gin, int nz_gin) { - DomainDecompositionBLOCK(P, H, nx_gin, ny_gin, nz_gin); // set grid dimensions - H->nx = nx_local+2*H->n_ghost; + H->nx = nx_local + 2 * H->n_ghost; H->nx_real = nx_local; - if (ny_local == 1) H->ny = 1; - else H->ny = ny_local+2*H->n_ghost; + if (ny_local == 1) + H->ny = 1; + else + H->ny = ny_local + 2 * H->n_ghost; H->ny_real = ny_local; - if (nz_local == 1) H->nz = 1; - else H->nz = nz_local+2*H->n_ghost; + if (nz_local == 1) + H->nz = 1; + else + H->nz = nz_local + 2 * H->n_ghost; H->nz_real = nz_local; // set total number of cells H->n_cells = H->nx * H->ny * H->nz; - //printf("In DomainDecomposition: nx %d ny %d nz %d nc %d\n",H->nx,H->ny,H->nz,H->n_cells); + // printf("In DomainDecomposition: nx %d ny %d nz %d nc + // %d\n",H->nx,H->ny,H->nz,H->n_cells); - //Allocate communication buffers + // Allocate communication buffers Allocate_MPI_DeviceBuffers(H); - } /* Perform domain decomposition */ -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, + int nx_gin, int ny_gin, int nz_gin) { int n; - int i,j,k; + int i, j, k; int *ix; int *iy; int *iz; - //enforce an even number of processes - if(nproc%2 && nproc>1) - { - chprintf("WARNING: Odd number of processors > 1 is not officially supported\n"); + // enforce an even number of processes + if (nproc % 2 && nproc > 1) { + chprintf( + "WARNING: Odd number of processors > 1 is not officially supported\n"); } /* record global size */ @@ -285,19 +289,19 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin nz_global = nz_gin; /*allocate subdomain indices*/ - ix = (int *)malloc(nproc*sizeof(int)); - iy = (int *)malloc(nproc*sizeof(int)); - iz = (int *)malloc(nproc*sizeof(int)); + ix = (int *)malloc(nproc * sizeof(int)); + iy = (int *)malloc(nproc * sizeof(int)); + iz = (int *)malloc(nproc * sizeof(int)); /*tile the MPI processes in blocks*/ /*this sets nproc_x, nproc_y, nproc_z */ - //chprintf("About to enter tiling block decomp\n"); + // chprintf("About to enter tiling block decomp\n"); MPI_Barrier(world); TileBlockDecomposition(); if (nz_global > nx_global) { int tmp; - tmp = nproc_x; + tmp = nproc_x; nproc_x = nproc_z; nproc_z = tmp; } @@ -307,119 +311,104 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin nproc_x = P->n_proc_x; nproc_y = P->n_proc_y; nproc_z = P->n_proc_z; - chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", nproc_x, nproc_y, nproc_z); - // chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", P->n_proc_x, P->n_proc_y, P->n_proc_z); + chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", nproc_x, nproc_y, + nproc_z); + // chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", P->n_proc_x, + // P->n_proc_y, P->n_proc_z); #endif - //chprintf("Allocating tiling.\n"); + // chprintf("Allocating tiling.\n"); MPI_Barrier(world); - int ***tiling = three_dimensional_int_array(nproc_x,nproc_y,nproc_z); + int ***tiling = three_dimensional_int_array(nproc_x, nproc_y, nproc_z); - - //find indices - //chprintf("Setting indices.\n"); + // find indices + // chprintf("Setting indices.\n"); MPI_Barrier(world); n = 0; - //Gravity: Change the order of MPI processes assignment to match the assignment done by PFFT - //Original: - // for(i=0;i=nproc_x) - dest[1] -= nproc_x; - - dest[2] = j-1; - if(dest[2]<0) - dest[2] += nproc_y; - dest[3] = j+1; - if(dest[3]>=nproc_y) - dest[3] -= nproc_y; - - dest[4] = k-1; - if(dest[4]<0) - dest[4] += nproc_z; - dest[5] = k+1; - if(dest[5]>=nproc_z) - dest[5] -= nproc_z; + if (n == procID) { + dest[0] = i - 1; + if (dest[0] < 0) dest[0] += nproc_x; + dest[1] = i + 1; + if (dest[1] >= nproc_x) dest[1] -= nproc_x; + + dest[2] = j - 1; + if (dest[2] < 0) dest[2] += nproc_y; + dest[3] = j + 1; + if (dest[3] >= nproc_y) dest[3] -= nproc_y; + + dest[4] = k - 1; + if (dest[4] < 0) dest[4] += nproc_z; + dest[5] = k + 1; + if (dest[5] >= nproc_z) dest[5] -= nproc_z; } n++; } /* set local x, y, z subdomain sizes */ - n = nx_global%nproc_x; - if(!n) - { - //nx_global splits evenly along x procs*/ - nx_local = nx_global/nproc_x; - nx_local_start = ix[procID]*nx_local; - }else{ - nx_local = nx_global/nproc_x; - if(ix[procID]xu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->xl_bcnd==1) P->xl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->xl_bcnd == 1) P->xl_bcnd = 5; + } else { P->xl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->xu_bcnd==1) P->xu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->xu_bcnd == 1) P->xu_bcnd = 5; } - }else{ - //this is completely an interior cell - //along the x direction, so - //set both x bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the x direction, so + // set both x bcnds to MPI bcnds P->xl_bcnd = 5; P->xu_bcnd = 5; } @@ -478,23 +464,21 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin /*do y bcnds next*/ /*exterior faces have to be treated separately*/ /*as long as there is more than one cell in the x direction*/ - if (nproc_y!=1) { - if((iy[procID]==0)||(iy[procID]==nproc_y-1)) - { - if(iy[procID]==0) - { + if (nproc_y != 1) { + if ((iy[procID] == 0) || (iy[procID] == nproc_y - 1)) { + if (iy[procID] == 0) { P->yu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->yl_bcnd==1) P->yl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->yl_bcnd == 1) P->yl_bcnd = 5; + } else { P->yl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->yu_bcnd==1) P->yu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->yu_bcnd == 1) P->yu_bcnd = 5; } - }else{ - //this is completely an interior cell - //along the y direction, so - //set both y bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the y direction, so + // set both y bcnds to MPI bcnds P->yl_bcnd = 5; P->yu_bcnd = 5; } @@ -503,55 +487,52 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin /*do z bcnds next*/ /*exterior faces have to be treated separately*/ /*as long as there is more than one cell in the x direction*/ - if(nproc_z!=1) { - if((iz[procID]==0)||(iz[procID]==nproc_z-1)) - { - if(iz[procID]==0) - { + if (nproc_z != 1) { + if ((iz[procID] == 0) || (iz[procID] == nproc_z - 1)) { + if (iz[procID] == 0) { P->zu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->zl_bcnd==1) P->zl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->zl_bcnd == 1) P->zl_bcnd = 5; + } else { P->zl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->zu_bcnd==1) P->zu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->zu_bcnd == 1) P->zu_bcnd = 5; } - }else{ - //this is completely an interior cell - //along the z direction, so - //set both z bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the z direction, so + // set both z bcnds to MPI bcnds P->zl_bcnd = 5; P->zu_bcnd = 5; } } - - //free indices + // free indices free(ix); free(iy); free(iz); - } void Allocate_MPI_DeviceBuffers(struct Header *H) { int xbsize, ybsize, zbsize; - if (H->ny==1 && H->nz==1) { - xbsize = H->n_fields*H->n_ghost; + if (H->ny == 1 && H->nz == 1) { + xbsize = H->n_fields * H->n_ghost; ybsize = 1; - zbsize = 1; + zbsize = 1; } // 2D - if (H->ny>1 && H->nz==1) { - xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost); - ybsize = H->n_fields*H->n_ghost*(H->nx); + if (H->ny > 1 && H->nz == 1) { + xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost); + ybsize = H->n_fields * H->n_ghost * (H->nx); zbsize = 1; } // 3D - if (H->ny>1 && H->nz>1) { - xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost)*(H->nz-2*H->n_ghost); - ybsize = H->n_fields*H->n_ghost*(H->nx)*(H->nz-2*H->n_ghost); - zbsize = H->n_fields*H->n_ghost*(H->nx)*(H->ny); + if (H->ny > 1 && H->nz > 1) { + xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * + (H->nz - 2 * H->n_ghost); + ybsize = H->n_fields * H->n_ghost * (H->nx) * (H->nz - 2 * H->n_ghost); + zbsize = H->n_fields * H->n_ghost * (H->nx) * (H->ny); } x_buffer_length = xbsize; @@ -560,113 +541,155 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) #ifdef PARTICLES // Set Initial sizes for particles buffers - int n_max = std::max( H->nx, H->ny ); - n_max = std::max( H->nz, n_max ); - int factor = 2; - N_PARTICLES_TRANSFER = n_max * n_max * factor ; + int n_max = std::max(H->nx, H->ny); + n_max = std::max(H->nz, n_max); + int factor = 2; + N_PARTICLES_TRANSFER = n_max * n_max * factor; // Set the number of values that will be transferred for each particle - N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities - #ifndef SINGLE_PARTICLE_MASS - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle mass - #endif - #ifdef PARTICLE_IDS - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle ID - #endif - #ifdef PARTICLE_AGE - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle age - #endif - - buffer_length_particles_x0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - #endif //PARTICLES + N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities + #ifndef SINGLE_PARTICLE_MASS + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle mass + #endif + #ifdef PARTICLE_IDS + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle ID + #endif + #ifdef PARTICLE_AGE + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle age + #endif + + buffer_length_particles_x0_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x0_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x1_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x1_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y0_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y0_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y1_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y1_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z0_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z0_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z1_send = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z1_recv = + N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + #endif // PARTICLES chprintf("Allocating MPI communication buffers on GPU "); chprintf("(nx = %ld, ny = %ld, nz = %ld).\n", xbsize, ybsize, zbsize); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x0, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x1, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y0, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y1, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z0, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z1, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1, zbsize*sizeof(Real)) ); + CudaSafeCall(cudaMalloc(&d_send_buffer_x0, xbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_x1, xbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x0, xbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x1, xbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y0, ybsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y1, ybsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y0, ybsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y1, ybsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z0, zbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z1, zbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z0, zbsize * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z1, zbsize * sizeof(Real))); #if !defined(MPI_GPU) - h_send_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_send_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_recv_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_recv_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_send_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_send_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_recv_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_recv_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_send_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_send_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_recv_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_recv_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) ); + h_send_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real)); + h_send_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real)); + h_recv_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real)); + h_recv_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real)); + h_send_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real)); + h_send_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real)); + h_recv_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real)); + h_recv_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real)); + h_send_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real)); + h_send_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real)); + h_recv_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real)); + h_recv_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real)); #endif - // NOTE: When changing this ifdef check for compatibility with + // NOTE: When changing this ifdef check for compatibility with // Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer // in particles/particles_boundaries.cpp - // Whether or not MPI_GPU is on, the device has transfer buffers for PARTICLES_GPU + // Whether or not MPI_GPU is on, the device has transfer buffers for + // PARTICLES_GPU #if defined(PARTICLES) && defined(PARTICLES_GPU) - chprintf("Allocating MPI communication buffers on GPU for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x0_particles, buffer_length_particles_x0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x1_particles, buffer_length_particles_x1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y0_particles, buffer_length_particles_y0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y1_particles, buffer_length_particles_y1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z0_particles, buffer_length_particles_z0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z1_particles, buffer_length_particles_z1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv*sizeof(Real)) ); - #endif // PARTICLES && PARTICLES_GPU + chprintf( + "Allocating MPI communication buffers on GPU for particle transfers ( " + "N_Particles: %d ).\n", + N_PARTICLES_TRANSFER); + CudaSafeCall(cudaMalloc(&d_send_buffer_x0_particles, + buffer_length_particles_x0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_x1_particles, + buffer_length_particles_x1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y0_particles, + buffer_length_particles_y0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y1_particles, + buffer_length_particles_y1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z0_particles, + buffer_length_particles_z0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z1_particles, + buffer_length_particles_z1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x0_particles, + buffer_length_particles_x0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x1_particles, + buffer_length_particles_x1_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y0_particles, + buffer_length_particles_y0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y1_particles, + buffer_length_particles_y1_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z0_particles, + buffer_length_particles_z0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z1_particles, + buffer_length_particles_z1_recv * sizeof(Real))); + #endif // PARTICLES && PARTICLES_GPU // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers #ifdef PARTICLES - #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) - chprintf("Allocating MPI communication buffers on Host for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); - h_send_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_send*sizeof(Real) ); - h_send_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_send*sizeof(Real) ); - h_send_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_send*sizeof(Real) ); - h_send_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_send*sizeof(Real) ); - h_send_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_send*sizeof(Real) ); - h_send_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_send*sizeof(Real) ); - h_recv_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_recv*sizeof(Real) ); - h_recv_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_recv*sizeof(Real) ); - h_recv_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_recv*sizeof(Real) ); - h_recv_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_recv*sizeof(Real) ); - h_recv_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_recv*sizeof(Real) ); - h_recv_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_recv*sizeof(Real) ); - #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) - #endif //PARTICLES - + #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) + chprintf( + "Allocating MPI communication buffers on Host for particle transfers ( " + "N_Particles: %d ).\n", + N_PARTICLES_TRANSFER); + h_send_buffer_x0_particles = + (Real *)malloc(buffer_length_particles_x0_send * sizeof(Real)); + h_send_buffer_x1_particles = + (Real *)malloc(buffer_length_particles_x1_send * sizeof(Real)); + h_send_buffer_y0_particles = + (Real *)malloc(buffer_length_particles_y0_send * sizeof(Real)); + h_send_buffer_y1_particles = + (Real *)malloc(buffer_length_particles_y1_send * sizeof(Real)); + h_send_buffer_z0_particles = + (Real *)malloc(buffer_length_particles_z0_send * sizeof(Real)); + h_send_buffer_z1_particles = + (Real *)malloc(buffer_length_particles_z1_send * sizeof(Real)); + h_recv_buffer_x0_particles = + (Real *)malloc(buffer_length_particles_x0_recv * sizeof(Real)); + h_recv_buffer_x1_particles = + (Real *)malloc(buffer_length_particles_x1_recv * sizeof(Real)); + h_recv_buffer_y0_particles = + (Real *)malloc(buffer_length_particles_y0_recv * sizeof(Real)); + h_recv_buffer_y1_particles = + (Real *)malloc(buffer_length_particles_y1_recv * sizeof(Real)); + h_recv_buffer_z0_particles = + (Real *)malloc(buffer_length_particles_z0_recv * sizeof(Real)); + h_recv_buffer_z1_particles = + (Real *)malloc(buffer_length_particles_z1_recv * sizeof(Real)); + #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || + // defined(PARTICLES_CPU) + #endif // PARTICLES } - /* MPI reduction wrapper for max(Real)*/ Real ReduceRealMax(Real x) { @@ -675,11 +698,10 @@ Real ReduceRealMax(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MAX, world); - y = (Real) out; + y = (Real)out; return y; } - /* MPI reduction wrapper for min(Real)*/ Real ReduceRealMin(Real x) { @@ -688,11 +710,10 @@ Real ReduceRealMin(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MIN, world); - y = (Real) out; + y = (Real)out; return y; } - /* MPI reduction wrapper for avg(Real)*/ Real ReduceRealAvg(Real x) { @@ -701,11 +722,11 @@ Real ReduceRealAvg(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_SUM, world); - y = (Real) out / nproc; + y = (Real)out / nproc; return y; } -#ifdef PARTICLES + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x) { @@ -713,29 +734,30 @@ Real ReducePartIntSum(part_int_t x) part_int_t out; part_int_t y; - #ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS MPI_Allreduce(&in, &out, 1, MPI_LONG, MPI_SUM, world); - #else + #else MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_SUM, world); - #endif - y = (part_int_t) out ; + #endif + y = (part_int_t)out; return y; } - // Count the particles in the MPI ranks lower than this rank (procID) to get a // global offset for the local IDs. -part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ){ +part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local) +{ part_int_t global_offset; part_int_t *n_local_all, *n_local_send; - n_local_send = (part_int_t *) malloc( 1*sizeof(part_int_t) ); - n_local_all = (part_int_t *) malloc( nproc*sizeof(part_int_t) ); + n_local_send = (part_int_t *)malloc(1 * sizeof(part_int_t)); + n_local_all = (part_int_t *)malloc(nproc * sizeof(part_int_t)); n_local_send[0] = n_local; - MPI_Allgather( n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world ); + MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, + world); global_offset = 0; - for (int other_rank=0; other_rank nproc_y if (np_x < np_y) { nproc_x = np_y; @@ -865,72 +874,61 @@ void TileBlockDecomposition(void) } /*base decomposition on whether n_gpf==2*/ - if(n_gpf!=2) { + if (n_gpf != 2) { /*we are in 3-d, so split remainder evenly*/ np_x = n_gpf; - n_gpf = greatest_prime_factor(nproc/n_gpf); - if(n_gpf!=2) { + n_gpf = greatest_prime_factor(nproc / n_gpf); + if (n_gpf != 2) { /*the next greatest prime is odd, so just split*/ np_y = n_gpf; - np_z = nproc/(np_x*np_y); + np_z = nproc / (np_x * np_y); } else { /*increase ny, nz round-robin*/ - while(np_x*np_y*np_z < nproc) - { - np_y*=2; - if(np_x*np_y*np_z==nproc) - break; - np_z*=2; - } - + while (np_x * np_y * np_z < nproc) { + np_y *= 2; + if (np_x * np_y * np_z == nproc) break; + np_z *= 2; + } } } else { /*nproc is a power of 2*/ /*we are in 3-d, so split remainder evenly*/ /*increase nx, ny, nz round-robin*/ - while(np_x*np_y*np_z < nproc) - { - np_x*=2; - if(np_x*np_y*np_z==nproc) - break; - np_y*=2; - if(np_x*np_y*np_z==nproc) - break; - np_z*=2; - } + while (np_x * np_y * np_z < nproc) { + np_x *= 2; + if (np_x * np_y * np_z == nproc) break; + np_y *= 2; + if (np_x * np_y * np_z == nproc) break; + np_z *= 2; + } } - - //reorder x, y, z + // reorder x, y, z int n_tmp; - if(np_z>np_y) - { - n_tmp = np_y; - np_y = np_z; - np_z = n_tmp; + if (np_z > np_y) { + n_tmp = np_y; + np_y = np_z; + np_z = n_tmp; } - if(np_y>np_x) - { - n_tmp = np_x; - np_x = np_y; - np_y = n_tmp; + if (np_y > np_x) { + n_tmp = np_x; + np_x = np_y; + np_y = n_tmp; } - if(np_z>np_y) - { - n_tmp = np_y; - np_y = np_z; - np_z = n_tmp; + if (np_z > np_y) { + n_tmp = np_y; + np_y = np_z; + np_z = n_tmp; } - //save result + // save result nproc_x = np_x; nproc_y = np_y; nproc_z = np_z; } - /*! \fn int ***three_dimensional_int_array(int n, int l, int m) * * \brief Allocate a three dimensional (n x l x m) int array * */ @@ -939,60 +937,59 @@ int ***three_dimensional_int_array(int n, int l, int m) int ***x; x = new int **[n]; - for(int i=0;i -#include -#include "../grid/grid3D.h" -#include "../global/global.h" - -#ifdef FFTW -#include "fftw3.h" -#include "fftw3-mpi.h" -#endif /*FFTW*/ + #ifndef MPI_ROUTINES_H + #define MPI_ROUTINES_H + #include + #include + + #include "../global/global.h" + #include "../grid/grid3D.h" + + #ifdef FFTW + #include "fftw3-mpi.h" + #include "fftw3.h" + #endif /*FFTW*/ /*Global MPI Variables*/ -extern int procID; /*process rank*/ -extern int nproc; /*number of processes in global comm*/ -extern int root; /*rank of root process*/ +extern int procID; /*process rank*/ +extern int nproc; /*number of processes in global comm*/ +extern int root; /*rank of root process*/ extern int procID_node; /*process rank on node*/ extern int nproc_node; /*number of MPI processes on node*/ -extern MPI_Comm world; /*global communicator*/ -extern MPI_Comm node; /*communicator for each node*/ +extern MPI_Comm world; /*global communicator*/ +extern MPI_Comm node; /*communicator for each node*/ extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/ -#ifdef PARTICLES -extern MPI_Datatype MPI_PART_INT; /*data type describing interger for particles precision*/ -#endif + #ifdef PARTICLES +extern MPI_Datatype + MPI_PART_INT; /*data type describing interger for particles precision*/ + #endif -//extern MPI_Request send_request[6]; -//extern MPI_Request recv_request[6]; +// extern MPI_Request send_request[6]; +// extern MPI_Request recv_request[6]; extern MPI_Request *send_request; extern MPI_Request *recv_request; -//MPI destinations and sources +// MPI destinations and sources extern int dest[6]; extern int source[6]; -//Communication buffers +// Communication buffers // For BLOCK extern Real *d_send_buffer_x0; @@ -65,8 +67,8 @@ extern Real *h_recv_buffer_y1; extern Real *h_recv_buffer_z0; extern Real *h_recv_buffer_z1; -#ifdef PARTICLES -//Buffers for particles transfers + #ifdef PARTICLES +// Buffers for particles transfers extern Real *d_send_buffer_x0_particles; extern Real *d_send_buffer_x1_particles; extern Real *d_send_buffer_y0_particles; @@ -113,8 +115,7 @@ extern MPI_Request *recv_request_n_particles; // Request for Particles Transfer extern MPI_Request *send_request_particles_transfer; extern MPI_Request *recv_request_particles_transfer; -#endif//PARTICLES - + #endif // PARTICLES extern int send_buffer_length; extern int recv_buffer_length; @@ -134,9 +135,9 @@ extern ptrdiff_t nx_local_start; extern ptrdiff_t ny_local_start; extern ptrdiff_t nz_local_start; -#ifdef FFTW + #ifdef FFTW extern ptrdiff_t n_local_complex; -#endif /*FFTW*/ + #endif /*FFTW*/ /*number of MPI procs in each dimension*/ extern int nproc_x; @@ -148,9 +149,11 @@ extern int nproc_z; void InitializeChollaMPI(int *pargc, char **pargv[]); /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, + int ny_global, int nz_global); -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, + int nx_global, int ny_global, int nz_global); /*tile MPI processes in a block decomposition*/ void TileBlockDecomposition(void); @@ -164,17 +167,19 @@ Real ReduceRealMin(Real x); /* MPI reduction wrapper for avg(Real)*/ Real ReduceRealAvg(Real x); -#ifdef PARTICLES + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x); -// Count the particles in the MPI ranks lower that this rank to get a global offset for the local IDs. -part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ); +// Count the particles in the MPI ranks lower that this rank to get a global +// offset for the local IDs. +part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local); -// Function that checks if the buffer size For the particles transfer is large enough, -// and grows the buffer if needed. -void Check_and_Grow_Particles_Buffer( Real **part_buffer, int *current_size_ptr, int new_size ); -#endif +// Function that checks if the buffer size For the particles transfer is large +// enough, and grows the buffer if needed. +void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, + int new_size); + #endif /* Print information about the domain properties */ void Print_Domain_Properties(struct Header H); @@ -185,19 +190,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H); /* find the greatest prime factor of an integer */ int greatest_prime_factor(int n); - /*! \fn int ***three_dimensional_int_array(int n, int l, int m) * * \brief Allocate a three dimensional (n x l x m) int array * */ int ***three_dimensional_int_array(int n, int l, int m); -/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int m) - * \brief De-allocate a three dimensional (n x l x m) int array. +/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int + * m) \brief De-allocate a three dimensional (n x l x m) int array. * */ void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m); /* Copy MPI receive buffers on Host to their device locations */ -void copyHostToDeviceReceiveBuffer ( int direction ); +void copyHostToDeviceReceiveBuffer(int direction); -#endif /*MPI_ROUTINES_H*/ -#endif /*MPI_CHOLLA*/ + #endif /*MPI_ROUTINES_H*/ +#endif /*MPI_CHOLLA*/ diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index c907e64eb..aac2a2056 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -1,41 +1,40 @@ #ifdef PARTICLES -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" -#include "../io/io.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include + #include + #include + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../particles/particles_3D.h" + #include "math.h" -//Get the particles Cloud-In-Cell interpolated density -void Particles_3D::Get_Density_CIC(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +// Get the particles Cloud-In-Cell interpolated density +void Particles_3D::Get_Density_CIC() +{ #ifdef PARTICLES_CPU - #ifdef PARALLEL_OMP + #ifdef PARALLEL_OMP Get_Density_CIC_OMP(); - #else + #else Get_Density_CIC_Serial(); - #endif //PARALLEL_OMP + #endif // PARALLEL_OMP #endif #ifdef PARTICLES_GPU Get_Density_CIC_GPU(); #endif - } - -//Compute the particles density and copy it to the array in Grav to compute the potential -void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){ - +// Compute the particles density and copy it to the array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P) +{ #ifdef CPU_TIME Timer.Part_Density.Start(); #endif @@ -54,53 +53,53 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){ // Step 2: Transfer Particles CIC density Boundaries Transfer_Particles_Density_Boundaries(P); - //Step 3: Copy Particles density to Gravity array + // Step 3: Copy Particles density to Gravity array Copy_Particles_Density(); #ifdef CPU_TIME Timer.Part_Dens_Transf.End(); #endif - - } -//Copy the particles density to the density array in Grav to compute the potential -void Grid3D::Copy_Particles_Density(){ - +// Copy the particles density to the density array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density() +{ #ifdef GRAVITY_GPU - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU Copy_Particles_Density_to_GPU(); - #endif + #endif Copy_Particles_Density_GPU(); #else - #ifndef PARALLEL_OMP - Copy_Particles_Density_function( 0, Grav.nz_local ); - #else + #ifndef PARALLEL_OMP + Copy_Particles_Density_function(0, Grav.nz_local); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end); - Copy_Particles_Density_function( g_start, g_end ); + Copy_Particles_Density_function(g_start, g_end); } - #endif//PARALLEL_OMP + #endif // PARALLEL_OMP - #endif//GRAVITY_GPU + #endif // GRAVITY_GPU } -void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){ +void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) +{ int nx_part, ny_part, nz_part, nGHST; - nGHST = Particles.G.n_ghost_particles_grid; - nx_part = Particles.G.nx_local + 2*nGHST; - ny_part = Particles.G.ny_local + 2*nGHST; - nz_part = Particles.G.nz_local + 2*nGHST; + nGHST = Particles.G.n_ghost_particles_grid; + nx_part = Particles.G.nx_local + 2 * nGHST; + ny_part = Particles.G.ny_local + 2 * nGHST; + nz_part = Particles.G.nz_local + 2 * nGHST; int nx_dens, ny_dens, nz_dens; nx_dens = Grav.nx_local; @@ -108,23 +107,23 @@ void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){ nz_dens = Grav.nz_local; int i, j, k, id_CIC, id_grid; - for ( k=g_start; k nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false; - if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false; - if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false; - if ( ! in_local ) { + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, + indx_y, indx_z); + if (indx_x < -1) ignore = true; + if (indx_y < -1) ignore = true; + if (indx_z < -1) ignore = true; + if (indx_x > nx_g - 3) ignore = true; + if (indx_y > ny_g - 3) ignore = true; + if (indx_y > nz_g - 3) ignore = true; + if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false; + if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false; + if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false; + if (!in_local) { std::cout << " Density CIC Error:" << std::endl; - #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; - #else + #ifdef PARTICLE_IDS + std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] + << std::endl; + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << x_pos << std::endl; std::cout << " Particle Y: " << y_pos << std::endl; std::cout << " Particle Z: " << z_pos << std::endl; - continue; + continue; } - if ( ignore ){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Density CIC Index pID: " << partIDs[pIndx] << std::endl; - #else + if (ignore) { + #ifdef PARTICLE_IDS + std::cout << "ERROR Density CIC Index pID: " << partIDs[pIndx] + << std::endl; + #else std::cout << "ERROR Density CIC Index " << std::endl; - #endif + #endif std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; @@ -235,75 +241,71 @@ void Particles_3D::Get_Density_CIC_Serial( ){ // exit(-1); continue; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( x_pos - cell_center_x ) / dx; - delta_y = 1 - ( y_pos - cell_center_y ) / dy; - delta_z = 1 - ( z_pos - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (x_pos - cell_center_x) / dx; + delta_y = 1 - (y_pos - cell_center_y) / dy; + delta_z = 1 - (z_pos - cell_center_z) / dz; indx_x += nGHST; indx_y += nGHST; indx_z += nGHST; - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * delta_z; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * delta_z; - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * delta_z; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z; - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * delta_z; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z; - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * (1-delta_z); + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z; - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); } } - - -#ifdef PARALLEL_OMP -//Compute the CIC density when PARALLEL_OMP -void Particles_3D::Get_Density_CIC_OMP( ){ - - - //Span OpenMP threads - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifdef PARALLEL_OMP +// Compute the CIC density when PARALLEL_OMP +void Particles_3D::Get_Density_CIC_OMP() +{ + // Span OpenMP threads + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id; int g_start, g_end; int n_omp_procs; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); int nGHST = G.n_ghost_particles_grid; - int nx_g = G.nx_local + 2*nGHST; - int ny_g = G.ny_local + 2*nGHST; - int nz_g = G.nz_local + 2*nGHST; + int nx_g = G.nx_local + 2 * nGHST; + int ny_g = G.ny_local + 2 * nGHST; + int nz_g = G.nz_local + 2 * nGHST; Real xMin, yMin, zMin, dx, dy, dz; - xMin = G.xMin; - yMin = G.yMin; - zMin = G.zMin; - dx = G.dx; - dy = G.dy; - dz = G.dz; - Real dV_inv = 1./(G.dx*G.dy*G.dz); - + xMin = G.xMin; + yMin = G.yMin; + zMin = G.zMin; + dx = G.dx; + dy = G.dy; + dz = G.dz; + Real dV_inv = 1. / (G.dx * G.dy * G.dz); - Get_OMP_Grid_Indxs( nz_g, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(nz_g, n_omp_procs, omp_id, &g_start, &g_end); part_int_t pIndx; int indx_x, indx_y, indx_z, indx; @@ -314,37 +316,37 @@ void Particles_3D::Get_Density_CIC_OMP( ){ bool ignore, in_local; bool add_1, add_2; - for ( pIndx=0; pIndx < n_local; pIndx++ ){ + for (pIndx = 0; pIndx < n_local; pIndx++) { add_1 = false; add_2 = false; - z_pos = pos_z[pIndx]; - indx_z = (int) floor( ( z_pos - zMin - 0.5*dz ) / dz ); + z_pos = pos_z[pIndx]; + indx_z = (int)floor((z_pos - zMin - 0.5 * dz) / dz); indx_z += nGHST; - if ( (indx_z >= g_start) && (indx_z < g_end) ) add_1 = true; - if ( ((indx_z+1) >= g_start) && ((indx_z+1) < g_end) ) add_2 = true; - if (!( add_1 || add_2) ) continue; + if ((indx_z >= g_start) && (indx_z < g_end)) add_1 = true; + if (((indx_z + 1) >= g_start) && ((indx_z + 1) < g_end)) add_2 = true; + if (!(add_1 || add_2)) continue; ignore = false; - x_pos = pos_x[pIndx]; - y_pos = pos_y[pIndx]; + x_pos = pos_x[pIndx]; + y_pos = pos_y[pIndx]; - indx_x = (int) floor( ( x_pos - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( y_pos - yMin - 0.5*dy ) / dy ); + indx_x = (int)floor((x_pos - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((y_pos - yMin - 0.5 * dy) / dy); indx_z -= nGHST; - if ( indx_x < -1 ) ignore = true; - if ( indx_y < -1 ) ignore = true; - if ( indx_z < -1 ) ignore = true; - if ( indx_x > nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( ignore ){ - #ifdef PARTICLE_IDS + if (indx_x < -1) ignore = true; + if (indx_y < -1) ignore = true; + if (indx_z < -1) ignore = true; + if (indx_x > nx_g - 3) ignore = true; + if (indx_y > ny_g - 3) ignore = true; + if (indx_y > nz_g - 3) ignore = true; + if (ignore) { + #ifdef PARTICLE_IDS std::cout << "ERROR CIC Index pID: " << partIDs[pIndx] << std::endl; - #else + #else std::cout << "ERROR CIC Index " << std::endl; - #endif + #endif std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; @@ -356,23 +358,24 @@ void Particles_3D::Get_Density_CIC_OMP( ){ continue; } in_local = true; - if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false; - if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false; - if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false; - if ( ! in_local ) { + if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false; + if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false; + if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false; + if (!in_local) { std::cout << " Density CIC Error:" << std::endl; - #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; - #else + #ifdef PARTICLE_IDS + std::cout << " Particle outside Local domain pID: " + << partIDs[pIndx] << std::endl; + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << x_pos << std::endl; std::cout << " Particle Y: " << y_pos << std::endl; std::cout << " Particle Z: " << z_pos << std::endl; - continue; + continue; } #ifdef SINGLE_PARTICLE_MASS @@ -381,49 +384,49 @@ void Particles_3D::Get_Density_CIC_OMP( ){ pMass = mass[pIndx] * dV_inv; #endif - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( x_pos - cell_center_x ) / dx; - delta_y = 1 - ( y_pos - cell_center_y ) / dy; - delta_z = 1 - ( z_pos - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (x_pos - cell_center_x) / dx; + delta_y = 1 - (y_pos - cell_center_y) / dy; + delta_z = 1 - (z_pos - cell_center_z) / dz; indx_x += nGHST; indx_y += nGHST; indx_z += nGHST; - if ( add_1 ){ - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * delta_z; + if (add_1) { + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * delta_z; - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * delta_z; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z; - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * delta_z; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z; - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z; } - if ( add_2 ){ - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * (1-delta_z); + if (add_2) { + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z); - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += + pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); } } } } -#endif //PARALLEL_OMP - -#endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #endif diff --git a/src/particles/density_CIC.h b/src/particles/density_CIC.h index 393c99a6a..d8ea2bef6 100644 --- a/src/particles/density_CIC.h +++ b/src/particles/density_CIC.h @@ -1,9 +1,11 @@ #ifdef PARTICLES -#ifndef DENSITY_CIC_H -#define DENSITY_CIC_H + #ifndef DENSITY_CIC_H + #define DENSITY_CIC_H -void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ); +void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, + Real pos_x, Real pos_y, Real pos_z, int &indx_x, + int &indx_y, int &indx_z); -#endif + #endif #endif \ No newline at end of file diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 977f84421..835110237 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -1,143 +1,166 @@ #ifdef PARTICLES -#include -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" - -#ifdef GRAVITY_GPU -void Grid3D::Copy_Particles_Density_to_GPU(){ - CudaSafeCall( cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../particles/particles_3D.h" + #include "../utils/gpu.hpp" + + #ifdef GRAVITY_GPU +void Grid3D::Copy_Particles_Density_to_GPU() +{ + CudaSafeCall(cudaMemcpy(Particles.G.density_dev, Particles.G.density, + Particles.G.n_cells * sizeof(Real), + cudaMemcpyHostToDevice)); } -#endif + #endif -#ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU -//Define atomic_add if it's not supported -#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 -#else -__device__ double atomicAdd(double* address, double val) + // Define atomic_add if it's not supported + #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + #else +__device__ double atomicAdd(double *address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); + unsigned long long int *address_as_ull = (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); } -#endif + #endif -//Get the CIC index from the particle position ( device function ) -__device__ void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ - indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); +// Get the CIC index from the particle position ( device function ) +__device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, + Real dy, Real dz, Real pos_x, Real pos_y, + Real pos_z, int &indx_x, int &indx_y, + int &indx_z) +{ + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); } -//CUDA Kernel to compute the CIC density from the particles positions -__global__ void Get_Density_CIC_Kernel( part_int_t n_local, Real particle_mass, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost ){ - - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +// CUDA Kernel to compute the CIC density from the particles positions +__global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, + Real *density_dev, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, + Real *mass_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, + Real zMax, Real dx, Real dy, Real dz, + int nx, int ny, int nz, int n_ghost) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; int nx_g, ny_g; - nx_g = nx + 2*n_ghost; - ny_g = ny + 2*n_ghost; + nx_g = nx + 2 * n_ghost; + ny_g = ny + 2 * n_ghost; Real pos_x, pos_y, pos_z, pMass; Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; - Real dV_inv = 1./(dx*dy*dz); + Real dV_inv = 1. / (dx * dy * dz); pos_x = pos_x_dev[tid]; pos_y = pos_y_dev[tid]; pos_z = pos_z_dev[tid]; - #ifdef SINGLE_PARTICLE_MASS + #ifdef SINGLE_PARTICLE_MASS pMass = particle_mass * dV_inv; - #else + #else pMass = mass_dev[tid] * dV_inv; - #endif + #endif int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z ); + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, + indx_y, indx_z); bool in_local = true; - if ( pos_x < xMin || pos_x >= xMax ) in_local = false; - if ( pos_y < yMin || pos_y >= yMax ) in_local = false; - if ( pos_z < zMin || pos_z >= zMax ) in_local = false; - if ( ! in_local ) { - printf(" Density CIC Error: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + if (pos_x < xMin || pos_x >= xMax) in_local = false; + if (pos_y < yMin || pos_y >= yMax) in_local = false; + if (pos_z < zMin || pos_z >= zMax) in_local = false; + if (!in_local) { + printf( + " Density CIC Error: Particle outside local domain [%f %f %f] [%f " + "%f] [%f %f] [%f %f]\n ", + pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); return; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; indx_x += n_ghost; indx_y += n_ghost; indx_z += n_ghost; - - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * delta_y * delta_z; - atomicAdd( &density_dev[indx], pMass * delta_x * delta_y * delta_z); + atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * delta_z); - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * delta_y * delta_z; - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * delta_y * delta_z); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * delta_z); - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * (1-delta_y) * delta_z; - atomicAdd( &density_dev[indx], pMass * delta_x * (1-delta_y) * delta_z); + atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * delta_z); // - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * delta_y * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * delta_x * delta_y * (1-delta_z) ); + atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * (1 - delta_z)); - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&density_dev[indx], + pMass * (1 - delta_x) * (1 - delta_y) * delta_z); - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&density_dev[indx], + pMass * (1 - delta_x) * delta_y * (1 - delta_z)); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&density_dev[indx], + pMass * delta_x * (1 - delta_y) * (1 - delta_z)); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * (1-delta_y) * (1-delta_z)); - + atomicAdd(&density_dev[indx], + pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z)); } - - -//Clear the density array: density=0 -void Particles_3D::Clear_Density_GPU_function( Real *density_dev, int n_cells){ - Set_Particles_Array_Real( 0.0, density_dev, n_cells); +// Clear the density array: density=0 +void Particles_3D::Clear_Density_GPU_function(Real *density_dev, int n_cells) +{ + Set_Particles_Array_Real(0.0, density_dev, n_cells); } - -//Call the CIC density kernel to get the particles density -void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev){ - +// Call the CIC density kernel to get the particles density +void Particles_3D::Get_Density_CIC_GPU_function( + part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, + Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, + int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, + Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *mass_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -145,16 +168,21 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid ); + hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, + n_local, particle_mass, density_dev, pos_x_dev, + pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, + yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, + n_ghost_particles_grid); CudaCheckError(); cudaDeviceSynchronize(); } - #if !defined(GRAVITY_GPU) - //Copy the density from device to host - CudaSafeCall( cudaMemcpy(density_h, density_dev, n_cells*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif + #if !defined(GRAVITY_GPU) + // Copy the density from device to host + CudaSafeCall(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), + cudaMemcpyDeviceToHost)); + #endif } -#endif//PARTICLES_GPU -#endif//PARTICLES + #endif // PARTICLES_GPU +#endif // PARTICLES diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 5e5cab8b8..60bc8131a 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -1,128 +1,130 @@ #ifdef PARTICLES -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "particles_3D.h" -#include + #include -//Copy the particles density boundaries for non-MPI PERIODIC transfers -void Grid3D::Set_Particles_Density_Boundaries_Periodic( int direction, int side ){ + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" +// Copy the particles density boundaries for non-MPI PERIODIC transfers +void Grid3D::Set_Particles_Density_Boundaries_Periodic(int direction, int side) +{ int i, j, k, indx_src, indx_dst; int nGHST, nx_g, ny_g, nz_g; nGHST = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*nGHST; - ny_g = Particles.G.ny_local + 2*nGHST; - nz_g = Particles.G.nz_local + 2*nGHST; - - //Copy X boundaries - if (direction == 0){ - for ( k=0; k + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" - -__global__ void Set_Particles_Density_Boundaries_Periodic_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d ){ - +__global__ void Set_Particles_Density_Boundaries_Periodic_kernel( + int direction, int side, int n_i, int n_j, int nx, int ny, int nz, + int n_ghost, Real *density_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - if ( direction == 0 ){ - if ( side == 0 ) tid_src = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - } - if ( direction == 1 ){ - if ( side == 0 ) tid_src = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; - } - if ( direction == 2 ){ - if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k )*nx*ny; - } - + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; + + if (direction == 0) { + if (side == 0) + tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (direction == 1) { + if (side == 0) + tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (direction == 2) { + if (side == 0) + tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + if (side == 1) + tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } + density_d[tid_dst] += density_d[tid_src]; - } - -void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ){ - +void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, + int side) +{ int n_ghost, nx_g, ny_g, nz_g, size, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -66,74 +74,73 @@ void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int s size = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size - 1 ) / TPB_PARTICLES + 1; + ngrid = (size - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev ); - + hipLaunchKernelGGL(Set_Particles_Density_Boundaries_Periodic_kernel, + dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev); } + #ifdef MPI_CHOLLA - - - -#ifdef MPI_CHOLLA - - - -__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d ){ - +__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( + int direction, int side, int n_i, int n_j, int nx, int ny, int nz, + int n_ghost, Real *density_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_dens = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_dens = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + if (side == 1) + tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = density_d[tid_dens]; - } - - - - -int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ){ - +int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, + int side, + Real *buffer) +{ int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -141,7 +148,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1; + ngrid = (size_buffer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -153,65 +160,71 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL( Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d ); + hipLaunchKernelGGL(Load_Particles_Density_Boundary_to_Buffer_kernel, + dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d); cudaDeviceSynchronize(); return size_buffer; } - - - -__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( + int direction, int side, int n_i, int n_j, int nx, int ny, int nz, + int n_ghost, Real *density_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; - - if ( direction == 0 ){ - if ( side == 0 ) tid_dens = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - } - if ( direction == 1 ){ - if ( side == 0 ) tid_dens = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; - } - if ( direction == 2 ){ - if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k )*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || + tid_k >= n_ghost) + return; + + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; + + if (direction == 0) { + if (side == 0) tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) + tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (direction == 1) { + if (side == 0) + tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) + tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (direction == 2) { + if (side == 0) + tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + if (side == 1) + tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; } density_d[tid_dens] += transfer_buffer_d[tid_buffer]; - } - - - -void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ){ - +void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, + int side, + Real *buffer) +{ int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -219,7 +232,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1; + ngrid = (size_buffer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -231,12 +244,11 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL( Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d ); - + hipLaunchKernelGGL(Unload_Particles_Density_Boundary_to_Buffer_kernel, + dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d); } + #endif // MPI_CHOLLA - -#endif//MPI_CHOLLA - -#endif//PARTICLES_GPU & GRAVITY_GPU +#endif // PARTICLES_GPU & GRAVITY_GPU diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index f5de01463..59292802b 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -1,68 +1,77 @@ -#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS) - -#include -#include -#include -#include -#include -#include -#include -#include -#include "../grid/grid3D.h" -#include "../global/global_cuda.h" -#include "../global/global.h" -#include "../io/io.h" -#include "supernova.h" - -#define TPB_FEEDBACK 256 -#define FEED_INFO_N 6 -#define i_RES 1 -#define i_UNRES 2 -#define i_ENERGY 3 -#define i_MOMENTUM 4 -#define i_UNRES_ENERGY 5 - -namespace supernova { - curandStateMRG32k3a_t* randStates; - part_int_t n_states; - Real *dev_snr, snr_dt, time_sn_start, time_sn_end; - int snr_n; -} - +#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && \ + defined(PARTICLE_IDS) + + #include + #include + #include + #include + + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "supernova.h" + + #define TPB_FEEDBACK 256 + #define FEED_INFO_N 6 + #define i_RES 1 + #define i_UNRES 2 + #define i_ENERGY 3 + #define i_MOMENTUM 4 + #define i_UNRES_ENERGY 5 + +namespace supernova +{ +curandStateMRG32k3a_t* randStates; +part_int_t n_states; +Real *dev_snr, snr_dt, time_sn_start, time_sn_end; +int snr_n; +} // namespace supernova -#ifndef O_HIP + #ifndef O_HIP __device__ double atomicMax(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; + unsigned long long int old = *address_as_ull, assumed; do { assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(fmax(val, __longlong_as_double(assumed))) - ); + old = atomicCAS( + address_as_ull, assumed, + __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); } -#endif //O_HIP + #endif // O_HIP -__global__ void initState_kernel(unsigned int seed, curandStateMRG32k3a_t* states) { - int id = blockIdx.x*blockDim.x + threadIdx.x; - curand_init(seed, id, 0, &states[id]); +__global__ void initState_kernel(unsigned int seed, + curandStateMRG32k3a_t* states) +{ + int id = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, id, 0, &states[id]); } - /** * @brief Does 2 things: - * -# Read in SN rate data from Starburst 99. If no file exists, assume a constant rate. - * -# Initialize the cuRAND state, which is analogous to the concept of generators in CPU code. - * The state object maintains configuration and status the cuRAND context for each thread on the GPU. - * Initialize more than the number of local particles since the latter will change through MPI transfers. + * -# Read in SN rate data from Starburst 99. If no file exists, assume a + * constant rate. + * -# Initialize the cuRAND state, which is analogous to the concept of + * generators in CPU code. The state object maintains configuration and status + * the cuRAND context for each thread on the GPU. Initialize more than the + * number of local particles since the latter will change through MPI transfers. * - * @param P pointer to parameters struct. Passes in starburst 99 filename and random number gen seed. + * @param P pointer to parameters struct. Passes in starburst 99 filename and + * random number gen seed. * @param n_local number of local particles on the GPU * @param allocation_factor */ -void supernova::initState(struct parameters *P, part_int_t n_local, Real allocation_factor) { +void supernova::initState(struct parameters* P, part_int_t n_local, + Real allocation_factor) +{ printf("supernova::initState start\n"); std::string snr_filename(P->snr_filename); if (snr_filename.size()) { @@ -78,39 +87,41 @@ void supernova::initState(struct parameters *P, part_int_t n_local, Real allocat std::vector snr_time; std::vector snr; - const int N_HEADER = 7; // S'99 has 7 rows of header information + const int N_HEADER = 7; // S'99 has 7 rows of header information const char* s99_delim = " "; // S'99 data separator std::string line; int line_counter = 0; while (snr_in.good()) { - std::getline(snr_in, line); - if (line_counter++ < N_HEADER) continue; // skip header processing - - int i = 0; - char *data = strtok(const_cast(line.c_str()), s99_delim); - while (data != nullptr) { - if (i == 0) { - // in the following divide by # years per kyr (1000) - snr_time.push_back(std::stof(std::string(data)) / 1000); - } - else if (i == 1) { - snr.push_back(pow(10, std::stof(std::string(data))) / 1000); - } - if (i > 0) break; // only care about the first 2 items. Once i = 1 can break here. - data = strtok(nullptr, s99_delim); - i++; + std::getline(snr_in, line); + if (line_counter++ < N_HEADER) continue; // skip header processing + + int i = 0; + char* data = strtok(const_cast(line.c_str()), s99_delim); + while (data != nullptr) { + if (i == 0) { + // in the following divide by # years per kyr (1000) + snr_time.push_back(std::stof(std::string(data)) / 1000); + } else if (i == 1) { + snr.push_back(pow(10, std::stof(std::string(data))) / 1000); } + if (i > 0) + break; // only care about the first 2 items. Once i = 1 can break + // here. + data = strtok(nullptr, s99_delim); + i++; + } } - time_sn_end = snr_time[snr_time.size() - 1]; + time_sn_end = snr_time[snr_time.size() - 1]; time_sn_start = snr_time[0]; // the following is the time interval between data points // (i.e. assumes regular temporal spacing) snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1); - CHECK(cudaMalloc((void**) &dev_snr, snr.size() * sizeof(Real))); - CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CHECK(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real))); + CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), + cudaMemcpyHostToDevice)); } else { chprintf("No SN rate file specified. Using constant rate\n"); @@ -119,367 +130,511 @@ void supernova::initState(struct parameters *P, part_int_t n_local, Real allocat } // Now ititialize the poisson random number generator state. - n_states = n_local*allocation_factor; - cudaMalloc((void**) &randStates, n_states*sizeof(curandStateMRG32k3a_t)); + n_states = n_local * allocation_factor; + cudaMalloc((void**)&randStates, n_states * sizeof(curandStateMRG32k3a_t)); int ngrid = (n_states + TPB_FEEDBACK - 1) / TPB_FEEDBACK; dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); - hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, randStates); + hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, + randStates); CHECK(cudaDeviceSynchronize()); - printf("supernova::initState end: n_states=%d, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); + printf("supernova::initState end: n_states=%d, ngrid=%d, threads=%d\n", + n_states, ngrid, TPB_FEEDBACK); } - -__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end) { - if (t < t_start|| t >= t_end) return 0; +__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, + Real t_end) +{ + if (t < t_start || t >= t_end) return 0; if (dev_snr == nullptr) return supernova::DEFAULT_SNR; - int index = (int)( (t - t_start) / snr_dt); - return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; + int index = (int)((t - t_start) / snr_dt); + return dev_snr[index] + + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; } - -__device__ Real Calc_Timestep(Real gamma, Real *density, Real *momentum_x, Real *momentum_y, Real *momentum_z, Real *energy, int index, Real dx, Real dy, Real dz){ - Real dens = fmax(density[index], DENS_FLOOR); +__device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, + Real* momentum_y, Real* momentum_z, Real* energy, + int index, Real dx, Real dy, Real dz) +{ + Real dens = fmax(density[index], DENS_FLOOR); Real d_inv = 1.0 / dens; - Real vx = momentum_x[index] * d_inv; - Real vy = momentum_y[index] * d_inv; - Real vz = momentum_z[index] * d_inv; - Real P = fmax((energy[index]- 0.5*dens*(vx*vx + vy*vy + vz*vz))*(gamma-1.0), TINY_NUMBER); + Real vx = momentum_x[index] * d_inv; + Real vy = momentum_y[index] * d_inv; + Real vz = momentum_z[index] * d_inv; + Real P = fmax((energy[index] - 0.5 * dens * (vx * vx + vy * vy + vz * vz)) * + (gamma - 1.0), + TINY_NUMBER); Real cs = sqrt(gamma * P * d_inv); - return fmax( fmax((fabs(vx) + cs)/dx, (fabs(vy) + cs)/dy), (fabs(vz) + cs)/dz ); + return fmax(fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy), + (fabs(vz) + cs) / dz); } - -/** the prescription for dividing a scalar quantity between 3x3x3 cells is done by imagining a - 2x2x2 cell volume around the SN. These fractions, then, represent the linear extent of this - volume into the cell in question. - For i=0 this should be 1*1/2. - For i=-1 this should be (1-dx)*1/2. - For i=+1 this should be dx*1/2. - In the above the 1/2 factor is normalize over 2 cells/direction. +/** the prescription for dividing a scalar quantity between 3x3x3 cells is done + by imagining a 2x2x2 cell volume around the SN. These fractions, then, + represent the linear extent of this volume into the cell in question. For i=0 + this should be 1*1/2. For i=-1 this should be (1-dx)*1/2. For i=+1 this + should be dx*1/2. In the above the 1/2 factor is normalize over 2 + cells/direction. */ -__device__ Real frac(int i, Real dx) { - return (-0.5*i*i - 0.5*i + 1 + i*dx)*0.5; +__device__ Real frac(int i, Real dx) +{ + return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; } - -__device__ Real d_fr(int i, Real dx) { - return (dx > 0.5)*i*(1-2*dx) + ((i+1)*dx + 0.5*(i-1)) - 3*(i-1)*(i+1)*(0.5 - dx); +__device__ Real d_fr(int i, Real dx) +{ + return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - + 3 * (i - 1) * (i + 1) * (0.5 - dx); } - -__device__ Real GetAverageDensity(Real *density, int xi, int yi, int zi, int nxg, int nyg, int ng) { +__device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, + int nxg, int nyg, int ng) +{ Real d_average = 0.0; for (int i = -1; i < 2; i++) { for (int j = -1; j < 2; j++) { for (int k = -1; k < 2; k++) { - d_average += density[(xi + ng + i) + (yi + ng + j)*nxg + (zi + ng + k)*nxg*nyg]; + d_average += density[(xi + ng + i) + (yi + ng + j) * nxg + + (zi + ng + k) * nxg * nyg]; } } } return d_average / 27; } - -__device__ Real GetAverageNumberDensity_CGS(Real *density, int xi, int yi, int zi, int nxg, int nyg, int ng) { - return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / (supernova::MU*MP); +__device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, + int zi, int nxg, int nyg, int ng) +{ + return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / + (supernova::MU * MP); } +__global__ void Cluster_Feedback_Kernel( + part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev, + Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, + int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, + Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, + Real* momentum_y, Real* momentum_z, Real gamma, + curandStateMRG32k3a_t* states, Real* prev_dens, int* prev_N, + short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, + Real time_sn_end) +{ + __shared__ Real + s_info[FEED_INFO_N * + TPB_FEEDBACK]; // for collecting SN feedback information, like # + // of SNe or # resolved. + int tid = threadIdx.x; + int gtid = blockIdx.x * blockDim.x + tid; + + s_info[FEED_INFO_N * tid] = 0; // number of supernovae + s_info[FEED_INFO_N * tid + 1] = 0; // number of resolved events + s_info[FEED_INFO_N * tid + 2] = 0; // number of unresolved events + s_info[FEED_INFO_N * tid + 3] = 0; // resolved energy + s_info[FEED_INFO_N * tid + 4] = 0; // unresolved momentum + s_info[FEED_INFO_N * tid + 5] = + 0; // unresolved KE added via momentum injection + + if (gtid < n_local) { + Real pos_x, pos_y, pos_z; + Real cell_center_x, cell_center_y, cell_center_z; + Real delta_x, delta_y, delta_z; + Real x_frac, y_frac, z_frac; + Real px, py, pz, d; + // Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e; + Real feedback_energy = 0, feedback_density = 0, feedback_momentum = 0, n_0, + shell_radius; + bool is_resolved = false; + Real dV = dx * dy * dz; + Real local_dti = 0.0; + + pos_x = pos_x_dev[gtid]; + pos_y = pos_y_dev[gtid]; + pos_z = pos_z_dev[gtid]; + // printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); + // printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); + + bool in_local = (pos_x >= xMin && pos_x < xMax) && + (pos_y >= yMin && pos_y < yMax) && + (pos_z >= zMin && pos_z < zMax); + if (!in_local) { + printf( + " Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] " + "[%f %f] [%f %f]\n ", + pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + } -__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t *id, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, - Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, - Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, - Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, curandStateMRG32k3a_t* states, - Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, Real time_sn_end) { - - __shared__ Real s_info[FEED_INFO_N*TPB_FEEDBACK]; // for collecting SN feedback information, like # of SNe or # resolved. - int tid = threadIdx.x; - int gtid = blockIdx.x * blockDim.x + tid ; - - s_info[FEED_INFO_N*tid] = 0; // number of supernovae - s_info[FEED_INFO_N*tid + 1] = 0; // number of resolved events - s_info[FEED_INFO_N*tid + 2] = 0; // number of unresolved events - s_info[FEED_INFO_N*tid + 3] = 0; // resolved energy - s_info[FEED_INFO_N*tid + 4] = 0; // unresolved momentum - s_info[FEED_INFO_N*tid + 5] = 0; // unresolved KE added via momentum injection - - if (gtid < n_local) { - Real pos_x, pos_y, pos_z; - Real cell_center_x, cell_center_y, cell_center_z; - Real delta_x, delta_y, delta_z; - Real x_frac, y_frac, z_frac; - Real px, py, pz, d; - //Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e; - Real feedback_energy=0, feedback_density=0, feedback_momentum=0, n_0, shell_radius; - bool is_resolved = false; - Real dV = dx*dy*dz; - Real local_dti = 0.0; - - pos_x = pos_x_dev[gtid]; - pos_y = pos_y_dev[gtid]; - pos_z = pos_z_dev[gtid]; - //printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); - //printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); - - - bool in_local = (pos_x >= xMin && pos_x < xMax) && - (pos_y >= yMin && pos_y < yMax) && - (pos_z >= zMin && pos_z < zMax); - if (!in_local) { - printf(" Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", - pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); - } - - int indx_x = (int) floor( ( pos_x - xMin ) / dx ); - int indx_y = (int) floor( ( pos_y - yMin ) / dy ); - int indx_z = (int) floor( ( pos_z - zMin ) / dz ); - //printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); - - - bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g-2*n_ghost || indx_y >= ny_g-2*n_ghost || indx_z >= nz_g-2*n_ghost; - if (ignore) { - printf(" Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d %d %d] \n ", - pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); - } - - if (!ignore && in_local) { + int indx_x = (int)floor((pos_x - xMin) / dx); + int indx_y = (int)floor((pos_y - yMin) / dy); + int indx_z = (int)floor((pos_z - zMin) / dz); + // printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + + bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || + indx_x >= nx_g - 2 * n_ghost || + indx_y >= ny_g - 2 * n_ghost || indx_z >= nz_g - 2 * n_ghost; + if (ignore) { + printf( + " Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d " + "%d %d] \n ", + pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); + } - int N = 0; - if ((t - age_dev[gtid]) <= time_sn_end) { // only calculate this if there will be SN feedback - if (direction == -1) N = -prev_N[gtid]; + if (!ignore && in_local) { + int N = 0; + if ((t - age_dev[gtid]) <= + time_sn_end) { // only calculate this if there will be SN feedback + if (direction == -1) + N = -prev_N[gtid]; + else { + curandStateMRG32k3a_t state = states[gtid]; + N = curand_poisson(&state, + GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, + time_sn_start, time_sn_end) * + mass_dev[gtid] * dt); + states[gtid] = state; + prev_N[gtid] = N; + } + if (N != 0) { + mass_dev[gtid] -= N * supernova::MASS_PER_SN; + feedback_energy = N * supernova::ENERGY_PER_SN / dV; + feedback_density = N * supernova::MASS_PER_SN / dV; + if (direction == -1) + n_0 = prev_dens[gtid]; else { - curandStateMRG32k3a_t state = states[gtid]; - N = curand_poisson (&state, GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt); - states[gtid] = state; - prev_N[gtid] = N; + n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, + nx_g, ny_g, n_ghost); + prev_dens[gtid] = n_0; } - if (N != 0) { - mass_dev[gtid] -= N * supernova::MASS_PER_SN; - feedback_energy = N * supernova::ENERGY_PER_SN / dV; - feedback_density = N * supernova::MASS_PER_SN / dV; - if (direction == -1) n_0 = prev_dens[gtid]; - else { - n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); - prev_dens[gtid] = n_0; - } - //int devcount; - //cudaGetDeviceCount(&devcount); - //int devId; - //cudaGetDevice(&devId); - //printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); - - feedback_momentum = direction*supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; - shell_radius = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); - is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; - if (!is_resolved) printf("UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, n_0=%0.4e\n", - t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); - - s_info[FEED_INFO_N*tid] = 1.*N; - if (is_resolved) s_info[FEED_INFO_N*tid + 1] = direction * 1.0; - else s_info[FEED_INFO_N*tid + 2] = direction * 1.0; - - int indx; - - if (is_resolved) { //if resolved inject energy and density - s_info[FEED_INFO_N*tid + 3] = feedback_energy * dV; - - indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); - - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; - indx_x += n_ghost; - indx_y += n_ghost; - indx_z += n_ghost; - - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - for (int k = 0; k < 2; k++) { - indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; - - if (abs(momentum_x[indx]/density[indx]) >= C_L) { - printf("%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_y[indx]/density[indx]) >= C_L) { - printf("%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_z[indx]/density[indx]) >= C_L) { - printf("%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - - // i_frac are the fractions of energy/density to be allocated - // to each of the 8 cells. - x_frac = i*(1-delta_x) + (1-i)*delta_x; - y_frac = j*(1-delta_y) + (1-j)*delta_y; - z_frac = k*(1-delta_z) + (1-k)*delta_z; - - atomicAdd(&density[indx], x_frac * y_frac * z_frac * feedback_density); - atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy ); - atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy ); - - if (abs(momentum_x[indx]/density[indx]) >= C_L) { - printf("%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_y[indx]/density[indx]) >= C_L) { - printf("%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_z[indx]/density[indx]) >= C_L) { - printf("%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - - if (direction > 0) local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + // int devcount; + // cudaGetDeviceCount(&devcount); + // int devId; + // cudaGetDevice(&devId); + // printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: + // %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); + + feedback_momentum = direction * supernova::FINAL_MOMENTUM * + pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; + shell_radius = + supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + if (!is_resolved) + printf( + "UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, " + "n_0=%0.4e\n", + t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, + (int)id[gtid], N, shell_radius, n_0); + + s_info[FEED_INFO_N * tid] = 1. * N; + if (is_resolved) + s_info[FEED_INFO_N * tid + 1] = direction * 1.0; + else + s_info[FEED_INFO_N * tid + 2] = direction * 1.0; + + int indx; + + if (is_resolved) { // if resolved inject energy and density + s_info[FEED_INFO_N * tid + 3] = feedback_energy * dV; + + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); + + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + for (int k = 0; k < 2; k++) { + indx = (indx_x + i) + (indx_y + j) * nx_g + + (indx_z + k) * nx_g * ny_g; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + printf( + "%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + printf( + "%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + printf( + "%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } - } - } - } else { //if not resolved, inject momentum and density - s_info[FEED_INFO_N*tid + 4] = feedback_momentum * dV; - - delta_x = ( pos_x - xMin - indx_x*dx ) / dx; - delta_y = ( pos_y - yMin - indx_y*dy ) / dy; - delta_z = ( pos_z - zMin - indx_z*dz ) / dz; - //printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); - //printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); - indx_x += n_ghost; - indx_y += n_ghost; - indx_z += n_ghost; + // i_frac are the fractions of energy/density to be allocated + // to each of the 8 cells. + x_frac = i * (1 - delta_x) + (1 - i) * delta_x; + y_frac = j * (1 - delta_y) + (1 - j) * delta_y; + z_frac = k * (1 - delta_z) + (1 - k) * delta_z; + + atomicAdd(&density[indx], + x_frac * y_frac * z_frac * feedback_density); + atomicAdd(&gasEnergy[indx], + x_frac * y_frac * z_frac * feedback_energy); + atomicAdd(&energy[indx], + x_frac * y_frac * z_frac * feedback_energy); + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + printf( + "%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + printf( + "%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + printf( + "%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } - if (abs(feedback_momentum/feedback_density*VELOCITY_UNIT*1e-5) > 40000) { // injected speeds are greater than 4e4 km/s - printf("**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z, feedback_momentum/feedback_density*VELOCITY_UNIT*1e-5); + if (direction > 0) + local_dti = fmax( + local_dti, + Calc_Timestep(gamma, density, momentum_x, momentum_y, + momentum_z, energy, indx, dx, dy, dz)); + } } - feedback_momentum /= sqrt(3.0); - - for (int i = -1; i < 2; i++) { - for (int j = -1; j < 2; j++) { - for (int k = -1; k < 2; k++) { - // index in array of conserved quantities - indx = (indx_x+i) + (indx_y+j)*nx_g + (indx_z+k)*nx_g*ny_g; - - x_frac = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); - y_frac = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); - z_frac = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); - - px = x_frac * feedback_momentum; - py = y_frac * feedback_momentum; - pz = z_frac * feedback_momentum; - d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + n_0*supernova::MU*MP/DENSITY_UNIT; - - //d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_density; - //e = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) * feedback_energy; - //printf("(%d, %d, %d): delta:(%.4e, %.4e, %.4e), frac: %.4e\n", indx_x, indx_y, indx_z, delta_x, delta_y, delta_z, frac(i, delta_x)*frac(j, delta_y)*frac(k, delta_z)); - //printf("(%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) before: %.4e\n", indx_x, indx_y, indx_z, N, i, j, k, density[indx]*DENSITY_UNIT/0.6/MP); - - - //v_1 = sqrt((momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])*VELOCITY_UNIT/1e5; - //t_b = gasEnergy[indx]*ENERGY_UNIT*(gamma - 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); - //p_b = sqrt(momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; - //d_b = density[indx]*DENSITY_UNIT/0.6/MP; - - if (abs(momentum_x[indx]/density[indx]) >= C_L) { - printf("%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_y[indx]/density[indx]) >= C_L) { - printf("%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_z[indx]/density[indx]) >= C_L) { - printf("%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - - atomicAdd(&momentum_x[indx], px); - atomicAdd(&momentum_y[indx], py); - atomicAdd(&momentum_z[indx], pz); - density[indx] = d; - energy[indx] = (momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/2/density[indx] + gasEnergy[indx]; - - - // atomicAdd( &energy[indx], e ); - //atomicAdd( &density[indx], d ); - - s_info[FEED_INFO_N*tid + i_UNRES_ENERGY] += direction*(px*px + py*py + pz*pz)/2/density[indx]*dV; - - if (abs(momentum_x[indx]/density[indx]) >= C_L) { - printf("%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_x[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_y[indx]/density[indx]) >= C_L) { - printf("%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_y[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - if (abs(momentum_z[indx]/density[indx]) >= C_L) { - printf("%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = %.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, - momentum_z[indx]/density[indx]*VELOCITY_UNIT*1e-5, density[indx]*DENSITY_UNIT/0.6/MP, n_0); - } - //gasEnergy[indx] = energy[indx] - (momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/2/density[indx]; - //v_2 = sqrt((momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx]) * VELOCITY_UNIT/1e5; - //t_a = gasEnergy[indx]*ENERGY_UNIT*(gamma - 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); - //d_a = density[indx]*DENSITY_UNIT/0.6/MP; - //p_a = sqrt(momentum_x[indx]*momentum_x[indx] + momentum_y[indx]*momentum_y[indx] + momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; - - - //printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, (v_2-v_1)/v_1*100); - //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, (t_a-t_b)/t_b*100); - //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, (d_a-d_b)/d_b*100); - //printf(" (%d, %d, %d):(%d SN) (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> %.4f %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, (p_a-p_b)/p_b*100); - - if (direction > 0) { - //printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], N, n_0); - local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); - } + } + } else { // if not resolved, inject momentum and density + s_info[FEED_INFO_N * tid + 4] = feedback_momentum * dV; + + delta_x = (pos_x - xMin - indx_x * dx) / dx; + delta_y = (pos_y - yMin - indx_y * dy) / dy; + delta_z = (pos_z - zMin - indx_z * dz) / dz; + // printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + // printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, + // %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); + + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * + 1e-5) > + 40000) { // injected speeds are greater than 4e4 km/s + printf( + "**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, + indx_y, indx_z, + feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5); + } + feedback_momentum /= sqrt(3.0); + + for (int i = -1; i < 2; i++) { + for (int j = -1; j < 2; j++) { + for (int k = -1; k < 2; k++) { + // index in array of conserved quantities + indx = (indx_x + i) + (indx_y + j) * nx_g + + (indx_z + k) * nx_g * ny_g; + + x_frac = + d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); + y_frac = + frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); + z_frac = + frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); + + px = x_frac * feedback_momentum; + py = y_frac * feedback_momentum; + pz = z_frac * feedback_momentum; + d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * + feedback_density + + n_0 * supernova::MU * MP / DENSITY_UNIT; + + // d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) + // * feedback_density; e = frac(i, delta_x) * frac(j, + // delta_y) * frac(k, delta_z) * feedback_energy; printf("(%d, + // %d, %d): delta:(%.4e, %.4e, %.4e), frac: %.4e\n", indx_x, + // indx_y, indx_z, delta_x, delta_y, delta_z, frac(i, + // delta_x)*frac(j, delta_y)*frac(k, delta_z)); printf("(%d, + // %d, %d):(%d SN) (i:%d, j:%d, k:%d) before: %.4e\n", indx_x, + // indx_y, indx_z, N, i, j, k, + // density[indx]*DENSITY_UNIT/0.6/MP); + + // v_1 = sqrt((momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])*VELOCITY_UNIT/1e5; + // t_b = gasEnergy[indx]*ENERGY_UNIT*(gamma - + // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); p_b = + // sqrt(momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; d_b = + // density[indx]*DENSITY_UNIT/0.6/MP; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + printf( + "%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + printf( + "%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + printf( + "%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + + atomicAdd(&momentum_x[indx], px); + atomicAdd(&momentum_y[indx], py); + atomicAdd(&momentum_z[indx], pz); + density[indx] = d; + energy[indx] = (momentum_x[indx] * momentum_x[indx] + + momentum_y[indx] * momentum_y[indx] + + momentum_z[indx] * momentum_z[indx]) / + 2 / density[indx] + + gasEnergy[indx]; + + // atomicAdd( &energy[indx], e ); + // atomicAdd( &density[indx], d ); + + s_info[FEED_INFO_N * tid + i_UNRES_ENERGY] += + direction * (px * px + py * py + pz * pz) / 2 / + density[indx] * dV; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + printf( + "%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + printf( + "%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + printf( + "%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + // gasEnergy[indx] = energy[indx] - + // (momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/2/density[indx]; v_2 = + // sqrt((momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx]) + // * VELOCITY_UNIT/1e5; t_a = + // gasEnergy[indx]*ENERGY_UNIT*(gamma - + // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); d_a = + // density[indx]*DENSITY_UNIT/0.6/MP; p_a = + // sqrt(momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; + + // printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) + // v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, indx_y, + // indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, + // (v_2-v_1)/v_1*100); printf(" (%d, %d, %d):(%d SN) (i:%d, + // j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f %%\n", + // indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, + // (t_a-t_b)/t_b*100); printf(" (%d, %d, %d):(%d SN) + // (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, + // (d_a-d_b)/d_b*100); printf(" (%d, %d, %d):(%d SN) + // (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> %.4f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, + // (p_a-p_b)/p_b*100); + + if (direction > 0) { + // printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], + // N, n_0); + local_dti = fmax( + local_dti, + Calc_Timestep(gamma, density, momentum_x, momentum_y, + momentum_z, energy, indx, dx, dy, dz)); } } } } - if (direction > 0) atomicMax(dti, local_dti); } + if (direction > 0) atomicMax(dti, local_dti); } } } + } - __syncthreads(); - - //reduce the info from all the threads in the block - for (unsigned int s = blockDim.x/2; s > 0; s>>=1) { - if(tid < s) { - s_info[FEED_INFO_N*tid] += s_info[FEED_INFO_N*(tid + s)]; - s_info[FEED_INFO_N*tid + 1] += s_info[FEED_INFO_N*(tid + s) + 1]; - s_info[FEED_INFO_N*tid + 2] += s_info[FEED_INFO_N*(tid + s) + 2]; - s_info[FEED_INFO_N*tid + 3] += s_info[FEED_INFO_N*(tid + s) + 3]; - s_info[FEED_INFO_N*tid + 4] += s_info[FEED_INFO_N*(tid + s) + 4]; - s_info[FEED_INFO_N*tid + 5] += s_info[FEED_INFO_N*(tid + s) + 5]; - } - __syncthreads(); + __syncthreads(); + + // reduce the info from all the threads in the block + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + s_info[FEED_INFO_N * tid] += s_info[FEED_INFO_N * (tid + s)]; + s_info[FEED_INFO_N * tid + 1] += s_info[FEED_INFO_N * (tid + s) + 1]; + s_info[FEED_INFO_N * tid + 2] += s_info[FEED_INFO_N * (tid + s) + 2]; + s_info[FEED_INFO_N * tid + 3] += s_info[FEED_INFO_N * (tid + s) + 3]; + s_info[FEED_INFO_N * tid + 4] += s_info[FEED_INFO_N * (tid + s) + 4]; + s_info[FEED_INFO_N * tid + 5] += s_info[FEED_INFO_N * (tid + s) + 5]; } + __syncthreads(); + } - if (tid == 0) { - info[FEED_INFO_N*blockIdx.x] = s_info[0]; - info[FEED_INFO_N*blockIdx.x + 1] = s_info[1]; - info[FEED_INFO_N*blockIdx.x + 2] = s_info[2]; - info[FEED_INFO_N*blockIdx.x + 3] = s_info[3]; - info[FEED_INFO_N*blockIdx.x + 4] = s_info[4]; - info[FEED_INFO_N*blockIdx.x + 5] = s_info[5]; - } + if (tid == 0) { + info[FEED_INFO_N * blockIdx.x] = s_info[0]; + info[FEED_INFO_N * blockIdx.x + 1] = s_info[1]; + info[FEED_INFO_N * blockIdx.x + 2] = s_info[2]; + info[FEED_INFO_N * blockIdx.x + 3] = s_info[3]; + info[FEED_INFO_N * blockIdx.x + 4] = s_info[4]; + info[FEED_INFO_N * blockIdx.x + 5] = s_info[5]; + } } - -Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { +Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) +{ #ifdef CPU_TIME G.Timer.Feedback.Start(); #endif @@ -487,7 +642,8 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { if (G.H.dt == 0) return 0.0; if (G.Particles.n_local > supernova::n_states) { - printf("ERROR: not enough cuRAND states (%d) for %d local particles\n", supernova::n_states, G.Particles.n_local ); + printf("ERROR: not enough cuRAND states (%d) for %d local particles\n", + supernova::n_states, G.Particles.n_local); exit(-1); } @@ -496,21 +652,21 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { Real h_info[6] = {0, 0, 0, 0, 0, 0}; Real info[6]; Real *d_dti, *d_info; - // require d_prev_dens & d_prev_N in case we have to undo feedback if the time step is too large. + // require d_prev_dens & d_prev_N in case we have to undo feedback if the time + // step is too large. Real* d_prev_dens; - int* d_prev_N; - + int* d_prev_N; if (G.Particles.n_local > 0) { CHECK(cudaMalloc(&d_dti, sizeof(Real))); CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); - CHECK(cudaMalloc(&d_prev_dens, G.Particles.n_local*sizeof(Real))); - CHECK(cudaMalloc(&d_prev_N, G.Particles.n_local*sizeof(int))); - CHECK(cudaMemset(d_prev_dens, 0, G.Particles.n_local*sizeof(Real))); - CHECK(cudaMemset(d_prev_N, 0, G.Particles.n_local*sizeof(int))); + CHECK(cudaMalloc(&d_prev_dens, G.Particles.n_local * sizeof(Real))); + CHECK(cudaMalloc(&d_prev_N, G.Particles.n_local * sizeof(int))); + CHECK(cudaMemset(d_prev_dens, 0, G.Particles.n_local * sizeof(Real))); + CHECK(cudaMemset(d_prev_N, 0, G.Particles.n_local * sizeof(int))); - ngrid = std::ceil((1.*G.Particles.n_local)/TPB_FEEDBACK); - CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N*ngrid*sizeof(Real))); + ngrid = std::ceil((1. * G.Particles.n_local) / TPB_FEEDBACK); + CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N * ngrid * sizeof(Real))); } // TODO: info collection and max dti calculation // assumes ngrid is 1. The reason being that reduction of @@ -520,38 +676,53 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { do { direction = 1; if (G.Particles.n_local > 0) { - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, - G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, - G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, - G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, - dev_snr, snr_dt, time_sn_start, time_sn_end); + hipLaunchKernelGGL( + Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, + G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, + G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, + G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, + G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, + d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, + G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, + snr_dt, time_sn_start, time_sn_end); CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA h_dti = ReduceRealMax(h_dti); MPI_Barrier(world); - #endif // MPI_CHOLLA + #endif // MPI_CHOLLA - if (h_dti != 0 && (C_cfl/h_dti < G.H.dt)) { // timestep too big: need to undo the last operation + if (h_dti != 0 && + (C_cfl / h_dti < + G.H.dt)) { // timestep too big: need to undo the last operation direction = -1; if (G.Particles.n_local > 0) { - hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, - G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, - G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, - G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, - dev_snr, snr_dt, time_sn_start, time_sn_end); + hipLaunchKernelGGL( + Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, + G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, + G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, + G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, + G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, + d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, + G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, + snr_dt, time_sn_start, time_sn_end); CHECK(cudaDeviceSynchronize()); } - G.H.dt = C_cfl/h_dti; + G.H.dt = C_cfl / h_dti; } } while (direction == -1); if (G.Particles.n_local > 0) { - CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N*sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), + cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_dti)); CHECK(cudaFree(d_info)); CHECK(cudaFree(d_prev_dens)); @@ -573,22 +744,38 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { Real resolved_ratio = 0.0; if (info[supernova::RESOLVED] > 0 || info[supernova::NOT_RESOLVED] > 0) { - resolved_ratio = info[supernova::RESOLVED]/(info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); + resolved_ratio = + info[supernova::RESOLVED] / + (info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); } Real global_resolved_ratio = 0.0; if (analysis.countResolved > 0 || analysis.countUnresolved > 0) { - global_resolved_ratio = analysis.countResolved / (analysis.countResolved + analysis.countUnresolved); + global_resolved_ratio = analysis.countResolved / + (analysis.countResolved + analysis.countUnresolved); } - chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN], resolved_ratio); - chprintf(" this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s unres_energy: %.5e erg\n", - info[supernova::ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, info[supernova::MOMENTUM]*VELOCITY_UNIT/1e5, - info[supernova::UNRES_ENERGY]*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); - chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)analysis.countSN, (long)analysis.countResolved, - (long)analysis.countUnresolved, global_resolved_ratio); - chprintf(" energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres energy: %.5e\n", - analysis.totalEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT, analysis.totalMomentum*VELOCITY_UNIT/1e5, - analysis.totalUnresEnergy*MASS_UNIT*LENGTH_UNIT*LENGTH_UNIT/TIME_UNIT/TIME_UNIT); + chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", + G.H.n_step, (long)info[supernova::SN], resolved_ratio); + chprintf( + " this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s " + "unres_energy: %.5e erg\n", + info[supernova::ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / + TIME_UNIT / TIME_UNIT, + info[supernova::MOMENTUM] * VELOCITY_UNIT / 1e5, + info[supernova::UNRES_ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / + TIME_UNIT / TIME_UNIT); + chprintf( + " cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", + (long)analysis.countSN, (long)analysis.countResolved, + (long)analysis.countUnresolved, global_resolved_ratio); + chprintf( + " energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres " + "energy: %.5e\n", + analysis.totalEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / + TIME_UNIT, + analysis.totalMomentum * VELOCITY_UNIT / 1e5, + analysis.totalUnresEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / + TIME_UNIT / TIME_UNIT); #ifdef CPU_TIME G.Timer.Feedback.End(); @@ -597,5 +784,4 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) { return h_dti; } - -#endif //SUPERNOVA & PARTICLES_GPU & PARTICLE_IDS & PARTICLE_AGE +#endif // SUPERNOVA & PARTICLES_GPU & PARTICLE_IDS & PARTICLE_AGE diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index 95c5b1213..ceb786c6b 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -1,121 +1,127 @@ #ifdef PARTICLES -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "particles_3D.h" -#include "density_CIC.h" -#include "../model/disk_galaxy.h" + #include + #include + #include -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" + #include "density_CIC.h" + #include "math.h" + #include "particles_3D.h" -//Get the Gravitational Field from the potential: g=-gradient(potential) -void Grid3D::Get_Gravity_Field_Particles(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +// Get the Gravitational Field from the potential: g=-gradient(potential) +void Grid3D::Get_Gravity_Field_Particles() +{ #ifdef PARTICLES_CPU - - #ifdef GRAVITY_GPU + + #ifdef GRAVITY_GPU Copy_Potential_From_GPU(); - #endif + #endif - #ifndef PARALLEL_OMP - Get_Gravity_Field_Particles_function( 0, Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid); - #else + #ifndef PARALLEL_OMP + Get_Gravity_Field_Particles_function( + 0, Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs( + Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid, + N_OMP_THREADS, omp_id, &g_start, &g_end); - Get_Gravity_Field_Particles_function( g_start, g_end); + Get_Gravity_Field_Particles_function(g_start, g_end); } - #endif//PARALLEL_OMP - #endif//PARTICLES_CPU - + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Get_Gravity_Field_Particles_GPU( Grav.F.potential_h ); + Particles.Get_Gravity_Field_Particles_GPU(Grav.F.potential_h); #endif - } - -void Grid3D::Get_Gravity_CIC(){ - +void Grid3D::Get_Gravity_CIC() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - Get_Gravity_CIC_function( 0, Particles.n_local ); - #else + #ifndef PARALLEL_OMP + Get_Gravity_CIC_function(0, Particles.n_local); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, + &p_end); - Get_Gravity_CIC_function( p_start, p_end ); + Get_Gravity_CIC_function(p_start, p_end); } - #endif//PARALLEL_OMP - #endif//PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Particles.Get_Gravity_CIC_GPU(); #endif } - -#ifdef PARTICLES_GPU -void Particles_3D::Get_Gravity_Field_Particles_GPU( Real *potential_host ){ - - Get_Gravity_Field_Particles_GPU_function( G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev ); - + #ifdef PARTICLES_GPU +void Particles_3D::Get_Gravity_Field_Particles_GPU(Real *potential_host) +{ + Get_Gravity_Field_Particles_GPU_function( + G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, + G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, + G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } -void Particles_3D::Get_Gravity_CIC_GPU(){ - - Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); +void Particles_3D::Get_Gravity_CIC_GPU() +{ + Get_Gravity_CIC_GPU_function( + n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, + G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, + pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, + G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } -#endif //PARTICLES_GPU + #endif // PARTICLES_GPU + #ifdef PARTICLES_CPU -#ifdef PARTICLES_CPU - -//Compute the gradient of the potential -void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){ - +// Compute the gradient of the potential +void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end) +{ int nx_grav, ny_grav, nz_grav, nGHST_grav; nGHST_grav = Particles.G.n_ghost_particles_grid; - nx_grav = Particles.G.nx_local + 2*nGHST_grav; - ny_grav = Particles.G.ny_local + 2*nGHST_grav; - nz_grav = Particles.G.nz_local + 2*nGHST_grav; + nx_grav = Particles.G.nx_local + 2 * nGHST_grav; + ny_grav = Particles.G.ny_local + 2 * nGHST_grav; + nz_grav = Particles.G.nz_local + 2 * nGHST_grav; int nx_grid, ny_grid, nz_grid, nGHST_grid; Real *potential; - potential = Grav.F.potential_h; + potential = Grav.F.potential_h; nGHST_grid = N_GHOST_POTENTIAL; - nx_grid = Grav.nx_local + 2*nGHST_grid; - ny_grid = Grav.ny_local + 2*nGHST_grid; - nz_grid = Grav.nz_local + 2*nGHST_grid; + nx_grid = Grav.nx_local + 2 * nGHST_grid; + ny_grid = Grav.ny_local + 2 * nGHST_grid; + nz_grid = Grav.nz_local + 2 * nGHST_grid; int nGHST = nGHST_grid - nGHST_grav; @@ -124,96 +130,112 @@ void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){ dy = Particles.G.dy; dz = Particles.G.dz; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT Real phi_ll, phi_rr; int id_ll, id_rr; - #endif + #endif Real phi_l, phi_r; int k, j, i, id_l, id_r, id; - for ( k=g_start; k -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "particles_3D.h" - -#ifdef GRAVITY_GPU -#include "../grid/grid3D.h" -#endif - -#ifdef PARTICLES_GPU - -//Copy the potential from host to device -void Particles_3D::Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ){ - CudaSafeCall( cudaMemcpy( potential_dev, potential_host, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); -} + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + + #ifdef GRAVITY_GPU + #include "../grid/grid3D.h" + #endif + #ifdef PARTICLES_GPU -//Kernel to compute the gradient of the potential -__global__ void Get_Gravity_Field_Particles_Kernel( Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, int nx, int ny, int nz, int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy, Real dz ){ +// Copy the potential from host to device +void Particles_3D::Copy_Potential_To_GPU(Real *potential_host, + Real *potential_dev, + int n_cells_potential) +{ + CudaSafeCall(cudaMemcpy(potential_dev, potential_host, + n_cells_potential * sizeof(Real), + cudaMemcpyHostToDevice)); +} +// Kernel to compute the gradient of the potential +__global__ void Get_Gravity_Field_Particles_Kernel( + Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, int nx, int ny, int nz, int n_ghost_particles_grid, + int n_ghost_potential, Real dx, Real dy, Real dz) +{ int tid_x = blockIdx.x * blockDim.x + threadIdx.x; int tid_y = blockIdx.y * blockDim.y + threadIdx.y; int tid_z = blockIdx.z * blockDim.z + threadIdx.z; int nx_grav, ny_grav, nz_grav; - nx_grav = nx + 2*n_ghost_particles_grid; - ny_grav = ny + 2*n_ghost_particles_grid; - nz_grav = nz + 2*n_ghost_particles_grid; + nx_grav = nx + 2 * n_ghost_particles_grid; + ny_grav = ny + 2 * n_ghost_particles_grid; + nz_grav = nz + 2 * n_ghost_particles_grid; - if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav ) return; - int tid = tid_x + tid_y*nx_grav + tid_z*nx_grav*ny_grav; + if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav) return; + int tid = tid_x + tid_y * nx_grav + tid_z * nx_grav * ny_grav; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; int nGHST = n_ghost_potential - n_ghost_particles_grid; Real phi_l, phi_r; int id_l, id_r; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT Real phi_ll, phi_rr; int id_ll, id_rr; - #endif + #endif // Get Potential Gradient X - id_l = (tid_x-1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_r = (tid_x+1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + id_l = (tid_x - 1 + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + 1 + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x-2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x+2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x - 2 + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + 2 + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; phi_ll = potential_dev[id_ll]; phi_rr = potential_dev[id_rr]; - gravity_x_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dx); - #else - gravity_x_dev[tid] = -0.5 * ( phi_r - phi_l ) / dx; - #endif + gravity_x_dev[tid] = + -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); + #else + gravity_x_dev[tid] = -0.5 * (phi_r - phi_l) / dx; + #endif // Get Potential Gradient Y - id_l = (tid_x + nGHST) + (tid_y-1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_r = (tid_x + nGHST) + (tid_y+1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + id_l = (tid_x + nGHST) + (tid_y - 1 + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + 1 + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y-2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x + nGHST) + (tid_y+2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x + nGHST) + (tid_y - 2 + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + 2 + nGHST) * nx_pot + + (tid_z + nGHST) * ny_pot * nx_pot; phi_ll = potential_dev[id_ll]; phi_rr = potential_dev[id_rr]; - gravity_y_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dy); - #else - gravity_y_dev[tid] = -0.5 * ( phi_r - phi_l ) / dy; - #endif + gravity_y_dev[tid] = + -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); + #else + gravity_y_dev[tid] = -0.5 * (phi_r - phi_l) / dy; + #endif // Get Potential Gradient Z - id_l = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-1 + nGHST)*ny_pot*nx_pot; - id_r = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+1 + nGHST)*ny_pot*nx_pot; + id_l = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z - 1 + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + 1 + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-2 + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+2 + nGHST)*ny_pot*nx_pot; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z - 2 + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + + (tid_z + 2 + nGHST) * ny_pot * nx_pot; phi_ll = potential_dev[id_ll]; phi_rr = potential_dev[id_rr]; - gravity_z_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dz); - #else - gravity_z_dev[tid] = -0.5 * ( phi_r - phi_l ) / dz; - #endif + gravity_z_dev[tid] = + -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); + #else + gravity_z_dev[tid] = -0.5 * (phi_r - phi_l) / dz; + #endif } - -//Call the kernel to compute the gradient of the potential -void Particles_3D::Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ){ - - #ifndef GRAVITY_GPU - Copy_Potential_To_GPU( potential_host, potential_dev, n_cells_potential ); - #endif +// Call the kernel to compute the gradient of the potential +void Particles_3D::Get_Gravity_Field_Particles_GPU_function( + int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, + int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, + Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) +{ + #ifndef GRAVITY_GPU + Copy_Potential_To_GPU(potential_host, potential_dev, n_cells_potential); + #endif int nx_g, ny_g, nz_g; - nx_g = nx_local + 2*N_GHOST_POTENTIAL; - ny_g = ny_local + 2*N_GHOST_POTENTIAL; - nz_g = nz_local + 2*N_GHOST_POTENTIAL; + nx_g = nx_local + 2 * N_GHOST_POTENTIAL; + ny_g = ny_local + 2 * N_GHOST_POTENTIAL; + nz_g = nz_local + 2 * N_GHOST_POTENTIAL; // set values for GPU kernels - int tpb_x = 8; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_g + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_g + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_g + tpb_z - 1) / tpb_z; + int tpb_x = 8; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_g + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_g + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_g + tpb_z - 1) / tpb_z; // number of blocks per 1D grid dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - - hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz ); + hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, + 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, + gravity_z_dev, nx_local, ny_local, nz_local, + n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz); CudaCheckError(); } - -//Get CIC indexes from the particles positions -__device__ void Get_Indexes_CIC_Gravity( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ - indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); +// Get CIC indexes from the particles positions +__device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, + Real dx, Real dy, Real dz, Real pos_x, + Real pos_y, Real pos_z, int &indx_x, + int &indx_y, int &indx_z) +{ + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); } -//Kernel to compute the gravitational field at the particles positions via Cloud-In-Cell -__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; +// Kernel to compute the gravitational field at the particles positions via +// Cloud-In-Cell +__global__ void Get_Gravity_CIC_Kernel( + part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, + int nx, int ny, int nz, int n_ghost) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if ( tid >= n_local) return; + if (tid >= n_local) return; int nx_g, ny_g; - nx_g = nx + 2*n_ghost; - ny_g = ny + 2*n_ghost; + nx_g = nx + 2 * n_ghost; + ny_g = ny + 2 * n_ghost; Real pos_x, pos_y, pos_z; Real cell_center_x, cell_center_y, cell_center_z; @@ -156,97 +193,114 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, pos_y = pos_y_dev[tid]; pos_z = pos_z_dev[tid]; - int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC_Gravity( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z ); + Get_Indexes_CIC_Gravity(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, + indx_x, indx_y, indx_z); bool in_local = true; - if ( pos_x < xMin || pos_x >= xMax ) in_local = false; - if ( pos_y < yMin || pos_y >= yMax ) in_local = false; - if ( pos_z < zMin || pos_z >= zMax ) in_local = false; - if ( ! in_local ) { + if (pos_x < xMin || pos_x >= xMax) in_local = false; + if (pos_y < yMin || pos_y >= yMax) in_local = false; + if (pos_z < zMin || pos_z >= zMax) in_local = false; + if (!in_local) { printf(" Gravity CIC Error: Particle outside local domain"); return; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; indx_x += n_ghost; indx_y += n_ghost; indx_z += n_ghost; - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; g_x_bl = gravity_x_dev[indx]; g_y_bl = gravity_y_dev[indx]; g_z_bl = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y)*nx_g + (indx_z)*nx_g * ny_g; g_x_br = gravity_x_dev[indx]; g_y_br = gravity_y_dev[indx]; g_z_br = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g; g_x_bu = gravity_x_dev[indx]; g_y_bu = gravity_y_dev[indx]; g_z_bu = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g; g_x_bru = gravity_x_dev[indx]; g_y_bru = gravity_y_dev[indx]; g_z_bru = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tl = gravity_x_dev[indx]; g_y_tl = gravity_y_dev[indx]; g_z_tl = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tr = gravity_x_dev[indx]; g_y_tr = gravity_y_dev[indx]; g_z_tr = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tu = gravity_x_dev[indx]; g_y_tu = gravity_y_dev[indx]; g_z_tu = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tru = gravity_x_dev[indx]; g_y_tru = gravity_y_dev[indx]; g_z_tru = gravity_z_dev[indx]; - g_x = g_x_bl*(delta_x)*(delta_y)*(delta_z) + g_x_br*(1-delta_x)*(delta_y)*(delta_z) + - g_x_bu*(delta_x)*(1-delta_y)*(delta_z ) + g_x_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_x_tl*(delta_x)*(delta_y)*(1-delta_z) + g_x_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_x_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_x_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); - - g_y = g_y_bl*(delta_x)*(delta_y)*(delta_z) + g_y_br*(1-delta_x)*(delta_y)*(delta_z) + - g_y_bu*(delta_x)*(1-delta_y)*(delta_z) + g_y_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_y_tl*(delta_x)*(delta_y)*(1-delta_z) + g_y_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_y_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_y_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); - - g_z = g_z_bl*(delta_x)*(delta_y)*(delta_z) + g_z_br*(1-delta_x)*(delta_y)*(delta_z) + - g_z_bu*(delta_x)*(1-delta_y)*(delta_z) + g_z_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_z_tl*(delta_x)*(delta_y)*(1-delta_z) + g_z_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_z_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_z_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); + g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); grav_x_dev[tid] = g_x; grav_y_dev[tid] = g_y; grav_z_dev[tid] = g_z; - } - -//Call the kernel to compote the gravitational field at the particles positions ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev){ - +// Call the kernel to compote the gravitational field at the particles positions +// ( CIC ) +void Particles_3D::Get_Gravity_CIC_GPU_function( + part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -254,48 +308,50 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, - gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, - xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, - n_ghost_particles_grid); + hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, + n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, + pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, + grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, + dz, nx_local, ny_local, nz_local, + n_ghost_particles_grid); CudaCheckError(); } - } -#endif //PARTICLES_GPU + #endif // PARTICLES_GPU -#ifdef GRAVITY_GPU - -void __global__ Copy_Particles_Density_Kernel( Real *dst_density, Real *src_density, int nx_local, int ny_local, int nz_local, int n_ghost ){ + #ifdef GRAVITY_GPU +void __global__ Copy_Particles_Density_Kernel(Real *dst_density, + Real *src_density, int nx_local, + int ny_local, int nz_local, + int n_ghost) +{ int tid_x, tid_y, tid_z, tid_CIC, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) return; - tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local; + tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; int nx_CIC, ny_CIC; - nx_CIC = nx_local + 2*n_ghost; - ny_CIC = ny_local + 2*n_ghost; - tid_CIC = tid_x + tid_y*nx_CIC + tid_z*nx_CIC*ny_CIC; + nx_CIC = nx_local + 2 * n_ghost; + ny_CIC = ny_local + 2 * n_ghost; + tid_CIC = tid_x + tid_y * nx_CIC + tid_z * nx_CIC * ny_CIC; dst_density[tid_dens] = src_density[tid_CIC]; - } - - -//Copy the particles density to the density array in Grav to compute the potential -void Grid3D::Copy_Particles_Density_GPU( ){ - +// Copy the particles density to the density array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density_GPU() +{ int nx_local, ny_local, nz_local, n_ghost; n_ghost = Particles.G.n_ghost_particles_grid; nx_local = Grav.nx_local; @@ -303,9 +359,9 @@ void Grid3D::Copy_Particles_Density_GPU( ){ nz_local = Grav.nz_local; // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; int ngrid_x = (nx_local - 1) / tpb_x + 1; int ngrid_y = (ny_local - 1) / tpb_y + 1; int ngrid_z = (nz_local - 1) / tpb_z + 1; @@ -314,10 +370,11 @@ void Grid3D::Copy_Particles_Density_GPU( ){ // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL( Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d, Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost ); + hipLaunchKernelGGL(Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, + Grav.F.density_d, Particles.G.density_dev, nx_local, + ny_local, nz_local, n_ghost); } + #endif // GRAVITY_GPU -#endif//GRAVITY_GPU - -#endif//PARTICLES +#endif // PARTICLES diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 0fb0fa37b..d82a28b2f 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -1,56 +1,60 @@ #ifdef PARTICLES -#include -#include -#include -#include -#include -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "particles_3D.h" - -#ifdef HDF5 -#include -#endif -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include + #include + #include -// #define OUTPUT_PARTICLES_DATA + #include + + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" + + #ifdef HDF5 + #include + #endif + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif +// #define OUTPUT_PARTICLES_DATA -void Particles_3D::Load_Particles_Data( struct parameters *P){ +void Particles_3D::Load_Particles_Data(struct parameters *P) +{ char filename[100]; char timestep[20]; - int nfile = P->nfile; //output step you want to read from + int nfile = P->nfile; // output step you want to read from char filename_counter[100]; // create the filename to read from strcpy(filename, P->indir); sprintf(timestep, "%d_particles", nfile); - strcat(filename,timestep); + strcat(filename, timestep); #if defined BINARY chprintf("\nERROR: Particles only support HDF5 outputs\n"); exit(-1); #elif defined HDF5 - strcat(filename,".h5"); + strcat(filename, ".h5"); #endif #ifdef MPI_CHOLLA - #ifdef TILED_INITIAL_CONDITIONS - sprintf(filename,"%sics_%dMpc_%d_particles.h5", P->indir, (int) P->tile_length/1000, G.nx_local); //Everyone reads the same file - #else - sprintf(filename,"%s.%d",filename,procID); - #endif //TILED_INITIAL_CONDITIONS + #ifdef TILED_INITIAL_CONDITIONS + sprintf(filename, "%sics_%dMpc_%d_particles.h5", P->indir, + (int)P->tile_length / 1000, + G.nx_local); // Everyone reads the same file + #else + sprintf(filename, "%s.%d", filename, procID); + #endif // TILED_INITIAL_CONDITIONS #endif - chprintf(" Loading particles file: %s \n", filename ); + chprintf(" Loading particles file: %s \n", filename); #ifdef HDF5 - hid_t file_id; - herr_t status; + hid_t file_id; + herr_t status; // open the file file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); @@ -59,128 +63,136 @@ void Particles_3D::Load_Particles_Data( struct parameters *P){ exit(0); } - Load_Particles_Data_HDF5(file_id, nfile, P ); + Load_Particles_Data_HDF5(file_id, nfile, P); #endif } - -void Grid3D::WriteData_Particles( struct parameters P, int nfile) +void Grid3D::WriteData_Particles(struct parameters P, int nfile) { // Write the particles data to file - OutputData_Particles( P, nfile); + OutputData_Particles(P, nfile); } + #ifdef HDF5 -#ifdef HDF5 - -void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P ) +void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, + struct parameters *P) { int i, j, k, id, buf_id; - hid_t attribute_id, dataset_id; - Real *dataset_buffer_px; - Real *dataset_buffer_py; - Real *dataset_buffer_pz; - Real *dataset_buffer_vx; - Real *dataset_buffer_vy; - Real *dataset_buffer_vz; - Real *dataset_buffer_m; - #ifdef PARTICLE_AGE - Real *dataset_buffer_age; - #endif - herr_t status; + hid_t attribute_id, dataset_id; + Real *dataset_buffer_px; + Real *dataset_buffer_py; + Real *dataset_buffer_pz; + Real *dataset_buffer_vx; + Real *dataset_buffer_vy; + Real *dataset_buffer_vz; + Real *dataset_buffer_m; + #ifdef PARTICLE_AGE + Real *dataset_buffer_age; + #endif + herr_t status; part_int_t n_to_load, pIndx; attribute_id = H5Aopen(file_id, "n_particles_local", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load); - status = H5Aclose(attribute_id); + status = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load); + status = H5Aclose(attribute_id); - #ifdef COSMOLOGY + #ifdef COSMOLOGY attribute_id = H5Aopen(file_id, "current_z", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_z); - status = H5Aclose(attribute_id); + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_z); + status = H5Aclose(attribute_id); attribute_id = H5Aopen(file_id, "current_a", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_a); - status = H5Aclose(attribute_id); - #endif + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_a); + status = H5Aclose(attribute_id); + #endif - #ifdef SINGLE_PARTICLE_MASS + #ifdef SINGLE_PARTICLE_MASS attribute_id = H5Aopen(file_id, "particle_mass", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass); - status = H5Aclose(attribute_id); - chprintf( " Using Single mass for DM particles: %f Msun/h\n", particle_mass); - #endif + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass); + status = H5Aclose(attribute_id); + chprintf(" Using Single mass for DM particles: %f Msun/h\n", particle_mass); + #endif - #ifndef MPI_CHOLLA + #ifndef MPI_CHOLLA chprintf(" Loading %ld particles\n", n_to_load); - #else + #else part_int_t n_total_load; - n_total_load = ReducePartIntSum( n_to_load ); - chprintf( " Total Particles To Load: %ld\n", n_total_load ); + n_total_load = ReducePartIntSum(n_to_load); + chprintf(" Total Particles To Load: %ld\n", n_total_load); // Print individual n_to_load // for ( int i=0; itile_length; // Rescale the particles position to the global domain chprintf(" Rescaling the Tiled Particles Positions... \n"); - chprintf(" Tile length: %f kpc/h \n", tile_length ); - chprintf(" N_Procs Z: %d Y: %d X: %d \n", nproc_z, nproc_y, nproc_x ); + chprintf(" Tile length: %f kpc/h \n", tile_length); + chprintf(" N_Procs Z: %d Y: %d X: %d \n", nproc_z, nproc_y, nproc_x); bool tile_length_difference = false; - if ( fabs( Lx_local - tile_length ) / Lx_local > 1e-2 ) tile_length_difference = true; - if ( fabs( Ly_local - tile_length ) / Ly_local > 1e-2 ) tile_length_difference = true; - if ( fabs( Lz_local - tile_length ) / Lz_local > 1e-2 ) tile_length_difference = true; - - if ( tile_length_difference ){ - std::cout << " WARNING: Local Domain Length Different to Tile Length " << std::endl; - printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, Lx_local ); - printf(" Tile Length: %f \n", tile_length ); + if (fabs(Lx_local - tile_length) / Lx_local > 1e-2) + tile_length_difference = true; + if (fabs(Ly_local - tile_length) / Ly_local > 1e-2) + tile_length_difference = true; + if (fabs(Lz_local - tile_length) / Lz_local > 1e-2) + tile_length_difference = true; + + if (tile_length_difference) { + std::cout << " WARNING: Local Domain Length Different to Tile Length " + << std::endl; + printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, + Lx_local); + printf(" Tile Length: %f \n", tile_length); } - #endif + #endif - //Loop over to input buffers and load each particle - for( pIndx=0; pIndx G.domainMax_x ){ + if (pPos_x < G.domainMin_x || pPos_x > G.domainMax_x) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_y < G.domainMin_y || pPos_y > G.domainMax_y ){ + if (pPos_y < G.domainMin_y || pPos_y > G.domainMax_y) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_z < G.domainMin_z || pPos_z > G.domainMax_z ){ + if (pPos_z < G.domainMin_z || pPos_z > G.domainMax_z) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false; - if ( pPos_y < G.yMin || pPos_y >= G.yMax ) in_local = false; - if ( pPos_z < G.zMin || pPos_z >= G.zMax ) in_local = false; - if ( ! in_local ) { - #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << pID << std::endl; - #else + if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false; + if (pPos_y < G.yMin || pPos_y >= G.yMax) in_local = false; + if (pPos_z < G.zMin || pPos_z >= G.zMax) in_local = false; + if (!in_local) { + #ifdef PARTICLE_IDS + std::cout << " Particle outside Local domain pID: " << pID + << std::endl; + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << pPos_x << std::endl; std::cout << " Particle Y: " << pPos_y << std::endl; std::cout << " Particle Z: " << pPos_z << std::endl; continue; } - //Keep track of the max and min position and velocity to print Initial Statistics - if ( pPos_x > px_max ) px_max = pPos_x; - if ( pPos_y > py_max ) py_max = pPos_y; - if ( pPos_z > pz_max ) pz_max = pPos_z; + // Keep track of the max and min position and velocity to print Initial + // Statistics + if (pPos_x > px_max) px_max = pPos_x; + if (pPos_y > py_max) py_max = pPos_y; + if (pPos_z > pz_max) pz_max = pPos_z; - if ( pPos_x < px_min ) px_min = pPos_x; - if ( pPos_y < py_min ) py_min = pPos_y; - if ( pPos_z < pz_min ) pz_min = pPos_z; + if (pPos_x < px_min) px_min = pPos_x; + if (pPos_y < py_min) py_min = pPos_y; + if (pPos_z < pz_min) pz_min = pPos_z; - if ( pVel_x > vx_max ) vx_max = pVel_x; - if ( pVel_y > vy_max ) vy_max = pVel_y; - if ( pVel_z > vz_max ) vz_max = pVel_z; + if (pVel_x > vx_max) vx_max = pVel_x; + if (pVel_y > vy_max) vy_max = pVel_y; + if (pVel_z > vz_max) vz_max = pVel_z; - if ( pVel_x < vx_min ) vx_min = pVel_x; - if ( pVel_y < vy_min ) vy_min = pVel_y; - if ( pVel_z < vz_min ) vz_min = pVel_z; + if (pVel_x < vx_min) vx_min = pVel_x; + if (pVel_y < vy_min) vy_min = pVel_y; + if (pVel_z < vz_min) vz_min = pVel_z; #ifdef PARTICLES_CPU - //Add the particle data to the particles vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z ); - vel_x.push_back( pVel_x ); - vel_y.push_back( pVel_y ); - vel_z.push_back( pVel_z ); - grav_x.push_back( 0.0 ); - grav_y.push_back( 0.0 ); - grav_z.push_back( 0.0 ); - #ifndef SINGLE_PARTICLE_MASS - mass.push_back( pMass ); - #endif - #ifdef PARTICLE_IDS + // Add the particle data to the particles vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(pVel_x); + vel_y.push_back(pVel_y); + vel_z.push_back(pVel_z); + grav_x.push_back(0.0); + grav_y.push_back(0.0); + grav_z.push_back(0.0); + #ifndef SINGLE_PARTICLE_MASS + mass.push_back(pMass); + #endif + #ifdef PARTICLE_IDS partIDs.push_back(pID); - #endif - #ifdef PARTICLE_AGE - age.push_back( pAge ); - #endif - n_local += 1; //Add 1 to the local number of particles - #endif//PARTICLES_CPU + #endif + #ifdef PARTICLE_AGE + age.push_back(pAge); + #endif + n_local += 1; // Add 1 to the local number of particles + #endif // PARTICLES_CPU } - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU // Alocate memory in GPU for particle data // particles_array_size = (part_int_t) n_to_load; - particles_array_size = Compute_Particles_GPU_Array_Size( n_to_load ); - chprintf( " Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_z_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_z_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_z_dev, particles_array_size); - #ifndef SINGLE_PARTICLE_MASS - Allocate_Particles_GPU_Array_Real( &mass_dev, particles_array_size); - #endif - #ifdef PARTICLE_IDS - Allocate_Particles_GPU_Array_Part_Int( &partIDs_dev, particles_array_size); - #endif - #ifdef PARTICLE_AGE - Allocate_Particles_GPU_Array_Real( &age_dev, particles_array_size); - #endif + particles_array_size = Compute_Particles_GPU_Array_Size(n_to_load); + chprintf(" Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, + G.gpu_allocation_factor, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size); + #ifndef SINGLE_PARTICLE_MASS + Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size); + #endif + #ifdef PARTICLE_IDS + Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size); + #endif + #ifdef PARTICLE_AGE + Allocate_Particles_GPU_Array_Real(&age_dev, particles_array_size); + #endif n_local = n_to_load; - chprintf( " Allocated GPU memory for particle data\n"); + chprintf(" Allocated GPU memory for particle data\n"); // printf( " Loaded %ld particles ", n_to_load); - //Copy the particle data to GPU memory - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_px, pos_x_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_py, pos_y_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_pz, pos_z_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vx, vel_x_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vy, vel_y_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vz, vel_z_dev, n_local); - #ifndef SINGLE_PARTICLE_MASS - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_m, mass_dev, n_local); - #endif - #ifdef PARTICLE_IDS - Copy_Particles_Array_Int_Host_to_Device( dataset_buffer_IDs, partIDs_dev, n_local); - #endif - #ifdef PARTICLE_AGE - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_age, age_dev, n_local); - #endif - #endif //PARTICLES_GPU + // Copy the particle data to GPU memory + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_px, pos_x_dev, + n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_py, pos_y_dev, + n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_pz, pos_z_dev, + n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vx, vel_x_dev, + n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vy, vel_y_dev, + n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vz, vel_z_dev, + n_local); + #ifndef SINGLE_PARTICLE_MASS + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_m, mass_dev, n_local); + #endif + #ifdef PARTICLE_IDS + Copy_Particles_Array_Int_Host_to_Device(dataset_buffer_IDs, partIDs_dev, + n_local); + #endif + #ifdef PARTICLE_AGE + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_age, age_dev, + n_local); + #endif + #endif // PARTICLES_GPU - #ifndef MPI_CHOLLA - chprintf( " Loaded %ld particles\n", n_local ); - #else + #ifndef MPI_CHOLLA + chprintf(" Loaded %ld particles\n", n_local); + #else MPI_Barrier(world); part_int_t n_total_loaded; - n_total_loaded = ReducePartIntSum( n_local ); + n_total_loaded = ReducePartIntSum(n_local); n_total_initial = n_total_loaded; - chprintf( " Total Particles Loaded: %ld\n", n_total_loaded ); - #endif + chprintf(" Total Particles Loaded: %ld\n", n_total_loaded); + #endif - #ifdef MPI_CHOLLA - Real px_max_g = ReduceRealMax( px_max ); - Real py_max_g = ReduceRealMax( py_max ); - Real pz_max_g = ReduceRealMax( pz_max ); - Real vx_max_g = ReduceRealMax( vx_max ); - Real vy_max_g = ReduceRealMax( vy_max ); - Real vz_max_g = ReduceRealMax( vz_max ); - - Real px_min_g = ReduceRealMin( px_min ); - Real py_min_g = ReduceRealMin( py_min ); - Real pz_min_g = ReduceRealMin( pz_min ); - Real vx_min_g = ReduceRealMin( vx_min ); - Real vy_min_g = ReduceRealMin( vy_min ); - Real vz_min_g = ReduceRealMin( vz_min ); - #else + #ifdef MPI_CHOLLA + Real px_max_g = ReduceRealMax(px_max); + Real py_max_g = ReduceRealMax(py_max); + Real pz_max_g = ReduceRealMax(pz_max); + Real vx_max_g = ReduceRealMax(vx_max); + Real vy_max_g = ReduceRealMax(vy_max); + Real vz_max_g = ReduceRealMax(vz_max); + + Real px_min_g = ReduceRealMin(px_min); + Real py_min_g = ReduceRealMin(py_min); + Real pz_min_g = ReduceRealMin(pz_min); + Real vx_min_g = ReduceRealMin(vx_min); + Real vy_min_g = ReduceRealMin(vy_min); + Real vz_min_g = ReduceRealMin(vz_min); + #else Real px_max_g = px_max; Real py_max_g = py_max; Real pz_max_g = pz_max; @@ -417,316 +447,359 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; - #endif//MPI_CHOLLA - - //Print initial Statistics - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Pos X Min: %f Max: %f [ kpc/h ]\n", px_min_g, px_max_g); - chprintf( " Pos Y Min: %f Max: %f [ kpc/h ]\n", py_min_g, py_max_g); - chprintf( " Pos Z Min: %f Max: %f [ kpc/h ]\n", pz_min_g, pz_max_g); - chprintf( " Vel X Min: %f Max: %f [ km/s ]\n", vx_min_g, vx_max_g); - chprintf( " Vel Y Min: %f Max: %f [ km/s ]\n", vy_min_g, vy_max_g); - chprintf( " Vel Z Min: %f Max: %f [ km/s ]\n", vz_min_g, vz_max_g); - #endif//PRINT_INITIAL_STATS - - //Free the buffers to used to load the hdf5 files + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; + #endif // MPI_CHOLLA + + // Print initial Statistics + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf(" Pos X Min: %f Max: %f [ kpc/h ]\n", px_min_g, px_max_g); + chprintf(" Pos Y Min: %f Max: %f [ kpc/h ]\n", py_min_g, py_max_g); + chprintf(" Pos Z Min: %f Max: %f [ kpc/h ]\n", pz_min_g, pz_max_g); + chprintf(" Vel X Min: %f Max: %f [ km/s ]\n", vx_min_g, vx_max_g); + chprintf(" Vel Y Min: %f Max: %f [ km/s ]\n", vy_min_g, vy_max_g); + chprintf(" Vel Z Min: %f Max: %f [ km/s ]\n", vz_min_g, vz_max_g); + #endif // PRINT_INITIAL_STATS + + // Free the buffers to used to load the hdf5 files free(dataset_buffer_px); free(dataset_buffer_py); free(dataset_buffer_pz); free(dataset_buffer_vx); free(dataset_buffer_vy); free(dataset_buffer_vz); - #ifndef SINGLE_PARTICLE_MASS + #ifndef SINGLE_PARTICLE_MASS free(dataset_buffer_m); - #endif - #ifdef PARTICLE_IDS + #endif + #ifdef PARTICLE_IDS free(dataset_buffer_IDs); - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE free(dataset_buffer_age); - #endif + #endif } - /*! \fn void Write_Header_HDF5(hid_t file_id) * \brief Write the relevant header info to the HDF5 file. */ -void Grid3D::Write_Particles_Header_HDF5( hid_t file_id){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; +void Grid3D::Write_Particles_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; // Single attributes first attr_dims = 1; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); // Create a group attribute - attribute_id = H5Acreate(file_id, "t_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "t_particles", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); // Write the attribute data status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.t); // Close the attribute + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, + dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, + dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_ULONG, &Particles.n_local); + status = H5Aclose(attribute_id); + + #ifdef SINGLE_PARTICLE_MASS + attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, + dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_ULONG, &Particles.n_local); - status = H5Aclose(attribute_id); + #endif + #ifdef COSMOLOGY + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); + status = H5Aclose(attribute_id); - #ifdef SINGLE_PARTICLE_MASS - attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); - status = H5Aclose(attribute_id); - #endif - - #ifdef COSMOLOGY - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); - status = H5Aclose(attribute_id); - - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); - status = H5Aclose(attribute_id); - #endif + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); + status = H5Aclose(attribute_id); + #endif status = H5Sclose(dataspace_id); - } - -void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){ +void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) +{ part_int_t i, j, k, id, buf_id; - hid_t dataset_id, dataspace_id; - Real *dataset_buffer; - #ifdef PARTICLE_IDS - part_int_t *dataset_buffer_IDs; - #endif - herr_t status; + hid_t dataset_id, dataspace_id; + Real *dataset_buffer; + #ifdef PARTICLE_IDS + part_int_t *dataset_buffer_IDs; + #endif + herr_t status; part_int_t n_local = Particles.n_local; - hsize_t dims[1]; - dataset_buffer = (Real *) malloc(n_local*sizeof(Real)); + hsize_t dims[1]; + dataset_buffer = (Real *)malloc(n_local * sizeof(Real)); bool output_particle_data; - #ifdef OUTPUT_PARTICLES_DATA + #ifdef OUTPUT_PARTICLES_DATA output_particle_data = true; - #else + #else output_particle_data = false; - #endif - - #ifdef PARTICLES_GPU - //Copy the device arrays from the device to the host - CudaSafeCall( cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif//PARTICLES_GPU - #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) - CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif//OUTPUT_POTENTIAL - + #endif + #ifdef PARTICLES_GPU + // Copy the device arrays from the device to the host + CudaSafeCall(cudaMemcpy(Particles.G.density, Particles.G.density_dev, + Particles.G.n_cells * sizeof(Real), + cudaMemcpyDeviceToHost)); + #endif // PARTICLES_GPU + #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && \ + defined(GRAVITY_GPU) + CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, + Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); + #endif // OUTPUT_POTENTIAL // Count Current Total Particles part_int_t N_particles_total; - #ifdef MPI_CHOLLA - N_particles_total = ReducePartIntSum( Particles.n_local ); - #else - N_particles_total = Particles.n_local; - #endif - - //Print the total particles when saving the particles data - chprintf( " Total Particles: %ld\n", N_particles_total ); + #ifdef MPI_CHOLLA + N_particles_total = ReducePartIntSum(Particles.n_local); + #else + N_particles_total = Particles.n_local; + #endif - //Print a warning if the number of particles has changed from the initial number of particles. - //This will indicate an error on the Particles transfers. - if ( N_particles_total != Particles.n_total_initial ) chprintf( " WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total ); + // Print the total particles when saving the particles data + chprintf(" Total Particles: %ld\n", N_particles_total); + // Print a warning if the number of particles has changed from the initial + // number of particles. This will indicate an error on the Particles + // transfers. + if (N_particles_total != Particles.n_total_initial) + chprintf(" WARNING: Lost Particles: %d \n", + Particles.n_total_initial - N_particles_total); // Create the data space for the datasets - dims[0] = n_local; + dims[0] = n_local; dataspace_id = H5Screate_simple(1, dims, NULL); - //Copy the particles data to the hdf5_buffers and create the data_sets - - // Copy the pos_x vector to the memory buffer - #ifdef PARTICLES_CPU - for ( i=0; i -#include -#include -#include -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../utils/prng_utilities.h" -#include "../model/disk_galaxy.h" -#include "particles_3D.h" -#include "../utils/error_handling.h" - -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - -Particles_3D::Particles_3D( void ): - TRANSFER_DENSITY_BOUNDARIES(false), - TRANSFER_PARTICLES_BOUNDARIES(false) -{} - -void Grid3D::Initialize_Particles( struct parameters *P ){ - - chprintf( "\nInitializing Particles...\n"); - - Particles.Initialize( P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal ); - - #if defined (PARTICLES_GPU) && defined (GRAVITY_GPU) - // Set the GPU array for the particles potential equal to the Gravity GPU array for the potential - Particles.G.potential_dev = Grav.F.potential_d; - #endif + #include "particles_3D.h" + + #include - if (strcmp(P->init, "Uniform")==0) Initialize_Uniform_Particles(); + #include + #include + #include + #include + + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" + #include "../utils/error_handling.h" + #include "../utils/prng_utilities.h" #ifdef MPI_CHOLLA - MPI_Barrier( world ); + #include "../mpi/mpi_routines.h" #endif - chprintf( "Particles Initialized Successfully. \n\n"); + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +Particles_3D::Particles_3D(void) + : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) +{ } -void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal){ +void Grid3D::Initialize_Particles(struct parameters *P) +{ + chprintf("\nInitializing Particles...\n"); - //Initialize local and total number of particles to 0 - n_local = 0; - n_total = 0; + Particles.Initialize(P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, + H.ydglobal, H.zdglobal); + + #if defined(PARTICLES_GPU) && defined(GRAVITY_GPU) + // Set the GPU array for the particles potential equal to the Gravity GPU + // array for the potential + Particles.G.potential_dev = Grav.F.potential_d; + #endif + + if (strcmp(P->init, "Uniform") == 0) Initialize_Uniform_Particles(); + + #ifdef MPI_CHOLLA + MPI_Barrier(world); + #endif + chprintf("Particles Initialized Successfully. \n\n"); +} + +void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, + Real ybound, Real zbound, Real xdglobal, + Real ydglobal, Real zdglobal) +{ + // Initialize local and total number of particles to 0 + n_local = 0; + n_total = 0; n_total_initial = 0; - //Initialize the simulation time and delta_t to 0 + // Initialize the simulation time and delta_t to 0 dt = 0.0; - t = 0.0; - //Set the maximum delta_t for particles, this can be changed depending on the problem. + t = 0.0; + // Set the maximum delta_t for particles, this can be changed depending on the + // problem. max_dt = 10000; - //Courant CFL condition factor for particles + // Courant CFL condition factor for particles C_cfl = 0.3; #ifndef SINGLE_PARTICLE_MASS - particle_mass = 0; //The particle masses are stored in a separate array + particle_mass = 0; // The particle masses are stored in a separate array #endif #ifdef PARTICLES_CPU - //Vectors for positions, velocities and accelerations + // Vectors for positions, velocities and accelerations real_vector_t pos_x; real_vector_t pos_y; real_vector_t pos_z; @@ -78,32 +84,33 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, real_vector_t grav_y; real_vector_t grav_z; - #ifndef SINGLE_PARTICLE_MASS - //Vector for masses + #ifndef SINGLE_PARTICLE_MASS + // Vector for masses real_vector_t mass; - #endif - #ifdef PARTICLE_IDS - //Vector for particle IDs + #endif + #ifdef PARTICLE_IDS + // Vector for particle IDs int_vector_t partIDs; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE real_vector_t age; - #endif + #endif - #ifdef MPI_CHOLLA - //Vectors for the indices of the particles that need to be transferred via MPI + #ifdef MPI_CHOLLA + // Vectors for the indices of the particles that need to be transferred via + // MPI int_vector_t out_indxs_vec_x0; int_vector_t out_indxs_vec_x1; int_vector_t out_indxs_vec_y0; int_vector_t out_indxs_vec_y1; int_vector_t out_indxs_vec_z0; int_vector_t out_indxs_vec_z1; - #endif + #endif - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - //Initialize Grid Values - //Local and total number of cells + // Initialize Grid Values + // Local and total number of cells G.nx_local = Grav.nx_local; G.ny_local = Grav.ny_local; G.nz_local = Grav.nz_local; @@ -111,38 +118,41 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, G.ny_total = Grav.ny_total; G.nz_total = Grav.nz_total; - //Uniform (dx, dy, dz) + // Uniform (dx, dy, dz) G.dx = Grav.dx; G.dy = Grav.dy; G.dz = Grav.dz; - //Left boundaries of the local domain + // Left boundaries of the local domain G.xMin = Grav.xMin; G.yMin = Grav.yMin; G.zMin = Grav.zMin; - //Right boundaries of the local domain + // Right boundaries of the local domain G.xMax = Grav.xMax; G.yMax = Grav.yMax; G.zMax = Grav.zMax; - //Left boundaries of the global domain + // Left boundaries of the global domain G.domainMin_x = xbound; G.domainMin_y = ybound; G.domainMin_z = zbound; - //Right boundaries of the global domain + // Right boundaries of the global domain G.domainMax_x = xbound + xdglobal; G.domainMax_y = ybound + ydglobal; G.domainMax_z = zbound + zdglobal; - //Number of ghost cells for the particles grid. For CIC one ghost cell is needed + // Number of ghost cells for the particles grid. For CIC one ghost cell is + // needed G.n_ghost_particles_grid = 1; - //Number of cells for the particles grid including ghost cells - G.n_cells = (G.nx_local+2*G.n_ghost_particles_grid) * (G.ny_local+2*G.n_ghost_particles_grid) * (G.nz_local+2*G.n_ghost_particles_grid); + // Number of cells for the particles grid including ghost cells + G.n_cells = (G.nx_local + 2 * G.n_ghost_particles_grid) * + (G.ny_local + 2 * G.n_ghost_particles_grid) * + (G.nz_local + 2 * G.n_ghost_particles_grid); - //Set the boundary types + // Set the boundary types #ifdef MPI_CHOLLA G.boundary_type_x0 = P->xlg_bcnd; G.boundary_type_x1 = P->xug_bcnd; @@ -151,71 +161,84 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif - + #ifdef PARTICLES_GPU - //Factor to allocate the particles data arrays on the GPU. - //When using MPI particles will be transferred to other GPU, for that reason we need extra memory allocated - #ifdef MPI_CHOLLA + // Factor to allocate the particles data arrays on the GPU. + // When using MPI particles will be transferred to other GPU, for that + // reason we need extra memory allocated + #ifdef MPI_CHOLLA G.gpu_allocation_factor = 1.25; - #else + #else G.gpu_allocation_factor = 1.0; - #endif - - G.size_blocks_array = 1024*128; - G.n_cells_potential = ( G.nx_local + 2*N_GHOST_POTENTIAL ) * ( G.ny_local + 2*N_GHOST_POTENTIAL ) * ( G.nz_local + 2*N_GHOST_POTENTIAL ); + #endif - #ifdef SINGLE_PARTICLE_MASS - mass_dev = NULL; //This array won't be used - #endif + G.size_blocks_array = 1024 * 128; + G.n_cells_potential = (G.nx_local + 2 * N_GHOST_POTENTIAL) * + (G.ny_local + 2 * N_GHOST_POTENTIAL) * + (G.nz_local + 2 * N_GHOST_POTENTIAL); + #ifdef SINGLE_PARTICLE_MASS + mass_dev = NULL; // This array won't be used + #endif - #endif //PARTICLES_GPU + #endif // PARTICLES_GPU // Flags for Initial and tranfer the particles and density - INITIAL = true; - TRANSFER_DENSITY_BOUNDARIES = false; + INITIAL = true; + TRANSFER_DENSITY_BOUNDARIES = false; TRANSFER_PARTICLES_BOUNDARIES = false; Allocate_Memory(); - //Initialize the particles density and gravitational field to 0. + // Initialize the particles density and gravitational field to 0. Initialize_Grid_Values(); // Initialize Particles - if (strcmp(P->init, "Spherical_Overdensity_3D")==0) Initialize_Sphere(P); - else if (strcmp(P->init, "Zeldovich_Pancake")==0) Initialize_Zeldovich_Pancake( P ); - else if (strcmp(P->init, "Read_Grid")==0) Load_Particles_Data( P ); - #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) - else if (strcmp(P->init, "Disk_3D_particles") == 0) Initialize_Disk_Stellar_Clusters(P); + if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) + Initialize_Sphere(P); + else if (strcmp(P->init, "Zeldovich_Pancake") == 0) + Initialize_Zeldovich_Pancake(P); + else if (strcmp(P->init, "Read_Grid") == 0) + Load_Particles_Data(P); + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && \ + defined(PARTICLE_IDS) + else if (strcmp(P->init, "Disk_3D_particles") == 0) + Initialize_Disk_Stellar_Clusters(P); #endif #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif - chprintf("Particles Initialized: \n n_local: %lu \n", n_local ); - chprintf(" n_total: %lu \n", n_total_initial ); - chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax ); - chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y, G.domainMax_y, G.domainMin_z, G.domainMax_z); - chprintf(" dx: %f %f %f\n", G.dx, G.dy, G.dz ); + chprintf("Particles Initialized: \n n_local: %lu \n", n_local); + chprintf(" n_total: %lu \n", n_total_initial); + chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, + G.xMax, G.yMin, G.yMax, G.zMin, G.zMax); + chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", + G.domainMin_x, G.domainMax_x, G.domainMin_y, G.domainMax_y, + G.domainMin_z, G.domainMax_z); + chprintf(" dx: %f %f %f\n", G.dx, G.dy, G.dz); #ifdef PARTICLE_IDS chprintf(" Tracking particle IDs\n"); #endif #if defined(MPI_CHOLLA) && defined(PRINT_DOMAIN) - for (int n=0; nprng_seed); - std::uniform_real_distribution xPositionPrng(G.xMin, G.xMax ); - std::uniform_real_distribution yPositionPrng(G.yMin, G.yMax ); - std::uniform_real_distribution zPositionPrng(G.zMin, G.zMax ); - while ( pID < n_particles_local ){ + std::uniform_real_distribution xPositionPrng(G.xMin, G.xMax); + std::uniform_real_distribution yPositionPrng(G.yMin, G.yMax); + std::uniform_real_distribution zPositionPrng(G.zMin, G.zMax); + while (pID < n_particles_local) { pPos_x = xPositionPrng(generator); pPos_y = yPositionPrng(generator); pPos_z = zPositionPrng(generator); - r = sqrt( (pPos_x-center_x)*(pPos_x-center_x) + (pPos_y-center_y)*(pPos_y-center_y) + (pPos_z-center_z)*(pPos_z-center_z) ); - if ( r > sphereR ) continue; + r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + + (pPos_y - center_y) * (pPos_y - center_y) + + (pPos_z - center_z) * (pPos_z - center_z)); + if (r > sphereR) continue; - #ifdef PARTICLES_CPU - //Copy the particle data to the particles vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z); - vel_x.push_back( 0.0 ); - vel_y.push_back( 0.0 ); - vel_z.push_back( 0.0 ); - grav_x.push_back( 0.0 ); - grav_y.push_back( 0.0 ); - grav_z.push_back( 0.0 ); + #ifdef PARTICLES_CPU + // Copy the particle data to the particles vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(0.0); + vel_y.push_back(0.0); + vel_z.push_back(0.0); + grav_x.push_back(0.0); + grav_y.push_back(0.0); + grav_z.push_back(0.0); #ifdef PARTICLE_IDS - partIDs.push_back( pID ); + partIDs.push_back(pID); #endif #ifndef SINGLE_PARTICLE_MASS - mass.push_back( Mparticle ); + mass.push_back(Mparticle); #endif - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU // Copy the particle data to the temporal Host Buffers - temp_pos_x[pID] = pPos_x; - temp_pos_y[pID] = pPos_y; - temp_pos_z[pID] = pPos_z; - temp_vel_x[pID] = 0.0; - temp_vel_y[pID] = 0.0; - temp_vel_z[pID] = 0.0; + temp_pos_x[pID] = pPos_x; + temp_pos_y[pID] = pPos_y; + temp_pos_z[pID] = pPos_z; + temp_vel_x[pID] = 0.0; + temp_vel_y[pID] = 0.0; + temp_vel_z[pID] = 0.0; #ifndef SINGLE_PARTICLE_MASS - temp_mass[pID] = Mparticle; + temp_mass[pID] = Mparticle; #endif #ifdef PARTICLE_IDS temp_id[pID] = pID; #endif - #endif //PARTICLES_GPU + #endif // PARTICLES_GPU pID += 1; } #ifdef PARTICLES_CPU n_local = pos_x.size(); - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) - // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks - chprintf( " Computing Global Particles IDs offset \n" ); + // Get global IDs: Offset the local IDs to get unique global IDs across the + // MPI ranks + chprintf(" Computing Global Particles IDs offset \n"); part_int_t global_id_offset; - global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - #ifdef PARTICLES_CPU - for ( int p_indx=0; p_indxprng_seed); - std::gamma_distribution radialDist(2,1); //for generating cyclindrical radii + std::gamma_distribution radialDist( + 2, 1); // for generating cyclindrical radii std::uniform_real_distribution zDist(-0.005, 0.005); std::uniform_real_distribution vzDist(-1e-8, 1e-8); - std::uniform_real_distribution phiDist(0, 2*M_PI); //for generating phi - std::normal_distribution speedDist(0, 1); //for generating random speeds. - - Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) - Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc - Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc - Real R_max = sqrt(P->xlen*P->xlen + P->ylen*P->ylen)/2; - R_max = P->xlen / 2.0; + std::uniform_real_distribution phiDist(0, + 2 * M_PI); // for generating phi + std::normal_distribution speedDist(0, + 1); // for generating random speeds. + + Real M_d = + Galaxies::MW + .getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc + Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc + Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; + R_max = P->xlen / 2.0; real_vector_t temp_pos_x; real_vector_t temp_pos_y; @@ -658,44 +689,45 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { real_vector_t temp_grav_y; real_vector_t temp_grav_z; real_vector_t temp_mass; - int_vector_t temp_ids; + int_vector_t temp_ids; real_vector_t temp_age; Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius - //unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); // 15kpc radius - Real total_mass = 0; + // unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc + // radius unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); + // // 15kpc radius + Real total_mass = 0; Real upper_limit_cluster_mass = 1e7; - long lost_particles = 0; - part_int_t id = -1; + long lost_particles = 0; + part_int_t id = -1; while (total_mass < upper_limit_cluster_mass) { Real cluster_mass = Galaxies::MW.singleClusterMass(generator); total_mass += cluster_mass; - id += 1; // do this here before we check whether the particle is in the MPI domain, otherwise - // could end up with duplicated IDs + id += 1; // do this here before we check whether the particle is in the MPI + // domain, otherwise could end up with duplicated IDs do { - R = R_d*radialDist(generator); + R = R_d * radialDist(generator); } while (R > R_max); phi = phiDist(generator); - x = R * cos(phi); - y = R * sin(phi); - z = zDist(generator); + x = R * cos(phi); + y = R * sin(phi); + z = zDist(generator); if (x < G.xMin || x >= G.xMax) continue; if (y < G.yMin || y >= G.yMax) continue; if (z < G.zMin || z >= G.zMax) continue; - ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); - vPhi = sqrt(R*ac); + ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); + vPhi = sqrt(R * ac); - vx = -vPhi*sin(phi); - vy = vPhi*cos(phi); - vz = 0.0; //vzDist(generator); + vx = -vPhi * sin(phi); + vy = vPhi * cos(phi); + vz = 0.0; // vzDist(generator); - //add particle data to the particles vectors + // add particle data to the particles vectors temp_pos_x.push_back(x); temp_pos_y.push_back(y); temp_pos_z.push_back(z); @@ -712,19 +744,19 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) { n_local = temp_pos_x.size(); -/* - part_int_t global_id_offset = 0; - #ifdef MPI_CHOLLA - // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks - chprintf( " Computing Global Particles IDs offset \n" ); - global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - #endif //MPI_CHOLLA - for ( int i=0; i 0) chprintf(" lost %lu particles\n", lost_particles); - chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, total_mass: %.3e s.m.\n", id+1, n_local, total_mass); + chprintf( + "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, " + "total_mass: %.3e s.m.\n", + id + 1, n_local, total_mass); } -#endif - - -void Particles_3D::Initialize_Zeldovich_Pancake( struct parameters *P ){ + #endif - //No particles for the Zeldovich Pancake problem. n_local=0 +void Particles_3D::Initialize_Zeldovich_Pancake(struct parameters *P) +{ + // No particles for the Zeldovich Pancake problem. n_local=0 chprintf("Setting Zeldovich Pancake initial conditions...\n"); // n_local = pos_x.size(); n_local = 0; - chprintf( " Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local); - + chprintf(" Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local); } - -void Grid3D::Initialize_Uniform_Particles(){ - //Initialize positions assigning one particle at each cell in a uniform grid +void Grid3D::Initialize_Uniform_Particles() +{ + // Initialize positions assigning one particle at each cell in a uniform grid int i, j, k, id; Real x_pos, y_pos, z_pos; Real dVol, Mparticle; - dVol = H.dx * H.dy * H.dz; + dVol = H.dx * H.dy * H.dz; Mparticle = dVol; #ifdef SINGLE_PARTICLE_MASS @@ -802,31 +845,31 @@ void Grid3D::Initialize_Uniform_Particles(){ #endif part_int_t pID = 0; - for (k=H.n_ghost; k -#include -#include -#include -#include -#include "../global/global.h" -#include "../gravity/grav3D.h" + #include + #include + #include + #include -#ifdef PARTICLES_GPU -#define TPB_PARTICLES 1024 -// #define PRINT_GPU_MEMORY -#define PRINT_MAX_MEMORY_USAGE -#endif + #include + #include "../global/global.h" + #include "../gravity/grav3D.h" + #ifdef PARTICLES_GPU + #define TPB_PARTICLES 1024 + // #define PRINT_GPU_MEMORY + #define PRINT_MAX_MEMORY_USAGE + #endif /*! \class Part3D * \brief Class to create a set of particles in 3D space. */ class Particles_3D { - public: - + public: part_int_t n_local; part_int_t n_total; @@ -40,22 +39,21 @@ class Particles_3D Real particle_mass; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real current_z; Real current_a; - #endif - + #endif - #ifdef PARTICLES_CPU - #ifdef PARTICLE_IDS + #ifdef PARTICLES_CPU + #ifdef PARTICLE_IDS int_vector_t partIDs; - #endif - #ifndef SINGLE_PARTICLE_MASS + #endif + #ifndef SINGLE_PARTICLE_MASS real_vector_t mass; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE real_vector_t age; - #endif + #endif real_vector_t pos_x; real_vector_t pos_y; real_vector_t pos_z; @@ -65,16 +63,16 @@ class Particles_3D real_vector_t grav_x; real_vector_t grav_y; real_vector_t grav_z; - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU part_int_t particles_array_size; - #ifdef PARTICLE_IDS + #ifdef PARTICLE_IDS part_int_t *partIDs_dev; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE Real *age_dev; - #endif + #endif Real *mass_dev; Real *pos_x_dev; Real *pos_y_dev; @@ -86,11 +84,9 @@ class Particles_3D Real *grav_y_dev; Real *grav_z_dev; + #endif // PARTICLES_GPU - #endif //PARTICLES_GPU - - - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA part_int_t n_transfer_x0; part_int_t n_transfer_x1; @@ -120,26 +116,21 @@ class Particles_3D part_int_t n_in_buffer_z0; part_int_t n_in_buffer_z1; - - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU int_vector_t out_indxs_vec_x0; int_vector_t out_indxs_vec_x1; int_vector_t out_indxs_vec_y0; int_vector_t out_indxs_vec_y1; int_vector_t out_indxs_vec_z0; int_vector_t out_indxs_vec_z1; - #endif //PARTICLES_CPU - + #endif // PARTICLES_CPU - #endif //MPI_CHOLLA + #endif // MPI_CHOLLA bool TRANSFER_DENSITY_BOUNDARIES; bool TRANSFER_PARTICLES_BOUNDARIES; - - struct Grid - { - + struct Grid { int nx_local, ny_local, nz_local; int nx_total, ny_total, nz_total; @@ -168,11 +159,10 @@ class Particles_3D Real *gravity_x; Real *gravity_y; Real *gravity_z; - #ifdef GRAVITY_GPU + #ifdef GRAVITY_GPU Real *density_dev; + #endif #endif - #endif - #ifdef PARTICLES_GPU Real *density_dev; @@ -183,7 +173,7 @@ class Particles_3D Real *dti_array_dev; Real *dti_array_host; - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA bool *transfer_particles_flags_d; int *transfer_particles_indices_d; int *replace_particles_indices_d; @@ -218,65 +208,112 @@ class Particles_3D Real *recv_buffer_z0_d; Real *recv_buffer_z1_d; - #endif // MPI_CHOLLA - - #endif //PARTICLES_GPU + #endif // MPI_CHOLLA + #endif // PARTICLES_GPU } G; Particles_3D(void); - void Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal ); + void Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, + Real zbound, Real xdglobal, Real ydglobal, Real zdglobal); - void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ); - void Free_GPU_Array_Real( Real *array ); - - #ifdef PARTICLES_GPU + void Allocate_Particles_Grid_Field_Real(Real **array_dev, int size); + void Free_GPU_Array_Real(Real *array); + + #ifdef PARTICLES_GPU - void Free_GPU_Array_int( int *array ); - void Free_GPU_Array_bool( bool *array ); - template< typename T > void Free_GPU_Array( T *array ){ cudaFree(array); } //TODO remove the Free_GPU_Array_ functions + void Free_GPU_Array_int(int *array); + void Free_GPU_Array_bool(bool *array); + template + void Free_GPU_Array(T *array) + { + cudaFree(array); + } // TODO remove the Free_GPU_Array_ functions void Allocate_Memory_GPU(); - void Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ); - void Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ); - void Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ); - void Allocate_Particles_GPU_Array_Part_Int( part_int_t **array_dev, part_int_t size ); - void Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size); - void Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size); - void Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size); - void Copy_Particles_Array_Int_Device_to_Host( part_int_t *array_dev, part_int_t *array_host, part_int_t size); - void Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, + part_int_t size); + void Copy_Particles_Array_Real_Host_to_Device(Real *array_host, + Real *array_dev, + part_int_t size); + void Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, + Real *array_host, + part_int_t size); + void Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, + part_int_t *array_dev, + part_int_t size); + void Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, + part_int_t *array_host, + part_int_t size); + void Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size); void Free_Memory_GPU(); void Initialize_Grid_Values_GPU(); void Get_Density_CIC_GPU(); - void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev); + void Get_Density_CIC_GPU_function( + part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, + Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, + int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, + Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *mass_dev); void Clear_Density_GPU(); - void Clear_Density_GPU_function( Real *density_dev, int n_cells); - void Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ); - void Get_Gravity_Field_Particles_GPU( Real *potential_host ); - void Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); + void Clear_Density_GPU_function(Real *density_dev, int n_cells); + void Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, + int n_cells_potential); + void Get_Gravity_Field_Particles_GPU(Real *potential_host); + void Get_Gravity_Field_Particles_GPU_function( + int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, + int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, + Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev); void Get_Gravity_CIC_GPU(); - void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev); - Real Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev ); - void Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); - void Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); - void Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); - void Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); - part_int_t Compute_Particles_GPU_Array_Size( part_int_t n ); - int Select_Particles_to_Transfer_GPU( int direction, int side ); - void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer, int buffer_length ); - void Replace_Tranfered_Particles_GPU( int n_transfer ); - void Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ); - void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ); - void Set_Particles_Open_Boundary_GPU( int dir, int side ); - #ifdef PRINT_MAX_MEMORY_USAGE + void Get_Gravity_CIC_GPU_function( + part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev); + Real Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_local, Real dx, + Real dy, Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, + Real *dti_array_host, + Real *dti_array_dev); + void Advance_Particles_KDK_Step1_GPU_function( + part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step1_Cosmo_GPU_function( + part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); + void Advance_Particles_KDK_Step2_GPU_function( + part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step2_Cosmo_GPU_function( + part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, + Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K); + part_int_t Compute_Particles_GPU_Array_Size(part_int_t n); + int Select_Particles_to_Transfer_GPU(int direction, int side); + void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, + int side, Real *send_buffer, + int buffer_length); + void Replace_Tranfered_Particles_GPU(int n_transfer); + void Unload_Particles_from_Buffer_GPU(int direction, int side, + Real *recv_buffer_h, int n_recv); + void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d); + void Set_Particles_Open_Boundary_GPU(int dir, int side); + #ifdef PRINT_MAX_MEMORY_USAGE void Print_Max_Memory_Usage(); - #endif - - #endif //PARTICLES_GPU - + #endif + #endif // PARTICLES_GPU void Allocate_Memory(); @@ -284,13 +321,14 @@ class Particles_3D void Initialize_Sphere(struct parameters *P); -#if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && \ + defined(PARTICLE_IDS) void Initialize_Disk_Stellar_Clusters(struct parameters *P); -#endif + #endif - void Initialize_Zeldovich_Pancake( struct parameters *P ); + void Initialize_Zeldovich_Pancake(struct parameters *P); - void Load_Particles_Data( struct parameters *P ); + void Load_Particles_Data(struct parameters *P); void Free_Memory(); @@ -298,44 +336,50 @@ class Particles_3D void Clear_Density(); - void Get_Density_CIC_Serial( ); + void Get_Density_CIC_Serial(); - #ifdef HDF5 - void Load_Particles_Data_HDF5( hid_t file_id, int nfile, struct parameters *P ); - #endif + #ifdef HDF5 + void Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P); + #endif - #ifdef PARALLEL_OMP - void Get_Density_CIC_OMP( ); - #endif + #ifdef PARALLEL_OMP + void Get_Density_CIC_OMP(); + #endif void Get_Density_CIC(); - #ifdef MPI_CHOLLA - void Clear_Particles_For_Transfer( void ); - void Select_Particles_to_Transfer_All( int *flags ); - void Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z); + #ifdef MPI_CHOLLA + void Clear_Particles_For_Transfer(void); + void Select_Particles_to_Transfer_All(int *flags); + void Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, + int buffer_length, Real pId, Real pMass, + Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, + Real pVel_x, Real pVel_y, Real pVel_z); void Remove_Transfered_Particles(); - #ifdef PARTICLES_CPU - void Clear_Vectors_For_Transfers( void ); - void Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z, int *flags ); - void Select_Particles_to_Transfer_All_CPU( int *flags ); - void Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length ); - void Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags); - #endif//PARTICLES_CPU - - - #ifdef PARTICLES_GPU + #ifdef PARTICLES_CPU + void Clear_Vectors_For_Transfers(void); + void Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, + Real pPos_y, Real pPos_z, Real pVel_x, + Real pVel_y, Real pVel_z, int *flags); + void Select_Particles_to_Transfer_All_CPU(int *flags); + void Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, + int buffer_length); + void Unload_Particles_from_Buffer_CPU( + int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags); + #endif // PARTICLES_CPU + + #ifdef PARTICLES_GPU void Allocate_Memory_GPU_MPI(); void ReAllocate_Memory_GPU_MPI(); - void Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer, int buffer_length ); - #endif //PARTICLES_GPU - #endif - + void Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer, + int buffer_length); + #endif // PARTICLES_GPU + #endif }; - -#endif //PARTICLES_H -#endif //PARTICLES + #endif // PARTICLES_H +#endif // PARTICLES diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 45c9d53c8..490e2d391 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -1,201 +1,215 @@ -#if defined(PARTICLES) +#if defined(PARTICLES) -#include -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "particles_3D.h" + #include + #include + #include + #include + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" +void Particles_3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } - - - - -void Particles_3D::Free_GPU_Array_Real( Real *array ){ cudaFree(array); } - - -void Particles_3D::Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ){ +void Particles_3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, + int size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif - if ( global_free < size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); + if (global_free < size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } + #ifdef PARTICLES_GPU + #ifdef PRINT_MAX_MEMORY_USAGE + #include "../mpi/mpi_routines.h" -#ifdef PARTICLES_GPU - -#ifdef PRINT_MAX_MEMORY_USAGE -#include "../mpi/mpi_routines.h" - -void Particles_3D::Print_Max_Memory_Usage(){ - +void Particles_3D::Print_Max_Memory_Usage() +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); - + part_int_t n_local_max, n_total, mem_usage; Real fraction_max, global_free_min; - - n_local_max = (part_int_t) ReduceRealMax( (Real) n_local ); - n_total = ReducePartIntSum( n_local ); - fraction_max = (Real) n_local_max / (Real) n_total; - mem_usage = n_local_max * 9 * sizeof(Real); //Usage for pos, vel ans accel. - - global_free_min = ReduceRealMin( (Real) global_free ); - - chprintf( " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB global_free_min: %.1f MB \n", n_local_max, fraction_max*100, mem_usage/1000000, global_free_min/1000000 ); - - -} -#endif + n_local_max = (part_int_t)ReduceRealMax((Real)n_local); + n_total = ReducePartIntSum(n_local); + fraction_max = (Real)n_local_max / (Real)n_total; + mem_usage = n_local_max * 9 * sizeof(Real); // Usage for pos, vel ans accel. + global_free_min = ReduceRealMin((Real)global_free); + chprintf( + " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB " + " global_free_min: %.1f MB \n", + n_local_max, fraction_max * 100, mem_usage / 1000000, + global_free_min / 1000000); +} -void Particles_3D::Free_GPU_Array_int( int *array ) { cudaFree(array); } -void Particles_3D::Free_GPU_Array_bool( bool *array ){ cudaFree(array); } + #endif +void Particles_3D::Free_GPU_Array_int(int *array) { cudaFree(array); } +void Particles_3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); } -template< typename T > -void __global__ Copy_Device_to_Device_Kernel( T *src_array_dev, T *dst_array_dev, part_int_t size ){ - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid < size ) dst_array_dev[tid] = src_array_dev[tid]; +template +void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, + part_int_t size) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < size) dst_array_dev[tid] = src_array_dev[tid]; } -template< typename T > -void Copy_Device_to_Device( T *src_array_dev, T *dst_array_dev, part_int_t size ){ - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; +template +void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) +{ + int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); + hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, + src_array_dev, dst_array_dev, size); CudaCheckError(); - } - -void Particles_3D::Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ){ +void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, + part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ){ +void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, + part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(int) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(int)/1000000 ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(int)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(int) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(int)) ); + CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(int))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_Part_Int( part_int_t **array_dev, part_int_t size ){ +void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, + part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(part_int_t) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(part_int_t)/1000000 ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(part_int_t)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", + size * sizeof(part_int_t) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(part_int_t)) ); + CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(part_int_t))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ){ +void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, + part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(bool) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(bool)/1000000 ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(bool)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(bool) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(bool)) ); + CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(bool))); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size){ - CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(Real), cudaMemcpyHostToDevice) ); +void Particles_3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, + Real *array_dev, + part_int_t size) +{ + CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(Real), + cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size){ - CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(Real), cudaMemcpyDeviceToHost) ); +void Particles_3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, + Real *array_host, + part_int_t size) +{ + CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(Real), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Host_to_Device( part_int_t *array_host, part_int_t *array_dev, part_int_t size) { - CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(part_int_t), cudaMemcpyHostToDevice) ); +void Particles_3D::Copy_Particles_Array_Int_Host_to_Device( + part_int_t *array_host, part_int_t *array_dev, part_int_t size) +{ + CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), + cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Device_to_Host( part_int_t *array_dev, part_int_t *array_host, part_int_t size) { - CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(part_int_t), cudaMemcpyDeviceToHost) ); +void Particles_3D::Copy_Particles_Array_Int_Device_to_Host( + part_int_t *array_dev, part_int_t *array_host, part_int_t size) +{ + CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -__global__ void Set_Particles_Array_Real_Kernel( Real value, Real *array_dev, part_int_t size ){ - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid < size ) array_dev[tid] = value; +__global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, + part_int_t size) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < size) array_dev[tid] = value; } - - -void Particles_3D::Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size){ - +void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, + part_int_t size) +{ // set values for GPU kernels - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size); + hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, + 0, value, array_dev, size); CudaCheckError(); } - - - - - - -#endif //PARTICLES_GPU -#endif//PARTICLES + #endif // PARTICLES_GPU +#endif // PARTICLES diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index d4df5eccf..b8c12c7cf 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -1,26 +1,27 @@ #ifdef PARTICLES -#include -#include -#include -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "particles_3D.h" + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#ifdef PARTICLES_GPU -#include "particles_boundaries_gpu.h" -#include "../utils/gpu_arrays_functions.h" -#endif//PARTICLES_GPU -#endif//MPI_CHOLLA + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" -//Transfer the particles that moved outside the local domain -void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #ifdef PARTICLES_GPU + #include "../utils/gpu_arrays_functions.h" + #include "particles_boundaries_gpu.h" + #endif // PARTICLES_GPU + #endif // MPI_CHOLLA +// Transfer the particles that moved outside the local domain +void Grid3D::Transfer_Particles_Boundaries(struct parameters P) +{ CudaCheckError(); - //Transfer Particles Boundaries + // Transfer Particles Boundaries Particles.TRANSFER_PARTICLES_BOUNDARIES = true; #ifdef CPU_TIME Timer.Part_Boundaries.Start(); @@ -33,569 +34,713 @@ void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ CudaCheckError(); } -#ifdef MPI_CHOLLA -//Remove the particles that were transferred outside the local domain -void Grid3D::Finish_Particles_Transfer( void ){ - - #ifdef PARTICLES_CPU + #ifdef MPI_CHOLLA +// Remove the particles that were transferred outside the local domain +void Grid3D::Finish_Particles_Transfer(void) +{ + #ifdef PARTICLES_CPU Particles.Remove_Transfered_Particles(); - #endif - + #endif } - -//Wait for the MPI request and unload the transferred particles -void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags) +// Wait for the MPI request and unload the transferred particles +void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, + int *flags) { - int iwait; - int index = 0; - int wait_max=0; + int index = 0; + int wait_max = 0; MPI_Status status; - - //find out how many recvs we need to wait for - if (dir==0) { - if(flags[0] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[1] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + // find out how many recvs we need to wait for + if (dir == 0) { + if (flags[0] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm + if (flags[1] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - if (dir==1) { - if(flags[2] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[3] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + if (dir == 1) { + if (flags[2] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm + if (flags[3] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - if (dir==2) { - if(flags[4] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[5] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + if (dir == 2) { + if (flags[4] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm + if (flags[5] == 5) // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - //wait for any receives to complete - for(iwait=0;iwait Particles.G.recv_buffer_size_x0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_x0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_x0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_x0_particles, + Particles.G.recv_buffer_size_x0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_x0 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, + &buffer_length_particles_x0_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, + &buffer_length_particles_x0_recv, + buffer_length); #endif - // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << Particles.n_recv_x0 << std::endl; - MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << + // Particles.n_recv_x0 << std::endl; + MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 1){ + if (index == 1) { buffer_length = Particles.n_recv_x1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_x1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_x1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_x1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_x1_particles, + Particles.G.recv_buffer_size_x1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_x1 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, + &buffer_length_particles_x1_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, + &buffer_length_particles_x1_recv, + buffer_length); #endif - // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout << " Recv X1: " << Particles.n_recv_x1 << " " << procID << " from " << source[1] << std::endl; - MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout + // << " Recv X1: " << Particles.n_recv_x1 << " " << procID << " from " + // << source[1] << std::endl; + MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 2){ + if (index == 2) { buffer_length = Particles.n_recv_y0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_y0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_y0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_y0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_y0_particles, + Particles.G.recv_buffer_size_y0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_y0 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, + &buffer_length_particles_y0_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, + &buffer_length_particles_y0_recv, + buffer_length); #endif - // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << Particles.n_recv_y0 << std::endl; - MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << + // Particles.n_recv_y0 << std::endl; + MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 3){ + if (index == 3) { buffer_length = Particles.n_recv_y1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_y1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_y1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_y1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_y1_particles, + Particles.G.recv_buffer_size_y1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_y1 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, + &buffer_length_particles_y1_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, + &buffer_length_particles_y1_recv, + buffer_length); #endif - // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << Particles.n_recv_y1 << std::endl; - MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << + // Particles.n_recv_y1 << std::endl; + MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 4){ + if (index == 4) { buffer_length = Particles.n_recv_z0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_z0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_z0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_z0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_z0_particles, + Particles.G.recv_buffer_size_z0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_z0 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, + &buffer_length_particles_z0_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, + &buffer_length_particles_z0_recv, + buffer_length); #endif - // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << Particles.n_recv_z0 << std::endl; - MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << + // Particles.n_recv_z0 << std::endl; + MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 5){ + if (index == 5) { buffer_length = Particles.n_recv_z1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_z1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_z1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_z1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_z1_particles, + Particles.G.recv_buffer_size_z1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_z1 = + (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, + &buffer_length_particles_z1_recv, + buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, + &buffer_length_particles_z1_recv, + buffer_length); #endif - // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << Particles.n_recv_z1 << std::endl; - MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << + // Particles.n_recv_z1 << std::endl; + MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, + world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } *ireq_particles_transfer += 1; } - -//Make Send and Receive request for the number of particles that will be transferred, and then load and send the transfer particles -void Grid3D::Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer ){ +// Make Send and Receive request for the number of particles that will be +// transferred, and then load and send the transfer particles +void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_x0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_x0_particles = d_send_buffer_x0_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send ); - #endif //PARTICLES_GPU - - MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, &send_request_n_particles[0]); + Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, + buffer_length_particles_x0_send); + #endif // PARTICLES_GPU + + MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, + &recv_request_n_particles[ireq_n_particles]); + MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, + &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - // if ( Particles.n_send_x0 > 0 ) if ( Particles.n_send_x0 > 0 ) std::cout << " Sent X0: " << Particles.n_send_x0 << " " << procID << " to " << dest[0] << std::endl; + // if ( Particles.n_send_x0 > 0 ) if ( Particles.n_send_x0 > 0 ) std::cout + // << " Sent X0: " << Particles.n_send_x0 << " " << procID << " to " << + // dest[0] << std::endl; buffer_length = Particles.n_send_x0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_x0_particles = h_send_buffer_x0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_x0_particles , &buffer_length_particles_x0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_x0_particles, + &buffer_length_particles_x0_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 0, send_buffer_x0_particles, + buffer_length_particles_x0_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_x0_particles = h_send_buffer_x0_particles; - #endif + #endif - MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_X1(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_x1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_x1_particles = d_send_buffer_x1_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send ); - #endif //PARTICLES_GPU - - MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); - // if ( Particles.n_send_x1 > 0 ) std::cout << " Sent X1: " << Particles.n_send_x1 << std::endl; + Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, + buffer_length_particles_x1_send); + #endif // PARTICLES_GPU + + MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, + &recv_request_n_particles[ireq_n_particles]); + MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, + &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); + // if ( Particles.n_send_x1 > 0 ) std::cout << " Sent X1: " << + // Particles.n_send_x1 << std::endl; buffer_length = Particles.n_send_x1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_x1_particles = h_send_buffer_x1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_x1_particles , &buffer_length_particles_x1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_x1_particles, + &buffer_length_particles_x1_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 1, send_buffer_x1_particles, + buffer_length_particles_x1_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_x1_particles = h_send_buffer_x1_particles; - #endif + #endif - MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request_particles_transfer[ireq_particles_transfer]);\ - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Y0(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_y0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_y0_particles = d_send_buffer_y0_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, + buffer_length_particles_y0_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, + &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_y0 > 0 ) std::cout << " Sent Y0: " << Particles.n_send_y0 << std::endl; + MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, + &recv_request_n_particles[ireq_n_particles]); + // if ( Particles.n_send_y0 > 0 ) std::cout << " Sent Y0: " << + // Particles.n_send_y0 << std::endl; buffer_length = Particles.n_send_y0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_y0_particles = h_send_buffer_y0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_y0_particles , &buffer_length_particles_y0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_y0_particles, + &buffer_length_particles_y0_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 0, send_buffer_y0_particles, + buffer_length_particles_y0_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_y0_particles = h_send_buffer_y0_particles; - #endif + #endif - MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Y1(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_y1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_y1_particles = d_send_buffer_y1_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send ); - #endif //PARTICLES_GPU - - MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); - MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_y1 > 0 ) std::cout << " Sent Y1: " << Particles.n_send_y1 << std::endl; + Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, + buffer_length_particles_y1_send); + #endif // PARTICLES_GPU + + MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, + &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); + MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, + &recv_request_n_particles[ireq_n_particles]); + // if ( Particles.n_send_y1 > 0 ) std::cout << " Sent Y1: " << + // Particles.n_send_y1 << std::endl; buffer_length = Particles.n_send_y1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_y1_particles = h_send_buffer_y1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_y1_particles , &buffer_length_particles_y1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_y1_particles, + &buffer_length_particles_y1_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 1, send_buffer_y1_particles, + buffer_length_particles_y1_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_y1_particles = h_send_buffer_y1_particles; - #endif + #endif - MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Z0(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_z0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_z0_particles = d_send_buffer_z0_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, + buffer_length_particles_z0_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, + &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_z0 > 0 ) std::cout << " Sent Z0: " << Particles.n_send_z0 << std::endl; + MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, + &recv_request_n_particles[ireq_n_particles]); + // if ( Particles.n_send_z0 > 0 ) std::cout << " Sent Z0: " << + // Particles.n_send_z0 << std::endl; buffer_length = Particles.n_send_z0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_z0_particles = h_send_buffer_z0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_z0_particles , &buffer_length_particles_z0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_z0_particles, + &buffer_length_particles_z0_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 0, send_buffer_z0_particles, + buffer_length_particles_z0_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_z0_particles = h_send_buffer_z0_particles; - #endif + #endif - MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Z1(int ireq_n_particles, + int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_z1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_z1_particles = d_send_buffer_z1_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send ); - #endif //PARTICLES_GPU - - MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); - MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << Particles.n_send_z1 << std::endl; + Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, + buffer_length_particles_z1_send); + #endif // PARTICLES_GPU + + MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, + &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); + MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, + &recv_request_n_particles[ireq_n_particles]); + // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << + // Particles.n_send_z1 << std::endl; buffer_length = Particles.n_send_z1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_z1_particles = h_send_buffer_z1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_z1_particles , &buffer_length_particles_z1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send ); - #endif //PARTICLES_CPU - - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + Check_and_Grow_Particles_Buffer(&send_buffer_z1_particles, + &buffer_length_particles_z1_send, + buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 1, send_buffer_z1_particles, + buffer_length_particles_z1_send); + #endif // PARTICLES_CPU + + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); send_buffer_z1_particles = h_send_buffer_z1_particles; - #endif + #endif - MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, + world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -//Unload the Transferred particles from the MPI_buffer, after buffer was received -void Grid3D::Unload_Particles_from_Buffer_X0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send, +// Unload the Transferred particles from the MPI_buffer, after buffer was +// received +void Grid3D::Unload_Particles_from_Buffer_X0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, - buffer_length_particles_x0_recv*sizeof(Real), + buffer_length_particles_x0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(0, 0, d_recv_buffer_x0_particles, + Particles.n_recv_x0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_X1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send, +void Grid3D::Unload_Particles_from_Buffer_X1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, - buffer_length_particles_x1_recv*sizeof(Real), + buffer_length_particles_x1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(0, 1, d_recv_buffer_x1_particles, + Particles.n_recv_x1); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Y0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, +void Grid3D::Unload_Particles_from_Buffer_Y0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, - buffer_length_particles_y0_recv*sizeof(Real), + buffer_length_particles_y0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(1, 0, d_recv_buffer_y0_particles, + Particles.n_recv_y0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Y1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, +void Grid3D::Unload_Particles_from_Buffer_Y1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, - buffer_length_particles_y1_recv*sizeof(Real), + buffer_length_particles_y1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(1, 1, d_recv_buffer_y1_particles, + Particles.n_recv_y1); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Z0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, +void Grid3D::Unload_Particles_from_Buffer_Z0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, - buffer_length_particles_z0_recv*sizeof(Real), + buffer_length_particles_z0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(2, 0, d_recv_buffer_z0_particles, + Particles.n_recv_z0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Z1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, +void Grid3D::Unload_Particles_from_Buffer_Z1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, + h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, + buffer_length_particles_y0_send, buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, - buffer_length_particles_z1_recv*sizeof(Real), + buffer_length_particles_z1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(2, 1, d_recv_buffer_z1_particles, + Particles.n_recv_z1); + #endif // PARTICLES_GPU } - -//Find the particles that moved outside the local domain in order to transfer them. -void Particles_3D::Select_Particles_to_Transfer_All( int *flags ){ - - #ifdef PARTICLES_CPU - Select_Particles_to_Transfer_All_CPU( flags ); - #endif//PARTICLES_CPU +// Find the particles that moved outside the local domain in order to transfer +// them. +void Particles_3D::Select_Particles_to_Transfer_All(int *flags) +{ + #ifdef PARTICLES_CPU + Select_Particles_to_Transfer_All_CPU(flags); + #endif // PARTICLES_CPU // When using PARTICLES_GPU the particles that need to be Transferred // are selected on the Load_Buffer_GPU functions - } - -void Particles_3D::Clear_Particles_For_Transfer( void ){ - - //Set the number of transferred particles to 0. +void Particles_3D::Clear_Particles_For_Transfer(void) +{ + // Set the number of transferred particles to 0. n_transfer_x0 = 0; n_transfer_x1 = 0; n_transfer_y0 = 0; @@ -603,7 +748,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_transfer_z0 = 0; n_transfer_z1 = 0; - //Set the number of send particles to 0. + // Set the number of send particles to 0. n_send_x0 = 0; n_send_x1 = 0; n_send_y0 = 0; @@ -611,7 +756,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_send_z0 = 0; n_send_z1 = 0; - //Set the number of received particles to 0. + // Set the number of received particles to 0. n_recv_x0 = 0; n_recv_x1 = 0; n_recv_y0 = 0; @@ -619,7 +764,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_recv_z0 = 0; n_recv_z1 = 0; - //Set the number of particles in transfer buffers to 0. + // Set the number of particles in transfer buffers to 0. n_in_buffer_x0 = 0; n_in_buffer_x1 = 0; n_in_buffer_y0 = 0; @@ -627,47 +772,56 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_in_buffer_z0 = 0; n_in_buffer_z1 = 0; - - #ifdef PARTICLES_CPU - //Clear the particles indices that were transferred during the previous timestep + #ifdef PARTICLES_CPU + // Clear the particles indices that were transferred during the previous + // timestep Clear_Vectors_For_Transfers(); - #endif //PARTICLES_CPU - + #endif // PARTICLES_CPU } -#ifdef PARTICLES_GPU - -int Particles_3D::Select_Particles_to_Transfer_GPU( int direction, int side ){ + #ifdef PARTICLES_GPU +int Particles_3D::Select_Particles_to_Transfer_GPU(int direction, int side) +{ int n_transfer; Real *pos; Real domainMin, domainMax; - if ( direction == 0 ){ - pos = pos_x_dev; + if (direction == 0) { + pos = pos_x_dev; domainMax = G.xMax; domainMin = G.xMin; } - if ( direction == 1 ){ - pos = pos_y_dev; + if (direction == 1) { + pos = pos_y_dev; domainMax = G.yMax; domainMin = G.yMin; } - if ( direction == 2 ){ - pos = pos_z_dev; + if (direction == 2) { + pos = pos_z_dev; domainMax = G.zMax; domainMin = G.zMin; } - //chprintf("n_local=%d SELECT PARTICLES: %d dir, %d side. Max/Min %.4e/%.4e \n", n_local, direction, side, domainMax, domainMin); - //Set the number of particles that will be sent and load the particles data into the transfer buffers - n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d ); + // chprintf("n_local=%d SELECT PARTICLES: %d dir, %d side. Max/Min %.4e/%.4e + // \n", n_local, direction, side, domainMax, domainMin); Set the number of + // particles that will be sent and load the particles data into the transfer + // buffers + n_transfer = Select_Particles_to_Transfer_GPU_function( + n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, + G.transfer_particles_flags_d, G.transfer_particles_indices_d, + G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, + G.transfer_particles_prefix_sum_blocks_d); CHECK(cudaDeviceSynchronize()); return n_transfer; } -void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, int buffer_length ){ - +void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, + int direction, + int side, + Real *send_buffer_h, + int buffer_length) +{ part_int_t *n_send; int *buffer_size; int n_fields_to_transfer; @@ -676,136 +830,189 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir int bt_pos_x, bt_pos_y, bt_pos_z, bt_non_pos; int field_id = -1; - bt_pos_x = -1; - bt_pos_y = -1; - bt_pos_z = -1; + bt_pos_x = -1; + bt_pos_y = -1; + bt_pos_z = -1; bt_non_pos = -1; - if ( direction == 0 ){ - pos = pos_x_dev; + if (direction == 0) { + pos = pos_x_dev; domainMin = G.domainMin_x; domainMax = G.domainMax_x; - if ( side == 0 ){ - n_send = &n_send_x0; - buffer_size = &G.send_buffer_size_x0; + if (side == 0) { + n_send = &n_send_x0; + buffer_size = &G.send_buffer_size_x0; send_buffer_d = G.send_buffer_x0_d; - bt_pos_x = G.boundary_type_x0; + bt_pos_x = G.boundary_type_x0; } - if ( side == 1 ){ - n_send = &n_send_x1; - buffer_size = &G.send_buffer_size_x1; + if (side == 1) { + n_send = &n_send_x1; + buffer_size = &G.send_buffer_size_x1; send_buffer_d = G.send_buffer_x1_d; - bt_pos_x = G.boundary_type_x1; + bt_pos_x = G.boundary_type_x1; } } - if ( direction == 1 ){ - pos = pos_y_dev; + if (direction == 1) { + pos = pos_y_dev; domainMin = G.domainMin_y; domainMax = G.domainMax_y; - if ( side == 0 ){ - n_send = &n_send_y0; - buffer_size = &G.send_buffer_size_y0; + if (side == 0) { + n_send = &n_send_y0; + buffer_size = &G.send_buffer_size_y0; send_buffer_d = G.send_buffer_y0_d; - bt_pos_y = G.boundary_type_y0; + bt_pos_y = G.boundary_type_y0; } - if ( side == 1 ){ - n_send = &n_send_y1; - buffer_size = &G.send_buffer_size_y1; + if (side == 1) { + n_send = &n_send_y1; + buffer_size = &G.send_buffer_size_y1; send_buffer_d = G.send_buffer_y1_d; - bt_pos_y = G.boundary_type_y1; + bt_pos_y = G.boundary_type_y1; } } - if ( direction == 2 ){ - pos = pos_z_dev; + if (direction == 2) { + pos = pos_z_dev; domainMin = G.domainMin_z; domainMax = G.domainMax_z; - if ( side == 0 ){ - n_send = &n_send_z0; - buffer_size = &G.send_buffer_size_z0; + if (side == 0) { + n_send = &n_send_z0; + buffer_size = &G.send_buffer_size_z0; send_buffer_d = G.send_buffer_z0_d; - bt_pos_z = G.boundary_type_z0; + bt_pos_z = G.boundary_type_z0; } - if ( side == 1 ){ - n_send = &n_send_z1; - buffer_size = &G.send_buffer_size_z1; + if (side == 1) { + n_send = &n_send_z1; + buffer_size = &G.send_buffer_size_z1; send_buffer_d = G.send_buffer_z1_d; - bt_pos_z = G.boundary_type_z1; + bt_pos_z = G.boundary_type_z1; } } - // If the number of particles in the array exceeds the size of the array, extend the array - if ( (*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER > *buffer_size ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array( &send_buffer_d, *buffer_size, G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER, true ); - *buffer_size = (part_int_t) G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER; + // If the number of particles in the array exceeds the size of the array, + // extend the array + if ((*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER > *buffer_size) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&send_buffer_d, *buffer_size, + G.gpu_allocation_factor * (*n_send + n_transfer) * + N_DATA_PER_PARTICLE_TRANSFER, + true); + *buffer_size = (part_int_t)G.gpu_allocation_factor * + (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER; } // Load the particles that will be transferred into the buffers n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_x ); - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_y ); - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_z ); - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - #ifndef SINGLE_PARTICLE_MASS - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, mass_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - #endif - #ifdef PARTICLE_IDS - Load_Particles_to_Transfer_Int_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - #endif - #ifdef PARTICLE_AGE - Load_Particles_to_Transfer_GPU_function( n_transfer, ++field_id, n_fields_to_transfer, age_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - #endif + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_x); + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_y); + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_z); + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #ifndef SINGLE_PARTICLE_MASS + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, mass_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif + #ifdef PARTICLE_IDS + Load_Particles_to_Transfer_Int_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif + #ifdef PARTICLE_AGE + Load_Particles_to_Transfer_GPU_function( + n_transfer, ++field_id, n_fields_to_transfer, age_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif CHECK(cudaDeviceSynchronize()); *n_send += n_transfer; // if ( *n_send > 0 ) printf( "###Transfered %ld particles\n", *n_send); } - - -void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){ - +void Particles_3D::Replace_Tranfered_Particles_GPU(int n_transfer) +{ // Replace the particles that were transferred - Replace_Transfered_Particles_GPU_function( n_transfer, pos_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, pos_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, pos_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - #ifndef SINGLE_PARTICLE_MASS - Replace_Transfered_Particles_GPU_function( n_transfer, mass_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - #endif - #ifdef PARTICLE_IDS - Replace_Transfered_Particles_Int_GPU_function( n_transfer, partIDs_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - #endif - #ifdef PARTICLE_AGE - Replace_Transfered_Particles_GPU_function( n_transfer, age_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - #endif + Replace_Transfered_Particles_GPU_function( + n_transfer, pos_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function( + n_transfer, pos_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function( + n_transfer, pos_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function( + n_transfer, vel_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function( + n_transfer, vel_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function( + n_transfer, vel_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #ifndef SINGLE_PARTICLE_MASS + Replace_Transfered_Particles_GPU_function( + n_transfer, mass_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif + #ifdef PARTICLE_IDS + Replace_Transfered_Particles_Int_GPU_function( + n_transfer, partIDs_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif + #ifdef PARTICLE_AGE + Replace_Transfered_Particles_GPU_function( + n_transfer, age_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif CHECK(cudaDeviceSynchronize()); // Update the local number of particles n_local -= n_transfer; - } - -void Particles_3D::Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer_h, int buffer_length ){ +void Particles_3D::Load_Particles_to_Buffer_GPU(int direction, int side, + Real *send_buffer_h, + int buffer_length) +{ int n_transfer; - n_transfer = Select_Particles_to_Transfer_GPU( direction, side ); + n_transfer = Select_Particles_to_Transfer_GPU(direction, side); - Copy_Transfer_Particles_to_Buffer_GPU( n_transfer, direction, side, send_buffer_h, buffer_length ); + Copy_Transfer_Particles_to_Buffer_GPU(n_transfer, direction, side, + send_buffer_h, buffer_length); - Replace_Tranfered_Particles_GPU( n_transfer ); + Replace_Tranfered_Particles_GPU(n_transfer); } - -/** - * Open boundary conditions follows the same logic as Load_Particles_to_Buffer_GPU, except that the particles that are selected for transfer are - * not moved into any buffer (Copy_Transfer_Particles_to_Buffer_GPU step is skipped). Also the domainMix/domainMax are the global min/max values. +/** + * Open boundary conditions follows the same logic as + * Load_Particles_to_Buffer_GPU, except that the particles that are selected for + * transfer are not moved into any buffer (Copy_Transfer_Particles_to_Buffer_GPU + * step is skipped). Also the domainMix/domainMax are the global min/max + * values. */ -void Particles_3D::Set_Particles_Open_Boundary_GPU( int dir, int side ){ +void Particles_3D::Set_Particles_Open_Boundary_GPU(int dir, int side) +{ int n_transfer; /*Real *pos; Real domainMin, domainMax; @@ -823,105 +1030,129 @@ void Particles_3D::Set_Particles_Open_Boundary_GPU( int dir, int side ){ domainMax = G.domainMax_z; }*/ n_transfer = Select_Particles_to_Transfer_GPU(dir, side); - //n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d ); - //CHECK(cudaDeviceSynchronize()); - //chprintf("OPEN condition: removing %d\n", n_transfer); - Replace_Tranfered_Particles_GPU( n_transfer ); + // n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, + // domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, + // G.transfer_particles_flags_d, G.transfer_particles_indices_d, + // G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, + // G.transfer_particles_prefix_sum_blocks_d ); + // CHECK(cudaDeviceSynchronize()); + // chprintf("OPEN condition: removing %d\n", n_transfer); + Replace_Tranfered_Particles_GPU(n_transfer); } - -void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ){ +void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, + Real *recv_buffer_d) +{ int n_fields_to_transfer; part_int_t n_local_after = n_local + n_recv; - if ( n_local_after > particles_array_size ){ - printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after ); + if (n_local_after > particles_array_size) { + printf(" Reallocating GPU particles arrays. N local particles: %ld \n", + n_local_after); int new_size = G.gpu_allocation_factor * n_local_after; - Extend_GPU_Array( &pos_x_dev, (int) particles_array_size, new_size, true ); - Extend_GPU_Array( &pos_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &pos_z_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &vel_x_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &vel_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &vel_z_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &grav_x_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &grav_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array( &grav_z_dev, (int) particles_array_size, new_size, false ); - #ifndef SINGLE_PARTICLE_MASS - Extend_GPU_Array( &mass_dev, (int)particles_array_size, new_size, false ); - #endif - #ifdef PARTICLE_IDS - Extend_GPU_Array( &partIDs_dev, (int)particles_array_size, new_size, false ); - #endif - #ifdef PARTICLE_AGE - Extend_GPU_Array( &age_dev, (int)particles_array_size, new_size, false ); - #endif - particles_array_size = (part_int_t) new_size; + Extend_GPU_Array(&pos_x_dev, (int)particles_array_size, new_size, true); + Extend_GPU_Array(&pos_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&pos_z_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_x_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_z_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_x_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_z_dev, (int)particles_array_size, new_size, false); + #ifndef SINGLE_PARTICLE_MASS + Extend_GPU_Array(&mass_dev, (int)particles_array_size, new_size, false); + #endif + #ifdef PARTICLE_IDS + Extend_GPU_Array(&partIDs_dev, (int)particles_array_size, new_size, false); + #endif + #ifdef PARTICLE_AGE + Extend_GPU_Array(&age_dev, (int)particles_array_size, new_size, false); + #endif + particles_array_size = (part_int_t)new_size; ReAllocate_Memory_GPU_MPI(); } // Unload the particles that were transferred from the buffers - int field_id = -1; + int field_id = -1; n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, pos_z_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, vel_z_dev, recv_buffer_d ); - #ifndef SINGLE_PARTICLE_MASS - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, mass_dev, recv_buffer_d ); - #endif - #ifdef PARTICLE_IDS - Unload_Particles_Int_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, partIDs_dev, recv_buffer_d ); - #endif - #ifdef PARTICLE_AGE - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, ++field_id, n_fields_to_transfer, age_dev, recv_buffer_d ); - #endif + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, pos_x_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, pos_y_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, pos_z_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, vel_x_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, vel_y_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, vel_z_dev, + recv_buffer_d); + #ifndef SINGLE_PARTICLE_MASS + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, mass_dev, + recv_buffer_d); + #endif + #ifdef PARTICLE_IDS + Unload_Particles_Int_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, + partIDs_dev, recv_buffer_d); + #endif + #ifdef PARTICLE_AGE + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, + n_fields_to_transfer, age_dev, + recv_buffer_d); + #endif n_local += n_recv; // if ( n_recv > 0 ) printf( "###Unloaded %d particles\n", n_recv ); } - - -void Particles_3D::Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ){ - +void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, + Real *recv_buffer_h, + int n_recv) +{ int buffer_size; Real domainMin, domainMax; Real *recv_buffer_d; - if ( direction == 0 ){ + if (direction == 0) { domainMin = G.domainMin_x; domainMin = G.domainMax_x; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_x0; recv_buffer_d = G.recv_buffer_x0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_x1; recv_buffer_d = G.recv_buffer_x1_d; } } - if ( direction == 1 ){ + if (direction == 1) { domainMin = G.domainMin_y; domainMin = G.domainMax_y; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_y0; recv_buffer_d = G.recv_buffer_y0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_y1; recv_buffer_d = G.recv_buffer_y1_d; } } - if ( direction == 2 ){ + if (direction == 2) { domainMin = G.domainMin_z; domainMin = G.domainMax_z; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_z0; recv_buffer_d = G.recv_buffer_z0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_z1; recv_buffer_d = G.recv_buffer_z1_d; } @@ -929,12 +1160,10 @@ void Particles_3D::Unload_Particles_from_Buffer_GPU( int direction, int side , R CudaCheckError(); - Copy_Transfer_Particles_from_Buffer_GPU( n_recv, recv_buffer_d ); - + Copy_Transfer_Particles_from_Buffer_GPU(n_recv, recv_buffer_d); } -#endif //PARTICLES_GPU - + #endif // PARTICLES_GPU -#endif //MPI_CHOLLA -#endif //PARTICLES + #endif // MPI_CHOLLA +#endif // PARTICLES diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 7e6eb3372..93ebc6099 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -1,67 +1,75 @@ #if defined(PARTICLES) && defined(PARTICLES_CPU) -#include -#include -#include -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "particles_3D.h" + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" -//Get and remove Real value at index on vector -Real Get_and_Remove_Real( part_int_t indx, real_vector_t &vec ){ + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif + +// Get and remove Real value at index on vector +Real Get_and_Remove_Real(part_int_t indx, real_vector_t &vec) +{ Real value = vec[indx]; - vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector - vec.pop_back(); //The last item in the vector is discarded + vec[indx] = vec.back(); // The item at the specified index is replaced by the + // last item in the vector + vec.pop_back(); // The last item in the vector is discarded return value; } -//Remove Real value at index on vector -void Remove_Real( part_int_t indx, real_vector_t &vec ){ - vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector - vec.pop_back(); //The last item in the vector is discarded +// Remove Real value at index on vector +void Remove_Real(part_int_t indx, real_vector_t &vec) +{ + vec[indx] = vec.back(); // The item at the specified index is replaced by the + // last item in the vector + vec.pop_back(); // The last item in the vector is discarded } -//Get and remove integer value at index on vector -Real Get_and_Remove_partID( part_int_t indx, int_vector_t &vec ){ - Real value = (Real) vec[indx]; - vec[indx] = vec.back(); +// Get and remove integer value at index on vector +Real Get_and_Remove_partID(part_int_t indx, int_vector_t &vec) +{ + Real value = (Real)vec[indx]; + vec[indx] = vec.back(); vec.pop_back(); return value; } -//Remove integer value at index on vector -void Remove_ID( part_int_t indx, int_vector_t &vec ){ +// Remove integer value at index on vector +void Remove_ID(part_int_t indx, int_vector_t &vec) +{ vec[indx] = vec.back(); vec.pop_back(); } -//Convert Real to Integer for transfering particles IDs on Real buffer arrays -part_int_t Real_to_part_int( Real inVal ){ - part_int_t outVal = (part_int_t) inVal; - if ( (inVal - outVal) > 0.1 ) outVal += 1; - if ( fabs(outVal - inVal) > 0.5 ) outVal -= 1; +// Convert Real to Integer for transfering particles IDs on Real buffer arrays +part_int_t Real_to_part_int(Real inVal) +{ + part_int_t outVal = (part_int_t)inVal; + if ((inVal - outVal) > 0.1) outVal += 1; + if (fabs(outVal - inVal) > 0.5) outVal -= 1; return outVal; } -//Set periodic boundaries for particles. Only when not using MPI -void Grid3D::Set_Particles_Boundary( int dir, int side ){ - +// Set periodic boundaries for particles. Only when not using MPI +void Grid3D::Set_Particles_Boundary(int dir, int side) +{ Real d_min, d_max, L; - if ( dir == 0 ){ + if (dir == 0) { d_min = Particles.G.xMin; d_max = Particles.G.xMax; } - if ( dir == 1 ){ + if (dir == 1) { d_min = Particles.G.yMin; d_max = Particles.G.yMax; } - if ( dir == 2 ){ + if (dir == 2) { d_min = Particles.G.zMin; d_max = Particles.G.zMax; } @@ -71,47 +79,50 @@ void Grid3D::Set_Particles_Boundary( int dir, int side ){ bool changed_pos; Real pos; #ifdef PARALLEL_OMP - #pragma omp parallel for private( pos, changed_pos) num_threads( N_OMP_THREADS ) + #pragma omp parallel for private(pos, changed_pos) \ + num_threads(N_OMP_THREADS) #endif - for( int i=0; i= d_max ) pos -= L;//When the position is on the right of the domain boundary, substract the domain Length to the position + if (side == 1) { + if (pos >= d_max) + pos -= L; // When the position is on the right of the domain boundary, + // substract the domain Length to the position changed_pos = true; } - //If the position was changed write the new position to the vectors - if ( !changed_pos ) continue; - if ( dir == 0 ) Particles.pos_x[i] = pos; - if ( dir == 1 ) Particles.pos_y[i] = pos; - if ( dir == 2 ) Particles.pos_z[i] = pos; - + // If the position was changed write the new position to the vectors + if (!changed_pos) continue; + if (dir == 0) Particles.pos_x[i] = pos; + if (dir == 1) Particles.pos_y[i] = pos; + if (dir == 2) Particles.pos_z[i] = pos; } } - -//Set open boundaries for particles when not using MPI -void Grid3D::Set_Particles_Open_Boundary_CPU( int dir, int side ){ +// Set open boundaries for particles when not using MPI +void Grid3D::Set_Particles_Open_Boundary_CPU(int dir, int side) +{ Real d_min, d_max; - if ( dir == 0 ){ + if (dir == 0) { d_min = Particles.G.domainMin_x; d_max = Particles.G.domainMax_x; } - if ( dir == 1 ){ + if (dir == 1) { d_min = Particles.G.domainMin_y; d_max = Particles.G.domainMax_y; } - if ( dir == 2 ){ + if (dir == 2) { d_min = Particles.G.domainMin_z; d_max = Particles.G.domainMax_z; } @@ -120,199 +131,212 @@ void Grid3D::Set_Particles_Open_Boundary_CPU( int dir, int side ){ int_vector_t removed_indices; #ifdef PARALLEL_OMP - #pragma omp parallel for private(pos) num_threads( N_OMP_THREADS ) + #pragma omp parallel for private(pos) num_threads(N_OMP_THREADS) #endif - for( int i=0; i d_max)) removed_indices.push_back(i); + for (int i = 0; i < Particles.n_local; i++) { + if (dir == 0) pos = Particles.pos_x[i]; + if (dir == 1) pos = Particles.pos_y[i]; + if (dir == 2) pos = Particles.pos_z[i]; + + // If the position is out of the region, remove. + if ((side == 0 && pos < d_min) || (side == 1 && pos > d_max)) + removed_indices.push_back(i); } std::sort(removed_indices.begin(), removed_indices.end()); part_int_t indx, pIndx; part_int_t n_delete = removed_indices.size(); - for ( indx=0; indx= G.xMax && flags[1]==5 ){ - out_indxs_vec_x1.push_back( pIndx ); + if (pos_x[pIndx] >= G.xMax && flags[1] == 5) { + out_indxs_vec_x1.push_back(pIndx); continue; } - if ( pos_y[pIndx] < G.yMin && flags[2]==5 ){ - out_indxs_vec_y0.push_back( pIndx ); + if (pos_y[pIndx] < G.yMin && flags[2] == 5) { + out_indxs_vec_y0.push_back(pIndx); continue; } - if ( pos_y[pIndx] >= G.yMax && flags[3]==5 ){ - out_indxs_vec_y1.push_back( pIndx ); + if (pos_y[pIndx] >= G.yMax && flags[3] == 5) { + out_indxs_vec_y1.push_back(pIndx); continue; } - if ( pos_z[pIndx] < G.zMin && flags[4]==5 ){ - out_indxs_vec_z0.push_back( pIndx ); + if (pos_z[pIndx] < G.zMin && flags[4] == 5) { + out_indxs_vec_z0.push_back(pIndx); continue; } - if ( pos_z[pIndx] >= G.zMax && flags[5]==5 ){ - out_indxs_vec_z1.push_back( pIndx ); + if (pos_z[pIndx] >= G.zMax && flags[5] == 5) { + out_indxs_vec_z1.push_back(pIndx); continue; } } - //Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of the transfer before removing transferred particles ) - // std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); - // std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); - // std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); - // std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); - // std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); - // std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); - - //Add the size of the out_vectors to the number of particles that will be send in each direction + // Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of + // the transfer before removing transferred particles ) + // std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); + // std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); + // std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); + // std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); + // std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); + // std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); + + // Add the size of the out_vectors to the number of particles that will be + // send in each direction n_send_x0 += out_indxs_vec_x0.size(); n_send_x1 += out_indxs_vec_x1.size(); n_send_y0 += out_indxs_vec_y0.size(); n_send_y1 += out_indxs_vec_y1.size(); n_send_z0 += out_indxs_vec_z0.size(); n_send_z1 += out_indxs_vec_z1.size(); - } - -//Load the particles that need to be transferred to the MPI buffer -void Particles_3D::Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length ){ - +// Load the particles that need to be transferred to the MPI buffer +void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, + Real *send_buffer, + int buffer_length) +{ part_int_t n_out; part_int_t n_send; int_vector_t *out_indxs_vec; part_int_t *n_in_buffer; - //Depending on the direction and side select the vector with the particle indices for the transfer - if ( direction == 0 ){ - if ( side == 0 ){ + // Depending on the direction and side select the vector with the particle + // indices for the transfer + if (direction == 0) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_x0; - n_send = n_send_x0; - n_in_buffer = &n_in_buffer_x0; + n_send = n_send_x0; + n_in_buffer = &n_in_buffer_x0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_x1; - n_send = n_send_x1; - n_in_buffer = &n_in_buffer_x1; + n_send = n_send_x1; + n_in_buffer = &n_in_buffer_x1; } } - if ( direction == 1 ){ - if ( side == 0 ){ + if (direction == 1) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_y0; - n_send = n_send_y0; - n_in_buffer = &n_in_buffer_y0; + n_send = n_send_y0; + n_in_buffer = &n_in_buffer_y0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_y1; - n_send = n_send_y1; - n_in_buffer = &n_in_buffer_y1; + n_send = n_send_y1; + n_in_buffer = &n_in_buffer_y1; } } - if ( direction == 2 ){ - if ( side == 0 ){ + if (direction == 2) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_z0; - n_send = n_send_z0; - n_in_buffer = &n_in_buffer_z0; + n_send = n_send_z0; + n_in_buffer = &n_in_buffer_z0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_z1; - n_send = n_send_z1; - n_in_buffer = &n_in_buffer_z1; + n_send = n_send_z1; + n_in_buffer = &n_in_buffer_z1; } } part_int_t offset, offset_extra; - n_out = out_indxs_vec->size(); //Number of particles to be transferred - offset = *n_in_buffer*N_DATA_PER_PARTICLE_TRANSFER; //Offset in the array to take in to account the particles that already reside in the buffer array + n_out = out_indxs_vec->size(); // Number of particles to be transferred + offset = *n_in_buffer * + N_DATA_PER_PARTICLE_TRANSFER; // Offset in the array to take in to + // account the particles that already + // reside in the buffer array part_int_t indx, pIndx; - for ( indx=0; indx buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; + // Check that the offset doesn't exceed the buffer size + if (offset > buffer_length) + std::cout << "ERROR: Buffer length exceeded on particles transfer" + << std::endl; } } - -//Add the data of a single particle to a transfer buffer -void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z){ - +// Add the data of a single particle to a transfer buffer +void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, + int buffer_length, Real pId, + Real pMass, Real pAge, Real pPos_x, + Real pPos_y, Real pPos_z, Real pVel_x, + Real pVel_y, Real pVel_z) +{ int offset, offset_extra; offset = n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; - if (offset > buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; + if (offset > buffer_length) + std::cout << "ERROR: Buffer length exceeded on particles transfer" + << std::endl; buffer[offset + 0] = pPos_x; buffer[offset + 1] = pPos_y; buffer[offset + 2] = pPos_z; @@ -321,78 +345,85 @@ void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, buffer[offset + 5] = pVel_z; offset_extra = offset + 5; - #ifndef SINGLE_PARTICLE_MASS + #ifndef SINGLE_PARTICLE_MASS offset_extra += 1; - buffer[ offset_extra ] = pMass; - #endif - #ifdef PARTICLE_IDS + buffer[offset_extra] = pMass; + #endif + #ifdef PARTICLE_IDS offset_extra += 1; buffer[offset_extra] = pId; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE offset_extra += 1; buffer[offset_extra] = pAge; - #endif + #endif } - -//After a particle was transferred, add the transferred particle data to the vectors that contain the data of the local particles -void Particles_3D::Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, Real pPos_z, - Real pVel_x, Real pVel_y, Real pVel_z, int *flags ){ - +// After a particle was transferred, add the transferred particle data to the +// vectors that contain the data of the local particles +void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, + Real pPos_x, Real pPos_y, + Real pPos_z, Real pVel_x, + Real pVel_y, Real pVel_z, int *flags) +{ // Make sure that the particle position is inside the local domain bool in_local = true; - if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false; - if ( ( pPos_y < G.yMin && flags[2]==5 ) || ( pPos_y >= G.yMax && flags[3]==5 ) ) in_local = false; - if ( ( pPos_z < G.zMin && flags[4]==5 ) || ( pPos_z >= G.zMax && flags[4]==5 ) ) in_local = false; - if ( ! in_local ) { - std::cout << " Adding particle out of local domain to vectors Error:" << std::endl; + if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false; + if ((pPos_y < G.yMin && flags[2] == 5) || (pPos_y >= G.yMax && flags[3] == 5)) + in_local = false; + if ((pPos_z < G.zMin && flags[4] == 5) || (pPos_z >= G.zMax && flags[4] == 5)) + in_local = false; + if (!in_local) { + std::cout << " Adding particle out of local domain to vectors Error:" + << std::endl; #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << pId << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << pPos_x << std::endl; std::cout << " Particle Y: " << pPos_y << std::endl; std::cout << " Particle Z: " << pPos_z << std::endl; } - //TODO: is it good enough to log the error (but then go ahead and add it to the vector)? - - //Append the particle data to the local data vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z ); - vel_x.push_back( pVel_x ); - vel_y.push_back( pVel_y ); - vel_z.push_back( pVel_z ); - #ifndef SINGLE_PARTICLE_MASS - mass.push_back( pMass ); - #endif - #ifdef PARTICLE_IDS - partIDs.push_back( Real_to_part_int(pId) ); - #endif - #ifdef PARTICLE_AGE + // TODO: is it good enough to log the error (but then go ahead and add it to + // the vector)? + + // Append the particle data to the local data vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(pVel_x); + vel_y.push_back(pVel_y); + vel_z.push_back(pVel_z); + #ifndef SINGLE_PARTICLE_MASS + mass.push_back(pMass); + #endif + #ifdef PARTICLE_IDS + partIDs.push_back(Real_to_part_int(pId)); + #endif + #ifdef PARTICLE_AGE age.push_back(pAge); - #endif + #endif grav_x.push_back(0); grav_y.push_back(0); grav_z.push_back(0); - //Add one to the local number of particles + // Add one to the local number of particles n_local += 1; } - - -//After the MPI transfer, unload the particles data from the buffers -void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags){ - - //Loop over the data in the recv_buffer, get the data for each particle and append the particle data to the local vecors +// After the MPI transfer, unload the particles data from the buffers +void Particles_3D::Unload_Particles_from_Buffer_CPU( + int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags) +{ + // Loop over the data in the recv_buffer, get the data for each particle and + // append the particle data to the local vecors int offset_buff, offset_extra; part_int_t pId; @@ -400,48 +431,49 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re offset_buff = 0; part_int_t indx; - for ( indx=0; indx= G.domainMax_x ) pPos_x -= ( G.domainMax_x - G.domainMin_x ); + // GLOBAL PERIODIC BOUNDARIES: for the X direction + if (pPos_x < G.domainMin_x) pPos_x += (G.domainMax_x - G.domainMin_x); + if (pPos_x >= G.domainMax_x) pPos_x -= (G.domainMax_x - G.domainMin_x); - //If the particle x_position is outside the local domain there was an error - if ( ( pPos_x < G.xMin ) || ( pPos_x >= G.xMax ) ){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of X domain pID: " << pId << std::endl; - #else + // If the particle x_position is outside the local domain there was an error + if ((pPos_x < G.xMin) || (pPos_x >= G.xMax)) { + #ifdef PARTICLE_IDS + std::cout << "ERROR Particle Transfer out of X domain pID: " << pId + << std::endl; + #else std::cout << "ERROR Particle Transfer out of X domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -451,35 +483,43 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - // If the y_position at the X_Tansfer (direction=0) is outside the local domain, then the particles is added to the buffer for the Y_Transfer - if (direction == 0 ){ - if ( pPos_y < G.yMin && flags[2]==5 ){ - Add_Particle_To_Buffer( send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + // If the y_position at the X_Tansfer (direction=0) is outside the local + // domain, then the particles is added to the buffer for the Y_Transfer + if (direction == 0) { + if (pPos_y < G.yMin && flags[2] == 5) { + Add_Particle_To_Buffer(send_buffer_y0, n_in_buffer_y0, buffer_length_y0, + pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, + pVel_y, pVel_z); n_send_y0 += 1; n_in_buffer_y0 += 1; continue; } - if ( pPos_y >= G.yMax && flags[3]==5 ){ - Add_Particle_To_Buffer( send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + if (pPos_y >= G.yMax && flags[3] == 5) { + Add_Particle_To_Buffer(send_buffer_y1, n_in_buffer_y1, buffer_length_y1, + pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, + pVel_y, pVel_z); n_send_y1 += 1; n_in_buffer_y1 += 1; continue; } } - //PERIODIC BOUNDARIES: for the Y direction - if ( direction == 1 ){ - if ( pPos_y < G.domainMin_y ) pPos_y += ( G.domainMax_y - G.domainMin_y ); - if ( pPos_y >= G.domainMax_y ) pPos_y -= ( G.domainMax_y - G.domainMin_y ); + // PERIODIC BOUNDARIES: for the Y direction + if (direction == 1) { + if (pPos_y < G.domainMin_y) pPos_y += (G.domainMax_y - G.domainMin_y); + if (pPos_y >= G.domainMax_y) pPos_y -= (G.domainMax_y - G.domainMin_y); } - //If the particle y_position is outside the local domain after the X-Transfer, there was an error - if ( (direction==1 || direction==2) && (( pPos_y < G.yMin ) || ( pPos_y >= G.yMax )) ){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of Y domain pID: " << pId << std::endl; - #else + // If the particle y_position is outside the local domain after the + // X-Transfer, there was an error + if ((direction == 1 || direction == 2) && + ((pPos_y < G.yMin) || (pPos_y >= G.yMax))) { + #ifdef PARTICLE_IDS + std::cout << "ERROR Particle Transfer out of Y domain pID: " << pId + << std::endl; + #else std::cout << "ERROR Particle Transfer out of Y domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -489,35 +529,42 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - // If the z_position at the X_Tansfer or Y_Transfer is outside the local domain, then the particles is added to the buffer for the Z_Transfer - if (direction !=2 ){ - if ( pPos_z < G.zMin && flags[4]==5 ){ - Add_Particle_To_Buffer( send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + // If the z_position at the X_Tansfer or Y_Transfer is outside the local + // domain, then the particles is added to the buffer for the Z_Transfer + if (direction != 2) { + if (pPos_z < G.zMin && flags[4] == 5) { + Add_Particle_To_Buffer(send_buffer_z0, n_in_buffer_z0, buffer_length_z0, + pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, + pVel_y, pVel_z); n_send_z0 += 1; n_in_buffer_z0 += 1; continue; } - if ( pPos_z >= G.zMax && flags[5]==5 ){ - Add_Particle_To_Buffer( send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + if (pPos_z >= G.zMax && flags[5] == 5) { + Add_Particle_To_Buffer(send_buffer_z1, n_in_buffer_z1, buffer_length_z1, + pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, + pVel_y, pVel_z); n_send_z1 += 1; n_in_buffer_z1 += 1; continue; } } - //GLOBAL PERIODIC BOUNDARIES: for the Z direction - if ( direction == 2 ){ - if ( pPos_z < G.domainMin_z ) pPos_z += ( G.domainMax_z - G.domainMin_z ); - if ( pPos_z >= G.domainMax_z ) pPos_z -= ( G.domainMax_z - G.domainMin_z ); + // GLOBAL PERIODIC BOUNDARIES: for the Z direction + if (direction == 2) { + if (pPos_z < G.domainMin_z) pPos_z += (G.domainMax_z - G.domainMin_z); + if (pPos_z >= G.domainMax_z) pPos_z -= (G.domainMax_z - G.domainMin_z); } - //If the particle z_position is outside the local domain after the X-Transfer and Y-Transfer, there was an error - if ( (direction==2) && (( pPos_z < G.zMin ) || ( pPos_z >= G.zMax )) ){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of Z domain pID: " << pId << std::endl; - #else + // If the particle z_position is outside the local domain after the + // X-Transfer and Y-Transfer, there was an error + if ((direction == 2) && ((pPos_z < G.zMin) || (pPos_z >= G.zMax))) { + #ifdef PARTICLE_IDS + std::cout << "ERROR Particle Transfer out of Z domain pID: " << pId + << std::endl; + #else std::cout << "ERROR Particle Transfer out of Z domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -527,16 +574,17 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - //If the particle doesn't have to be transferred to the y_direction or z_direction, then add the particle date to the local vectors - Add_Particle_To_Vectors( pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags ); + // If the particle doesn't have to be transferred to the y_direction or + // z_direction, then add the particle date to the local vectors + Add_Particle_To_Vectors(pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, + pVel_y, pVel_z, flags); } } - -//Remove the particles that were transferred outside the local domain -void Particles_3D::Remove_Transfered_Particles( void ){ - - //Get the number of particles to delete +// Remove the particles that were transferred outside the local domain +void Particles_3D::Remove_Transfered_Particles(void) +{ + // Get the number of particles to delete part_int_t n_delete = 0; n_delete += out_indxs_vec_x0.size(); n_delete += out_indxs_vec_x1.size(); @@ -546,16 +594,24 @@ void Particles_3D::Remove_Transfered_Particles( void ){ n_delete += out_indxs_vec_z1.size(); // std::cout << "N to delete: " << n_delete << std::endl; - //Concatenate the indices of all the particles that moved into a new vector (delete_indxs_vec) + // Concatenate the indices of all the particles that moved into a new vector + // (delete_indxs_vec) int_vector_t delete_indxs_vec; - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end() ); - - //Clear the vectors that stored the transferred indices for each direction. All these indices are now stored in delete_indxs_vec + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x0.begin(), + out_indxs_vec_x0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x1.begin(), + out_indxs_vec_x1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y0.begin(), + out_indxs_vec_y0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y1.begin(), + out_indxs_vec_y1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z0.begin(), + out_indxs_vec_z0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z1.begin(), + out_indxs_vec_z1.end()); + + // Clear the vectors that stored the transferred indices for each direction. + // All these indices are now stored in delete_indxs_vec out_indxs_vec_x0.clear(); out_indxs_vec_x1.clear(); out_indxs_vec_y0.clear(); @@ -563,63 +619,68 @@ void Particles_3D::Remove_Transfered_Particles( void ){ out_indxs_vec_z0.clear(); out_indxs_vec_z1.clear(); - //Sort the indices that need to be deleted so that the particles are deleted from right to left + // Sort the indices that need to be deleted so that the particles are deleted + // from right to left std::sort(delete_indxs_vec.begin(), delete_indxs_vec.end()); part_int_t indx, pIndx; - for ( indx=0; indx -#include -#include -#include -#include "../utils/gpu.hpp" -#include -#include "../io/io.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../grid/grid3D.h" -#include "particles_boundaries_gpu.h" -#include "particles_3D.h" - -#define SCAN_SHARED_SIZE 2*TPB_PARTICLES - - -__global__ void Set_Particles_Boundary_Kernel( int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max, Real d_length ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; + #include + #include + #include + #include + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + #include "particles_boundaries_gpu.h" + + #define SCAN_SHARED_SIZE 2 * TPB_PARTICLES + +__global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, + Real *pos_dev, Real d_min, + Real d_max, Real d_length) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; Real pos; pos = pos_dev[tid]; - if ( side == 0 ){ - if ( pos < d_min ) pos += d_length; + if (side == 0) { + if (pos < d_min) pos += d_length; } - if ( side == 1 ){ - if ( pos >= d_max ) pos -= d_length; + if (side == 1) { + if (pos >= d_max) pos -= d_length; } pos_dev[tid] = pos; - } - -void Grid3D::Set_Particles_Boundary_GPU( int dir, int side ){ - +void Grid3D::Set_Particles_Boundary_GPU(int dir, int side) +{ Real d_min, d_max, L; Real *pos_dev; - if ( dir == 0 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + if (dir == 0) { + d_min = Particles.G.zMin; + d_max = Particles.G.zMax; pos_dev = Particles.pos_x_dev; } - if ( dir == 1 ){ - d_min = Particles.G.yMin; - d_max = Particles.G.yMax; + if (dir == 1) { + d_min = Particles.G.yMin; + d_max = Particles.G.yMax; pos_dev = Particles.pos_y_dev; } - if ( dir == 2 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + if (dir == 2) { + d_min = Particles.G.zMin; + d_max = Particles.G.zMax; pos_dev = Particles.pos_z_dev; } L = d_max - d_min; // set values for GPU kernels - int grid_size = (Particles.n_local - 1) / TPB_PARTICLES + 1; + int grid_size = (Particles.n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev, d_min, d_max, L ); + hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, + side, Particles.n_local, pos_dev, d_min, d_max, L); CudaCheckError(); } - // #ifdef MPI_CHOLLA -__global__ void Get_Transfer_Flags_Kernel( part_int_t n_total, int side, Real d_min, Real d_max, Real *pos_d, bool *transfer_flags_d ){ - +__global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, + Real d_min, Real d_max, Real *pos_d, + bool *transfer_flags_d) +{ int tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + if (tid >= n_total) return; bool transfer = 0; Real pos = pos_d[tid]; - if ( side == 0 && pos < d_min) transfer = 1; - if ( side == 1 && pos >= d_max) transfer = 1; + if (side == 0 && pos < d_min) transfer = 1; + if (side == 1 && pos >= d_max) transfer = 1; transfer_flags_d[tid] = transfer; } - -__global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d ){ - +__global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, + int *prefix_sum_d, int *prefix_sum_block_d) +{ __shared__ int data_sh[SCAN_SHARED_SIZE]; int tid_block, block_start; // tid = threadIdx.x + blockIdx.x * blockDim.x; tid_block = threadIdx.x; - block_start = 2*blockIdx.x*blockDim.x; + block_start = 2 * blockIdx.x * blockDim.x; - data_sh[2*tid_block] = block_start + 2*tid_block < n_total ? (int) transfer_flags_d[block_start + 2*tid_block] : 0; - data_sh[2*tid_block+1] = block_start + 2*tid_block+1 < n_total ? (int) transfer_flags_d[block_start + 2*tid_block+1] : 0; + data_sh[2 * tid_block] = + block_start + 2 * tid_block < n_total + ? (int)transfer_flags_d[block_start + 2 * tid_block] + : 0; + data_sh[2 * tid_block + 1] = + block_start + 2 * tid_block + 1 < n_total + ? (int)transfer_flags_d[block_start + 2 * tid_block + 1] + : 0; __syncthreads(); int offset = 1; - int n = blockDim.x*2; + int n = blockDim.x * 2; int ai, bi; int t; - for (int d = n/2; d>0; d/=2){ - + for (int d = n / 2; d > 0; d /= 2) { __syncthreads(); - if ( tid_block < d ){ - ai = offset*(2*tid_block+1)-1; - bi = offset*(2*tid_block+2)-1; + if (tid_block < d) { + ai = offset * (2 * tid_block + 1) - 1; + bi = offset * (2 * tid_block + 2) - 1; data_sh[bi] += data_sh[ai]; } offset *= 2; @@ -124,16 +132,14 @@ __global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *pr if (tid_block == 0) data_sh[n - 1] = 0; // Traverse down tree & build scan - for (int d = 1; d < n; d *= 2){ - + for (int d = 1; d < n; d *= 2) { __syncthreads(); - offset /=2; - if (tid_block < d){ + offset /= 2; + if (tid_block < d) { + ai = offset * (2 * tid_block + 1) - 1; + bi = offset * (2 * tid_block + 2) - 1; - ai = offset*(2*tid_block+1)-1; - bi = offset*(2*tid_block+2)-1; - - t = data_sh[ai]; + t = data_sh[ai]; data_sh[ai] = data_sh[bi]; data_sh[bi] += t; } @@ -141,60 +147,64 @@ __global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *pr __syncthreads(); // Write results to device memory - if ( block_start + 2*tid_block < n_total ) prefix_sum_d[block_start + 2*tid_block] = data_sh[2*tid_block]; - if ( block_start + 2*tid_block+1 < n_total) prefix_sum_d[block_start + 2*tid_block+1] = data_sh[2*tid_block+1]; + if (block_start + 2 * tid_block < n_total) + prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; + if (block_start + 2 * tid_block + 1 < n_total) + prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1]; // Write the block sum - int last_flag_block = (int) transfer_flags_d[block_start + 2*(blockDim.x-1)+1]; - if (tid_block == 0) prefix_sum_block_d[blockIdx.x] = data_sh[2*(blockDim.x-1)+1] + last_flag_block; + int last_flag_block = + (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; + if (tid_block == 0) + prefix_sum_block_d[blockIdx.x] = + data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; } - -__global__ void Prefix_Sum_Blocks_Kernel( int n_partial, int *prefix_sum_block_d ){ - - int tid_block, val, start_index, n_threads; +__global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) +{ + int tid_block, val, start_index, n_threads; tid_block = threadIdx.x; n_threads = blockDim.x; __shared__ int data_sh[TPB_PARTICLES]; - - int sum = 0; - int n = 0; + int sum = 0; + int n = 0; start_index = n * n_threads; - while( start_index < n_partial ){ - data_sh[tid_block] = start_index+tid_block < n_partial ? prefix_sum_block_d[start_index+tid_block] : 0; + while (start_index < n_partial) { + data_sh[tid_block] = start_index + tid_block < n_partial + ? prefix_sum_block_d[start_index + tid_block] + : 0; __syncthreads(); - - if (tid_block == 0){ - for ( int i=0; i 0 ) printf( "##Thread transfer: %d\n", n_transfer_d[0]); +__global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, + int *n_transfer_d, + bool *transfer_flags_d, + int *prefix_sum_d) +{ + n_transfer_d[0] = + prefix_sum_d[n_total - 1] + (int)transfer_flags_d[n_total - 1]; + // if ( n_transfer_d[0] > 0 ) printf( "##Thread transfer: %d\n", + // n_transfer_d[0]); } -__global__ void Get_Transfer_Indices_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *transfer_indices_d ){ - +__global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, + bool *transfer_flags_d, + int *prefix_sum_d, + int *transfer_indices_d) +{ int tid, transfer_index; - tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= n_total) return; transfer_index = prefix_sum_d[tid]; - - if ( transfer_index < 0 || transfer_index >= n_total ){ - printf( "#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", transfer_index ); + + if (transfer_index < 0 || transfer_index >= n_total) { + printf( + "#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", + transfer_index); return; } - - if ( transfer_flags_d[tid] ) transfer_indices_d[transfer_index] = tid; + if (transfer_flags_d[tid]) transfer_indices_d[transfer_index] = tid; } - -__global__ void Select_Indices_to_Replace_Transfered_Kernel( part_int_t n_total, int n_transfer, bool *transfer_flags_d, int *prefix_sum_d, int *replace_indices_d ){ - +__global__ void Select_Indices_to_Replace_Transfered_Kernel( + part_int_t n_total, int n_transfer, bool *transfer_flags_d, + int *prefix_sum_d, int *replace_indices_d) +{ int tid, tid_inv; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + if (tid >= n_total) return; tid_inv = n_total - tid - 1; bool transfer_flag = transfer_flags_d[tid]; - if ( transfer_flag ) return; + if (transfer_flag) return; int prefix_sum_inv, replace_id; prefix_sum_inv = n_transfer - prefix_sum_d[tid]; - replace_id = tid_inv - prefix_sum_inv; - - - if ( replace_id < 0 || replace_id >= n_total ){ - printf( "#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", replace_id ); + replace_id = tid_inv - prefix_sum_inv; + + if (replace_id < 0 || replace_id >= n_total) { + printf("#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", + replace_id); return; - } + } replace_indices_d[replace_id] = tid; - } - -template< typename T> -__global__ void Replace_Transfered_Particles_Kernel( int n_transfer, T *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ +template +__global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, + int *transfer_indices_d, + int *replace_indices_d, + bool print_replace) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) return; int dst_id, src_id; dst_id = transfer_indices_d[tid]; src_id = replace_indices_d[tid]; - if ( dst_id < src_id ){ - if (print_replace) printf("Replacing: %f \n", field_d[dst_id]*1.0 ); + if (dst_id < src_id) { + if (print_replace) printf("Replacing: %f \n", field_d[dst_id] * 1.0); field_d[dst_id] = field_d[src_id]; } - } - -void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, + int *transfer_indices_d, + int *replace_indices_d, + bool print_replace) +{ int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace ); + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, + 0, 0, n_transfer, field_d, transfer_indices_d, + replace_indices_d, print_replace); CudaCheckError(); - } - -void Replace_Transfered_Particles_Int_GPU_function( int n_transfer, part_int_t *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, + part_int_t *field_d, + int *transfer_indices_d, + int *replace_indices_d, + bool print_replace) +{ int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace ); + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, + 0, 0, n_transfer, field_d, transfer_indices_d, + replace_indices_d, print_replace); CudaCheckError(); } - -part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ){ +part_int_t Select_Particles_to_Transfer_GPU_function( + part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, + int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, + int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d) +{ // set values for GPU kernels int grid_size, grid_size_half; - grid_size = (n_local - 1) / TPB_PARTICLES + 1; - grid_size_half = ( (n_local-1)/2 ) / TPB_PARTICLES + 1; + grid_size = (n_local - 1) / TPB_PARTICLES + 1; + grid_size_half = ((n_local - 1) / 2) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); dim3 dim1dGrid_half(grid_size_half, 1, 1); @@ -315,184 +350,223 @@ part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int si return 0; } - hipLaunchKernelGGL( Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d, transfer_flags_d); + hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, + n_local, side, domainMin, domainMax, pos_d, + transfer_flags_d); CudaCheckError(); - hipLaunchKernelGGL( Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d ); + hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, + transfer_flags_d, transfer_prefix_sum_d, + transfer_prefix_sum_blocks_d); CudaCheckError(); - hipLaunchKernelGGL( Prefix_Sum_Blocks_Kernel, 1, dim1dBlock , 0, 0, grid_size_half, transfer_prefix_sum_blocks_d ); + hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, + grid_size_half, transfer_prefix_sum_blocks_d); CudaCheckError(); - - hipLaunchKernelGGL( Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d ); + + hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, + transfer_prefix_sum_d, transfer_prefix_sum_blocks_d); CudaCheckError(); - - hipLaunchKernelGGL( Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d, transfer_prefix_sum_d ); + + hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, + n_transfer_d, transfer_flags_d, transfer_prefix_sum_d); CudaCheckError(); - - CudaSafeCall( cudaMemcpy( n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost) ); + + CudaSafeCall(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), + cudaMemcpyDeviceToHost)); CudaCheckError(); - - hipLaunchKernelGGL( Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local , transfer_flags_d, transfer_prefix_sum_d, transfer_indices_d ); + + hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, + n_local, transfer_flags_d, transfer_prefix_sum_d, + transfer_indices_d); CudaCheckError(); - hipLaunchKernelGGL( Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d ); + hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, n_transfer_h[0], + transfer_flags_d, transfer_prefix_sum_d, + replace_indices_d); CudaCheckError(); // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); return n_transfer_h[0]; - } - - -__global__ void Load_Transfered_Particles_to_Buffer_Kernel( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ - +__global__ void Load_Transfered_Particles_to_Buffer_Kernel( + int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) return; int src_id, dst_id; Real field_val; - src_id = transfer_indices_d[tid]; - dst_id = tid * n_fields_to_transfer + field_id; + src_id = transfer_indices_d[tid]; + dst_id = tid * n_fields_to_transfer + field_id; field_val = field_d[src_id]; // Set global periodic boundary conditions - if ( boundary_type == 1 && field_val < domainMin ) field_val += ( domainMax - domainMin ); - if ( boundary_type == 1 && field_val >= domainMax ) field_val -= ( domainMax - domainMin ); + if (boundary_type == 1 && field_val < domainMin) + field_val += (domainMax - domainMin); + if (boundary_type == 1 && field_val >= domainMax) + field_val -= (domainMax - domainMin); send_buffer_d[dst_id] = field_val; - } -void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ - +void Load_Particles_to_Transfer_GPU_function( + int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type ); + hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, + send_buffer_d, domainMin, domainMax, boundary_type); CudaCheckError(); - } -__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ - +__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel( + int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) return; int src_id, dst_id; part_int_t field_val; - src_id = transfer_indices_d[tid]; - dst_id = tid * n_fields_to_transfer + field_id; + src_id = transfer_indices_d[tid]; + dst_id = tid * n_fields_to_transfer + field_id; field_val = field_d[src_id]; // Set global periodic boundary conditions - if ( boundary_type == 1 && field_val < domainMin ) field_val += ( domainMax - domainMin ); - if ( boundary_type == 1 && field_val >= domainMax ) field_val -= ( domainMax - domainMin ); + if (boundary_type == 1 && field_val < domainMin) + field_val += (domainMax - domainMin); + if (boundary_type == 1 && field_val >= domainMax) + field_val -= (domainMax - domainMin); send_buffer_d[dst_id] = __longlong_as_double(field_val); - } - -void Load_Particles_to_Transfer_Int_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ +void Load_Particles_to_Transfer_Int_GPU_function( + int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type ); + hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, + send_buffer_d, domainMin, domainMax, boundary_type); CudaCheckError(); - } -#ifdef MPI_CHOLLA -void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){ - + #ifdef MPI_CHOLLA +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, + Real *buffer_d) +{ int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall( cudaMemcpy( buffer_h, buffer_d, transfer_size*sizeof(Real), cudaMemcpyDeviceToHost) ); + CudaSafeCall(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), + cudaMemcpyDeviceToHost)); CudaCheckError(); - } - - -void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){ - +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, + Real *buffer_d) +{ int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall( cudaMemcpy( buffer_d, buffer_h, transfer_size*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), + cudaMemcpyHostToDevice)); CudaCheckError(); - } -#endif //MPI_CHOLLA - -__global__ void Unload_Transfered_Particles_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ){ + #endif // MPI_CHOLLA +__global__ void Unload_Transfered_Particles_from_Buffer_Kernel( + int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, Real *recv_buffer_d) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) return; int src_id, dst_id; - src_id = tid * n_fields_to_transfer + field_id; - dst_id = n_local + tid; + src_id = tid * n_fields_to_transfer + field_id; + dst_id = n_local + tid; field_d[dst_id] = recv_buffer_d[src_id]; - } -void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ){ - +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, + int field_id, + int n_fields_to_transfer, + Real *field_d, + Real *recv_buffer_d) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d ); + hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, n_transfer, field_id, + n_fields_to_transfer, field_d, recv_buffer_d); CudaCheckError(); - } -__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ){ - +__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel( + int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, Real *recv_buffer_d) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) return; int src_id, dst_id; - src_id = tid * n_fields_to_transfer + field_id; - dst_id = n_local + tid; + src_id = tid * n_fields_to_transfer + field_id; + dst_id = n_local + tid; field_d[dst_id] = __double_as_longlong(recv_buffer_d[src_id]); - } -void Unload_Particles_Int_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ){ - +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, + int field_id, + int n_fields_to_transfer, + part_int_t *field_d, + Real *recv_buffer_d) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d ); + hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, + dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, field_id, + n_fields_to_transfer, field_d, recv_buffer_d); CudaCheckError(); - } // #endif//MPI_CHOLLA - -#endif //PARTICLES +#endif // PARTICLES diff --git a/src/particles/particles_boundaries_gpu.h b/src/particles/particles_boundaries_gpu.h index e99a5ddc1..940121787 100644 --- a/src/particles/particles_boundaries_gpu.h +++ b/src/particles/particles_boundaries_gpu.h @@ -1,23 +1,49 @@ #if defined(PARTICLES) && defined(PARTICLES_GPU) -#ifndef PARTICLES_BOUNDARIES_H -#define PARTICLES_BOUNDARIES_H - -part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ); - -void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ); -void Load_Particles_to_Transfer_Int_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ); - -void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ); -void Replace_Transfered_Particles_Int_GPU_function( int n_transfer, part_int_t *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ); - -void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); - -void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); - -void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ); -void Unload_Particles_Int_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, Real *recv_buffer_d ); - - -#endif //PARTICLES_H -#endif //PARTICLES \ No newline at end of file + #ifndef PARTICLES_BOUNDARIES_H + #define PARTICLES_BOUNDARIES_H + +part_int_t Select_Particles_to_Transfer_GPU_function( + part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, + int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, + int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d); + +void Load_Particles_to_Transfer_GPU_function( + int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type); +void Load_Particles_to_Transfer_Int_GPU_function( + int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type); + +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, + int *transfer_indices_d, + int *replace_indices_d, + bool print_replace); +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, + part_int_t *field_d, + int *transfer_indices_d, + int *replace_indices_d, + bool print_replace); + +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, + Real *buffer_d); + +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, + Real *buffer_d); + +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, + int field_id, + int n_fields_to_transfer, + Real *field_d, + Real *recv_buffer_d); +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, + int field_id, + int n_fields_to_transfer, + part_int_t *field_d, + Real *recv_buffer_d); + + #endif // PARTICLES_H +#endif // PARTICLES \ No newline at end of file diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index a979565a2..72485acd9 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -1,58 +1,56 @@ #ifdef PARTICLES + #include + #include -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "particles_3D.h" -#include "../io/io.h" + #include -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "math.h" + #include "particles_3D.h" + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -//Compute the delta_t for the particles -Real Grid3D::Calc_Particles_dt( ){ - +// Compute the delta_t for the particles +Real Grid3D::Calc_Particles_dt() +{ Real dt_particles; #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - dt_particles = Calc_Particles_dt_function( 0, Particles.n_local ); - #else + #ifndef PARALLEL_OMP + dt_particles = Calc_Particles_dt_function(0, Particles.n_local); + #else dt_particles = 1e100; Real dt_particles_all[N_OMP_THREADS]; - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - dt_particles_all[omp_id] = Calc_Particles_dt_function( p_start, p_end ); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, + &p_end); + dt_particles_all[omp_id] = Calc_Particles_dt_function(p_start, p_end); } - for ( int i=0; i Particles.G.size_blocks_array ) chprintf(" Error: particles dt_array too small\n"); + int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + if (ngrid > Particles.G.size_blocks_array) + chprintf(" Error: particles dt_array too small\n"); Real max_dti; - max_dti = Particles.Calc_Particles_dt_GPU_function( ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev ); + max_dti = Particles.Calc_Particles_dt_GPU_function( + ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, + Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, + Particles.G.dti_array_host, Particles.G.dti_array_dev); Real dt_min; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real scale_factor, vel_factor, da_min; - scale_factor = 1 / ( Cosmo.current_a * Cosmo.Get_Hubble_Parameter( Cosmo.current_a) ) * Cosmo.cosmo_h; + scale_factor = + 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * + Cosmo.cosmo_h; vel_factor = Cosmo.current_a / scale_factor; - da_min = vel_factor / max_dti; - dt_min = Cosmo.Get_dt_from_da( da_min ); - #else + da_min = vel_factor / max_dti; + dt_min = Cosmo.Get_dt_from_da(da_min); + #else dt_min = 1 / max_dti; - #endif - - return Particles.C_cfl*dt_min; + #endif + return Particles.C_cfl * dt_min; } -//Update positions and velocities (step 1 of KDK scheme ) in the GPU -void Grid3D::Advance_Particles_KDK_Step1_GPU(){ - - #ifdef COSMOLOGY - Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K ); - #else - Particles.Advance_Particles_KDK_Step1_GPU_function( Particles.n_local, Particles.dt, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev ); - #endif - - +// Update positions and velocities (step 1 of KDK scheme ) in the GPU +void Grid3D::Advance_Particles_KDK_Step1_GPU() +{ + #ifdef COSMOLOGY + Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( + Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, + Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, + Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + #else + Particles.Advance_Particles_KDK_Step1_GPU_function( + Particles.n_local, Particles.dt, Particles.pos_x_dev, Particles.pos_y_dev, + Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, + Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, + Particles.grav_z_dev); + #endif } -//Update velocities (step 2 of KDK scheme ) in the GPU -void Grid3D::Advance_Particles_KDK_Step2_GPU(){ - - #ifdef COSMOLOGY - Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K ); - #else - Particles.Advance_Particles_KDK_Step2_GPU_function( Particles.n_local, Particles.dt, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev ); - #endif - - +// Update velocities (step 2 of KDK scheme ) in the GPU +void Grid3D::Advance_Particles_KDK_Step2_GPU() +{ + #ifdef COSMOLOGY + Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( + Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, + Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + #else + Particles.Advance_Particles_KDK_Step2_GPU_function( + Particles.n_local, Particles.dt, Particles.vel_x_dev, Particles.vel_y_dev, + Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, + Particles.grav_z_dev); + #endif } + #endif // PARTICLES_GPU -#endif //PARTICLES_GPU - - - - -#ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU -//Loop over the particles anf compute dt_min -Real Grid3D::Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ){ +// Loop over the particles anf compute dt_min +Real Grid3D::Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end) +{ part_int_t pID; Real dt, dt_min, vel; dt_min = 1e100; - for ( pID=p_start; pID 0){ - dt = Particles.G.dx / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dx / vel; + dt_min = std::min(dt_min, dt); } vel = fabs(Particles.vel_y[pID]); - if ( vel > 0){ - dt = Particles.G.dy / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dy / vel; + dt_min = std::min(dt_min, dt); } vel = fabs(Particles.vel_z[pID]); - if ( vel > 0){ - dt = Particles.G.dz / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dz / vel; + dt_min = std::min(dt_min, dt); } } return Particles.C_cfl * dt_min; } -#endif //PARTICLES_CPU - -//Update the particles positions and velocities -void Grid3D::Advance_Particles( int N_step ){ + #endif // PARTICLES_CPU +// Update the particles positions and velocities +void Grid3D::Advance_Particles(int N_step) +{ CudaCheckError(); #ifdef CPU_TIME - if ( N_step == 1) Timer.Advance_Part_1.Start(); - if ( N_step == 2) Timer.Advance_Part_2.Start(); + if (N_step == 1) Timer.Advance_Part_1.Start(); + if (N_step == 2) Timer.Advance_Part_2.Start(); #endif #ifdef PARTICLES_KDK - //Update the velocities by 0.5*delta_t and update the positions by delta_t - if ( N_step == 1 ) Advance_Particles_KDK_Step1(); + // Update the velocities by 0.5*delta_t and update the positions by delta_t + if (N_step == 1) Advance_Particles_KDK_Step1(); #endif - if ( N_step == 2 ){ - //Compute the particles accelerations at the new positions + if (N_step == 2) { + // Compute the particles accelerations at the new positions Get_Particles_Acceleration(); - #ifdef PARTICLES_KDK - //Advance the particles velocities by the remaining 0.5*delta_t + #ifdef PARTICLES_KDK + // Advance the particles velocities by the remaining 0.5*delta_t Advance_Particles_KDK_Step2(); - #endif - + #endif } #ifdef CPU_TIME - if ( N_step == 1) Timer.Advance_Part_1.End(); - if ( N_step == 2) Timer.Advance_Part_2.End(); + if (N_step == 1) Timer.Advance_Part_1.End(); + if (N_step == 2) Timer.Advance_Part_2.End(); #endif CudaCheckError(); - } // Get the accteleration for all the particles -void Grid3D::Get_Particles_Acceleration(){ - - //First compute the gravitational field at the center of the grid cells +void Grid3D::Get_Particles_Acceleration() +{ + // First compute the gravitational field at the center of the grid cells Get_Gravity_Field_Particles(); - //Then Interpolate the gravitational field from the centers of the cells to the positions of the particles + // Then Interpolate the gravitational field from the centers of the cells to + // the positions of the particles Get_Gravity_CIC(); } -//Update positions and velocities (step 1 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step1( ){ - +// Update positions and velocities (step 1 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step1() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step1_function( 0, Particles.n_local ); - #else - Advance_Particles_KDK_Step1_function( 0, Particles.n_local ); - #endif//COSMOLOGY - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifndef PARALLEL_OMP + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step1_function(0, Particles.n_local); + #else + Advance_Particles_KDK_Step1_function(0, Particles.n_local); + #endif // COSMOLOGY + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step1_function( p_start, p_end ); - #else - Advance_Particles_KDK_Step1_function( p_start, p_end ); - #endif//COSMOLOGY + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, + &p_end); + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step1_function(p_start, p_end); + #else + Advance_Particles_KDK_Step1_function(p_start, p_end); + #endif // COSMOLOGY } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Advance_Particles_KDK_Step1_GPU(); - #endif //PARTICLES_GPU - + #endif // PARTICLES_GPU } -//Update velocities (step 2 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step2( ){ - +// Update velocities (step 2 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step2() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step2_function( 0, Particles.n_local ); - #else - Advance_Particles_KDK_Step2_function( 0, Particles.n_local ); - #endif//COSMOLOGY - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifndef PARALLEL_OMP + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step2_function(0, Particles.n_local); + #else + Advance_Particles_KDK_Step2_function(0, Particles.n_local); + #endif // COSMOLOGY + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step2_function( p_start, p_end ); - #else - Advance_Particles_KDK_Step2_function( p_start, p_end ); - #endif//COSMOLOGY + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, + &p_end); + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step2_function(p_start, p_end); + #else + Advance_Particles_KDK_Step2_function(p_start, p_end); + #endif // COSMOLOGY } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Advance_Particles_KDK_Step2_GPU(); - #endif //PARTICLES_GPU - + #endif // PARTICLES_GPU } -#ifdef PARTICLES_CPU -//Update positions and velocities (step 1 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end ){ - + #ifdef PARTICLES_CPU +// Update positions and velocities (step 1 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, + part_int_t p_end) +{ part_int_t pID; Real dt = Particles.dt; // Advance velocities by half a step - for ( pID=p_start; pID -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "particles_3D.h" - -#ifdef COSMOLOGY -#include "../cosmology/cosmology.h" + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + + #ifdef COSMOLOGY + #include "../cosmology/cosmology.h" // #include "../cosmology/cosmology_functions_gpu.h" -// FUTURE FIX: The Hubble function was defined here because I couldn't get it form other file, tried -dc flag when compiling buu paris broke. -__device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ){ - Real a2 = a * a; - Real a3 = a2 * a; - Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L ); +// FUTURE FIX: The Hubble function was defined here because I couldn't get it +// form other file, tried -dc flag when compiling buu paris broke. +__device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, + Real Omega_L, Real Omega_K) +{ + Real a2 = a * a; + Real a3 = a2 * a; + Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L); return H0 * sqrt(factor); - } -#endif - - - - + #endif -__global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array ) +__global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, + Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, + Real *dti_array) { __shared__ Real max_dti[TPB_PARTICLES]; @@ -37,7 +39,7 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, int tid; // get a global thread ID - id = blockIdx.x * blockDim.x + threadIdx.x ; + id = blockIdx.x * blockDim.x + threadIdx.x; // and a thread id within the block tid = threadIdx.x; @@ -50,20 +52,21 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, // if( tid == 0 ) printf("%f %f %f \n", dx, dy, dz ); // threads corresponding to real cells do the calculation - if (id < n_local ){ + if (id < n_local) { // every thread collects the variables it needs from global memory - vx = vel_x_dev[id]; - vy = vel_y_dev[id]; - vz = vel_z_dev[id]; - max_dti[tid] = fmax( fabs(vx)/dx, fabs(vy)/dy); - max_dti[tid] = fmax( max_dti[tid], fabs(vz)/dz); - max_dti[tid] = fmax( max_dti[tid], 0.0); + vx = vel_x_dev[id]; + vy = vel_y_dev[id]; + vz = vel_z_dev[id]; + max_dti[tid] = fmax(fabs(vx) / dx, fabs(vy) / dy); + max_dti[tid] = fmax(max_dti[tid], fabs(vz) / dz); + max_dti[tid] = fmax(max_dti[tid], 0.0); } __syncthreads(); - // do the reduction in shared memory (find the max inverse timestep in the block) - for (unsigned int s=1; s= n_local) return; +__global__ void Advance_Particles_KDK_Step1_Kernel( + part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; // Advance velocities by half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid]; vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; - //Advance Positions using advanced velocities + // Advance Positions using advanced velocities pos_x_dev[tid] += dt * vel_x_dev[tid]; pos_y_dev[tid] += dt * vel_y_dev[tid]; pos_z_dev[tid] += dt * vel_z_dev[tid]; } - -__global__ void Advance_Particles_KDK_Step2_Kernel( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step2_Kernel( + part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; // Advance velocities by the second half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid]; vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; - } - -void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - +void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( + part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -155,16 +159,20 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev ); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, dt, pos_x_dev, pos_y_dev, + pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, + grav_y_dev, grav_z_dev); CudaCheckError(); } } - -void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - +void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( + part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -172,35 +180,38 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev ); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, dt, vel_x_dev, vel_y_dev, + vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); CudaCheckError(); } } + #ifdef COSMOLOGY -#ifdef COSMOLOGY - - -__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( + part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; vel_y = vel_y_dev[tid]; vel_z = vel_z_dev[tid]; - Real da_half, a_half, H, H_half, dt, dt_half; - da_half = da/2; - a_half = current_a + da_half; + da_half = da / 2; + a_half = current_a + da_half; - H = Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K ); - H_half = Get_Hubble_Parameter_dev( a_half, H0, Omega_M, Omega_L, Omega_K ); + H = Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K); + H_half = Get_Hubble_Parameter_dev(a_half, H0, Omega_M, Omega_L, Omega_K); - dt = da / ( current_a * H ) * cosmo_h; - dt_half = da / ( a_half * H_half ) * cosmo_h / ( a_half ); + dt = da / (current_a * H) * cosmo_h; + dt_half = da / (a_half * H_half) * cosmo_h / (a_half); // if ( tid == 0 ) printf( "dt: %f\n", dt); // if ( tid == 0 ) printf( "pos_x: %f\n", pos_x_dev[tid]); @@ -208,24 +219,27 @@ __global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Re // if ( tid == 0 ) printf( "grav_x: %f\n", grav_x_dev[tid]); // Advance velocities by half a step - vel_x = ( current_a*vel_x + 0.5*dt*grav_x_dev[tid] ) / a_half; - vel_y = ( current_a*vel_y + 0.5*dt*grav_y_dev[tid] ) / a_half; - vel_z = ( current_a*vel_z + 0.5*dt*grav_z_dev[tid] ) / a_half; + vel_x = (current_a * vel_x + 0.5 * dt * grav_x_dev[tid]) / a_half; + vel_y = (current_a * vel_y + 0.5 * dt * grav_y_dev[tid]) / a_half; + vel_z = (current_a * vel_z + 0.5 * dt * grav_z_dev[tid]) / a_half; vel_x_dev[tid] = vel_x; vel_y_dev[tid] = vel_y; vel_z_dev[tid] = vel_z; - //Advance Positions using advanced velocities + // Advance Positions using advanced velocities pos_x_dev[tid] += dt_half * vel_x; pos_y_dev[tid] += dt_half * vel_y; pos_z_dev[tid] += dt_half * vel_z; } - -__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( + part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, + Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) return; Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; @@ -233,23 +247,28 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Re vel_z = vel_z_dev[tid]; Real da_half, a_half, dt; - da_half = da/2; - a_half = current_a - da_half; + da_half = da / 2; + a_half = current_a - da_half; - dt = da / ( current_a * Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K ) ) * cosmo_h; + dt = da / + (current_a * + Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K)) * + cosmo_h; // Advance velocities by the second half a step - vel_x_dev[tid] = ( a_half*vel_x + 0.5*dt*grav_x_dev[tid] ) / current_a; - vel_y_dev[tid] = ( a_half*vel_y + 0.5*dt*grav_y_dev[tid] ) / current_a; - vel_z_dev[tid] = ( a_half*vel_z + 0.5*dt*grav_z_dev[tid] ) / current_a; - + vel_x_dev[tid] = (a_half * vel_x + 0.5 * dt * grav_x_dev[tid]) / current_a; + vel_y_dev[tid] = (a_half * vel_y + 0.5 * dt * grav_y_dev[tid]) / current_a; + vel_z_dev[tid] = (a_half * vel_z + 0.5 * dt * grav_z_dev[tid]) / current_a; } - -void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - +void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( + part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -257,19 +276,24 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_ // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K ); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, delta_a, pos_x_dev, pos_y_dev, + pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, + grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, + Omega_L, Omega_K); CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + // CudaCheckError(); } - } - - -void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - +void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( + part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, + Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -277,15 +301,15 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_ // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K ); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, + dim1dBlock, 0, 0, n_local, delta_a, vel_x_dev, vel_y_dev, + vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, + H0, cosmo_h, Omega_M, Omega_L, Omega_K); CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + // CudaCheckError(); } } -#endif //COSMOLOGY - - - + #endif // COSMOLOGY #endif diff --git a/src/particles/supernova.h b/src/particles/supernova.h index 5490c44d0..56b1ad70a 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -1,36 +1,43 @@ #pragma once #if defined(PARTICLES_GPU) && defined(SUPERNOVA) -#include "../global/global.h" -#include "../analysis/feedback_analysis.h" -#ifdef O_HIP -#include -#include -#else -#include -#include -#endif //O_HIP + #include "../analysis/feedback_analysis.h" + #include "../global/global.h" + #ifdef O_HIP + #include + #include + #else + #include + #include + #endif // O_HIP +namespace supernova +{ +const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, + UNRES_ENERGY = 5; -namespace supernova { - const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; +// supernova rate: 1SN / 100 solar masses per 36 Myr +static const Real DEFAULT_SNR = 2.8e-7; +static const Real ENERGY_PER_SN = + 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; +static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN +static const Real FINAL_MOMENTUM = + 2.8e5 / LENGTH_UNIT * 1e5 * + TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) +static const Real MU = 0.6; +static const Real R_SH = + 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) +static const Real DEFAULT_SN_END = + 40000; // default value for when SNe stop (40 Myr) +static const Real DEFAULT_SN_START = + 4000; // default value for when SNe start (4 Myr) - // supernova rate: 1SN / 100 solar masses per 36 Myr - static const Real DEFAULT_SNR = 2.8e-7; - static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT*TIME_UNIT*TIME_UNIT/LENGTH_UNIT/LENGTH_UNIT; - static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN - static const Real FINAL_MOMENTUM = 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) - static const Real MU = 0.6; - static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) - static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) - static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) +extern curandStateMRG32k3a_t* randStates; +extern part_int_t n_states; +extern Real *dev_snr, snr_dt, time_sn_end, time_sn_start; - - extern curandStateMRG32k3a_t* randStates; - extern part_int_t n_states; - extern Real *dev_snr, snr_dt, time_sn_end, time_sn_start; - - void initState(struct parameters *P, part_int_t n_local, Real allocation_factor = 1); - Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); -} -#endif //PARTICLES_GPU && SUPERNOVA +void initState(struct parameters* P, part_int_t n_local, + Real allocation_factor = 1); +Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); +} // namespace supernova +#endif // PARTICLES_GPU && SUPERNOVA diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index e6d48999a..56370e014 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -2,279 +2,274 @@ * \brief Definitions of the piecewise constant reconstruction functions */ #ifdef CUDA -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../utils/mhd_utilities.h" -#include "../utils/cuda_utilities.h" - -__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/pcm_cuda.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/mhd_utilities.h" + +__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int n_cells, + int n_ghost, Real gamma, int n_fields) { - // declare conserved variables for each stencil // these will be placed into registers for each thread Real d, mx, my, mz, E; #ifdef DE Real ge; - #endif //DE + #endif // DE #ifdef SCALAR Real scalar[NSCALARS]; - #endif //SCALAR + #endif // SCALAR // get a global thread ID - int xid = threadIdx.x + blockIdx.x*blockDim.x; + int xid = threadIdx.x + blockIdx.x * blockDim.x; int id; - // threads corresponding to real cells plus one ghost cell do the calculation - if (xid < n_cells-1) - { + if (xid < n_cells - 1) { // retrieve appropriate conserved variables id = xid; - d = dev_conserved[ id]; - mx = dev_conserved[ n_cells + id]; - my = dev_conserved[2*n_cells + id]; - mz = dev_conserved[3*n_cells + id]; - E = dev_conserved[4*n_cells + id]; - #ifdef SCALAR - for (int i=0; i 0) - { - id = cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny); - dev_bounds_Rx[ id] = d; - dev_bounds_Rx[ n_cells + id] = mx; - dev_bounds_Rx[2*n_cells + id] = my; - dev_bounds_Rx[3*n_cells + id] = mz; - dev_bounds_Rx[4*n_cells + id] = E; - #ifdef SCALAR - for (int i=0; i 0) { + id = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); + dev_bounds_Rx[id] = d; + dev_bounds_Rx[n_cells + id] = mx; + dev_bounds_Rx[2 * n_cells + id] = my; + dev_bounds_Rx[3 * n_cells + id] = mz; + dev_bounds_Rx[4 * n_cells + id] = E; + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i]; + } + #endif // SCALAR + #ifdef MHD + dev_bounds_Rx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; + dev_bounds_Rx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; + #endif // MHD + #ifdef DE + dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge; + #endif // DE } - if (yid > 0) - { + if (yid > 0) { // Send the y-1/2 Right interface - id = cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny); - dev_bounds_Ry[ id] = d; - dev_bounds_Ry[ n_cells + id] = mx; - dev_bounds_Ry[2*n_cells + id] = my; - dev_bounds_Ry[3*n_cells + id] = mz; - dev_bounds_Ry[4*n_cells + id] = E; - #ifdef SCALAR - for (int i=0; i 0) - { + if (zid > 0) { // Send the z-1/2 Right interface - id = cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny); - dev_bounds_Rz[ id] = d; - dev_bounds_Rz[ n_cells + id] = mx; - dev_bounds_Rz[2*n_cells + id] = my; - dev_bounds_Rz[3*n_cells + id] = mz; - dev_bounds_Rz[4*n_cells + id] = E; - #ifdef SCALAR - for (int i=0; i -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/plmc_cuda.h" - -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif //DE - - -/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) + #ifdef PLMC + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif // DE + +/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plm. */ +__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables for each stencil @@ -55,585 +66,616 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; Real C; - #ifndef VL - Real dtodx = dt/dx; + #ifndef VL + Real dtodx = dt / dx; Real lambda_m, lambda_0, lambda_p; Real qx; Real lamdiff; Real sum_0, sum_1, sum_2, sum_3, sum_4; - #endif // not VL - #ifdef DE + #endif // not VL + #ifdef DE Real ge_i, ge_imo, ge_ipo; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_i; Real ge_L_iph, ge_R_imh; Real E, E_kin, dge; - #ifndef VL + #ifndef VL Real sum_ge; - #endif //CTU - #endif //DE - #ifdef SCALAR + #endif // CTU + #endif // DE + #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; + Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], + del_scalar_G[NSCALARS]; Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL + #ifndef VL Real sum_scalar[NSCALARS]; - #endif //CTU - #endif //SCALAR + #endif // CTU + #endif // SCALAR // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; if (dir == 0) { - xs = 1; xe = nx-2; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 1; + xe = nx - 2; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 1; ye = ny-2; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 1; + ye = ny - 2; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 1; ze = nz-2; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 1; + ze = nz - 2; } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 3-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else //not DE - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); + id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; + #ifdef DE // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + #else // not DE + p_i = (dev_conserved[4 * n_cells + id] - + 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * + (gamma - 1.0); + #endif // PRESSURE_DE + p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR - for (int i=0; i 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE del_ge_L = ge_i - ge_imo; del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5*(ge_ipo - ge_imo); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif //DE + del_ge_C = 0.5 * (ge_ipo - ge_imo); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } + del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / + (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; + } } - #endif //SCALAR - - - // Project the left, right, centered and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A - del_a_0_L = -d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i); - del_a_1_L = del_d_L - del_p_L / (a_i*a_i); + #endif // SCALAR + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_0_L = -d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + del_a_1_L = del_d_L - del_p_L / (a_i * a_i); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i); + del_a_4_L = d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - del_a_0_R = -d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i); - del_a_1_R = del_d_R - del_p_R / (a_i*a_i); + del_a_0_R = -d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + del_a_1_R = del_d_R - del_p_R / (a_i * a_i); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i); + del_a_4_R = d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - del_a_0_C = -d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i); - del_a_1_C = del_d_C - del_p_C / (a_i*a_i); + del_a_0_C = -d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + del_a_1_C = del_d_C - del_p_C / (a_i * a_i); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i); + del_a_4_C = d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - del_a_0_G = -d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i); - del_a_1_G = del_d_G - del_p_G / (a_i*a_i); + del_a_0_G = -d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + del_a_1_G = del_d_G - del_p_G / (a_i * a_i); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i); + del_a_4_G = d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - - // Apply monotonicity constraints to the differences in the characteristic variables + // Apply monotonicity constraints to the differences in the characteristic + // variables del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - if (del_a_0_L*del_a_0_R > 0.0) { + if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L*del_a_1_R > 0.0) { + if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L*del_a_2_R > 0.0) { + if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L*del_a_3_R > 0.0) { + if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L*del_a_4_R > 0.0) { + if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } #ifdef DE del_ge_m_i = 0.0; - if (del_ge_L*del_ge_R > 0.0) { + if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0*lim_slope_a, lim_slope_b); + del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - #endif //DE + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0*lim_slope_a, lim_slope_b); + del_scalar_m_i[i] = + sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } - #endif //SCALAR - - + #endif // SCALAR - // Project the monotonized difference in the characteristic variables back onto the - // primitive variables - // Stone Eqn 39 + // Project the monotonized difference in the characteristic variables back + // onto the primitive variables Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i*del_a_0_m / d_i + a_i* del_a_4_m / d_i; + del_vx_m_i = -a_i * del_a_0_m / d_i + a_i * del_a_4_m / d_i; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; - del_p_m_i = a_i*a_i*del_a_0_m + a_i*a_i*del_a_4_m; + del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; + // Compute the left and right interface values using the monotonized + // difference in the primitive variables - // Compute the left and right interface values using the monotonized difference in the - // primitive variables + d_R_imh = d_i - 0.5 * del_d_m_i; + vx_R_imh = vx_i - 0.5 * del_vx_m_i; + vy_R_imh = vy_i - 0.5 * del_vy_m_i; + vz_R_imh = vz_i - 0.5 * del_vz_m_i; + p_R_imh = p_i - 0.5 * del_p_m_i; - d_R_imh = d_i - 0.5*del_d_m_i; - vx_R_imh = vx_i - 0.5*del_vx_m_i; - vy_R_imh = vy_i - 0.5*del_vy_m_i; - vz_R_imh = vz_i - 0.5*del_vz_m_i; - p_R_imh = p_i - 0.5*del_p_m_i; - - d_L_iph = d_i + 0.5*del_d_m_i; - vx_L_iph = vx_i + 0.5*del_vx_m_i; - vy_L_iph = vy_i + 0.5*del_vy_m_i; - vz_L_iph = vz_i + 0.5*del_vz_m_i; - p_L_iph = p_i + 0.5*del_p_m_i; + d_L_iph = d_i + 0.5 * del_d_m_i; + vx_L_iph = vx_i + 0.5 * del_vx_m_i; + vy_L_iph = vy_i + 0.5 * del_vy_m_i; + vz_L_iph = vz_i + 0.5 * del_vz_m_i; + p_L_iph = p_i + 0.5 * del_p_m_i; #ifdef DE - ge_R_imh = ge_i - 0.5*del_ge_m_i; - ge_L_iph = ge_i + 0.5*del_ge_m_i; - #endif //DE + ge_R_imh = ge_i - 0.5 * del_ge_m_i; + ge_L_iph = ge_i + 0.5 * del_ge_m_i; + #endif // DE #ifdef SCALAR - for (int i=0; i= 0) - { + #endif // SCALAR + if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i)); - sum_1 += lamdiff * (del_vx_m_i/2.0 - del_p_m_i/(2*a_i*d_i)); - sum_4 += lamdiff * (-d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0); + sum_0 += lamdiff * + (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } - if (lambda_0 >= 0) - { + if (lambda_0 >= 0) { lamdiff = lambda_p - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i/(a_i*a_i)); + sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif //DE + #endif // DE #ifdef SCALAR - for (int i=0; i= 0) - { + if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i)); - sum_1 += lamdiff * (del_vx_m_i/2.0 + del_p_m_i/(2*a_i*d_i)); - sum_4 += lamdiff * (d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0); + sum_0 += lamdiff * + (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } // add the corrections to the initial guesses for the interface values - d_L_iph += 0.5*dtodx*sum_0; - vx_L_iph += 0.5*dtodx*sum_1; - vy_L_iph += 0.5*dtodx*sum_2; - vz_L_iph += 0.5*dtodx*sum_3; - p_L_iph += 0.5*dtodx*sum_4; - #ifdef DE - ge_L_iph += 0.5*dtodx*sum_ge; - #endif //DE - #ifdef SCALAR - for (int i=0; i -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/plmp_cuda.h" - -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif - - -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plm. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif + +/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plm. */ +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables in the stencil @@ -46,191 +57,209 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE Real ge_i, ge_imo, ge_ipo, ge_L, ge_R, dge_L, dge_R, E_kin, E, dge; - #endif //DE + #endif // DE #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; - #endif //SCALAR + Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], + dscalar_R[NSCALARS]; + #endif // SCALAR - #ifndef VL //Don't use velocities to reconstruct when using VL - Real dtodx = dt/dx; + #ifndef VL // Don't use velocities to reconstruct when using VL + Real dtodx = dt / dx; Real dfl, dfr, mxfl, mxfr, myfl, myfr, mzfl, mzfr, Efl, Efr; - #ifdef DE + #ifdef DE Real gefl, gefr; - #endif //DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR Real scalarfl[NSCALARS], scalarfr[NSCALARS]; - #endif //SCALAR - #endif //VL + #endif // SCALAR + #endif // VL // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; if (dir == 0) { - xs = 1; xe = nx-2; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 1; + xe = nx - 2; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 1; ye = ny-2; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 1; + ye = ny - 2; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 1; ze = nz-2; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 1; + ze = nz - 2; } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 3-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); - #ifdef SCALAR - for (int i=0; i 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); } - else { del_q_G = 0.0; } + if (del_q_L * del_q_R > 0.0) { + del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); + } else { + del_q_G = 0.0; + } // Monotonize the differences lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); // Minmod limiter - //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); + // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - + del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); // Calculate the left and right interface values using the limited slopes - *q_L = q_i - 0.5*del_q_m; - *q_R = q_i + 0.5*del_q_m; - + *q_L = q_i - 0.5 * del_q_m; + *q_R = q_i + 0.5 * del_q_m; } - -#endif //CUDA +#endif // CUDA diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h index 9cf5f01a3..627fb52a9 100644 --- a/src/reconstruction/plmp_cuda.h +++ b/src/reconstruction/plmp_cuda.h @@ -3,23 +3,28 @@ #ifdef CUDA -#ifndef PLMP_CUDA_H -#define PLMP_CUDA_H - - -#include "../global/global.h" - -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plmp. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - - -/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using linear reconstruction - in the primitive variables with Van Leer or Minmod slope limiting. */ -__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R); - - -#endif // PLMP_CUDA_H -#endif // CUDA + #ifndef PLMP_CUDA_H + #define PLMP_CUDA_H + + #include "../global/global.h" + +/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plmp. */ +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields); + +/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, + Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + linear reconstruction in the primitive variables with Van Leer or Minmod slope + limiting. */ +__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, + Real *q_L, Real *q_R); + + #endif // PLMP_CUDA_H +#endif // CUDA diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 8f00b8a73..d13dd0c60 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1,36 +1,47 @@ /*! \file ppmc_cuda.cu - * \brief Functions definitions for the ppm kernels, using characteristic tracing. - Written following Stone et al. 2008. */ + * \brief Functions definitions for the ppm kernels, using characteristic + tracing. Written following Stone et al. 2008. */ #ifdef CUDA -#ifdef PPMC - -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../utils/hydro_utilities.h" - -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif - - -/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) + #ifdef PPMC + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif + +/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm. */ +__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; - if (dir == 0 ) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir == 1 ) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir == 2 ) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables for each stencil @@ -59,810 +70,990 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L, vx_L, vy_L, vz_L, p_L; Real d_R, vx_R, vy_R, vz_R, p_R; - // #ifdef CTU - #ifndef VL - Real dtodx = dt/dx; + // #ifdef CTU + #ifndef VL + Real dtodx = dt / dx; Real d_6, vx_6, vy_6, vz_6, p_6; Real lambda_m, lambda_0, lambda_p; Real lambda_max, lambda_min; Real A, B, C, D; Real chi_1, chi_2, chi_3, chi_4, chi_5; Real sum_1, sum_2, sum_3, sum_4, sum_5; - #endif //VL + #endif // VL - #ifdef DE + #ifdef DE Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; Real ge_L, ge_R; - Real E_kin, E, dge; - // #ifdef CTU - #ifndef VL + Real E_kin, E, dge; + // #ifdef CTU + #ifndef VL Real chi_ge, sum_ge, ge_6; - #endif //VL - #endif //DE - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; - Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; + #endif // VL + #endif // DE + #ifdef SCALAR + Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], + scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; + Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], + del_scalar_G[NSCALARS]; + Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], + del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - // #ifdef CTU - #ifndef VL + // #ifdef CTU + #ifndef VL Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; - #endif //VL - #endif //SCALAR - + #endif // VL + #endif // SCALAR // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; if (dir == 0) { - xs = 2; xe = nx-3; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 2; ye = ny-3; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 2; ze = nz-3; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 5-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else //not DE - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); + id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; + #ifdef DE // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + #else // not DE + p_i = (dev_conserved[4 * n_cells + id] - + 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * + (gamma - 1.0); + #endif // PRESSURE_DE + p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE - ge_i = dge / d_i; - #endif //DE + ge_i = dge / d_i; + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE - del_ge_L = ge_imo - ge_imt; - del_ge_R = ge_i - ge_imo; - del_ge_C = 0.5*(ge_i - ge_imt); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif //DE + del_ge_L = ge_imo - ge_imt; + del_ge_R = ge_i - ge_imo; + del_ge_C = 0.5 * (ge_i - ge_imt); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; + del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; + del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / + (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; + } } - #endif //SCALAR + #endif // SCALAR + // Step 3 - Project the left, right, centered and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A - // Step 3 - Project the left, right, centered and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A - - del_a_0_L = -0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); + del_a_0_L = -0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a); + del_a_4_L = 0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); + del_a_0_R = -0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a); + del_a_4_R = 0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); + del_a_0_C = -0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a); + del_a_4_C = 0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); + del_a_0_G = -0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a); - + del_a_4_G = 0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables // Stone Eqn 38 del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - if (del_a_0_L*del_a_0_R > 0.0) { + if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_0_m = + sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L*del_a_1_R > 0.0) { + if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_1_m = + sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L*del_a_2_R > 0.0) { + if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_2_m = + sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L*del_a_3_R > 0.0) { + if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_3_m = + sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L*del_a_4_R > 0.0) { + if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_4_m = + sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { + if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_imo = 0.0; - #endif //DE + del_ge_m_imo = + sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_ge_m_imo = 0.0; + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_imo[i] = 0.0; + del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * + fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_scalar_m_imo[i] = 0.0; } - #endif //SCALAR - + #endif // SCALAR - // Step 5 - Project the monotonized difference in the characteristic variables back onto the + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the // primitive variables // Stone Eqn 39 del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a*del_a_0_m/d_imo + a*del_a_4_m/d_imo; + del_vx_m_imo = -a * del_a_0_m / d_imo + a * del_a_4_m / d_imo; del_vy_m_imo = del_a_2_m; del_vz_m_imo = del_a_3_m; - del_p_m_imo = a*a*del_a_0_m + a*a*del_a_4_m; + del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables - // Note that here L and R refer to locations relative to the cell center - // Stone Eqn 36 + // Step 2 - Compute the left, right, centered, and van Leer differences of + // the primitive variables + // Note that here L and R refer to locations relative to the cell + // center Stone Eqn 36 // calculate the adiabatic sound speed in cell i - a = sqrt(gamma*p_i/d_i); + a = sqrt(gamma * p_i / d_i); // left - del_d_L = d_i - d_imo; + del_d_L = d_i - d_imo; del_vx_L = vx_i - vx_imo; del_vy_L = vy_i - vy_imo; del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; + del_p_L = p_i - p_imo; // right - del_d_R = d_ipo - d_i; + del_d_R = d_ipo - d_i; del_vx_R = vx_ipo - vx_i; del_vy_R = vy_ipo - vy_i; del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; + del_p_R = p_ipo - p_i; // centered - del_d_C = 0.5*(d_ipo - d_imo); - del_vx_C = 0.5*(vx_ipo - vx_imo); - del_vy_C = 0.5*(vy_ipo - vy_imo); - del_vz_C = 0.5*(vz_ipo - vz_imo); - del_p_C = 0.5*(p_ipo - p_imo); + del_d_C = 0.5 * (d_ipo - d_imo); + del_vx_C = 0.5 * (vx_ipo - vx_imo); + del_vy_C = 0.5 * (vy_ipo - vy_imo); + del_vz_C = 0.5 * (vz_ipo - vz_imo); + del_p_C = 0.5 * (p_ipo - p_imo); // van Leer - if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE del_ge_L = ge_i - ge_imo; del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5*(ge_ipo - ge_imo); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif //DE + del_ge_C = 0.5 * (ge_ipo - ge_imo); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } + del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / + (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; + } } - #endif //SCALAR + #endif // SCALAR - // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A + // Step 3 - Project the left, right, centered, and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A - del_a_0_L = -0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); + del_a_0_L = -0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a); + del_a_4_L = 0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); + del_a_0_R = -0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a); + del_a_4_R = 0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); + del_a_0_C = -0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a); + del_a_4_C = 0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); + del_a_0_G = -0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a); + del_a_4_G = 0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables // Stone Eqn 38 del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - if (del_a_0_L*del_a_0_R > 0.0) { + if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_0_m = + sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L*del_a_1_R > 0.0) { + if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_1_m = + sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L*del_a_2_R > 0.0) { + if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_2_m = + sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L*del_a_3_R > 0.0) { + if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_3_m = + sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L*del_a_4_R > 0.0) { + if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_4_m = + sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { + if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_i = 0.0; - #endif //DE + del_ge_m_i = + sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_ge_m_i = 0.0; + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_i[i] = 0.0; + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * + fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_scalar_m_i[i] = 0.0; } - #endif //SCALAR + #endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic variables back onto the + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the // primitive variables // Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a*del_a_0_m/d_i + a*del_a_4_m/d_i; + del_vx_m_i = -a * del_a_0_m / d_i + a * del_a_4_m / d_i; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; - del_p_m_i = a*a*del_a_0_m + a*a*del_a_4_m; - - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables - // Note that here L and R refer to locations relative to the cell center - // Stone Eqn 36 + del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; + // Step 2 - Compute the left, right, centered, and van Leer differences of + // the primitive variables + // Note that here L and R refer to locations relative to the cell + // center Stone Eqn 36 // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma*p_ipo/d_ipo); + a = sqrt(gamma * p_ipo / d_ipo); // left del_d_L = d_ipo - d_i; del_vx_L = vx_ipo - vx_i; del_vy_L = vy_ipo - vy_i; del_vz_L = vz_ipo - vz_i; - del_p_L = p_ipo - p_i; + del_p_L = p_ipo - p_i; // right - del_d_R = d_ipt - d_ipo; + del_d_R = d_ipt - d_ipo; del_vx_R = vx_ipt - vx_ipo; del_vy_R = vy_ipt - vy_ipo; del_vz_R = vz_ipt - vz_ipo; - del_p_R = p_ipt - p_ipo; + del_p_R = p_ipt - p_ipo; // centered - del_d_C = 0.5*(d_ipt - d_i); - del_vx_C = 0.5*(vx_ipt- vx_i); - del_vy_C = 0.5*(vy_ipt - vy_i); - del_vz_C = 0.5*(vz_ipt - vz_i); - del_p_C = 0.5*(p_ipt - p_i); + del_d_C = 0.5 * (d_ipt - d_i); + del_vx_C = 0.5 * (vx_ipt - vx_i); + del_vy_C = 0.5 * (vy_ipt - vy_i); + del_vz_C = 0.5 * (vz_ipt - vz_i); + del_p_C = 0.5 * (p_ipt - p_i); // van Leer - if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE del_ge_L = ge_ipo - ge_i; del_ge_R = ge_ipt - ge_ipo; - del_ge_C = 0.5*(ge_ipt- ge_i); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif //DE + del_ge_C = 0.5 * (ge_ipt - ge_i); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } + del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / + (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; + } } - #endif //SCALAR - + #endif // SCALAR - // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A + // Step 3 - Project the left, right, centered, and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A - del_a_0_L = -0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); + del_a_0_L = -0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a); + del_a_4_L = 0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); + del_a_0_R = -0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a); + del_a_4_R = 0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); + del_a_0_C = -0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a); + del_a_4_C = 0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); + del_a_0_G = -0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a); + del_a_4_G = 0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables // Stone Eqn 38 del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - if (del_a_0_L*del_a_0_R > 0.0) { + if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_0_m = + sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L*del_a_1_R > 0.0) { + if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_1_m = + sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L*del_a_2_R > 0.0) { + if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_2_m = + sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L*del_a_3_R > 0.0) { + if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_3_m = + sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L*del_a_4_R > 0.0) { + if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_a_4_m = + sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { + if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_ipo = 0.0; - #endif //DE + del_ge_m_ipo = + sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_ge_m_ipo = 0.0; + #endif // DE #ifdef SCALAR - for (int i=0; i 0.0) { + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_ipo[i] = 0.0; + del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * + fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else + del_scalar_m_ipo[i] = 0.0; } - #endif //SCALAR + #endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic variables back onto the + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the // primitive variables // Stone Eqn 39 del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a*del_a_0_m / d_ipo + a* del_a_4_m / d_ipo; + del_vx_m_ipo = -a * del_a_0_m / d_ipo + a * del_a_4_m / d_ipo; del_vy_m_ipo = del_a_2_m; del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = a*a*del_a_0_m + a*a*del_a_4_m; - + del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; - // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center - // Here, the subscripts L and R refer to the left and right side of the ith cell center - // Stone Eqn 46 + // Step 6 - Use parabolic interpolation to compute values at the left and + // right of each cell center + // Here, the subscripts L and R refer to the left and right side of + // the ith cell center Stone Eqn 46 - d_L = 0.5*(d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5*(vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5*(vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5*(vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5*(p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; + d_L = 0.5 * (d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; + vx_L = 0.5 * (vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; + vy_L = 0.5 * (vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; + vz_L = 0.5 * (vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; + p_L = 0.5 * (p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; - d_R = 0.5*(d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5*(vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5*(vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5*(vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5*(p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; + d_R = 0.5 * (d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; + vx_R = 0.5 * (vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; + vy_R = 0.5 * (vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; + vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; + p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; #ifdef DE - ge_L = 0.5*(ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5*(ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif //DE + ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; + ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; + #endif // DE #ifdef SCALAR - for (int i=0; i (d_R - d_L) *(d_R - d_L)) d_L = 3.0*d_i - 2.0*d_R; - if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) > (vx_R - vx_L)*(vx_R - vx_L)) vx_L = 3.0*vx_i - 2.0*vx_R; - if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) > (vy_R - vy_L)*(vy_R - vy_L)) vy_L = 3.0*vy_i - 2.0*vy_R; - if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) > (vz_R - vz_L)*(vz_R - vz_L)) vz_L = 3.0*vz_i - 2.0*vz_R; - if ( 6.0*(p_R - p_L) *(p_i - 0.5*(p_L + p_R)) > (p_R - p_L) *(p_R - p_L)) p_L = 3.0*p_i - 2.0*p_R; - - if ( 6.0*(d_R - d_L) *(d_i - 0.5*(d_L + d_R)) < -(d_R - d_L) *(d_R - d_L)) d_R = 3.0*d_i - 2.0*d_L; - if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) < -(vx_R - vx_L)*(vx_R - vx_L)) vx_R = 3.0*vx_i - 2.0*vx_L; - if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) < -(vy_R - vy_L)*(vy_R - vy_L)) vy_R = 3.0*vy_i - 2.0*vy_L; - if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) < -(vz_R - vz_L)*(vz_R - vz_L)) vz_R = 3.0*vz_i - 2.0*vz_L; - if ( 6.0*(p_R - p_L) *(p_i - 0.5*(p_L + p_R)) < -(p_R - p_L) *(p_R - p_L)) p_R = 3.0*p_i - 2.0*p_L; - - d_L = fmax( fmin(d_i, d_imo), d_L ); - d_L = fmin( fmax(d_i, d_imo), d_L ); - d_R = fmax( fmin(d_i, d_ipo), d_R ); - d_R = fmin( fmax(d_i, d_ipo), d_R ); - vx_L = fmax( fmin(vx_i, vx_imo), vx_L ); - vx_L = fmin( fmax(vx_i, vx_imo), vx_L ); - vx_R = fmax( fmin(vx_i, vx_ipo), vx_R ); - vx_R = fmin( fmax(vx_i, vx_ipo), vx_R ); - vy_L = fmax( fmin(vy_i, vy_imo), vy_L ); - vy_L = fmin( fmax(vy_i, vy_imo), vy_L ); - vy_R = fmax( fmin(vy_i, vy_ipo), vy_R ); - vy_R = fmin( fmax(vy_i, vy_ipo), vy_R ); - vz_L = fmax( fmin(vz_i, vz_imo), vz_L ); - vz_L = fmin( fmax(vz_i, vz_imo), vz_L ); - vz_R = fmax( fmin(vz_i, vz_ipo), vz_R ); - vz_R = fmin( fmax(vz_i, vz_ipo), vz_R ); - p_L = fmax( fmin(p_i, p_imo), p_L ); - p_L = fmin( fmax(p_i, p_imo), p_L ); - p_R = fmax( fmin(p_i, p_ipo), p_R ); - p_R = fmin( fmax(p_i, p_ipo), p_R ); + if ((d_R - d_i) * (d_i - d_L) <= 0) d_L = d_R = d_i; + if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) vx_L = vx_R = vx_i; + if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) vy_L = vy_R = vy_i; + if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) vz_L = vz_R = vz_i; + if ((p_R - p_i) * (p_i - p_L) <= 0) p_L = p_R = p_i; + + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > + (d_R - d_L) * (d_R - d_L)) + d_L = 3.0 * d_i - 2.0 * d_R; + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > + (vx_R - vx_L) * (vx_R - vx_L)) + vx_L = 3.0 * vx_i - 2.0 * vx_R; + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > + (vy_R - vy_L) * (vy_R - vy_L)) + vy_L = 3.0 * vy_i - 2.0 * vy_R; + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > + (vz_R - vz_L) * (vz_R - vz_L)) + vz_L = 3.0 * vz_i - 2.0 * vz_R; + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > + (p_R - p_L) * (p_R - p_L)) + p_L = 3.0 * p_i - 2.0 * p_R; + + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < + -(d_R - d_L) * (d_R - d_L)) + d_R = 3.0 * d_i - 2.0 * d_L; + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < + -(vx_R - vx_L) * (vx_R - vx_L)) + vx_R = 3.0 * vx_i - 2.0 * vx_L; + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < + -(vy_R - vy_L) * (vy_R - vy_L)) + vy_R = 3.0 * vy_i - 2.0 * vy_L; + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < + -(vz_R - vz_L) * (vz_R - vz_L)) + vz_R = 3.0 * vz_i - 2.0 * vz_L; + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < + -(p_R - p_L) * (p_R - p_L)) + p_R = 3.0 * p_i - 2.0 * p_L; + + d_L = fmax(fmin(d_i, d_imo), d_L); + d_L = fmin(fmax(d_i, d_imo), d_L); + d_R = fmax(fmin(d_i, d_ipo), d_R); + d_R = fmin(fmax(d_i, d_ipo), d_R); + vx_L = fmax(fmin(vx_i, vx_imo), vx_L); + vx_L = fmin(fmax(vx_i, vx_imo), vx_L); + vx_R = fmax(fmin(vx_i, vx_ipo), vx_R); + vx_R = fmin(fmax(vx_i, vx_ipo), vx_R); + vy_L = fmax(fmin(vy_i, vy_imo), vy_L); + vy_L = fmin(fmax(vy_i, vy_imo), vy_L); + vy_R = fmax(fmin(vy_i, vy_ipo), vy_R); + vy_R = fmin(fmax(vy_i, vy_ipo), vy_R); + vz_L = fmax(fmin(vz_i, vz_imo), vz_L); + vz_L = fmin(fmax(vz_i, vz_imo), vz_L); + vz_R = fmax(fmin(vz_i, vz_ipo), vz_R); + vz_R = fmin(fmax(vz_i, vz_ipo), vz_R); + p_L = fmax(fmin(p_i, p_imo), p_L); + p_L = fmin(fmax(p_i, p_imo), p_L); + p_R = fmax(fmin(p_i, p_ipo), p_R); + p_R = fmin(fmax(p_i, p_ipo), p_R); #ifdef DE - if ((ge_R - ge_i) *(ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; - if ( 6.0*(ge_R - ge_L) *(ge_i - 0.5*(ge_L + ge_R)) > (ge_R - ge_L) *(ge_R - ge_L)) ge_L = 3.0*ge_i - 2.0*ge_R; - if ( 6.0*(ge_R - ge_L) *(ge_i - 0.5*(ge_L + ge_R)) < -(ge_R - ge_L) *(ge_R - ge_L)) ge_R = 3.0*ge_i - 2.0*ge_L; - ge_L = fmax( fmin(ge_i, ge_imo), ge_L ); - ge_L = fmin( fmax(ge_i, ge_imo), ge_L ); - ge_R = fmax( fmin(ge_i, ge_ipo), ge_R ); - ge_R = fmin( fmax(ge_i, ge_ipo), ge_R ); - #endif //DE + if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > + (ge_R - ge_L) * (ge_R - ge_L)) + ge_L = 3.0 * ge_i - 2.0 * ge_R; + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < + -(ge_R - ge_L) * (ge_R - ge_L)) + ge_R = 3.0 * ge_i - 2.0 * ge_L; + ge_L = fmax(fmin(ge_i, ge_imo), ge_L); + ge_L = fmin(fmax(ge_i, ge_imo), ge_L); + ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); + ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); + #endif // DE #ifdef SCALAR - for (int i=0; i (scalar_R[i] - scalar_L[i]) *(scalar_R[i] - scalar_L[i])) scalar_L[i] = 3.0*scalar_i[i] - 2.0*scalar_R[i]; - if ( 6.0*(scalar_R[i] - scalar_L[i]) *(scalar_i[i] - 0.5*(scalar_L[i] + scalar_R[i])) < -(scalar_R[i] - scalar_L[i]) *(scalar_R[i] - scalar_L[i])) scalar_R[i] = 3.0*scalar_i[i] - 2.0*scalar_L[i]; - scalar_L[i] = fmax( fmin(scalar_i[i], scalar_imo[i]), scalar_L[i] ); - scalar_L[i] = fmin( fmax(scalar_i[i], scalar_imo[i]), scalar_L[i] ); - scalar_R[i] = fmax( fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i] ); - scalar_R[i] = fmin( fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i] ); + for (int i = 0; i < NSCALARS; i++) { + if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) + scalar_L[i] = scalar_R[i] = scalar_i[i]; + if (6.0 * (scalar_R[i] - scalar_L[i]) * + (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > + (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) + scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; + if (6.0 * (scalar_R[i] - scalar_L[i]) * + (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < + -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) + scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; + scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); + scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); + scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); + scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); } - #endif //SCALAR + #endif // SCALAR // #ifdef CTU #ifndef VL - // Step 8 - Compute the coefficients for the monotonized parabolic interpolation function + // Step 8 - Compute the coefficients for the monotonized parabolic + // interpolation function // Stone Eqn 54 - del_d_m_i = d_R - d_L; + del_d_m_i = d_R - d_L; del_vx_m_i = vx_R - vx_L; del_vy_m_i = vy_R - vy_L; del_vz_m_i = vz_R - vz_L; - del_p_m_i = p_R - p_L; + del_p_m_i = p_R - p_L; - d_6 = 6.0*(d_i - 0.5*(d_L + d_R)); - vx_6 = 6.0*(vx_i - 0.5*(vx_L + vx_R)); - vy_6 = 6.0*(vy_i - 0.5*(vy_L + vy_R)); - vz_6 = 6.0*(vz_i - 0.5*(vz_L + vz_R)); - p_6 = 6.0*(p_i - 0.5*(p_L + p_R)); + d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); + vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); + vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); + vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); + p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); - #ifdef DE + #ifdef DE del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0*(ge_i - 0.5*(ge_L + ge_R)); - #endif //DE + ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); + #endif // DE - #ifdef SCALAR - for (int i=0; i= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_m); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_m*lambda_m); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; - - sum_1 += -0.5*(d_i*chi_2/a - chi_5/(a*a)); - sum_2 += 0.5*(chi_2 - chi_5/(a*d_i)); - sum_5 += -0.5*(d_i*chi_2*a - chi_5); + #endif // SCALAR + + if (lambda_m >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_m); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * + (lambda_p * lambda_p - lambda_m * lambda_m); + + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; + + sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); + sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); } - if (lambda_0 >= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_0); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_0*lambda_0); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; + if (lambda_0 >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_0); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * + (lambda_p * lambda_p - lambda_0 * lambda_0); + + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; #ifdef DE - chi_ge = A*(del_ge_m_i - ge_6) + B*ge_6; - #endif //DE + chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; + #endif // DE #ifdef SCALAR - for (int i=0; i= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_p); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_p*lambda_p); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; - - sum_1 += 0.5*(d_i*chi_2/a + chi_5/(a*a)); - sum_2 += 0.5*(chi_2 + chi_5/(a*d_i)); - sum_5 += 0.5*(d_i*chi_2*a + chi_5); + if (lambda_p >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_p); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * + (lambda_p * lambda_p - lambda_p * lambda_p); + + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; + + sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); + sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); } // add the corrections to the initial guesses for the interface values @@ -950,14 +1141,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_R += sum_3; vz_R += sum_4; p_R += sum_5; - #ifdef DE + #ifdef DE ge_R += sum_ge; - #endif //DE - #ifdef SCALAR - for (int i=0; i -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/ppmp_cuda.h" + #include -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif // #define STEEPENING // #define FLATTENING -//Note: Errors when using FLATTENING, need to check the ghost cells - -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +// Note: Errors when using FLATTENING, need to check the ghost cells + +/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm with limiting in + the primitive variables. */ +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables in the stencil @@ -41,9 +54,9 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; - #ifdef FLATTENING + #ifdef FLATTENING Real p_imth, p_ipth; - #endif //FLATTENING + #endif // FLATTENING // declare left and right interface values Real d_L, vx_L, vy_L, vz_L, p_L; @@ -52,14 +65,14 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // declare other variables Real del_q_imo, del_q_i, del_q_ipo; - #ifndef VL -// #ifdef CTU - Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries - Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i + #ifndef VL + // #ifdef CTU + Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries + Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i Real d_6, vx_6, vy_6, vz_6, p_6; Real beta_m, beta_0, beta_p; Real alpha_m, alpha_0, alpha_p; - Real lambda_m, lambda_0, lambda_p; // speed of characteristics + Real lambda_m, lambda_0, lambda_p; // speed of characteristics Real dL_m, vxL_m, pL_m; Real dL_0, vyL_0, vzL_0, pL_0; Real vxL_p, pL_p; @@ -68,34 +81,34 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real dR_p, vxR_p, pR_p; Real chi_L_m, chi_L_0, chi_L_p; Real chi_R_m, chi_R_0, chi_R_p; - #endif //CTU + #endif // CTU - #ifdef DE + #ifdef DE Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt, ge_L, ge_R, E_kin, E, dge; - #ifndef VL -// #ifdef CTU + #ifndef VL + // #ifdef CTU Real del_ge, ge_6, geL_0, geR_0; - #endif //CTU - #endif //DE + #endif // CTU + #endif // DE - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; + #ifdef SCALAR + Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], + scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - #ifndef VL -// #ifdef CTU - Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS]; - #endif //CTU - #endif //SCALAR - - + #ifndef VL + // #ifdef CTU + Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], + scalarR_0[NSCALARS]; + #endif // CTU + #endif // SCALAR // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; @@ -116,403 +129,472 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // zs = 3; ze = nz-4; // } - //Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost cells on each side ) + // Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost + // cells on each side ) if (dir == 0) { - xs = 2; xe = nx-3; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 2; ye = ny-3; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 2; ze = nz-3; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 5-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); + id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; + #ifdef DE // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); + p_i = (dev_conserved[4 * n_cells + id] - + 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * + (gamma - 1.0); + #endif // PRESSURE_DE + p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE ge_i = dge / d_i; - #endif //DE + #endif // DE #ifdef SCALAR - for (int i=0; i 0.01) - { - //calculate the second derivative of the density in the imo and ipo cells + // check for contact discontinuities & steepen if necessary (see Fryxell + // Sec 3.1.2) if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true, + // check further conditions, otherwise do nothing + if ((fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)) > 0.01) { + // calculate the second derivative of the density in the imo and ipo cells d2_rho_imo = calc_d2_rho(d_imt, d_imo, d_i, dx); d2_rho_ipo = calc_d2_rho(d_i, d_ipo, d_ipt, dx); - //if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check further conditions, otherwise do nothing - if ((d2_rho_imo * d2_rho_ipo) < 0) - { - //calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) (Colella Eqn 3.2) - //if c5 is true, set value of eta for discontinuity steepening - if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) - { - //calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) + // if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check + // further conditions, otherwise do nothing + if ((d2_rho_imo * d2_rho_ipo) < 0) { + // calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) + // (Colella Eqn 3.2) if c5 is true, set value of eta for discontinuity + // steepening + if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < + 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) { + // calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) eta_i = calc_eta(d2_rho_imo, d2_rho_ipo, dx, d_imo, d_ipo); - //calculate steepening coefficient (Fryxell Eqn 40) (Colella Eqn 1.16) - eta_i = fmax(0, fmin(20*(eta_i-0.05), 1) ); + // calculate steepening coefficient (Fryxell Eqn 40) (Colella + // Eqn 1.16) + eta_i = fmax(0, fmin(20 * (eta_i - 0.05), 1)); - //calculate new left and right interface variables using monotonized slopes + // calculate new left and right interface variables using monotonized + // slopes del_q_imo = Calculate_Slope(d_imt, d_imo, d_i); del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt); - //replace left and right interface values of density (Colella Eqn 1.14, 1.15) - d_L = d_L*(1-eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i; - d_R = d_R*(1-eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i; + // replace left and right interface values of density (Colella + // Eqn 1.14, 1.15) + d_L = d_L * (1 - eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i; + d_R = d_R * (1 - eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i; } } } -#endif //STEEPENING + #endif // STEEPENING -#ifdef FLATTENING + #ifdef FLATTENING Real F_imo, F_i, F_ipo; - //flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) - //calculate the shock steepness parameters (Fryxell Eqn 43) - //calculate the dimensionless flattening coefficients (Fryxell Eqn 45) - F_imo = fmax( 0, fmin(1, 10*(( (p_i - p_imt) / (p_ipo - p_imth)) - 0.75)) ); - F_i = fmax( 0, fmin(1, 10*(( (p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75)) ); - F_ipo = fmax( 0, fmin(1, 10*(( (p_ipt - p_i) / (p_ipth - p_imo)) - 0.75)) ); - //ensure that we are encountering a shock (Fryxell Eqns 46 & 47) - if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1./3.) {F_imo = 0;} - if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1./3.) {F_i = 0;} - if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1./3.) {F_ipo = 0;} - if (vx_i - vx_imt > 0) {F_imo = 0;} - if (vx_ipo - vx_imo > 0) {F_i = 0;} - if (vx_ipt - vx_i > 0) {F_ipo = 0;} - //set the flattening coefficient (Fryxell Eqn 48) - if (p_ipo - p_imo < 0) {F_i = fmax(F_i, F_ipo);} - else {F_i = fmax(F_i, F_imo);} - //modify the interface values - d_L = F_i * d_i + (1 - F_i) * d_L; + // flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) + // calculate the shock steepness parameters (Fryxell Eqn 43) + // calculate the dimensionless flattening coefficients (Fryxell Eqn 45) + F_imo = fmax(0, fmin(1, 10 * (((p_i - p_imt) / (p_ipo - p_imth)) - 0.75))); + F_i = fmax(0, fmin(1, 10 * (((p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75))); + F_ipo = fmax(0, fmin(1, 10 * (((p_ipt - p_i) / (p_ipth - p_imo)) - 0.75))); + // ensure that we are encountering a shock (Fryxell Eqns 46 & 47) + if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1. / 3.) { + F_imo = 0; + } + if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1. / 3.) { + F_i = 0; + } + if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1. / 3.) { + F_ipo = 0; + } + if (vx_i - vx_imt > 0) { + F_imo = 0; + } + if (vx_ipo - vx_imo > 0) { + F_i = 0; + } + if (vx_ipt - vx_i > 0) { + F_ipo = 0; + } + // set the flattening coefficient (Fryxell Eqn 48) + if (p_ipo - p_imo < 0) { + F_i = fmax(F_i, F_ipo); + } else { + F_i = fmax(F_i, F_imo); + } + // modify the interface values + d_L = F_i * d_i + (1 - F_i) * d_L; vx_L = F_i * vx_i + (1 - F_i) * vx_L; vy_L = F_i * vy_i + (1 - F_i) * vy_L; vz_L = F_i * vz_i + (1 - F_i) * vz_L; - p_L = F_i * p_i + (1 - F_i) * p_L; - #ifdef DE + p_L = F_i * p_i + (1 - F_i) * p_L; + #ifdef DE ge_L = F_i * ge_i + (1 - F_i) * ge_L; - #endif //DE - #ifdef SCALAR - for (int i=0; i= 0) { chi_L_m = 0; } - if (lambda_0 >= 0) { chi_L_0 = 0; } - if (lambda_p >= 0) { chi_L_p = 0; } - if (lambda_m <= 0) { chi_R_m = 0; } - if (lambda_0 <= 0) { chi_R_0 = 0; } - if (lambda_p <= 0) { chi_R_p = 0; } - - // use the chi values to correct the initial guesses and calculate final input states - p_L = p_L + (d_L*d_L*cl*cl) * (chi_L_p + chi_L_m); - vx_L = vx_L + d_L*cl * (chi_L_p - chi_L_m); - d_L = pow( ((1.0/d_L) - (chi_L_m + chi_L_0 + chi_L_p)) , -1); - p_R = p_L + (d_R*d_R*cr*cr) * (chi_R_p + chi_R_m); - vx_R = vx_R + d_R*cr * (chi_R_p - chi_R_m); - d_R = pow( ((1.0/d_R) - (chi_R_m + chi_R_0 + chi_R_p)) , -1); -#endif //CTU + chi_L_m = 1. / (2 * d_L * cl) * (vx_L - vxL_m - (p_L - pL_m) / (d_L * cl)); + chi_L_p = -1. / (2 * d_L * cl) * (vx_L - vxL_p + (p_L - pL_p) / (d_L * cl)); + chi_L_0 = (p_L - pL_0) / (d_L * d_L * cl * cl) + 1. / d_L - 1. / dL_0; + chi_R_m = 1. / (2 * d_R * cr) * (vx_R - vxR_m - (p_R - pR_m) / (d_R * cr)); + chi_R_p = -1. / (2 * d_R * cr) * (vx_R - vxR_p + (p_R - pR_p) / (d_R * cr)); + chi_R_0 = (p_R - pR_0) / (d_R * d_R * cr * cr) + 1. / d_R - 1. / dR_0; + + // set chi to 0 if characteristic velocity has the wrong sign (Fryxell Eqn + // 64) + if (lambda_m >= 0) { + chi_L_m = 0; + } + if (lambda_0 >= 0) { + chi_L_0 = 0; + } + if (lambda_p >= 0) { + chi_L_p = 0; + } + if (lambda_m <= 0) { + chi_R_m = 0; + } + if (lambda_0 <= 0) { + chi_R_0 = 0; + } + if (lambda_p <= 0) { + chi_R_p = 0; + } + // use the chi values to correct the initial guesses and calculate final + // input states + p_L = p_L + (d_L * d_L * cl * cl) * (chi_L_p + chi_L_m); + vx_L = vx_L + d_L * cl * (chi_L_p - chi_L_m); + d_L = pow(((1.0 / d_L) - (chi_L_m + chi_L_0 + chi_L_p)), -1); + p_R = p_L + (d_R * d_R * cr * cr) * (chi_R_p + chi_R_m); + vx_R = vx_R + d_R * cr * (chi_R_p - chi_R_m); + d_R = pow(((1.0 / d_R) - (chi_R_m + chi_R_0 + chi_R_p)), -1); + #endif // CTU // Apply mimimum constraints - d_L = fmax(d_L, (Real) TINY_NUMBER); - d_R = fmax(d_R, (Real) TINY_NUMBER); - p_L = fmax(p_L, (Real) TINY_NUMBER); - p_R = fmax(p_R, (Real) TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved variables - // send final values back from kernel - // bounds_R refers to the right side of the i-1/2 interface - if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny; - if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny; - if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny; - dev_bounds_R[ id] = d_L; - dev_bounds_R[o1*n_cells + id] = d_L*vx_L; - dev_bounds_R[o2*n_cells + id] = d_L*vy_L; - dev_bounds_R[o3*n_cells + id] = d_L*vz_L; - dev_bounds_R[4*n_cells + id] = p_L/(gamma-1.0) + 0.5*d_L*(vx_L*vx_L + vy_L*vy_L + vz_L*vz_L); + d_L = fmax(d_L, (Real)TINY_NUMBER); + d_R = fmax(d_R, (Real)TINY_NUMBER); + p_L = fmax(p_L, (Real)TINY_NUMBER); + p_R = fmax(p_R, (Real)TINY_NUMBER); + + // Convert the left and right states in the primitive to the conserved + // variables send final values back from kernel bounds_R refers to the right + // side of the i-1/2 interface + if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; + if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; + if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + dev_bounds_R[id] = d_L; + dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; + dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; + dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; + dev_bounds_R[4 * n_cells + id] = + p_L / (gamma - 1.0) + + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); #ifdef SCALAR - for (int i=0; i 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); } - else { del_q_G = 0.0; } - + if (del_q_L * del_q_R > 0.0) { + del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); + } else { + del_q_G = 0.0; + } // Monotonize the differences lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); // Minmod limiter - //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); + // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); return del_q_m; - } - -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using parabolic reconstruction - in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) +/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + parabolic reconstruction in the primitive variables with limited slopes + provided. Applies further monotonicity constraints.*/ +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real del_q_imo, Real del_q_i, + Real del_q_ipo, Real *q_L, Real *q_R) { // Calculate the left and right interface values using the limited slopes - *q_L = 0.5*(q_i + q_imo) - (1.0/6.0)*(del_q_i - del_q_imo); - *q_R = 0.5*(q_ipo + q_i) - (1.0/6.0)*(del_q_ipo - del_q_i); + *q_L = 0.5 * (q_i + q_imo) - (1.0 / 6.0) * (del_q_i - del_q_imo); + *q_R = 0.5 * (q_ipo + q_i) - (1.0 / 6.0) * (del_q_ipo - del_q_i); - // Apply further monotonicity constraints to ensure interface values lie between - // neighboring cell-centered values + // Apply further monotonicity constraints to ensure interface values lie + // between neighboring cell-centered values // local maximum or minimum criterion (Fryxell Eqn 52, Fig 11) - if ((*q_R - q_i)*(q_i - *q_L) <= 0) *q_L = *q_R = q_i; + if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i; // steep gradient criterion (Fryxell Eqn 53, Fig 12) - if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) > (*q_R - *q_L)*(*q_R - *q_L)) *q_L = 3.0*q_i - 2.0*(*q_R); - if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) < -(*q_R - *q_L)*(*q_R - *q_L)) *q_R = 3.0*q_i - 2.0*(*q_L); - - *q_L = fmax( fmin(q_i, q_imo), *q_L ); - *q_L = fmin( fmax(q_i, q_imo), *q_L ); - *q_R = fmax( fmin(q_i, q_ipo), *q_R ); - *q_R = fmin( fmax(q_i, q_ipo), *q_R ); - + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > + (*q_R - *q_L) * (*q_R - *q_L)) + *q_L = 3.0 * q_i - 2.0 * (*q_R); + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < + -(*q_R - *q_L) * (*q_R - *q_L)) + *q_R = 3.0 * q_i - 2.0 * (*q_L); + + *q_L = fmax(fmin(q_i, q_imo), *q_L); + *q_L = fmin(fmax(q_i, q_imo), *q_L); + *q_R = fmax(fmin(q_i, q_ipo), *q_R); + *q_R = fmin(fmax(q_i, q_ipo), *q_R); } - /*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */ + * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) + */ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx) { - return (1. / (6*dx*dx)) * (rho_ipo - 2*rho_i + rho_imo); + return (1. / (6 * dx * dx)) * (rho_ipo - 2 * rho_i + rho_imo); } - /*! \fn calc_eta * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo) +__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, + Real rho_ipo) { Real A, B; - A = (d2rho_ipo - d2rho_imo)*dx*dx; + A = (d2rho_ipo - d2rho_imo) * dx * dx; B = 1.0 / (rho_ipo - rho_imo); return -A * B; } - - -#endif //PPMP -#endif //CUDA + #endif // PPMP +#endif // CUDA diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h index c8a85711e..b6cf0d212 100644 --- a/src/reconstruction/ppmp_cuda.h +++ b/src/reconstruction/ppmp_cuda.h @@ -3,34 +3,45 @@ #ifdef CUDA -#ifndef PPMP_CUDA_H -#define PPMP_CUDA_H - - -#include "../global/global.h" - -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); + #ifndef PPMP_CUDA_H + #define PPMP_CUDA_H + + #include "../global/global.h" + +/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm with limiting in + the primitive variables. */ +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, + int n_fields); /*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) * \brief Calculates the limited slope across a cell.*/ __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo); -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using parabolic reconstruction - in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R); +/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + parabolic reconstruction in the primitive variables with limited slopes + provided. Applies further monotonicity constraints.*/ +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real del_q_imo, Real del_q_i, + Real del_q_ipo, Real *q_L, Real *q_R); /*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */ + * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) + */ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); /*! \fn calc_eta * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo); +__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, + Real rho_ipo); -#endif // PPMP_CUDA_H -#endif // CUDA + #endif // PPMP_CUDA_H +#endif // CUDA diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index d84464828..088166742 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -3,112 +3,125 @@ #ifdef CUDA -#include "../utils/gpu.hpp" -#include -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../riemann_solvers/exact_cuda.h" - -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + #include + #include + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../utils/gpu.hpp" + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif -/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief Exact Riemann solver based on the Fortran code given in + * Sec. 4.9 of Toro (1999). */ +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } - Real dl, vxl, vyl, vzl, pl, cl; //density, velocity, pressure, sound speed (left) - Real dr, vxr, vyr, vzr, pr, cr; //density, velocity, pressure, sound speed (right) - Real ds, vs, ps, Es; //sample_CUDAd density, velocity, pressure, total energy - Real vm, pm; //velocity and pressure in the star region + Real dl, vxl, vyl, vzl, pl, + cl; // density, velocity, pressure, sound speed (left) + Real dr, vxr, vyr, vzr, pr, + cr; // density, velocity, pressure, sound speed (right) + Real ds, vs, ps, Es; // sample_CUDAd density, velocity, pressure, total + // energy + Real vm, pm; // velocity and pressure in the star region #ifdef DE - Real gel, ger, E_kin, E, dge ; + Real gel, ger, E_kin, E, dge; #endif #ifdef SCALAR Real scalarl[NSCALARS], scalarr[NSCALARS]; #endif - // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve primitive variables - dl = dev_bounds_L[ tid]; - vxl = dev_bounds_L[o1*n_cells + tid]/dl; - vyl = dev_bounds_L[o2*n_cells + tid]/dl; - vzl = dev_bounds_L[o3*n_cells + tid]/dl; - #ifdef DE //PRESSURE_DE - E = dev_bounds_L[4*n_cells + tid]; - E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); - dge = dev_bounds_L[(n_fields-1)*n_cells + tid]; - pl = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - pl = (dev_bounds_L[4*n_cells + tid] - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); - #endif //PRESSURE_DE - pl = fmax(pl, (Real) TINY_NUMBER); - #ifdef SCALAR - for (int i=0; i= 0) - { - dev_flux[o2*n_cells + tid] = ds*vs*vyl; - dev_flux[o3*n_cells + tid] = ds*vs*vzl; - #ifdef SCALAR - for (int i=0; i= 0) { + dev_flux[o2 * n_cells + tid] = ds * vs * vyl; + dev_flux[o3 * n_cells + tid] = ds * vs * vzl; + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarl[i]; } - #endif - #ifdef DE - dev_flux[(n_fields-1)*n_cells + tid] = ds*vs*gel; - #endif - Es = (ps/(gamma - 1.0)) + 0.5*ds*(vs*vs + vyl*vyl + vzl*vzl); - } - else - { - dev_flux[o2*n_cells + tid] = ds*vs*vyr; - dev_flux[o3*n_cells + tid] = ds*vs*vzr; - #ifdef SCALAR - for (int i=0; i nriter) { - //printf("Divergence in Newton-Raphson iteration. p = %e\n", *p); + // printf("Divergence in Newton-Raphson iteration. p = %e\n", *p); } // compute velocity in star region - *v = 0.5*(vxl + vxr + fr - fl); - + *v = 0.5 * (vxl + vxr + fr - fl); } - -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, - Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, + Real *p, Real dl, Real vxl, Real pl, Real cl, + Real dr, Real vxr, Real pr, Real cr, Real gamma) { // purpose: to sample the solution throughout the wave // pattern. Pressure pm and velocity vm in the @@ -251,96 +264,89 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real Real c, sl, sr; - if (vm >= 0) // sampling point lies to the left of the contact discontinuity + if (vm >= 0) // sampling point lies to the left of the contact discontinuity { - if (pm <= pl) // left rarefaction + if (pm <= pl) // left rarefaction { - if (vxl - cl >= 0) // sampled point is in left data state + if (vxl - cl >= 0) // sampled point is in left data state { *d = dl; *v = vxl; *p = pl; - } - else - { - if (vm - cl*powf(pm/pl, (gamma - 1.0)/(2.0 * gamma)) < 0) // sampled point is in star left state + } else { + if (vm - cl * powf(pm / pl, (gamma - 1.0) / (2.0 * gamma)) < + 0) // sampled point is in star left state { - *d = dl*powf(pm/pl, 1.0/gamma); + *d = dl * powf(pm / pl, 1.0 / gamma); *v = vm; *p = pm; - } - else // sampled point is inside left fan + } else // sampled point is inside left fan { - c = (2.0 / (gamma + 1.0))*(cl + ((gamma - 1.0) / 2.0)*vxl); + c = (2.0 / (gamma + 1.0)) * (cl + ((gamma - 1.0) / 2.0) * vxl); *v = c; - *d = dl*powf(c/cl, 2.0 / (gamma - 1.0)); - *p = pl*powf(c/cl, 2.0 * gamma / (gamma - 1.0)); + *d = dl * powf(c / cl, 2.0 / (gamma - 1.0)); + *p = pl * powf(c / cl, 2.0 * gamma / (gamma - 1.0)); } } - } - else // left shock + } else // left shock { - sl = vxl - cl*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pl) + ((gamma - 1.0)/(2.0 * gamma))); - if (sl >= 0) // sampled point is in left data state + sl = vxl - cl * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pl) + + ((gamma - 1.0) / (2.0 * gamma))); + if (sl >= 0) // sampled point is in left data state { *d = dl; *v = vxl; *p = pl; - } - else // sampled point is in star left state + } else // sampled point is in star left state { - *d = dl*(pm/pl + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pl)*((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dl * (pm / pl + ((gamma - 1.0) / (gamma + 1.0))) / + ((pm / pl) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } } - } - else // sampling point lies to the right of the contact discontinuity + } else // sampling point lies to the right of the contact discontinuity { - if (pm > pr) // right shock + if (pm > pr) // right shock { - sr = vxr + cr*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pr) + ((gamma - 1.0)/(2.0 * gamma))); - if (sr <= 0) // sampled point is in right data state + sr = vxr + cr * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pr) + + ((gamma - 1.0) / (2.0 * gamma))); + if (sr <= 0) // sampled point is in right data state { *d = dr; *v = vxr; *p = pr; - } - else // sampled point is in star right state + } else // sampled point is in star right state { - *d = dr*(pm/pr + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pr)*((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dr * (pm / pr + ((gamma - 1.0) / (gamma + 1.0))) / + ((pm / pr) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } - } - else // right rarefaction + } else // right rarefaction { - if (vxr + cr <= 0) // sampled point is in right data state + if (vxr + cr <= 0) // sampled point is in right data state { *d = dr; *v = vxr; *p = pr; - } - else - { - if (vm + cr*powf(pm/pr, (gamma - 1.0)/(2.0 * gamma)) >= 0) // sampled point is in star right state + } else { + if (vm + cr * powf(pm / pr, (gamma - 1.0) / (2.0 * gamma)) >= + 0) // sampled point is in star right state { - *d = dr*powf(pm/pr, (1.0/gamma)); + *d = dr * powf(pm / pr, (1.0 / gamma)); *v = vm; *p = pm; - } - else // sampled point is inside right fan + } else // sampled point is inside right fan { - c = (2.0 / (gamma + 1.0))*(cr - ((gamma - 1.0) / 2.0)*vxr); + c = (2.0 / (gamma + 1.0)) * (cr - ((gamma - 1.0) / 2.0) * vxr); *v = -c; - *d = dr*powf(c/cr, 2.0 / (gamma - 1.0)); - *p = pr*powf(c/cr, 2.0 * gamma / (gamma - 1.0)); + *d = dr * powf(c / cr, 2.0 / (gamma - 1.0)); + *p = pr * powf(c / cr, 2.0 * gamma / (gamma - 1.0)); } } } } } - - -#endif //CUDA +#endif // CUDA diff --git a/src/riemann_solvers/exact_cuda.h b/src/riemann_solvers/exact_cuda.h index 4d6d1f3d6..ed62928c6 100644 --- a/src/riemann_solvers/exact_cuda.h +++ b/src/riemann_solvers/exact_cuda.h @@ -1,27 +1,36 @@ /*! \file exact_cuda.h - * \brief Declarations of functions for the cuda exact riemann solver kernel. */ + * \brief Declarations of functions for the cuda exact riemann solver kernel. + */ #ifdef CUDA -#ifndef EXACT_CUDA_H -#define EXACT_CUDA_H + #ifndef EXACT_CUDA_H + #define EXACT_CUDA_H -#include "../global/global.h" + #include "../global/global.h" +/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief Exact Riemann solver based on the Fortran code given in + * Sec. 4.9 of Toro (1999). */ +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields); -/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); +__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, + Real vxr, Real pr, Real cr, Real gamma); -__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); +__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, + Real ck, Real gamma); -__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma); +__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, + Real cl, Real dr, Real vxr, Real pr, Real cr, + Real gamma); -__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, + Real *p, Real dl, Real vxl, Real pl, Real cl, + Real dr, Real vxr, Real pr, Real cr, Real gamma); -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, - Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); - - -#endif //EXACT_CUDA_H -#endif //CUDA + #endif // EXACT_CUDA_H +#endif // CUDA diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu index a69cf9d0f..7540a218d 100644 --- a/src/riemann_solvers/hll_cuda.cu +++ b/src/riemann_solvers/hll_cuda.cu @@ -3,29 +3,34 @@ #ifdef CUDA -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../riemann_solvers/hll_cuda.h" + #include -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../utils/gpu.hpp" + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif -/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */ -__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief HLLC Riemann solver based on the version described in Toro + * (2006), Sec. 10.4. */ +__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; @@ -44,66 +49,72 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R Real dgel, dger, f_ge_l, f_ge_r, f_ge, E_kin; #endif #ifdef SCALAR - Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; + Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], + f_sc[NSCALARS]; #endif // Real etah = 0; int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" + #include -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../utils/gpu.hpp" + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif -/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */ -__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief HLLC Riemann solver based on the version described in Toro + * (2006), Sec. 10.4. */ +__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; @@ -44,92 +49,99 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ Real dgel, dger, gel, ger, gels, gers, f_ge_l, f_ge_r, f_ge, E_kin; #endif #ifdef SCALAR - Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; + Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], + scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], + f_sc[NSCALARS]; #endif Real etah = 0; int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i -#include #include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" +#include "../riemann_solvers/hllc_cuda.h" // Include code to test #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../riemann_solvers/hllc_cuda.h" // Include code to test #if defined(CUDA) && defined(HLLC) - // ========================================================================= - /*! - * \brief Test fixture for simple testing of the HLLC Riemann Solver. - Effectively takes the left state, right state, fiducial fluxes, and - custom user output then performs all the required running and testing - * - */ - class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test - { - protected: - // ===================================================================== - /*! - * \brief Compute and return the HLLC fluxes - * - * \param[in] leftState The state on the left side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, and energy. - * \param[in] rightState The state on the right side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, and energy. - * \param[in] gamma The adiabatic index - * \return std::vector - */ - std::vector computeFluxes(std::vector const &stateLeft, - std::vector const &stateRight, - Real const &gamma) - { - // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction? - int const ny = 1; // Number of cells in the y-direction? - int const nz = 1; // Number of cells in the z-direction? - int const nGhost = 0; // Isn't actually used it appears - int const direction = 0; // Which direction, 0=x, 1=y, 2=z - int const nFields = 5; // Total number of conserved fields - - // Launch Parameters - dim3 const dimGrid (1,1,1); // How many blocks in the grid - dim3 const dimBlock(1,1,1); // How many threads per block - - // Create the std::vector to store the fluxes and declare the device - // pointers - std::vector testFlux(5); - Real *devConservedLeft; - Real *devConservedRight; - Real *devTestFlux; - - // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields*sizeof(Real))); - - CudaSafeCall(cudaMemcpy(devConservedLeft, - stateLeft.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, - stateRight.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - - // Run kernel - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, - dimGrid, - dimBlock, - 0, - 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devTestFlux, - nx, - ny, - nz, - nGhost, - gamma, - direction, - nFields); - - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), - devTestFlux, - nFields*sizeof(Real), - cudaMemcpyDeviceToHost)); - - // Make sure to sync with the device so we have the results - cudaDeviceSynchronize(); - CudaCheckError(); - - return testFlux; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Check if the fluxes are correct - * - * \param[in] fiducialFlux The fiducial flux in conserved variables. In - * order the elements are: density, x-momentum, y-momentum, z-momentum, - * and energy. - * \param[in] testFlux The test flux in conserved variables. In order - * the elements are: density, x-momentum, y-momentum, z-momentum, and - * energy. - * \param[in] customOutput Any custom output the user would like to - * print. It will print after the default GTest output but before the - * values that failed are printed - */ - void checkResults(std::vector const &fiducialFlux, - std::vector const &testFlux, - std::string const &customOutput = "") - { - // Field names - std::vector const fieldNames {"Densities", - "X Momentum", - "Y Momentum", - "Z Momentum", - "Energies"}; - - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) - and (fiducialFlux.size() == fieldNames.size())) - << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl - << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl - << "testFlux.size() = " << testFlux.size() << std::endl - << "fieldNames.size() = " << fieldNames.size() << std::endl; - - // Check for equality - for (size_t i = 0; i < fieldNames.size(); i++) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], - testFlux[i], - absoluteDiff, - ulpsDiff); - EXPECT_TRUE(areEqual) - << std::endl << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } - // ===================================================================== - - }; - // ========================================================================= - - // ========================================================================= - // Testing Calculate_HLLC_Fluxes_CUDA - /*! - * \brief Test the HLLC solver with the input from the high pressure side of a - sod shock tube. Correct results are hard coded into this test. Similar tests - do not need to be this verbose, simply passing values to the kernel call - should be sufficient in most cases - * - */ - TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name - HighPressureSideExpectCorrectOutput) // Test name - { - // Physical Values - Real const density = 1.0; - Real const pressure = 1.0; - Real const velocityX = 0.0; - Real const velocityY = 0.0; - Real const velocityZ = 0.0; - Real const momentumX = density * velocityX; - Real const momentumY = density * velocityY; - Real const momentumZ = density * velocityZ; - Real const gamma = 1.4; - Real const energy = (pressure/(gamma - 1)) + 0.5 * density - * (velocityX*velocityX - + velocityY*velocityY - + velocityZ*velocityZ); - - std::vector const state{density, - momentumX, - momentumY, - momentumZ, - energy}; - std::vector const fiducialFluxes{0, 1, 0, 0, 0}; - - // Compute the fluxes - std::vector const testFluxes = computeFluxes(state, // Left state - state, // Right state - gamma); // Adiabatic Index - - // Check for correctness - checkResults(fiducialFluxes, testFluxes); +// ========================================================================= +/*! + * \brief Test fixture for simple testing of the HLLC Riemann Solver. + Effectively takes the left state, right state, fiducial fluxes, and + custom user output then performs all the required running and testing + * + */ +class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test +{ + protected: + // ===================================================================== + /*! + * \brief Compute and return the HLLC fluxes + * + * \param[in] leftState The state on the left side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, and energy. + * \param[in] rightState The state on the right side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, and energy. + * \param[in] gamma The adiabatic index + * \return std::vector + */ + std::vector computeFluxes(std::vector const &stateLeft, + std::vector const &stateRight, + Real const &gamma) + { + // Simulation Paramters + int const nx = 1; // Number of cells in the x-direction? + int const ny = 1; // Number of cells in the y-direction? + int const nz = 1; // Number of cells in the z-direction? + int const nGhost = 0; // Isn't actually used it appears + int const direction = 0; // Which direction, 0=x, 1=y, 2=z + int const nFields = 5; // Total number of conserved fields + + // Launch Parameters + dim3 const dimGrid(1, 1, 1); // How many blocks in the grid + dim3 const dimBlock(1, 1, 1); // How many threads per block + + // Create the std::vector to store the fluxes and declare the device + // pointers + std::vector testFlux(5); + Real *devConservedLeft; + Real *devConservedRight; + Real *devTestFlux; + + // Allocate device arrays and copy data + CudaSafeCall(cudaMalloc(&devConservedLeft, nFields * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedRight, nFields * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devTestFlux, nFields * sizeof(Real))); + + CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), + nFields * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), + nFields * sizeof(Real), cudaMemcpyHostToDevice)); + + // Run kernel + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devTestFlux, nx, ny, nz, nGhost, gamma, direction, + nFields); + + CudaCheckError(); + CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, + nFields * sizeof(Real), cudaMemcpyDeviceToHost)); + + // Make sure to sync with the device so we have the results + cudaDeviceSynchronize(); + CudaCheckError(); + + return testFlux; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Check if the fluxes are correct + * + * \param[in] fiducialFlux The fiducial flux in conserved variables. In + * order the elements are: density, x-momentum, y-momentum, z-momentum, + * and energy. + * \param[in] testFlux The test flux in conserved variables. In order + * the elements are: density, x-momentum, y-momentum, z-momentum, and + * energy. + * \param[in] customOutput Any custom output the user would like to + * print. It will print after the default GTest output but before the + * values that failed are printed + */ + void checkResults(std::vector const &fiducialFlux, + std::vector const &testFlux, + std::string const &customOutput = "") + { + // Field names + std::vector const fieldNames{ + "Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"}; + + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and + (fiducialFlux.size() == fieldNames.size())) + << "The fiducial flux, test flux, and field name vectors are not all " + "the same length" + << std::endl + << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl + << "testFlux.size() = " << testFlux.size() << std::endl + << "fieldNames.size() = " << fieldNames.size() << std::endl; + + // Check for equality + for (size_t i = 0; i < fieldNames.size(); i++) { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + + bool areEqual = testingUtilities::nearlyEqualDbl( + fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) + << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } - // ========================================================================= + } + // ===================================================================== +}; +// ========================================================================= + +// ========================================================================= +// Testing Calculate_HLLC_Fluxes_CUDA +/*! +* \brief Test the HLLC solver with the input from the high pressure side of a +sod shock tube. Correct results are hard coded into this test. Similar tests +do not need to be this verbose, simply passing values to the kernel call +should be sufficient in most cases +* +*/ +TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name + HighPressureSideExpectCorrectOutput) // Test name +{ + // Physical Values + Real const density = 1.0; + Real const pressure = 1.0; + Real const velocityX = 0.0; + Real const velocityY = 0.0; + Real const velocityZ = 0.0; + Real const momentumX = density * velocityX; + Real const momentumY = density * velocityY; + Real const momentumZ = density * velocityZ; + Real const gamma = 1.4; + Real const energy = (pressure / (gamma - 1)) + + 0.5 * density * + (velocityX * velocityX + velocityY * velocityY + + velocityZ * velocityZ); + + std::vector const state{density, momentumX, momentumY, momentumZ, + energy}; + std::vector const fiducialFluxes{0, 1, 0, 0, 0}; + + // Compute the fluxes + std::vector const testFluxes = computeFluxes(state, // Left state + state, // Right state + gamma); // Adiabatic Index + + // Check for correctness + checkResults(fiducialFluxes, testFluxes); +} +// ========================================================================= #endif diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 8b5ac667e..2cb930d62 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -1,941 +1,704 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of the HLLD solver from Miyoshi & Kusano 2005 - * "A multi-state HLL approximate Riemann solver for ideal magnetohydrodynamics", - * hereafter referred to as M&K 2005 + * \brief Contains the implementation of the HLLD solver from Miyoshi & Kusano + * 2005 "A multi-state HLL approximate Riemann solver for ideal + * magnetohydrodynamics", hereafter referred to as M&K 2005 * -*/ + */ // External Includes // Local Includes -#include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../utils/hydro_utilities.h" -#include "../utils/mhd_utilities.h" +#include "../grid/grid_enum.h" #include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" #include "../utils/math_utilities.h" -#include "../grid/grid_enum.h" +#include "../utils/mhd_utilities.h" -#ifdef DE //PRESSURE_DE - #include "../utils/hydro_utilities.h" -#endif // DE +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif // DE #ifdef CUDA -#ifdef MHD + #ifdef MHD namespace mhd { - // ========================================================================= - __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_magnetic_face, - Real *dev_flux, - int nx, - int ny, - int nz, - int n_ghost, - Real gamma, - int direction, - int n_fields) - { - // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int threadId = threadIdx.x + blockId * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); - - // Number of cells - int n_cells = nx*ny*nz; - - // Offsets & indices - int o1, o2, o3; - if (direction==0) {o1 = 1; o2 = 2; o3 = 3;} - if (direction==1) {o1 = 2; o2 = 3; o3 = 1;} - if (direction==2) {o1 = 3; o2 = 1; o3 = 2;} - - // Thread guard to avoid overrun - if (xid < nx and - yid < ny and - zid < nz) - { - // ============================ - // Retrieve conserved variables - // ============================ - // The magnetic field in the X-direction - Real magneticX = dev_magnetic_face[threadId]; - - // Left interface - Real densityL = dev_bounds_L[threadId]; - Real momentumXL = dev_bounds_L[threadId + n_cells * o1]; - Real momentumYL = dev_bounds_L[threadId + n_cells * o2]; - Real momentumZL = dev_bounds_L[threadId + n_cells * o3]; - Real energyL = dev_bounds_L[threadId + n_cells * 4]; - Real magneticYL = dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_y)]; - Real magneticZL = dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_z)]; - - #ifdef SCALAR - Real scalarConservedL[NSCALARS]; - for (int i=0; i= 0.0) - { - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityFluxL, - momentumFluxXL, momentumFluxYL, momentumFluxZL, - energyFluxL, - magneticFluxYL, magneticFluxZL); - #ifdef SCALAR - for (int i=0; i= 0.0) - { - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxL, - momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, - energyStarFluxL, - magneticStarFluxYL, magneticStarFluxZL); - #ifdef SCALAR - for (int i=0; i= 0.0) - { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - mhd::_internal::_doubleStarFluxes(speedStarL, - momentumStarFluxXL, - momentumStarFluxYL, - momentumStarFluxZL, - energyStarFluxL, - magneticStarFluxYL, - magneticStarFluxZL, - densityStarL, - speedM, - velocityStarYL, - velocityStarZL, - energyStarL, - magneticStarYL, - magneticStarZL, - speedM, - velocityDoubleStarY, - velocityDoubleStarZ, - energyDoubleStarL, - magneticDoubleStarY, - magneticDoubleStarZ, - momentumDoubleStarFluxX, - momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxL, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i=0; i= 0.0) - { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - mhd::_internal::_doubleStarFluxes(speedStarR, - momentumStarFluxXR, - momentumStarFluxYR, - momentumStarFluxZR, - energyStarFluxR, - magneticStarFluxYR, - magneticStarFluxZR, - densityStarR, - speedM, - velocityStarYR, - velocityStarZR, - energyStarR, - magneticStarYR, - magneticStarZR, - speedM, - velocityDoubleStarY, - velocityDoubleStarZ, - energyDoubleStarR, - magneticDoubleStarY, - magneticDoubleStarZ, - momentumDoubleStarFluxX, - momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxR, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i=0; i= 0.0) { + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, + densityFluxL, momentumFluxXL, + momentumFluxYL, momentumFluxZL, energyFluxL, + magneticFluxYL, magneticFluxZL); + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedL[i] / densityL) * densityFluxL; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedL / densityL) * densityFluxL; + #endif // DE + return; + } + // Right state + Real densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, + magneticFluxYR, magneticFluxZR, energyFluxR; + mhd::_internal::_nonStarFluxes( + momentumXR, velocityXR, velocityYR, velocityZR, totalPressureR, energyR, + magneticX, magneticYR, magneticZR, densityFluxR, momentumFluxXR, + momentumFluxYR, momentumFluxZR, magneticFluxYR, magneticFluxZR, + energyFluxR); + + // If we're in the R state then assign fluxes and return. + // In this state the flow is supersonic + // M&K 2005 equation 66 + if (speedR <= 0.0) { + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, + densityFluxR, momentumFluxXR, + momentumFluxYR, momentumFluxZR, energyFluxR, + magneticFluxYR, magneticFluxZR); + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedR[i] / densityR) * densityFluxR; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedR / densityR) * densityFluxR; + #endif // DE + return; + } + + // ================================================================= + // Compute the fluxes in the star states + // ================================================================= + // Shared quantity + // note that velocityStarX = speedM + // M&K 2005 equation 23, might need to switch to eqn. 41 in the + // future though they should produce identical results + Real totalPressureStar = totalPressureL + densityL * (speedL - velocityXL) * + (speedM - velocityXL); + + // Left star state + Real velocityStarYL, velocityStarZL, energyStarL, magneticStarYL, + magneticStarZL, densityStarFluxL, momentumStarFluxXL, + momentumStarFluxYL, momentumStarFluxZL, magneticStarFluxYL, + magneticStarFluxZL, energyStarFluxL; + mhd::_internal::_starFluxes( + speedM, speedL, densityL, velocityXL, velocityYL, velocityZL, + momentumXL, momentumYL, momentumZL, energyL, totalPressureL, magneticX, + magneticYL, magneticZL, densityStarL, totalPressureStar, densityFluxL, + momentumFluxXL, momentumFluxYL, momentumFluxZL, energyFluxL, + magneticFluxYL, magneticFluxZL, velocityStarYL, velocityStarZL, + energyStarL, magneticStarYL, magneticStarZL, densityStarFluxL, + momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, + energyStarFluxL, magneticStarFluxYL, magneticStarFluxZL); + + // If we're in the L* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speedStarL >= 0.0) { + mhd::_internal::_returnFluxes( + threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, + momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, + energyStarFluxL, magneticStarFluxYL, magneticStarFluxZL); + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedL[i] / densityL) * densityStarFluxL; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedL / densityL) * densityStarFluxL; + #endif // DE + return; + } + + // Right star state + Real velocityStarYR, velocityStarZR, energyStarR, magneticStarYR, + magneticStarZR, densityStarFluxR, momentumStarFluxXR, + momentumStarFluxYR, momentumStarFluxZR, magneticStarFluxYR, + magneticStarFluxZR, energyStarFluxR; + mhd::_internal::_starFluxes( + speedM, speedR, densityR, velocityXR, velocityYR, velocityZR, + momentumXR, momentumYR, momentumZR, energyR, totalPressureR, magneticX, + magneticYR, magneticZR, densityStarR, totalPressureStar, densityFluxR, + momentumFluxXR, momentumFluxYR, momentumFluxZR, energyFluxR, + magneticFluxYR, magneticFluxZR, velocityStarYR, velocityStarZR, + energyStarR, magneticStarYR, magneticStarZR, densityStarFluxR, + momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, + energyStarFluxR, magneticStarFluxYR, magneticStarFluxZR); + + // If we're in the R* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speedStarR <= 0.0) { + mhd::_internal::_returnFluxes( + threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, + momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, + energyStarFluxR, magneticStarFluxYR, magneticStarFluxZR); + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedR[i] / densityR) * densityStarFluxR; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedR / densityR) * densityStarFluxR; + #endif // DE + return; + } + + // ================================================================= + // Compute the fluxes in the double star states + // ================================================================= + Real velocityDoubleStarY, velocityDoubleStarZ, magneticDoubleStarY, + magneticDoubleStarZ, energyDoubleStarL, energyDoubleStarR; + mhd::_internal::_doubleStarState( + speedM, magneticX, totalPressureStar, densityStarL, velocityStarYL, + velocityStarZL, energyStarL, magneticStarYL, magneticStarZL, + densityStarR, velocityStarYR, velocityStarZR, energyStarR, + magneticStarYR, magneticStarZR, velocityDoubleStarY, + velocityDoubleStarZ, magneticDoubleStarY, magneticDoubleStarZ, + energyDoubleStarL, energyDoubleStarR); + + // Compute and return L** fluxes + // M&K 2005 equation 66 + if (speedM >= 0.0) { + Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, + momentumDoubleStarFluxZ, energyDoubleStarFlux, + magneticDoubleStarFluxY, magneticDoubleStarFluxZ; + mhd::_internal::_doubleStarFluxes( + speedStarL, momentumStarFluxXL, momentumStarFluxYL, + momentumStarFluxZL, energyStarFluxL, magneticStarFluxYL, + magneticStarFluxZL, densityStarL, speedM, velocityStarYL, + velocityStarZL, energyStarL, magneticStarYL, magneticStarZL, speedM, + velocityDoubleStarY, velocityDoubleStarZ, energyDoubleStarL, + magneticDoubleStarY, magneticDoubleStarZ, momentumDoubleStarFluxX, + momentumDoubleStarFluxY, momentumDoubleStarFluxZ, + energyDoubleStarFlux, magneticDoubleStarFluxY, + magneticDoubleStarFluxZ); + + mhd::_internal::_returnFluxes( + threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, + momentumDoubleStarFluxX, momentumDoubleStarFluxY, + momentumDoubleStarFluxZ, energyDoubleStarFlux, + magneticDoubleStarFluxY, magneticDoubleStarFluxZ); + + #ifdef SCALAR + // Return the passive scalar fluxes + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedL[i] / densityL) * densityStarFluxL; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedL / densityL) * densityStarFluxL; + #endif // DE + return; + } + // Compute and return R** fluxes + // M&K 2005 equation 66 + else if (speedStarR >= 0.0) { + Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, + momentumDoubleStarFluxZ, energyDoubleStarFlux, + magneticDoubleStarFluxY, magneticDoubleStarFluxZ; + mhd::_internal::_doubleStarFluxes( + speedStarR, momentumStarFluxXR, momentumStarFluxYR, + momentumStarFluxZR, energyStarFluxR, magneticStarFluxYR, + magneticStarFluxZR, densityStarR, speedM, velocityStarYR, + velocityStarZR, energyStarR, magneticStarYR, magneticStarZR, speedM, + velocityDoubleStarY, velocityDoubleStarZ, energyDoubleStarR, + magneticDoubleStarY, magneticDoubleStarZ, momentumDoubleStarFluxX, + momentumDoubleStarFluxY, momentumDoubleStarFluxZ, + energyDoubleStarFlux, magneticDoubleStarFluxY, + magneticDoubleStarFluxZ); + + mhd::_internal::_returnFluxes( + threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, + momentumDoubleStarFluxX, momentumDoubleStarFluxY, + momentumDoubleStarFluxZ, energyDoubleStarFlux, + magneticDoubleStarFluxY, magneticDoubleStarFluxZ); + + #ifdef SCALAR + // Return the passive scalar fluxes + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + threadId] = + (scalarConservedR[i] / densityR) * densityStarFluxR; + } + #endif // SCALAR + #ifdef DE + dev_flux[(n_fields - 1) * n_cells + threadId] = + (thermalEnergyConservedR / densityR) * densityStarFluxR; + #endif // DE + return; + } + } // End thread guard +}; +// ========================================================================= + +namespace _internal +{ +// ===================================================================== +__device__ __host__ void _approximateWaveSpeeds( + Real const &densityL, Real const &momentumXL, Real const &momentumYL, + Real const &momentumZL, Real const &velocityXL, Real const &velocityYL, + Real const &velocityZL, Real const &gasPressureL, + Real const &totalPressureL, Real const &magneticX, Real const &magneticYL, + Real const &magneticZL, Real const &densityR, Real const &momentumXR, + Real const &momentumYR, Real const &momentumZR, Real const &velocityXR, + Real const &velocityYR, Real const &velocityZR, Real const &gasPressureR, + Real const &totalPressureR, Real const &magneticYR, Real const &magneticZR, + Real const &gamma, Real &speedL, Real &speedR, Real &speedM, + Real &speedStarL, Real &speedStarR, Real &densityStarL, Real &densityStarR) +{ + // Get the fast magnetosonic wave speeds + Real magSonicL = mhd::utils::fastMagnetosonicSpeed( + densityL, gasPressureL, magneticX, magneticYL, magneticZL, gamma); + Real magSonicR = mhd::utils::fastMagnetosonicSpeed( + densityR, gasPressureR, magneticX, magneticYR, magneticZR, gamma); + + // Compute the S_L and S_R wave speeds. + // Version suggested by Miyoshi & Kusano 2005 and used in Athena + // M&K 2005 equation 67 + Real magSonicMax = fmax(magSonicL, magSonicR); + speedL = fmin(velocityXL, velocityXR) - magSonicMax; + speedR = fmax(velocityXL, velocityXR) + magSonicMax; + + // Compute the S_M wave speed + // M&K 2005 equation 38 + speedM = // Numerator + (momentumXR * (speedR - velocityXR) - momentumXL * (speedL - velocityXL) + + (totalPressureL - totalPressureR)) / + // Denominator + (densityR * (speedR - velocityXR) - densityL * (speedL - velocityXL)); + + // Compute the densities in the star state + // M&K 2005 equation 43 + densityStarL = densityL * (speedL - velocityXL) / (speedL - speedM); + densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM); + + // Compute the S_L^* and S_R^* wave speeds + // M&K 2005 equation 51 + speedStarL = speedM - mhd::utils::alfvenSpeed(magneticX, densityStarL); + speedStarR = speedM + mhd::utils::alfvenSpeed(magneticX, densityStarR); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void _nonStarFluxes( + Real const &momentumX, Real const &velocityX, Real const &velocityY, + Real const &velocityZ, Real const &totalPressure, Real const &energy, + Real const &magneticX, Real const &magneticY, Real const &magneticZ, + Real &densityFlux, Real &momentumFluxX, Real &momentumFluxY, + Real &momentumFluxZ, Real &magneticFluxY, Real &magneticFluxZ, + Real &energyFlux) +{ + // M&K 2005 equation 2 + densityFlux = momentumX; + + momentumFluxX = momentumX * velocityX + totalPressure - magneticX * magneticX; + momentumFluxY = momentumX * velocityY - magneticX * magneticY; + momentumFluxZ = momentumX * velocityZ - magneticX * magneticZ; + + magneticFluxY = magneticY * velocityX - magneticX * velocityY; + magneticFluxZ = magneticZ * velocityX - magneticX * velocityZ; + + // Group transverse terms for FP associative symmetry + energyFlux = velocityX * (energy + totalPressure) - + magneticX * (velocityX * magneticX + ((velocityY * magneticY) + + (velocityZ * magneticZ))); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void _returnFluxes( + int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, Real const &densityFlux, + Real const &momentumFluxX, Real const &momentumFluxY, + Real const &momentumFluxZ, Real const &energyFlux, + Real const &magneticFluxY, Real const &magneticFluxZ) +{ + dev_flux[threadId] = densityFlux; + dev_flux[threadId + n_cells * o1] = momentumFluxX; + dev_flux[threadId + n_cells * o2] = momentumFluxY; + dev_flux[threadId + n_cells * o3] = momentumFluxZ; + dev_flux[threadId + n_cells * 4] = energyFlux; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_z)] = magneticFluxY; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_y)] = magneticFluxZ; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void _starFluxes( + Real const &speedM, Real const &speedSide, Real const &density, + Real const &velocityX, Real const &velocityY, Real const &velocityZ, + Real const &momentumX, Real const &momentumY, Real const &momentumZ, + Real const &energy, Real const &totalPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &densityStar, + Real const &totalPressureStar, Real const &densityFlux, + Real const &momentumFluxX, Real const &momentumFluxY, + Real const &momentumFluxZ, Real const &energyFlux, + Real const &magneticFluxY, Real const &magneticFluxZ, Real &velocityStarY, + Real &velocityStarZ, Real &energyStar, Real &magneticStarY, + Real &magneticStarZ, Real &densityStarFlux, Real &momentumStarFluxX, + Real &momentumStarFluxY, Real &momentumStarFluxZ, Real &energyStarFlux, + Real &magneticStarFluxY, Real &magneticStarFluxZ) +{ + // Check for and handle the degenerate case + // Explained at the top of page 326 in M&K 2005 + if (fabs(density * (speedSide - velocityX) * (speedSide - speedM) - + (magneticX * magneticX)) < + totalPressureStar * mhd::_internal::_hlldSmallNumber) { + velocityStarY = velocityY; + velocityStarZ = velocityZ; + magneticStarY = magneticY; + magneticStarZ = magneticZ; + } else { + // Denominator for M&K 2005 equations 44-47 + Real const denom = + density * (speedSide - velocityX) * (speedSide - speedM) - + (magneticX * magneticX); + + // Compute the velocity and magnetic field in the star state + // M&K 2005 equations 44 & 46 + Real coef = magneticX * (speedM - velocityX) / denom; + velocityStarY = velocityY - magneticY * coef; + velocityStarZ = velocityZ - magneticZ * coef; + + // M&K 2005 equations 45 & 47 + Real tmpPower = (speedSide - velocityX); + tmpPower = tmpPower * tmpPower; + coef = (density * tmpPower - (magneticX * magneticX)) / denom; + magneticStarY = magneticY * coef; + magneticStarZ = magneticZ * coef; + } + + // M&K 2005 equation 48 + energyStar = + (energy * (speedSide - velocityX) - totalPressure * velocityX + + totalPressureStar * speedM + + magneticX * + (math_utils::dotProduct(velocityX, velocityY, velocityZ, magneticX, + magneticY, magneticZ) - + math_utils::dotProduct(speedM, velocityStarY, velocityStarZ, + magneticX, magneticStarY, magneticStarZ))) / + (speedSide - speedM); + + // Now compute the star state fluxes + // M&K 2005 equations 64 + densityStarFlux = densityFlux + speedSide * (densityStar - density); + ; + momentumStarFluxX = + momentumFluxX + speedSide * (densityStar * speedM - momentumX); + ; + momentumStarFluxY = + momentumFluxY + speedSide * (densityStar * velocityStarY - momentumY); + ; + momentumStarFluxZ = + momentumFluxZ + speedSide * (densityStar * velocityStarZ - momentumZ); + ; + energyStarFlux = energyFlux + speedSide * (energyStar - energy); + magneticStarFluxY = magneticFluxY + speedSide * (magneticStarY - magneticY); + magneticStarFluxZ = magneticFluxZ + speedSide * (magneticStarZ - magneticZ); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void _doubleStarState( + Real const &speedM, Real const &magneticX, Real const &totalPressureStar, + Real const &densityStarL, Real const &velocityStarYL, + Real const &velocityStarZL, Real const &energyStarL, + Real const &magneticStarYL, Real const &magneticStarZL, + Real const &densityStarR, Real const &velocityStarYR, + Real const &velocityStarZR, Real const &energyStarR, + Real const &magneticStarYR, Real const &magneticStarZR, + Real &velocityDoubleStarY, Real &velocityDoubleStarZ, + Real &magneticDoubleStarY, Real &magneticDoubleStarZ, + Real &energyDoubleStarL, Real &energyDoubleStarR) +{ + // if Bx is zero then just return the star state + // Explained at the top of page 328 in M&K 2005. Essentially when + // magneticX is 0 this reduces to the HLLC solver + if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { + velocityDoubleStarY = velocityStarYL; + velocityDoubleStarZ = velocityStarZL; + magneticDoubleStarY = magneticStarYL; + magneticDoubleStarZ = magneticStarZL; + energyDoubleStarL = energyStarL; + energyDoubleStarR = energyStarR; + } else { + // Setup some variables we'll need later + Real sqrtDL = sqrt(densityStarL); + Real sqrtDR = sqrt(densityStarR); + Real inverseDensities = 1.0 / (sqrtDL + sqrtDR); + Real magXSign = copysign(1.0, magneticX); + + // All we need to do now is compute the transverse velocities + // and magnetic fields along with the energy + + // Double Star velocities + // M&K 2005 equations 59 & 60 + velocityDoubleStarY = + inverseDensities * (sqrtDL * velocityStarYL + sqrtDR * velocityStarYR + + magXSign * (magneticStarYR - magneticStarYL)); + velocityDoubleStarZ = + inverseDensities * (sqrtDL * velocityStarZL + sqrtDR * velocityStarZR + + magXSign * (magneticStarZR - magneticStarZL)); + + // Double star magnetic fields + // M&K 2005 equations 61 & 62 + magneticDoubleStarY = + inverseDensities * + (sqrtDL * magneticStarYR + sqrtDR * magneticStarYL + + magXSign * (sqrtDL * sqrtDR) * (velocityStarYR - velocityStarYL)); + magneticDoubleStarZ = + inverseDensities * + (sqrtDL * magneticStarZR + sqrtDR * magneticStarZL + + magXSign * (sqrtDL * sqrtDR) * (velocityStarZR - velocityStarZL)); + + // Double star energy + Real velDblStarDotMagDblStar = math_utils::dotProduct( + speedM, velocityDoubleStarY, velocityDoubleStarZ, magneticX, + magneticDoubleStarY, magneticDoubleStarZ); + // M&K 2005 equation 63 + energyDoubleStarL = + energyStarL - + sqrtDL * magXSign * + (math_utils::dotProduct(speedM, velocityStarYL, velocityStarZL, + magneticX, magneticStarYL, magneticStarZL) - + velDblStarDotMagDblStar); + energyDoubleStarR = + energyStarR + + sqrtDR * magXSign * + (math_utils::dotProduct(speedM, velocityStarYR, velocityStarZR, + magneticX, magneticStarYR, magneticStarZR) - + velDblStarDotMagDblStar); + } +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void _doubleStarFluxes( + Real const &speedStarSide, Real const &momentumStarFluxX, + Real const &momentumStarFluxY, Real const &momentumStarFluxZ, + Real const &energyStarFlux, Real const &magneticStarFluxY, + Real const &magneticStarFluxZ, Real const &densityStar, + Real const &velocityStarX, Real const &velocityStarY, + Real const &velocityStarZ, Real const &energyStar, + Real const &magneticStarY, Real const &magneticStarZ, + Real const &velocityDoubleStarX, Real const &velocityDoubleStarY, + Real const &velocityDoubleStarZ, Real const &energyDoubleStar, + Real const &magneticDoubleStarY, Real const &magneticDoubleStarZ, + Real &momentumDoubleStarFluxX, Real &momentumDoubleStarFluxY, + Real &momentumDoubleStarFluxZ, Real &energyDoubleStarFlux, + Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ) +{ + // M&K 2005 equation 65 + momentumDoubleStarFluxX = + momentumStarFluxX + + speedStarSide * (velocityDoubleStarX - velocityStarX) * densityStar; + momentumDoubleStarFluxY = + momentumStarFluxY + + speedStarSide * (velocityDoubleStarY - velocityStarY) * densityStar; + momentumDoubleStarFluxZ = + momentumStarFluxZ + + speedStarSide * (velocityDoubleStarZ - velocityStarZ) * densityStar; + energyDoubleStarFlux = + energyStarFlux + speedStarSide * (energyDoubleStar - energyStar); + magneticDoubleStarFluxY = + magneticStarFluxY + speedStarSide * (magneticDoubleStarY - magneticStarY); + magneticDoubleStarFluxZ = + magneticStarFluxZ + speedStarSide * (magneticDoubleStarZ - magneticStarZ); +} +// ===================================================================== + +} // namespace _internal +} // end namespace mhd + #endif // MHD +#endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 332768f8a..539e5a96c 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -2,8 +2,8 @@ * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Contains the declaration of the HLLD solver from Miyoshi & Kusano 2005 - * "A multi-state HLL approximate Riemann solver for ideal magnetohydrodynamics", - * hereafter referred to as M&K 2005 + * "A multi-state HLL approximate Riemann solver for ideal + * magnetohydrodynamics", hereafter referred to as M&K 2005 * */ @@ -21,366 +21,283 @@ */ namespace mhd { - /*! - * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 - * - * \param[in] dev_bounds_L The interface states on the left side of the interface - * \param[in] dev_bounds_R The interface states on the right side of the interface - * \param[in] dev_magnetic_face A pointer to the begining of the conserved - * magnetic field array that is stored at the interface. I.e. for the - * X-direction solve this would be the begining of the X-direction fields - * \param[out] dev_flux The output flux - * \param[in] nx Number of cells in the X-direction - * \param[in] ny Number of cells in the Y-direction - * \param[in] nz Number of cells in the Z-direction - * \param[in] n_ghost Number of ghost cells on each side - * \param[in] gamma The adiabatic index - * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, 2=Z - * \param[in] n_fields The total number of fields - */ - __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_magnetic_face, - Real *dev_flux, - int nx, - int ny, - int nz, - int n_ghost, - Real gamma, - int direction, - int n_fields); +/*! + * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 + * + * \param[in] dev_bounds_L The interface states on the left side of the + * interface \param[in] dev_bounds_R The interface states on the right side of + * the interface \param[in] dev_magnetic_face A pointer to the begining of the + * conserved magnetic field array that is stored at the interface. I.e. for the + * X-direction solve this would be the begining of the X-direction fields + * \param[out] dev_flux The output flux + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_ghost Number of ghost cells on each side + * \param[in] gamma The adiabatic index + * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, + * 2=Z \param[in] n_fields The total number of fields + */ +__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, + Real *dev_magnetic_face, + Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, + int direction, int n_fields); - /*! - * \brief Namespace to hold private functions used within the HLLD - * solver - * - */ - namespace _internal - { - /*! - * \brief Used for some comparisons. Value was chosen to match what is - * used in Athena - */ - Real static const _hlldSmallNumber = 1.0e-8; +/*! + * \brief Namespace to hold private functions used within the HLLD + * solver + * + */ +namespace _internal +{ +/*! + * \brief Used for some comparisons. Value was chosen to match what is + * used in Athena + */ +Real static const _hlldSmallNumber = 1.0e-8; - /*! - * \brief Compute the left, right, star, and middle wave speeds. Also - * returns the densities in the star states. M&K 2005 equations 38, 43, - * 51, and 67 - * - * \param[in] densityL Density, left side - * \param[in] momentumXL Momentum in the X-direction, left side - * \param[in] momentumYL Momentum in the Y-direction, left side - * \param[in] momentumZL Momentum in the Z-direction, left side - * \param[in] velocityXL Velocity in the X-direction, left side - * \param[in] velocityYL Velocity in the Y-direction, left side - * \param[in] velocityZL Velocity in the Z-direction, left side - * \param[in] gasPressureL Gas pressure, left side - * \param[in] totalPressureL Total MHD pressure, left side - * \param[in] magneticX Magnetic field in the X-direction, left side - * \param[in] magneticYL Magnetic field in the Y-direction, left side - * \param[in] magneticZL Magnetic field in the Z-direction, left side - * \param[in] densityR Density, right side - * \param[in] momentumXR Momentum in the X-direction, right side - * \param[in] momentumYR Momentum in the Y-direction, right side - * \param[in] momentumZR Momentum in the Z-direction, right side - * \param[in] velocityXR Velocity in the X-direction, right side - * \param[in] velocityYR Velocity in the Y-direction, right side - * \param[in] velocityZR Velocity in the Z-direction, right side - * \param[in] gasPressureR Gas pressure, right side - * \param[in] totalPressureR Total MHD pressure, right side - * \param[in] magneticYR Magnetic field in the Y-direction, right side - * \param[in] magneticZR Magnetic field in the Z-direction, right side - * \param[in] gamma Adiabatic index - * \param[out] speedL Approximate speed of the left most wave - * \param[out] speedR Approximate speed of the right most wave - * \param[out] speedM Speed of the middle wave - * \param[out] speedStarL Speed of the left star state wave - * \param[out] speedStarR Speed of the right star state wave - * \param[out] densityStarL Density in left star region - * \param[out] densityStarR Density in right star region - */ - __device__ __host__ void _approximateWaveSpeeds(Real const &densityL, - Real const &momentumXL, - Real const &momentumYL, - Real const &momentumZL, - Real const &velocityXL, - Real const &velocityYL, - Real const &velocityZL, - Real const &gasPressureL, - Real const &totalPressureL, - Real const &magneticX, - Real const &magneticYL, - Real const &magneticZL, - Real const &densityR, - Real const &momentumXR, - Real const &momentumYR, - Real const &momentumZR, - Real const &velocityXR, - Real const &velocityYR, - Real const &velocityZR, - Real const &gasPressureR, - Real const &totalPressureR, - Real const &magneticYR, - Real const &magneticZR, - Real const &gamma, - Real &speedL, - Real &speedR, - Real &speedM, - Real &speedStarL, - Real &speedStarR, - Real &densityStarL, - Real &densityStarR); +/*! + * \brief Compute the left, right, star, and middle wave speeds. Also + * returns the densities in the star states. M&K 2005 equations 38, 43, + * 51, and 67 + * + * \param[in] densityL Density, left side + * \param[in] momentumXL Momentum in the X-direction, left side + * \param[in] momentumYL Momentum in the Y-direction, left side + * \param[in] momentumZL Momentum in the Z-direction, left side + * \param[in] velocityXL Velocity in the X-direction, left side + * \param[in] velocityYL Velocity in the Y-direction, left side + * \param[in] velocityZL Velocity in the Z-direction, left side + * \param[in] gasPressureL Gas pressure, left side + * \param[in] totalPressureL Total MHD pressure, left side + * \param[in] magneticX Magnetic field in the X-direction, left side + * \param[in] magneticYL Magnetic field in the Y-direction, left side + * \param[in] magneticZL Magnetic field in the Z-direction, left side + * \param[in] densityR Density, right side + * \param[in] momentumXR Momentum in the X-direction, right side + * \param[in] momentumYR Momentum in the Y-direction, right side + * \param[in] momentumZR Momentum in the Z-direction, right side + * \param[in] velocityXR Velocity in the X-direction, right side + * \param[in] velocityYR Velocity in the Y-direction, right side + * \param[in] velocityZR Velocity in the Z-direction, right side + * \param[in] gasPressureR Gas pressure, right side + * \param[in] totalPressureR Total MHD pressure, right side + * \param[in] magneticYR Magnetic field in the Y-direction, right side + * \param[in] magneticZR Magnetic field in the Z-direction, right side + * \param[in] gamma Adiabatic index + * \param[out] speedL Approximate speed of the left most wave + * \param[out] speedR Approximate speed of the right most wave + * \param[out] speedM Speed of the middle wave + * \param[out] speedStarL Speed of the left star state wave + * \param[out] speedStarR Speed of the right star state wave + * \param[out] densityStarL Density in left star region + * \param[out] densityStarR Density in right star region + */ +__device__ __host__ void _approximateWaveSpeeds( + Real const &densityL, Real const &momentumXL, Real const &momentumYL, + Real const &momentumZL, Real const &velocityXL, Real const &velocityYL, + Real const &velocityZL, Real const &gasPressureL, + Real const &totalPressureL, Real const &magneticX, Real const &magneticYL, + Real const &magneticZL, Real const &densityR, Real const &momentumXR, + Real const &momentumYR, Real const &momentumZR, Real const &velocityXR, + Real const &velocityYR, Real const &velocityZR, Real const &gasPressureR, + Real const &totalPressureR, Real const &magneticYR, Real const &magneticZR, + Real const &gamma, Real &speedL, Real &speedR, Real &speedM, + Real &speedStarL, Real &speedStarR, Real &densityStarL, Real &densityStarR); - /*! - * \brief Compute the fluxes in the left or right non-star state - * - * \param[in] momentumX Momentum in the X-direction - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] totalPressure Total MHD pressure - * \param[in] energy Energy - * \param[in] magneticX Magnetic field in -direction - * \param[in] magneticY Magnetic field in -direction - * \param[in] magneticZ Magnetic field in -direction - * \param[out] densityFlux The density flux - * \param[out] momentumFluxX The momentum flux in the X-direction - * \param[out] momentumFluxY The momentum flux in the Y-direction - * \param[out] momentumFluxZ The momentum flux in the Z-direction - * \param[out] magneticFluxY The magnetic field flux in the Y-direction - * \param[out] magneticFluxZ The magnetic field flux in the Z-direction - * \param[out] energyFlux The energy flux - */ - __device__ __host__ void _nonStarFluxes(Real const &momentumX, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &totalPressure, - Real const &energy, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real &densityFlux, - Real &momentumFluxX, - Real &momentumFluxY, - Real &momentumFluxZ, - Real &magneticFluxY, - Real &magneticFluxZ, - Real &energyFlux); +/*! + * \brief Compute the fluxes in the left or right non-star state + * + * \param[in] momentumX Momentum in the X-direction + * \param[in] velocityX Velocity in the X-direction + * \param[in] velocityY Velocity in the Y-direction + * \param[in] velocityZ Velocity in the Z-direction + * \param[in] totalPressure Total MHD pressure + * \param[in] energy Energy + * \param[in] magneticX Magnetic field in -direction + * \param[in] magneticY Magnetic field in -direction + * \param[in] magneticZ Magnetic field in -direction + * \param[out] densityFlux The density flux + * \param[out] momentumFluxX The momentum flux in the X-direction + * \param[out] momentumFluxY The momentum flux in the Y-direction + * \param[out] momentumFluxZ The momentum flux in the Z-direction + * \param[out] magneticFluxY The magnetic field flux in the Y-direction + * \param[out] magneticFluxZ The magnetic field flux in the Z-direction + * \param[out] energyFlux The energy flux + */ +__device__ __host__ void _nonStarFluxes( + Real const &momentumX, Real const &velocityX, Real const &velocityY, + Real const &velocityZ, Real const &totalPressure, Real const &energy, + Real const &magneticX, Real const &magneticY, Real const &magneticZ, + Real &densityFlux, Real &momentumFluxX, Real &momentumFluxY, + Real &momentumFluxZ, Real &magneticFluxY, Real &magneticFluxZ, + Real &energyFlux); - /*! - * \brief Assign the given flux values to the dev_flux array - * - * \param[in] threadId The thread ID - * \param[in] o1 Offset to get indexing right - * \param[in] o2 Offset to get indexing right - * \param[in] o3 Offset to get indexing right - * \param[in] n_cells Number of cells - * \param[out] dev_flux The flux array - * \param[in] densityFlux The density flux - * \param[in] momentumFluxX The momentum flux in the X-direction - * \param[in] momentumFluxY The momentum flux in the Y-direction - * \param[in] momentumFluxZ The momentum flux in the Z-direction - * \param[in] magneticFluxY The magnetic field flux in the X-direction - * \param[in] magneticFluxZ The magnetic field flux in the Y-direction - * \param[in] energyFlux The energy flux - */ - __device__ __host__ void _returnFluxes(int const &threadId, - int const &o1, - int const &o2, - int const &o3, - int const &n_cells, - Real *dev_flux, - Real const &densityFlux, - Real const &momentumFluxX, - Real const &momentumFluxY, - Real const &momentumFluxZ, - Real const &magneticFluxY, - Real const &magneticFluxZ, - Real const &energyFlux); +/*! + * \brief Assign the given flux values to the dev_flux array + * + * \param[in] threadId The thread ID + * \param[in] o1 Offset to get indexing right + * \param[in] o2 Offset to get indexing right + * \param[in] o3 Offset to get indexing right + * \param[in] n_cells Number of cells + * \param[out] dev_flux The flux array + * \param[in] densityFlux The density flux + * \param[in] momentumFluxX The momentum flux in the X-direction + * \param[in] momentumFluxY The momentum flux in the Y-direction + * \param[in] momentumFluxZ The momentum flux in the Z-direction + * \param[in] magneticFluxY The magnetic field flux in the X-direction + * \param[in] magneticFluxZ The magnetic field flux in the Y-direction + * \param[in] energyFlux The energy flux + */ +__device__ __host__ void _returnFluxes( + int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, Real const &densityFlux, + Real const &momentumFluxX, Real const &momentumFluxY, + Real const &momentumFluxZ, Real const &magneticFluxY, + Real const &magneticFluxZ, Real const &energyFlux); - /*! - * \brief Compute the fluxes in the left or right star state. M&K 2005 - * equations 44-48, 64 - * - * \param[in] speedM Speed of the central wave - * \param[in] speedSide Speed of the non-star wave on the side being computed - * \param[in] density Density - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] momentumX Momentum in the X-direction - * \param[in] momentumY Momentum in the Y-direction - * \param[in] momentumZ Momentum in the Z-direction - * \param[in] energy Energy - * \param[in] totalPressure Total MHD pressure - * \param[in] magneticX Magnetic field in the X-direction - * \param[in] magneticY Magnetic field in the Y-direction - * \param[in] magneticZ Magnetic field in the Z-direction - * \param[in] densityStar Density in the star state - * \param[in] totalPressureStar Total MHD pressure in the star state - * \param[in] densityFlux Density Flux from the non-star state - * \param[in] momentumFluxX Momentum flux from the non-star state in the X-direction - * \param[in] momentumFluxY Momentum flux from the non-star state in the Y-direction - * \param[in] momentumFluxZ Momentum flux from the non-star state in the Z-direction - * \param[in] energyFlux Energy flux from the non-star state - * \param[in] magneticFluxY Magnetic flux from the non-star state in the X-direction - * \param[in] magneticFluxZ Magnetic flux from the non-star state in the Y-direction - * \param[out] velocityStarY Velocity in the star state in the Y-direction - * \param[out] velocityStarZ Velocity in the star state in the Z-direction - * \param[out] energyStar Energy in the star state - * \param[out] magneticStarY Magnetic field in the star state in the X-direction - * \param[out] magneticStarZ Magnetic field in the star state in the Y-direction - * \param[out] densityStarFlux Density flux in the star state - * \param[out] momentumStarFluxX Momentum flux in the star state in the X-direction - * \param[out] momentumStarFluxY Momentum flux in the star state in the Y-direction - * \param[out] momentumStarFluxZ Momentum flux in the star state in the Z-direction - * \param[out] energyStarFlux Energy flux in the star state - * \param[out] magneticStarFluxY Magnetic field flux in the star state in the X-direction - * \param[out] magneticStarFluxZ Magnetic field flux in the star state in the Y-direction - * - */ - __device__ __host__ void _starFluxes(Real const &speedM, - Real const &speedSide, - Real const &density, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &energy, - Real const &totalPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &densityStar, - Real const &totalPressureStar, - Real const &densityFlux, - Real const &momentumFluxX, - Real const &momentumFluxY, - Real const &momentumFluxZ, - Real const &energyFlux, - Real const &magneticFluxY, - Real const &magneticFluxZ, - Real &velocityStarY, - Real &velocityStarZ, - Real &energyStar, - Real &magneticStarY, - Real &magneticStarZ, - Real &densityStarFlux, - Real &momentumStarFluxX, - Real &momentumStarFluxY, - Real &momentumStarFluxZ, - Real &energyStarFlux, - Real &magneticStarFluxY, - Real &magneticStarFluxZ); +/*! + * \brief Compute the fluxes in the left or right star state. M&K 2005 + * equations 44-48, 64 + * + * \param[in] speedM Speed of the central wave + * \param[in] speedSide Speed of the non-star wave on the side being computed + * \param[in] density Density + * \param[in] velocityX Velocity in the X-direction + * \param[in] velocityY Velocity in the Y-direction + * \param[in] velocityZ Velocity in the Z-direction + * \param[in] momentumX Momentum in the X-direction + * \param[in] momentumY Momentum in the Y-direction + * \param[in] momentumZ Momentum in the Z-direction + * \param[in] energy Energy + * \param[in] totalPressure Total MHD pressure + * \param[in] magneticX Magnetic field in the X-direction + * \param[in] magneticY Magnetic field in the Y-direction + * \param[in] magneticZ Magnetic field in the Z-direction + * \param[in] densityStar Density in the star state + * \param[in] totalPressureStar Total MHD pressure in the star state + * \param[in] densityFlux Density Flux from the non-star state + * \param[in] momentumFluxX Momentum flux from the non-star state in the + * X-direction \param[in] momentumFluxY Momentum flux from the non-star state in + * the Y-direction \param[in] momentumFluxZ Momentum flux from the non-star + * state in the Z-direction \param[in] energyFlux Energy flux from the non-star + * state \param[in] magneticFluxY Magnetic flux from the non-star state in the + * X-direction \param[in] magneticFluxZ Magnetic flux from the non-star state in + * the Y-direction \param[out] velocityStarY Velocity in the star state in the + * Y-direction \param[out] velocityStarZ Velocity in the star state in the + * Z-direction \param[out] energyStar Energy in the star state \param[out] + * magneticStarY Magnetic field in the star state in the X-direction \param[out] + * magneticStarZ Magnetic field in the star state in the Y-direction \param[out] + * densityStarFlux Density flux in the star state \param[out] momentumStarFluxX + * Momentum flux in the star state in the X-direction \param[out] + * momentumStarFluxY Momentum flux in the star state in the Y-direction + * \param[out] momentumStarFluxZ Momentum flux in the star state in the + * Z-direction \param[out] energyStarFlux Energy flux in the star state + * \param[out] magneticStarFluxY Magnetic field flux in the star state in the + * X-direction \param[out] magneticStarFluxZ Magnetic field flux in the star + * state in the Y-direction + * + */ +__device__ __host__ void _starFluxes( + Real const &speedM, Real const &speedSide, Real const &density, + Real const &velocityX, Real const &velocityY, Real const &velocityZ, + Real const &momentumX, Real const &momentumY, Real const &momentumZ, + Real const &energy, Real const &totalPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &densityStar, + Real const &totalPressureStar, Real const &densityFlux, + Real const &momentumFluxX, Real const &momentumFluxY, + Real const &momentumFluxZ, Real const &energyFlux, + Real const &magneticFluxY, Real const &magneticFluxZ, Real &velocityStarY, + Real &velocityStarZ, Real &energyStar, Real &magneticStarY, + Real &magneticStarZ, Real &densityStarFlux, Real &momentumStarFluxX, + Real &momentumStarFluxY, Real &momentumStarFluxZ, Real &energyStarFlux, + Real &magneticStarFluxY, Real &magneticStarFluxZ); - /*! - * \brief Compute the double star state. M&K 2005 equations 59-63 - * - * \param[in] speedM - * \param[in] magneticX - * \param[in] totalPressureStar - * \param[in] densityStarL - * \param[in] velocityStarYL - * \param[in] velocityStarZL - * \param[in] energyStarL - * \param[in] magneticStarYL - * \param[in] magneticStarZL - * \param[in] densityStarR - * \param[in] velocityStarYR - * \param[in] velocityStarZR - * \param[in] energyStarR - * \param[in] magneticStarYR - * \param[in] magneticStarZR - * \param[out] velocityDoubleStarY - * \param[out] velocityDoubleStarZ - * \param[out] magneticDoubleStarY - * \param[out] magneticDoubleStarZ - * \param[out] energyDoubleStarL - * \param[out] energyDoubleStarR - */ - __device__ __host__ void _doubleStarState(Real const &speedM, - Real const &magneticX, - Real const &totalPressureStar, - Real const &densityStarL, - Real const &velocityStarYL, - Real const &velocityStarZL, - Real const &energyStarL, - Real const &magneticStarYL, - Real const &magneticStarZL, - Real const &densityStarR, - Real const &velocityStarYR, - Real const &velocityStarZR, - Real const &energyStarR, - Real const &magneticStarYR, - Real const &magneticStarZR, - Real &velocityDoubleStarY, - Real &velocityDoubleStarZ, - Real &magneticDoubleStarY, - Real &magneticDoubleStarZ, - Real &energyDoubleStarL, - Real &energyDoubleStarR); +/*! + * \brief Compute the double star state. M&K 2005 equations 59-63 + * + * \param[in] speedM + * \param[in] magneticX + * \param[in] totalPressureStar + * \param[in] densityStarL + * \param[in] velocityStarYL + * \param[in] velocityStarZL + * \param[in] energyStarL + * \param[in] magneticStarYL + * \param[in] magneticStarZL + * \param[in] densityStarR + * \param[in] velocityStarYR + * \param[in] velocityStarZR + * \param[in] energyStarR + * \param[in] magneticStarYR + * \param[in] magneticStarZR + * \param[out] velocityDoubleStarY + * \param[out] velocityDoubleStarZ + * \param[out] magneticDoubleStarY + * \param[out] magneticDoubleStarZ + * \param[out] energyDoubleStarL + * \param[out] energyDoubleStarR + */ +__device__ __host__ void _doubleStarState( + Real const &speedM, Real const &magneticX, Real const &totalPressureStar, + Real const &densityStarL, Real const &velocityStarYL, + Real const &velocityStarZL, Real const &energyStarL, + Real const &magneticStarYL, Real const &magneticStarZL, + Real const &densityStarR, Real const &velocityStarYR, + Real const &velocityStarZR, Real const &energyStarR, + Real const &magneticStarYR, Real const &magneticStarZR, + Real &velocityDoubleStarY, Real &velocityDoubleStarZ, + Real &magneticDoubleStarY, Real &magneticDoubleStarZ, + Real &energyDoubleStarL, Real &energyDoubleStarR); - /*! - * \brief Compute the double star state fluxes. M&K 2005 equation 65 - * - * \param[in] speedStarSide The star speed on the side being computed - * \param[in] momentumStarFluxX - * \param[in] momentumStarFluxY - * \param[in] momentumStarFluxZ - * \param[in] energyStarFlux - * \param[in] magneticStarFluxY - * \param[in] magneticStarFluxZ - * \param[in] densityStar - * \param[in] velocityStarX - * \param[in] velocityStarY - * \param[in] velocityStarZ - * \param[in] energyStar - * \param[in] magneticStarY - * \param[in] magneticStarZ - * \param[in] velocityDoubleStarX - * \param[in] velocityDoubleStarY - * \param[in] velocityDoubleStarZ - * \param[in] energyDoubleStar - * \param[in] magneticDoubleStarY - * \param[in] magneticDoubleStarZ - * \param[out] momentumDoubleStarFluxX - * \param[out] momentumDoubleStarFluxY - * \param[out] momentumDoubleStarFluxZ - * \param[out] energyDoubleStarFlux - * \param[out] magneticDoubleStarFluxY - * \param[out] magneticDoubleStarFluxZ - */ - __device__ __host__ void _doubleStarFluxes(Real const &speedStarSide, - Real const &momentumStarFluxX, - Real const &momentumStarFluxY, - Real const &momentumStarFluxZ, - Real const &energyStarFlux, - Real const &magneticStarFluxY, - Real const &magneticStarFluxZ, - Real const &densityStar, - Real const &velocityStarX, - Real const &velocityStarY, - Real const &velocityStarZ, - Real const &energyStar, - Real const &magneticStarY, - Real const &magneticStarZ, - Real const &velocityDoubleStarX, - Real const &velocityDoubleStarY, - Real const &velocityDoubleStarZ, - Real const &energyDoubleStar, - Real const &magneticDoubleStarY, - Real const &magneticDoubleStarZ, - Real &momentumDoubleStarFluxX, - Real &momentumDoubleStarFluxY, - Real &momentumDoubleStarFluxZ, - Real &energyDoubleStarFlux, - Real &magneticDoubleStarFluxY, - Real &magneticDoubleStarFluxZ); +/*! + * \brief Compute the double star state fluxes. M&K 2005 equation 65 + * + * \param[in] speedStarSide The star speed on the side being computed + * \param[in] momentumStarFluxX + * \param[in] momentumStarFluxY + * \param[in] momentumStarFluxZ + * \param[in] energyStarFlux + * \param[in] magneticStarFluxY + * \param[in] magneticStarFluxZ + * \param[in] densityStar + * \param[in] velocityStarX + * \param[in] velocityStarY + * \param[in] velocityStarZ + * \param[in] energyStar + * \param[in] magneticStarY + * \param[in] magneticStarZ + * \param[in] velocityDoubleStarX + * \param[in] velocityDoubleStarY + * \param[in] velocityDoubleStarZ + * \param[in] energyDoubleStar + * \param[in] magneticDoubleStarY + * \param[in] magneticDoubleStarZ + * \param[out] momentumDoubleStarFluxX + * \param[out] momentumDoubleStarFluxY + * \param[out] momentumDoubleStarFluxZ + * \param[out] energyDoubleStarFlux + * \param[out] magneticDoubleStarFluxY + * \param[out] magneticDoubleStarFluxZ + */ +__device__ __host__ void _doubleStarFluxes( + Real const &speedStarSide, Real const &momentumStarFluxX, + Real const &momentumStarFluxY, Real const &momentumStarFluxZ, + Real const &energyStarFlux, Real const &magneticStarFluxY, + Real const &magneticStarFluxZ, Real const &densityStar, + Real const &velocityStarX, Real const &velocityStarY, + Real const &velocityStarZ, Real const &energyStar, + Real const &magneticStarY, Real const &magneticStarZ, + Real const &velocityDoubleStarX, Real const &velocityDoubleStarY, + Real const &velocityDoubleStarZ, Real const &energyDoubleStar, + Real const &magneticDoubleStarY, Real const &magneticDoubleStarZ, + Real &momentumDoubleStarFluxX, Real &momentumDoubleStarFluxY, + Real &momentumDoubleStarFluxZ, Real &energyDoubleStarFlux, + Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ); - } // end namespace mhd::_internal -} // end namespace mhd -#endif //CUDA +} // namespace _internal +} // end namespace mhd +#endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 0de90e6f9..59f52d72a 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -1,2583 +1,3032 @@ /*! -* \file hlld_cuda_tests.cpp -* \author Robert 'Bob' Caddy (rvc@pitt.edu) -* \brief Test the code units within hlld_cuda.cu -* -*/ + * \file hlld_cuda_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Test the code units within hlld_cuda.cu + * + */ // STL Includes +#include #include #include -#include #include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" #include "../grid/grid_enum.h" +#include "../riemann_solvers/hlld_cuda.h" // Include code to test #include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" #include "../utils/mhd_utilities.h" -#include "../riemann_solvers/hlld_cuda.h" // Include code to test +#include "../utils/testing_utilities.h" #ifdef CUDA -#ifdef MHD - // ========================================================================= - // Integration tests for the entire HLLD solver. Unit tests are below - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test fixture for simple testing of the HLLD Riemann Solver. - Effectively takes the left state, right state, fiducial fluxes, and - custom user output then performs all the required running and testing - * - */ - class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test - { - protected: - // ===================================================================== - /*! - * \brief Compute and return the HLLD fluxes - * - * \param[in] leftState The state on the left side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] rightState The state on the right side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] gamma The adiabatic index - * \param[in] direction Which plane the interface is. 0 = plane normal to - * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. - * \return std::vector - */ - std::vector computeFluxes(std::vector stateLeft, - std::vector stateRight, - Real const &gamma, - int const &direction=0) - { - - // Rearrange X, Y, and Z values for the chosen direction - std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); - std::rotate(stateRight.begin()+ 1, stateRight.begin()+ 4 - direction, stateRight.begin()+ 4); - - // Create new vectors that store the values in the way that the HLLD - // solver expects - EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x)) - << "The left and right magnetic fields are not equal"; - std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; - stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); - stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); - - // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction - int const ny = 1; // Number of cells in the y-direction - int const nz = 1; // Number of cells in the z-direction - int const nGhost = 0; // Isn't actually used it appears - int nFields = 8; // Total number of conserved fields - #ifdef SCALAR - nFields += NSCALARS; - #endif // SCALAR - #ifdef DE - nFields++; - #endif //DE - - // Launch Parameters - dim3 const dimGrid (1,1,1); // How many blocks in the grid - dim3 const dimBlock(1,1,1); // How many threads per block - - // Create the std::vector to store the fluxes and declare the device - // pointers - std::vector testFlux(nFields-1, 0); - Real *devConservedLeft; - Real *devConservedRight; - Real *devConservedMagXFace; - Real *devTestFlux; - - // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, stateLeft.size()*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, stateRight.size()*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedMagXFace, magneticX.size()*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size()*sizeof(Real))); - - CudaSafeCall(cudaMemcpy(devConservedLeft, - stateLeft.data(), - stateLeft.size()*sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, - stateRight.data(), - stateRight.size()*sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedMagXFace, - magneticX.data(), - magneticX.size()*sizeof(Real), - cudaMemcpyHostToDevice)); - - // Run kernel - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, - dimGrid, - dimBlock, - 0, - 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devConservedMagXFace, // the magnetic field at the interface - devTestFlux, - nx, - ny, - nz, - nGhost, - gamma, - direction, - nFields); - - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), - devTestFlux, - testFlux.size()*sizeof(Real), - cudaMemcpyDeviceToHost)); - - // Make sure to sync with the device so we have the results - cudaDeviceSynchronize(); - CudaCheckError(); - - // Free device arrays - cudaFree(devConservedLeft); - cudaFree(devConservedRight); - cudaFree(devConservedMagXFace); - cudaFree(devTestFlux); - - // The HLLD solver only writes the the first two "slots" for - // magnetic flux so let's rearrange to make sure we have all the - // magnetic fluxes in the right spots - testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0); - std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, testFlux.begin() + 4); // Rotate momentum - - return testFlux; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Check if the fluxes are correct - * - * \param[in] fiducialFlux The fiducial flux in conserved variables. In - * order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] scalarFlux The fiducial flux in the passive scalars - * \param[in] thermalEnergyFlux The fiducial flux in the dual energy - * thermal energy - * \param[in] testFlux The test flux in conserved variables. In order the - * elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] customOutput Any custom output the user would like to - * print. It will print after the default GTest output but before the - * values that failed are printed - * \param[in] direction Which plane the interface is. 0 = plane normal to - * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. - */ - void checkResults(std::vector fiducialFlux, - std::vector scalarFlux, - Real thermalEnergyFlux, - std::vector const &testFlux, - std::string const &customOutput = "", - int const &direction=0) - { - // Field names - std::vector fieldNames{"Densities", - "X Momentum", - "Y Momentum", - "Z Momentum", - "Energies", - "X Magnetic Field", - "Y Magnetic Field", - "Z Magnetic Field"}; - #ifdef DE - fieldNames.push_back("Thermal energy (dual energy)"); - fiducialFlux.push_back(thermalEnergyFlux); - #endif //DE - #ifdef SCALAR - std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; - fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, - scalarNames.begin(), - scalarNames.begin() + grid_enum::nscalars); - - fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, - scalarFlux.begin(), - scalarFlux.begin() + grid_enum::nscalars); - #endif //SCALAR - - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) - and (fiducialFlux.size() == fieldNames.size())) - << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl - << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl - << "testFlux.size() = " << testFlux.size() << std::endl - << "fieldNames.size() = " << fieldNames.size() << std::endl; - - // Check for equality - for (size_t i = 0; i < fieldNames.size(); i++) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], - testFlux[i], - absoluteDiff, - ulpsDiff); - EXPECT_TRUE(areEqual) - << std::endl << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Convert a vector of quantities in primitive variables to - * conserved variables - * - * \param[in] input The state in primitive variables. In order the - * elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \return std::vector The state in conserved variables. In order - * the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - */ - std::vector primitive2Conserved(std::vector const &input, - double const &gamma, - std::vector const &primitiveScalars) - { - std::vector output(input.size()); - output.at(0) = input.at(0); // Density - output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum - output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum - output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhd::utils::computeEnergy(input.at(4), - input.at(0), - input.at(1), - input.at(2), - input.at(3), - input.at(5), - input.at(6), - input.at(7), - gamma); // Pressure to Energy - output.at(5) = input.at(5); // X Magnetic Field - output.at(6) = input.at(6); // Y Magnetic Field - output.at(7) = input.at(7); // Z Magnetic Field - - #ifdef SCALAR - std::vector conservedScalar(primitiveScalars.size()); - std::transform(primitiveScalars.begin(), - primitiveScalars.end(), - conservedScalar.begin(), - [&](Real const &c){ return c*output.at(0); }); - output.insert(output.begin() + grid_enum::magnetic_start, - conservedScalar.begin(), - conservedScalar.begin() + grid_enum::nscalars); - #endif //SCALAR - #ifdef DE - output.push_back(mhd::utils::computeThermalEnergy(output.at(4), - output.at(0), - output.at(1), - output.at(2), - output.at(3), - output.at(grid_enum::magnetic_x), - output.at(grid_enum::magnetic_y), - output.at(grid_enum::magnetic_z), - gamma)); - #endif //DE - return output; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief On test start make sure that the number of NSCALARS is allowed - * - */ - void SetUp() - { - #ifdef SCALAR - ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently supported in HLLD tests. NSCALARS = " << NSCALARS; - ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test with passive scalars. NSCALARS = " << NSCALARS; - #endif //SCALAR - } - // ===================================================================== - private: - }; - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Brio & Wu Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - BrioAndWuShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 2.; - Real const Vz = 0.0; - Real const Bx = 0.75; - Real const Bz = 0.0; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0 , Bz}, gamma, primitiveScalar), - leftFastRareLeftSide = primitive2Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), - leftFastRareRightSide = primitive2Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), - compoundLeftSide = primitive2Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), - compoundPeak = primitive2Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), - compoundRightSide = primitive2Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, primitiveScalar), - rightFastRareLeftSide = primitive2Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, gamma, primitiveScalar), - rightFastRareRightSide = primitive2Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Brio & Wu state\n" - "Right State: Left Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Brio & Wu state\n" - "Right State: Right Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Brio & Wu state\n" - "Right State: Right Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, 0.064707291981509041, 0.0, 1.0074980455427278, 0}; - std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; - Real thermalEnergyFlux = 0.20673357746080046; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Brio & Wu state\n" - "Right State: Left Brio & Wu state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, -0.064707291981509041, 0.0, -1.0074980455427278, 0}; - std::vector const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; - Real thermalEnergyFlux = -0.20673357746080046; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left of left fast rarefaction\n" - "Right State: Right of left fast rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4253304970883941, 0.47729308161522394, -0.55321646324583107, 0, 0.92496835095531071, 0.0, 0.53128887284876058, 0}; - std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; - Real thermalEnergyFlux = 0.41622256825457099; - std::vector const testFluxes = computeFluxes(leftFastRareLeftSide, - leftFastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left fast rarefaction\n" - "Right State: Left of left fast rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.070492123816403796, 1.2489600267034342, -0.71031457071286608, 0, 0.21008080091470105, 0.0, 0.058615131833681167, 0}; - std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; - Real thermalEnergyFlux = 0.047345816580591255; - std::vector const testFluxes = computeFluxes(leftFastRareRightSide, - leftFastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of compound wave\n" - "Right State: Right of compound wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4470171023231666, 0.60747660800918468, -0.20506357956052623, 0, 0.72655525704800772, 0.0, 0.76278089951123285, 0}; - std::vector const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598}; - Real thermalEnergyFlux = 0.38787931087981475; - std::vector const testFluxes = computeFluxes(compoundLeftSide, - compoundRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of compound wave\n" - "Right State: Left of compound wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.38496850292724116, 0.66092864409611585, -0.3473204105316457, 0, 0.89888639514227009, 0.0, 0.71658566275120927, 0}; - std::vector const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; - Real thermalEnergyFlux = 0.28530908823756074; - std::vector const testFluxes = computeFluxes(compoundRightSide, - compoundLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of Compound Wave\n" - "Right State: Peak of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.41864266180405574, 0.63505764056357727, -0.1991008813536404, 0, 0.73707474818824525, 0.0, 0.74058225030218761, 0}; - std::vector const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111}; - Real thermalEnergyFlux = 0.36325864563467081; - std::vector const testFluxes = computeFluxes(compoundLeftSide, - compoundPeak, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Peak of Compound Wave\n" - "Right State: Left of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.39520761138156862, 0.6390998385557225, -0.35132701297727598, 0, 0.89945171879176522, 0.0, 0.71026545717401468, 0}; - std::vector const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; - Real thermalEnergyFlux = 0.32239432669410983; - std::vector const testFluxes = computeFluxes(compoundPeak, - compoundLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Peak of Compound Wave\n" - "Right State: Right of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4285899590904928, 0.6079309920345296, -0.26055320217638239, 0, 0.75090757444649436, 0.0, 0.85591904930227747, 0}; - std::vector const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; - Real thermalEnergyFlux = 0.34962629086469987; - std::vector const testFluxes = computeFluxes(compoundPeak, - compoundRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of Compound Wave\n" - "Right State: Peak of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.39102247793946454, 0.65467021266207581, -0.25227691377588229, 0, 0.76271525822813691, 0.0, 0.83594460438033491, 0}; - std::vector const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; - Real thermalEnergyFlux = 0.28979582956267347; - std::vector const testFluxes = computeFluxes(compoundRightSide, - compoundPeak, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.40753761783585118, 0.62106392255463172, -0.2455554035355339, 0, 0.73906344777217226, 0.0, 0.8687394222350926, 0}; - std::vector const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; - Real thermalEnergyFlux = 0.30895701155896288; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.13849588572126192, 0.46025037934770729, 0.18052412687974539, 0, 0.35385590617992224, 0.0, 0.86909622543144227, 0}; - std::vector const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; - Real thermalEnergyFlux = 0.30928031735570188; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Slow shock left side\n" - "Right State: Slow shock right side\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459, 0.60579784881286636, 0, -0.32813070621836449, 0.0, 0.40636483121437972, 0}; - std::vector const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261}; - Real thermalEnergyFlux = 4.4037784886918126e-05; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, - slowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Slow shock right side\n" - "Right State: Slow shock left side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.016514307834939734, 0.16452009375678914, 0.71622171077118635, 0, -0.37262428139914472, 0.0, 0.37204015363322052, 0}; - std::vector const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; - Real thermalEnergyFlux = -0.020617189878790236; - std::vector const testFluxes = computeFluxes(slowShockRightSide, - slowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right fast rarefaction left side\n" - "Right State: Right fast rarefaction right side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.026222824218991747, 0.22254903570732654, 0.68544334213642255, 0, -0.33339172106895454, 0.0, 0.32319665359522443, 0}; - std::vector const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; - Real thermalEnergyFlux = -0.020960370728633469; - std::vector const testFluxes = computeFluxes(rightFastRareLeftSide, - rightFastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right fast rarefaction right side\n" - "Right State: Right fast rarefaction left side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.001088867226159973, 0.32035322820305906, 0.74922357263343131, 0, -0.0099746892805345766, 0.0, 0.0082135595470345102, 0}; - std::vector const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614}; - Real thermalEnergyFlux = -0.00081785194236053073; - std::vector const testFluxes = computeFluxes(rightFastRareRightSide, - rightFastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Dai & Woodward Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const coef = 1. / (std::sqrt(4. * M_PI)); - Real const Bx = 4. * coef; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6*coef, 2*coef}, gamma, primitiveScalar), - leftFastShockLeftSide = primitive2Conserved({1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar), - leftFastShockRightSide = primitive2Conserved({1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar), - leftRotationLeftSide = primitive2Conserved({1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar), - leftRotationRightSide = primitive2Conserved({1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar), - leftSlowShockLeftSide = primitive2Conserved({1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar), - leftSlowShockRightSide = primitive2Conserved({1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar), - rightSlowShockLeftSide = primitive2Conserved({1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar), - rightSlowShockRightSide = primitive2Conserved({1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar), - rightRotationLeftSide = primitive2Conserved({1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar), - rightRotationRightSide = primitive2Conserved({1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar), - rightFastShockLeftSide = primitive2Conserved({1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar), - rightFastShockRightSide = primitive2Conserved({1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4*coef, 2*coef}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Dai & Woodward state\n" - "Right State: Left Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 1.0381971863420549, -1.1459155902616465, -0.63661977236758127, 0, 0.0, 0, -1.1102230246251565e-16}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Dai & Woodward state\n" - "Right State: Right Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.35915494309189522, -1.2732395447351625, -0.63661977236758127, -0.63661977236758172, 0.0, 2.2204460492503131e-16, -1.1283791670955123}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Dai & Woodward state\n" - "Right State: Right Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -1.1194725181819352, -0.11432087006939984, 0.0, 0.056156000248263505, -0.42800560867873094}; - std::vector const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; - Real thermalEnergyFlux = 0.24104061926661174; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Dai & Woodward state\n" - "Right State: Left Dai & Woodward state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -0.14549552299758384, -0.47242308031148195, 0.0, -0.056156000248263505, -0.55262526758377528}; - std::vector const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; - Real thermalEnergyFlux = -0.24104061926661174; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left of left fast shock\n" - "Right State: Right of left fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96813688187727132, 3.0871217875403394, -1.4687093290523414, -0.33726008721080036, 4.2986213406773457, 0.0, 0.84684181393860269, -0.087452560407274671}; - std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045}; - Real thermalEnergyFlux = 1.2886155333980993; - std::vector const testFluxes = computeFluxes(leftFastShockLeftSide, - leftFastShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left fast shock\n" - "Right State: Left of left fast shock\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1.3053938862274184, 2.4685129176021858, -1.181892850065283, -0.011160487372167127, 5.1797404608257249, 0.0, 1.1889903073770265, 0.10262704114294516}; - std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666}; - Real thermalEnergyFlux = 2.081389946702628; - std::vector const testFluxes = computeFluxes(leftFastShockRightSide, - leftFastShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of left rotation/Alfven wave\n" - "Right State: Right of left rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96326128304298586, 2.8879592118317445, -1.4808188010794987, -0.20403672861184916, 4.014027751838869, 0.0, 0.7248753989305099, -0.059178137562467162}; - std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; - Real thermalEnergyFlux = 1.5323573637968553; - std::vector const testFluxes = computeFluxes(leftRotationLeftSide, - leftRotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left rotation/Alfven wave\n" - "Right State: Left of left rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96353754504060063, 2.8875487093397085, -1.4327309336053695, -0.31541343522923493, 3.9739842521208342, 0.0, 0.75541746728406312, -0.13479771672887678}; - std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; - Real thermalEnergyFlux = 1.5333744977458499; - std::vector const testFluxes = computeFluxes(leftRotationRightSide, - leftRotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of left slow shock\n" - "Right State: Right of left slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.88716095730727451, 2.9828594399125663, -1.417062582518549, -0.21524331343191233, 3.863474778369334, 0.0, 0.71242370728996041, -0.05229712416644372}; - std::vector const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; - Real thermalEnergyFlux = 1.4145715457049737; - std::vector const testFluxes = computeFluxes(leftSlowShockLeftSide, - leftSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left slow shock\n" - "Right State: Left of left slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1.042385440439527, 2.7732383399777376, -1.5199872074603551, -0.21019362664841068, 4.1322001036232585, 0.0, 0.72170937317481543, -0.049474715634396704}; - std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879}; - Real thermalEnergyFlux = 1.8639570701934713; - std::vector const testFluxes = computeFluxes(leftSlowShockRightSide, - leftSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.95545795601418737, 2.8843900822429749, -1.4715039715239722, -0.21575736014726318, 4.0078718055059257, 0.0, 0.72241353110189066, -0.049073560388753337}; - std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892}; - Real thermalEnergyFlux = 1.7186185770667382; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.86324813554422819, 2.8309913324581251, -1.4761428591480787, -0.23887765947428419, 3.9892942559102793, 0.0, 0.72244123046603836, -0.049025527032060034}; - std::vector const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; - Real thermalEnergyFlux = 1.7184928987481187; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right slow shock\n" - "Right State: Right of right slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.81125524370350677, 2.901639500435365, -1.5141545346789429, -0.262600896007809, 3.8479660419540087, 0.0, 0.7218977970017596, -0.049091614519593846}; - std::vector const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; - Real thermalEnergyFlux = 1.6022319728249694; - std::vector const testFluxes = computeFluxes(rightSlowShockLeftSide, - rightSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right slow shock\n" - "Right State: Left of right slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.60157947557836688, 2.3888357198399746, -1.9910500022202977, -0.45610948442354332, 3.5359430988850069, 0.0, 1.0670963294022622, 0.05554893654378229}; - std::vector const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885}; - Real thermalEnergyFlux = 1.0487665253534804; - std::vector const testFluxes = computeFluxes(rightSlowShockRightSide, - rightSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right rotation/Alfven wave\n" - "Right State: Right of right rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.55701691287884714, 2.4652223621237814, -1.9664615862227277, -0.47490477894092042, 3.3900659850690529, 0.0, 1.0325648885587542, 0.059165409025635551}; - std::vector const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; - Real thermalEnergyFlux = 0.9707694646266285; - std::vector const testFluxes = computeFluxes(rightRotationLeftSide, - rightRotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right rotation/Alfven wave\n" - "Right State: Left of right rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.55689116371132596, 2.4648517303940851, -1.7972202655166787, -0.90018282739798461, 3.3401033852664566, 0.0, 0.88105841856465605, 0.43911718823267476}; - std::vector const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; - Real thermalEnergyFlux = 0.9702629326292449; - std::vector const testFluxes = computeFluxes(rightRotationRightSide, - rightRotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right fast shock\n" - "Right State: Right of right fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.48777637414577313, 2.3709438477809708, -1.7282900552525988, -0.86414423547773778, 2.8885015704245069, 0.0, 0.77133731061645838, 0.38566794697432505}; - std::vector const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; - Real thermalEnergyFlux = 0.84330016382608752; - std::vector const testFluxes = computeFluxes(rightFastShockLeftSide, - rightFastShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right fast shock\n" - "Right State: Left of right fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.040639426423817904, 1.0717156491947966, -1.2612066401572222, -0.63060225433149875, 0.15803727234007203, 0.0, 0.042555541396817498, 0.021277678888288909}; - std::vector const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; - Real thermalEnergyFlux = 0.060961577855018087; - std::vector const testFluxes = computeFluxes(rightFastShockRightSide, - rightFastShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Ryu & Jones 4d Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const Bx = 0.7; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), - hydroRareLeftSide = primitive2Conserved({0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma, primitiveScalar), - hydroRareRightSide = primitive2Conserved({0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma, primitiveScalar), - switchOnSlowShockLeftSide = primitive2Conserved({0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma, primitiveScalar), - switchOnSlowShockRightSide = primitive2Conserved({0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved({0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved({0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma, primitiveScalar), - rotationLeftSide = primitive2Conserved({0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma, primitiveScalar), - rotationRightSide = primitive2Conserved({0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma, primitiveScalar), - fastRareLeftSide = primitive2Conserved({0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, - 4.981931e-17}, gamma, primitiveScalar), - fastRareRightSide = primitive2Conserved({0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Ryu & Jones 4d state\n" - "Right State: Left Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Ryu & Jones 4d state\n" - "Right State: Right Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0, -0.69999999999999996}; - std::vector const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Ryu & Jones 4d state\n" - "Right State: Right Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.092428729855986602, 0.53311593977445149, -0.39622049648437296, -0.21566989083797167, -0.13287876964320211, 0.0, -0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; - Real thermalEnergyFlux = 0.13864309478397996; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Ryu & Jones 4d state\n" - "Right State: Left Ryu & Jones 4d state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296, 0.21566989083797167, 0.13287876964320211, 0.0, 0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; - Real thermalEnergyFlux = -0.13864309478397996; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left side of pure hydrodynamic rarefaction\n" - "Right State: Right side of pure hydrodynamic rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.074035256375659553, 0.66054553664209648, -6.1597070943493028e-41, -2.9447391900433873e-41, 0.1776649658235645, 0.0, -6.3466063324344113e-41, -3.0340891384335242e-41}; - std::vector const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; - Real thermalEnergyFlux = 0.11034221894046368; - std::vector const testFluxes = computeFluxes(hydroRareLeftSide, - hydroRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right side of pure hydrodynamic rarefaction\n" - "Right State: Left side of pure hydrodynamic rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.013336890338886076, 0.74071279157971992, -6.1745213352160876e-41, -2.9474651270630147e-41, 0.033152482405470307, 0.0, 6.2022392844946449e-41, 2.9606965476795895e-41}; - std::vector const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; - Real thermalEnergyFlux = 0.019189877201961154; - std::vector const testFluxes = computeFluxes(hydroRareRightSide, - hydroRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of switch on slow shock\n" - "Right State: Right of switch on slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.19734622040826083, 0.47855039640569758, -0.3392293209655618, -0.18588204716255491, 0.10695446263054809, 0.0, -0.3558357543098733, -0.19525093130352045}; - std::vector const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; - Real thermalEnergyFlux = 0.2840373040888583; - std::vector const testFluxes = computeFluxes(switchOnSlowShockLeftSide, - switchOnSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of switch on slow shock\n" - "Right State: Left of switch on slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.097593254768855386, 0.76483698872352757, -0.02036438492698419, -0.010747481940703562, 0.25327551496496836, 0.0, -0.002520109973016129, -0.00088262199017708799}; - std::vector const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; - Real thermalEnergyFlux = 0.1100817647375162; - std::vector const testFluxes = computeFluxes(switchOnSlowShockRightSide, - switchOnSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.2091677440314007, 0.5956612619664029, -0.29309091669513981, -0.16072556008504282, 0.19220050968424285, 0.0, -0.35226977371803297, -0.19316940226499904}; - std::vector const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; - Real thermalEnergyFlux = 0.23702444986592192; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.15801775068597168, 0.57916072367837657, -0.33437339604094024, -0.18336617461176744, 0.16789791355547545, 0.0, -0.3522739911439669, -0.19317084712861482}; - std::vector const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; - Real thermalEnergyFlux = 0.23704936434506069; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of slow shock\n" - "Right State: Right of slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.11744487326715558, 0.66868230621718128, -0.35832022960458892, -0.19650694834641164, 0.057880816021092185, 0.0, -0.37198011453582402, -0.20397277844271294}; - std::vector const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; - Real thermalEnergyFlux = 0.1738058891582446; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, - slowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of slow shock\n" - "Right State: Left of slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.038440990187426027, 0.33776683678923869, -0.62583241538732792, -0.3437911783906169, -0.13471828103488348, 0.0, -0.15165427985881363, -0.082233932588833825}; - std::vector const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; - Real thermalEnergyFlux = 0.038445630017261548; - std::vector const testFluxes = computeFluxes(slowShockRightSide, - slowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of rotation/Alfven wave\n" - "Right State: Right of rotation/Alfven wave\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, -0.33352435102145184, -0.21197843894720192, 0.0, -0.18030635192654354, -0.098381113757603278}; - std::vector const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; - Real thermalEnergyFlux = -0.0052395622905745485; - std::vector const testFluxes = computeFluxes(rotationLeftSide, - rotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of rotation/Alfven wave\n" - "Right State: Left of rotation/Alfven wave\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.005459628948343731, 0.4415038084184626, -0.69273580053867279, -0.0051834737482743809, -0.037389286119015486, 0.0, -0.026148289294373184, -0.69914753968916865}; - std::vector const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; - Real thermalEnergyFlux = -0.0054536013916442853; - std::vector const testFluxes = computeFluxes(rotationRightSide, - rotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of fast rarefaction\n" - "Right State: Right of fast rarefaction\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, -0.0059354802028144804, -0.040194357552219451, 0.0, -0.027710302430178135, -0.70000000000000007}; - std::vector const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; - Real thermalEnergyFlux = -0.0059354109546219782; - std::vector const testFluxes = computeFluxes(fastRareLeftSide, - fastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of fast rarefaction\n" - "Right State: Left of fast rarefaction\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706, -0.69998654276213712, -3.0171858819427744e-05, -0.00014827469339251387, 0.0, -8.2898844654399895e-05, -0.69999999999999984}; - std::vector const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373}; - Real thermalEnergyFlux = -3.000421709818028e-05; - std::vector const testFluxes = computeFluxes(fastRareRightSide, - fastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Einfeldt Strong Rarefaction (EFR) - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const V0 = 2.; - Real const Vy = 0.0; - Real const Vz = 0.0; - Real const Bx = 0.0; - Real const Bz = 0.0; - - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), - leftRarefactionCenter = primitive2Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), - leftVxTurnOver = primitive2Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), - midPoint = primitive2Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), - rightVxTurnOver = primitive2Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), - rightRarefactionCenter = primitive2Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Intermediate state checks - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Left rarefaction center\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0}; - std::vector const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; - Real thermalEnergyFlux = -0.19705631998499995; - std::vector const testFluxes = computeFluxes(leftICs, - leftRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left rarefaction center\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left rarefaction center\n" - "Right State: Left Vx turnover point\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0}; - std::vector const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; - Real thermalEnergyFlux = -0.0052127803322822184; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, - leftVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Vx turnover point\n" - "Right State: Left rarefaction center\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0}; - std::vector const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636}; - Real thermalEnergyFlux = -0.19746407621898149; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, - leftRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Vx turnover point\n" - "Right State: Midpoint\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0}; - std::vector const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054}; - Real thermalEnergyFlux = -0.00034184143405415065; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, - midPoint, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Midpoint\n" - "Right State: Left Vx turnover point\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0}; - std::vector const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; - Real thermalEnergyFlux = -0.001531664896602873; - std::vector const testFluxes = computeFluxes(midPoint, - leftVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Midpoint\n" - "Right State: Right Vx turnover point\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0}; - std::vector const scalarFlux{ 0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; - Real thermalEnergyFlux = 0.00040916715364737997; - std::vector const testFluxes = computeFluxes(midPoint, - rightVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Vx turnover point\n" - "Right State: Midpoint\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0}; - std::vector const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; - Real thermalEnergyFlux = 0.0020542406295284269; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, - midPoint, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Vx turnover point\n" - "Right State: Right rarefaction center\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0}; - std::vector const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; - Real thermalEnergyFlux = 0.0053191138878843835; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, - rightRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right rarefaction center\n" - "Right State: Right Vx turnover point\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0}; - std::vector const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; - Real thermalEnergyFlux = 0.1444638874418068; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, - rightVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right rarefaction center\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0}; - std::vector const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; - Real thermalEnergyFlux = 0.14472930749999999; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Right rarefaction center\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, - rightRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using the constant states from the - * examples in cholla/examples/3D - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - ConstantStatesExpectCorrectFlux) - { - // Constant Values - Real const gamma = 5./3.; - - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - zeroMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), - onesMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); - - for (size_t direction = 2; direction < 3; direction++) - { - { - std::string const outputString {"Left State: Constant state, zero magnetic field\n" - "Right State: Constant state, zero magnetic field\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0,1.380658e-05,0,0,0,0,0,0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.; - std::vector const testFluxes = computeFluxes(zeroMagneticField, - zeroMagneticField, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Constant state, ones magnetic field\n" - "Right State: Constant state, ones magnetic field\n" - "HLLD State: Left Double Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, 3.4694469519536142e-18, 3.4694469519536142e-18}; - std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; - Real thermalEnergyFlux = 0.; - std::vector const testFluxes = computeFluxes(onesMagneticField, - onesMagneticField, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with the degenerate state - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - DegenerateStateCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // State - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); - - std::vector const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = 1.5; - std::string const outputString {"Left State: Degenerate state\n" - "Right State: Degenerate state\n" - "HLLD State: Left Double Star State"}; - - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - // If you run into issues with the energy try 0.001953125 instead. - // That's what I got when running the Athena solver on its own. Running - // the Athena solver with theses tests gave me -0.00080700946455175148 - // though - for (size_t direction = 0; direction < 3; direction++) - { - std::vector const testFluxes = computeFluxes(state, - state, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with all zeroes - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - AllZeroesExpectAllZeroes) - { - // Constant Values - Real const gamma = 5./3.; - - // State - size_t numElements = 8; - #ifdef SCALAR - numElements += 3; - #endif // SCALAR - - std::vector const state(numElements, 0.0); - std::vector const fiducialFlux(8,0.0); - std::vector const scalarFlux(3,0.0); - Real thermalEnergyFlux = 0.0; - - std::string const outputString {"Left State: All zeroes\n" - "Right State: All zeroes\n" - "HLLD State: Right Star State"}; - - for (size_t direction = 0; direction < 3; direction++) - { - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const testFluxes = computeFluxes(state, - state, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with negative pressure, energy, and - density. - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - UnphysicalValuesExpectAutomaticFix) - { - // Constant Values - Real const gamma = 5./3.; - - // States - std::vector // | Density | X-Momentum | Y-Momentum | Z-Momentum | Energy | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - negativePressure = { 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, - negativeEnergy = { 1.0, 1.0, 1.0, 1.0, -(5-gamma), 1.0, 1.0, 1.0}, - negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, - negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0}, - negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; - - #ifdef SCALAR - std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; - negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - #endif // SCALAR - #ifdef DE - negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(grid_enum::magnetic_x),negativePressure.at(grid_enum::magnetic_y),negativePressure.at(grid_enum::magnetic_z),gamma)); - negativeEnergy.push_back(mhd::utils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(grid_enum::magnetic_x),negativeEnergy.at(grid_enum::magnetic_y),negativeEnergy.at(grid_enum::magnetic_z),gamma)); - negativeDensity.push_back(mhd::utils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(grid_enum::magnetic_x),negativeDensity.at(grid_enum::magnetic_y),negativeDensity.at(grid_enum::magnetic_z),gamma)); - negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(grid_enum::magnetic_x),negativeDensityEnergyPressure.at(grid_enum::magnetic_y),negativeDensityEnergyPressure.at(grid_enum::magnetic_z),gamma)); - negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(grid_enum::magnetic_x),negativeDensityPressure.at(grid_enum::magnetic_y),negativeDensityPressure.at(grid_enum::magnetic_z),gamma)); - #endif //DE - - for (size_t direction = 0; direction < 3; direction++) - { - { - std::string const outputString {"Left State: Negative Pressure\n" - "Right State: Negative Pressure\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = -1.5; - std::vector const testFluxes = computeFluxes(negativePressure, - negativePressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Energy\n" - "Right State: Negative Energy\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = -6.333333333333333; - std::vector const testFluxes = computeFluxes(negativeEnergy, - negativeEnergy, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density\n" - "Right State: Negative Density\n" - "HLLD State: Left State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensity, - negativeDensity, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density, Energy, and Pressure\n" - "Right State: Negative Density, Energy, and Pressure\n" - "HLLD State: Right State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0}; - std::vector const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20}; - Real thermalEnergyFlux = 1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensityEnergyPressure, - negativeDensityEnergyPressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density and Pressure\n" - "Right State: Negative Density and Pressure\n" - "HLLD State: Left State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensityPressure, - negativeDensityPressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - // End of integration tests for the entire HLLD solver. Unit tests are below - // ========================================================================= - - // ========================================================================= - // Unit tests for the contents of the mhd::_internal namespace - // ========================================================================= - /*! - * \brief A struct to hold some basic test values - * - */ - namespace - { - struct testParams - { - // List of cases - std::vector names{"Case 1", "Case 2"}; - - // Conserved Variables - double gamma = 5./3.; - std::valarray densityL {21.50306776645775 , 48.316634031589935}; - std::valarray densityR {81.1217731762265 , 91.02955738853635}; - std::valarray momentumXL{38.504606872151484 , 18.984145880030045}; - std::valarray momentumXR{ 8.201811315045326 , 85.24863367778745}; - std::valarray momentumYL{ 7.1046427940455015, 33.76182584816693}; - std::valarray momentumYR{13.874767484202021 , 33.023492551299974}; - std::valarray momentumZL{32.25700338919422 , 89.52561861038686}; - std::valarray momentumZR{33.85305318830181 , 8.664313303796256}; - std::valarray energyL {65.75120838109942 , 38.461354599479826}; - std::valarray energyR {18.88982523270516 , 83.65639784178894}; - std::valarray magneticXL{92.75101068883114 , 31.588767769990532}; - std::valarray magneticXR{93.66196246448985 , 84.3529879134052}; - std::valarray magneticYL{12.297499156516622 , 63.74471969570406}; - std::valarray magneticYR{84.9919141787549 , 35.910258841630984}; - std::valarray magneticZL{46.224045698787776 , 37.70326455170754}; - std::valarray magneticZR{34.852095153095384 , 24.052685003977757}; - // Star States - std::valarray densityStarL {28.520995251761526 , 54.721668215064945}; - std::valarray densityStarR {49.09069570738605 , 72.68000504460609}; - std::valarray momentumStarXL{48.96082367518151 , 97.15439466280228}; - std::valarray momentumStarXR{65.74705433463932 , 94.5689655974538}; - std::valarray momentumStarYL{44.910034185328996 , 78.60179936059853}; - std::valarray momentumStarYR{51.642522487399276 , 44.63864007208728}; - std::valarray momentumStarZL{39.78163555990428 , 63.01612978428839}; - std::valarray momentumStarZR{33.47900698769427 , 52.19410653341197}; - std::valarray energyStarL { 6.579867455284738 , 30.45043664908369}; - std::valarray energyStarR {90.44484278669114 , 61.33664731346812}; - std::valarray magneticStarXL{49.81491527582234 , 62.379765828560906}; - std::valarray magneticStarXR{67.77402751903804 , 64.62226739788758}; - std::valarray magneticStarYL{62.09348829143065 , 54.27916744403672}; - std::valarray magneticStarYR{26.835645069149873 , 98.97444628327318}; - std::valarray magneticStarZL{62.765890944643196 , 93.26765455509641}; - std::valarray magneticStarZR{ 7.430231695917344 , 10.696380763901459}; - // Double Star State - std::valarray momentumDoubleStarXL{75.42525315887075 , 83.87480678359029}; - std::valarray momentumDoubleStarYL{22.56132540660678 , 76.11074421934487}; - std::valarray momentumDoubleStarZL{27.83908778933224 , 28.577101567661465}; - std::valarray energyDoubleStar {45.83202455707669 , 55.4553014145573}; - std::valarray magneticDoubleStarY {20.943239839455895 , 83.8514810487021}; - std::valarray magneticDoubleStarZ {83.3802438268807 , 80.36671251730783}; - // Fluxes - std::valarray densityFluxL {12.939239309626116 , 81.71524586517073}; - std::valarray momentumFluxXL {65.05481464917627 , 56.09885069707803}; - std::valarray momentumFluxYL {73.67692845586782 , 2.717246983403787}; - std::valarray momentumFluxZL {16.873647595664387 , 39.70132983192873}; - std::valarray energyFluxL {52.71888731972469 , 81.63926176158796}; - std::valarray magneticFluxXL {67.7412464028116 , 42.85301340921149}; - std::valarray magneticFluxYL {58.98928445415967 , 57.04344459221359}; - std::valarray magneticFluxZL {29.976925743532302 , 97.73329827141359}; - std::valarray momentumStarFluxX{74.90125547448865 , 26.812722601652684}; - std::valarray momentumStarFluxY{16.989138610622945 , 48.349566649914976}; - std::valarray momentumStarFluxZ{38.541822734846185 , 61.22843961052538}; - std::valarray energyStarFlux {19.095105176247017 , 45.43224973313112}; - std::valarray magneticStarFluxY{96.23964526624277 , 33.05337536594796}; - std::valarray magneticStarFluxZ{86.22516928268347 , 15.62102082410738}; - - // Derived/Primitive variables - std::valarray velocityXL = momentumXL / densityL; - std::valarray velocityXR = momentumXR / densityR; - std::valarray velocityYL = momentumYL / densityL; - std::valarray velocityYR = momentumYR / densityR; - std::valarray velocityZL = momentumZL / densityL; - std::valarray velocityZR = momentumZR / densityR; - std::valarray totalPressureStarL{66.80958736783934 , 72.29644038317676}; - std::vector gasPressureL; - std::vector gasPressureR; - std::vector totalPressureL; - std::vector totalPressureR; - // Star State - std::valarray velocityStarXL = momentumStarXL / densityStarL; - std::valarray velocityStarXR = momentumStarXR / densityStarR; - std::valarray velocityStarYL = momentumStarYL / densityStarL; - std::valarray velocityStarYR = momentumStarYR / densityStarR; - std::valarray velocityStarZL = momentumStarZL / densityStarL; - std::valarray velocityStarZR = momentumStarZR / densityStarR; - // Double Star State - std::valarray velocityDoubleStarXL = momentumDoubleStarXL / densityStarL; - std::valarray velocityDoubleStarYL = momentumDoubleStarYL / densityStarL; - std::valarray velocityDoubleStarZL = momentumDoubleStarZL / densityStarL; - // Other - std::valarray speedM {68.68021569453585 , 70.08236749169825}; - std::valarray speedSide {70.37512772923496 , 3.6579130085113265}; - testParams() - { - for (size_t i = 0; i < names.size(); i++) - { - gasPressureL.push_back(mhd::utils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); - gasPressureR.push_back(mhd::utils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); - totalPressureL.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); - totalPressureR.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); - } - } - }; - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_approximateWaveSpeeds function - * - */ - TEST(tMHDHlldInternalApproximateWaveSpeeds, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - std::vector const fiducialSpeedL {-22.40376497145191, -11.190385012513822}; - std::vector const fiducialSpeedR {24.295526347371595, 12.519790189404299}; - std::vector const fiducialSpeedM {-0.81760587897407833, -0.026643804611559244}; - std::vector const fiducialSpeedStarL {-19.710500632936679, -4.4880642018724357}; - std::vector const fiducialSpeedStarR {9.6740190040662242, 3.4191202933087519}; - std::vector const fiducialDensityStarL{24.101290139122913, 50.132466596958501}; - std::vector const fiducialDensityStarR{78.154104734671265, 84.041595114910123}; - - double testSpeedL = 0; - double testSpeedR = 0; - double testSpeedM = 0; - double testSpeedStarL = 0; - double testSpeedStarR = 0; - double testDensityStarL = 0; - double testDensityStarR = 0; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_approximateWaveSpeeds(parameters.densityL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.gasPressureL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityR[i], - parameters.momentumXR[i], - parameters.momentumYR[i], - parameters.momentumZR[i], - parameters.velocityXR[i], - parameters.velocityYR[i], - parameters.velocityZR[i], - parameters.gasPressureR[i], - parameters.totalPressureR[i], - parameters.magneticYR[i], - parameters.magneticZR[i], - parameters.gamma, - testSpeedL, - testSpeedR, - testSpeedM, - testSpeedStarL, - testSpeedStarR, - testDensityStarL, - testDensityStarR); - // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], - testSpeedL, - parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), - testSpeedR, - parameters.names.at(i) + ", SpeedR"); - testingUtilities::checkResults(fiducialSpeedM.at(i), - testSpeedM, - parameters.names.at(i) + ", SpeedM"); - testingUtilities::checkResults(fiducialSpeedStarL.at(i), - testSpeedStarL, - parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), - testSpeedStarR, - parameters.names.at(i) + ", SpeedStarR"); - testingUtilities::checkResults(fiducialDensityStarL.at(i), - testDensityStarL, - parameters.names.at(i) + ", DensityStarL"); - testingUtilities::checkResults(fiducialDensityStarR.at(i), - testDensityStarR, - parameters.names.at(i) + ", DensityStarR"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_starFluxes function in the non-degenerate - * case - * - */ - TEST(tMHDHlldInternalStarFluxes, - CorrectInputNonDegenerateExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialVelocityStarY {12.831290892281075, 12.92610185957192}; - std::vector const fiducialVelocityStarZ {48.488664548015286, 9.0850326944201107}; - std::vector const fiducialEnergyStar {1654897.6912410262, 956.83439334487116}; - std::vector const fiducialMagneticStarY {-186.47142421374559, 2.6815421494204679}; - std::vector const fiducialMagneticStarZ {-700.91191100481922, 1.5860591049546646}; - std::vector const fiducialDensityStarFlux {506.82678248238807, 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{25328.25203616685, 2466.5997745560339}; - std::vector const fiducialMomentumStarFluxZ{95071.711914347878, 1530.7490710422007}; - std::vector const fiducialEnergyStarFlux {116459061.8691024, 3440.9679468544314}; - std::vector const fiducialMagneticStarFluxY{-13929.399086330559, -166.32034689537392}; - std::vector const fiducialMagneticStarFluxZ{-52549.811458376971, -34.380297363339892}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_starFluxes(parameters.speedM[i], - parameters.speedSide[i], - parameters.densityL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.energyL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityStarL[i], - parameters.totalPressureStarL[i], - parameters.densityFluxL[i], - parameters.momentumFluxXL[i], - parameters.momentumFluxYL[i], - parameters.momentumFluxZL[i], - parameters.energyFluxL[i], - parameters.magneticFluxYL[i], - parameters.magneticFluxZL[i], - testVelocityStarY, - testVelocityStarZ, - testEnergyStar, - testMagneticStarY, - testMagneticStarZ, - testDensityStarFlux, - testMomentumStarFluxX, - testMomentumStarFluxY, - testMomentumStarFluxZ, - testEnergyStarFlux, - testMagneticStarFluxY, - testMagneticStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], - testVelocityStarY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], - testVelocityStarZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], - testEnergyStar, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], - testMagneticStarY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], - testMagneticStarZ, - parameters.names.at(i) + ", MagneticStarZ"); - testingUtilities::checkResults(fiducialDensityStarFlux[i], - testDensityStarFlux, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialMomentumStarFluxX[i], - testMomentumStarFluxX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialMomentumStarFluxY[i], - testMomentumStarFluxY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialMomentumStarFluxZ[i], - testMomentumStarFluxZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialMagneticStarFluxY[i], - testMagneticStarFluxY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialMagneticStarFluxZ[i], - testMagneticStarFluxZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); - } - } - - /*! - * \brief Test the mhd::_internal::_starFluxes function in the degenerate - * case - * - */ - TEST(tMHDHlldInternalStarFluxes, - CorrectInputDegenerateExpectCorrectOutput) - { - testParams const parameters; - - // Used to get us into the degenerate case - double const totalPressureStarMultiplier = 1E15; - - std::vector const fiducialVelocityStarY {0.33040135813215948, 0.69876195899931859}; - std::vector const fiducialVelocityStarZ {1.500111692877206, 1.8528943583250035}; - std::vector const fiducialEnergyStar {2.7072182962581443e+18, -76277716432851392}; - std::vector const fiducialMagneticStarY {12.297499156516622, 63.744719695704063}; - std::vector const fiducialMagneticStarZ {46.224045698787776, 37.703264551707541}; - std::vector const fiducialDensityStarFlux {506.82678248238807, 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{236.85804348470396, 19.08858135095122}; - std::vector const fiducialMomentumStarFluxZ{757.76012607552047, 83.112898961023902}; - std::vector const fiducialEnergyStarFlux {1.9052083339008875e+20, -2.7901725119926531e+17}; - std::vector const fiducialMagneticStarFluxY{58.989284454159673, 57.043444592213589}; - std::vector const fiducialMagneticStarFluxZ{29.976925743532302, 97.733298271413588}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_starFluxes(parameters.speedM[i], - parameters.speedSide[i], - parameters.densityL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.energyL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityStarL[i], - parameters.totalPressureStarL[i] * totalPressureStarMultiplier, - parameters.densityFluxL[i], - parameters.momentumFluxXL[i], - parameters.momentumFluxYL[i], - parameters.momentumFluxZL[i], - parameters.energyFluxL[i], - parameters.magneticFluxYL[i], - parameters.magneticFluxZL[i], - testVelocityStarY, - testVelocityStarZ, - testEnergyStar, - testMagneticStarY, - testMagneticStarZ, - testDensityStarFlux, - testMomentumStarFluxX, - testMomentumStarFluxY, - testMomentumStarFluxZ, - testEnergyStarFlux, - testMagneticStarFluxY, - testMagneticStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], - testVelocityStarY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], - testVelocityStarZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], - testEnergyStar, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], - testMagneticStarY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], - testMagneticStarZ, - parameters.names.at(i) + ", MagneticStarZ"); - testingUtilities::checkResults(fiducialDensityStarFlux[i], - testDensityStarFlux, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialMomentumStarFluxX[i], - testMomentumStarFluxX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialMomentumStarFluxY[i], - testMomentumStarFluxY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialMomentumStarFluxZ[i], - testMomentumStarFluxZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialMagneticStarFluxY[i], - testMagneticStarFluxY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialMagneticStarFluxZ[i], - testMagneticStarFluxZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_nonStarFluxes function - * - */ - TEST(tMHDHlldInternalNonStarFluxes, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialDensityFlux {38.504606872151484, 18.984145880030045}; - std::vector const fiducialMomentumFluxX{-3088.4810263278778, 2250.9966820900618}; - std::vector const fiducialMomentumFluxY{-1127.8835013070616, -2000.3517480656785}; - std::vector const fiducialMomentumFluxZ{-4229.5657456907293, -1155.8240512956793}; - std::vector const fiducialMagneticFluxY{-8.6244637840856555, 2.9729840344910059}; - std::vector const fiducialMagneticFluxZ{-56.365490339906408, -43.716615275067923}; - std::vector const fiducialEnergyFlux {-12344.460641662206, -2717.2127176227905}; - - double testDensityFlux; - double testMomentumFluxX; - double testMomentumFluxY; - double testMomentumFluxZ; - double testMagneticFluxY; - double testMagneticFluxZ; - double testEnergyFlux; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_nonStarFluxes(parameters.momentumXL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.totalPressureL[i], - parameters.energyL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - testDensityFlux, - testMomentumFluxX, - testMomentumFluxY, - testMomentumFluxZ, - testMagneticFluxY, - testMagneticFluxZ, - testEnergyFlux); - - // Now check results - testingUtilities::checkResults(fiducialDensityFlux[i], - testDensityFlux, - parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialMomentumFluxX[i], - testMomentumFluxX, - parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialMomentumFluxY[i], - testMomentumFluxY, - parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialMomentumFluxZ[i], - testMomentumFluxZ, - parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialMagneticFluxY[i], - testMagneticFluxY, - parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialMagneticFluxZ[i], - testMagneticFluxZ, - parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialEnergyFlux[i], - testEnergyFlux, - parameters.names.at(i) + ", EnergyFlux"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_doubleStarState function. Non-degenerate - * state - * - */ - TEST(tMHDHlldInternalDoubleStarState, - CorrectInputNonDegenerateExpectCorrectOutput) - { - testParams const parameters; - - double const fixedEpsilon = 7E-12; - - std::vector const fiducialVelocityDoubleStarY{-1.5775383335759607, 3.803188977150934}; - std::vector const fiducialVelocityDoubleStarZ{-3.4914062207842482, -4.2662645349592765}; - std::vector const fiducialMagneticDoubleStarY{45.259313435283325, 71.787329583230417}; - std::vector const fiducialMagneticDoubleStarZ{36.670978215630669, 53.189673238238178}; - std::vector const fiducialEnergyDoubleStarL {-2048.1953674500514, -999.79694164635089}; - std::vector const fiducialEnergyDoubleStarR {1721.0582276783764, 252.04716752257781}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_doubleStarState(parameters.speedM[i], - parameters.magneticXL[i], - parameters.totalPressureStarL[i], - parameters.densityStarL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.densityStarR[i], - parameters.velocityStarYR[i], - parameters.velocityStarZR[i], - parameters.energyStarR[i], - parameters.magneticStarYR[i], - parameters.magneticStarZR[i], - testVelocityDoubleStarY, - testVelocityDoubleStarZ, - testMagneticDoubleStarY, - testMagneticDoubleStarZ, - testEnergyDoubleStarL, - testEnergyDoubleStarR); - - // Now check results - testingUtilities::checkResults(fiducialVelocityDoubleStarY[i], - testVelocityDoubleStarY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i], - testVelocityDoubleStarZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialMagneticDoubleStarY[i], - testMagneticDoubleStarY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i], - testMagneticDoubleStarZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarL[i], - testEnergyDoubleStarL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialEnergyDoubleStarR[i], - testEnergyDoubleStarR, - parameters.names.at(i) + ", EnergyDoubleStarR", - fixedEpsilon); - } - } - - /*! - * \brief Test the mhd::_internal::_doubleStarState function in the - * degenerate state. - * - */ - TEST(tMHDHlldInternalDoubleStarState, - CorrectInputDegenerateExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialVelocityDoubleStarY{1.5746306813243216, 1.4363926014039052}; - std::vector const fiducialVelocityDoubleStarZ{1.3948193325212686, 1.1515754515491903}; - std::vector const fiducialMagneticDoubleStarY{62.093488291430653, 54.279167444036723}; - std::vector const fiducialMagneticDoubleStarZ{62.765890944643196, 93.267654555096414}; - std::vector const fiducialEnergyDoubleStarL {6.579867455284738, 30.450436649083692}; - std::vector const fiducialEnergyDoubleStarR {90.44484278669114, 61.33664731346812}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_doubleStarState(parameters.speedM[i], - 0.0, - parameters.totalPressureStarL[i], - parameters.densityStarL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.densityStarR[i], - parameters.velocityStarYR[i], - parameters.velocityStarZR[i], - parameters.energyStarR[i], - parameters.magneticStarYR[i], - parameters.magneticStarZR[i], - testVelocityDoubleStarY, - testVelocityDoubleStarZ, - testMagneticDoubleStarY, - testMagneticDoubleStarZ, - testEnergyDoubleStarL, - testEnergyDoubleStarR); - // Now check results - testingUtilities::checkResults(fiducialVelocityDoubleStarY[i], - testVelocityDoubleStarY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i], - testVelocityDoubleStarZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialMagneticDoubleStarY[i], - testMagneticDoubleStarY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i], - testMagneticDoubleStarZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarL[i], - testEnergyDoubleStarL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialEnergyDoubleStarR[i], - testEnergyDoubleStarR, - parameters.names.at(i) + ", EnergyDoubleStarR"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_doubleStarFluxes function - * - */ - TEST(tMHDHlldInternalDoubleStarFluxes, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialMomentumDoubleStarFluxX{1937.3388606704509, -21.762854649386174}; - std::vector const fiducialMomentumDoubleStarFluxY{-1555.8040962754276, 39.237503643804175}; - std::vector const fiducialMomentumDoubleStarFluxZ{-801.91650203165148, -64.746529703562871}; - std::vector const fiducialEnergyDoubleStarFlux {2781.4706748628528, 136.89786983482355}; - std::vector const fiducialMagneticDoubleStarFluxY{-2799.7143456312342, 141.2263259922299}; - std::vector const fiducialMagneticDoubleStarFluxZ{1536.9628864256708, -31.569502877970095}; - - - double testMomentumDoubleStarFluxX; - double testMomentumDoubleStarFluxY; - double testMomentumDoubleStarFluxZ; - double testEnergyDoubleStarFlux; - double testMagneticDoubleStarFluxY; - double testMagneticDoubleStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - mhd::_internal::_doubleStarFluxes(parameters.speedSide[i], - parameters.momentumStarFluxX[i], - parameters.momentumStarFluxY[i], - parameters.momentumStarFluxZ[i], - parameters.energyStarFlux[i], - parameters.magneticStarFluxY[i], - parameters.magneticStarFluxZ[i], - parameters.densityStarL[i], - parameters.velocityStarXL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.velocityDoubleStarXL[i], - parameters.velocityDoubleStarYL[i], - parameters.velocityDoubleStarZL[i], - parameters.energyDoubleStar[i], - parameters.magneticDoubleStarY[i], - parameters.magneticDoubleStarZ[i], - testMomentumDoubleStarFluxX, - testMomentumDoubleStarFluxY, - testMomentumDoubleStarFluxZ, - testEnergyDoubleStarFlux, - testMagneticDoubleStarFluxY, - testMagneticDoubleStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxX[i], - testMomentumDoubleStarFluxX, - parameters.names.at(i) + ", MomentumDoubleStarFluxX"); - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxY[i], - testMomentumDoubleStarFluxY, - parameters.names.at(i) + ", MomentumDoubleStarFluxY"); - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxZ[i], - testMomentumDoubleStarFluxZ, - parameters.names.at(i) + ", MomentumDoubleStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarFlux[i], - testEnergyDoubleStarFlux, - parameters.names.at(i) + ", EnergyDoubleStarFlux"); - testingUtilities::checkResults(fiducialMagneticDoubleStarFluxY[i], - testMagneticDoubleStarFluxY, - parameters.names.at(i) + ", MagneticDoubleStarFluxY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarFluxZ[i], - testMagneticDoubleStarFluxZ, - parameters.names.at(i) + ", MagneticDoubleStarFluxZ"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the mhd::_internal::_returnFluxes function - * - */ - TEST(tMHDHlldInternalReturnFluxes, - CorrectInputExpectCorrectOutput) - { - double const dummyValue = 999; - double const densityFlux = 1; - double const momentumFluxX = 2; - double const momentumFluxY = 3; - double const momentumFluxZ = 4; - double const energyFlux = 5; - double const magneticFluxY = 6; - double const magneticFluxZ = 7; - - int threadId = 0; - int n_cells = 10; - int nFields = 8; // Total number of conserved fields - #ifdef SCALAR - nFields += NSCALARS; - #endif // SCALAR - #ifdef DE - nFields++; - #endif //DE - - // Lambda for finding indices and check if they're correct - auto findIndex = [](std::vector const &vec, - double const &num, - int const &fidIndex, - std::string const &name) - { - int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); - // EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl; - - return index; - }; - - for (size_t direction = 0; direction < 3; direction++) - { - int o1, o2, o3; - if (direction==0) {o1 = 1; o2 = 2; o3 = 3;} - if (direction==1) {o1 = 2; o2 = 3; o3 = 1;} - if (direction==2) {o1 = 3; o2 = 1; o3 = 2;} - - std::vector testFluxArray(nFields*n_cells, dummyValue); - - // Fiducial Indices - int const fiducialDensityIndex = threadId; - int const fiducialMomentumIndexX = threadId + n_cells * o1; - int const fiducialMomentumIndexY = threadId + n_cells * o2; - int const fiducialMomentumIndexZ = threadId + n_cells * o3; - int const fiducialEnergyIndex = threadId + n_cells * 4; - int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); - int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); - - mhd::_internal::_returnFluxes(threadId, - o1, - o2, - o3, - n_cells, - testFluxArray.data(), - densityFlux, - momentumFluxX, - momentumFluxY, - momentumFluxZ, - energyFlux, - magneticFluxY, - magneticFluxZ); - - // Find the indices for the various fields - int densityLoc = findIndex(testFluxArray, densityFlux, fiducialDensityIndex, "density"); - int momentumXLocX = findIndex(testFluxArray, momentumFluxX, fiducialMomentumIndexX, "momentum X"); - int momentumYLocY = findIndex(testFluxArray, momentumFluxY, fiducialMomentumIndexY, "momentum Y"); - int momentumZLocZ = findIndex(testFluxArray, momentumFluxZ, fiducialMomentumIndexZ, "momentum Z"); - int energyLoc = findIndex(testFluxArray, energyFlux, fiducialEnergyIndex, "energy"); - int magneticYLoc = findIndex(testFluxArray, magneticFluxY, fiducialMagneticYIndex, "magnetic Y"); - int magneticZLoc = findIndex(testFluxArray, magneticFluxZ, fiducialMagneticZIndex, "magnetic Z"); - - for (size_t i = 0; i < testFluxArray.size(); i++) - { - // Skip the already checked indices - if ((i != densityLoc) and - (i != momentumXLocX) and - (i != momentumYLocY) and - (i != momentumZLocZ) and - (i != energyLoc) and - (i != magneticYLoc) and - (i != magneticZLoc)) - { - EXPECT_EQ(dummyValue, testFluxArray.at(i)) - << "Unexpected value at index that _returnFluxes shouldn't be touching" << std::endl - << "Index = " << i << std::endl - << "Direction = " << direction << std::endl; - } - } - } - } - // ========================================================================= -#endif // MHD -#endif // CUDA + #ifdef MHD +// ========================================================================= +// Integration tests for the entire HLLD solver. Unit tests are below +// ========================================================================= + +// ========================================================================= +/*! +* \brief Test fixture for simple testing of the HLLD Riemann Solver. +Effectively takes the left state, right state, fiducial fluxes, and +custom user output then performs all the required running and testing +* +*/ +class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test +{ + protected: + // ===================================================================== + /*! + * \brief Compute and return the HLLD fluxes + * + * \param[in] leftState The state on the left side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] rightState The state on the right side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] gamma The adiabatic index + * \param[in] direction Which plane the interface is. 0 = plane normal to + * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. + * \return std::vector + */ + std::vector computeFluxes(std::vector stateLeft, + std::vector stateRight, + Real const &gamma, int const &direction = 0) + { + // Rearrange X, Y, and Z values for the chosen direction + std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, + stateLeft.begin() + 4); + std::rotate(stateRight.begin() + 1, stateRight.begin() + 4 - direction, + stateRight.begin() + 4); + + // Create new vectors that store the values in the way that the HLLD + // solver expects + EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), + stateRight.at(grid_enum::magnetic_x)) + << "The left and right magnetic fields are not equal"; + std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; + stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); + stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); + + // Simulation Paramters + int const nx = 1; // Number of cells in the x-direction + int const ny = 1; // Number of cells in the y-direction + int const nz = 1; // Number of cells in the z-direction + int const nGhost = 0; // Isn't actually used it appears + int nFields = 8; // Total number of conserved fields + #ifdef SCALAR + nFields += NSCALARS; + #endif // SCALAR + #ifdef DE + nFields++; + #endif // DE + + // Launch Parameters + dim3 const dimGrid(1, 1, 1); // How many blocks in the grid + dim3 const dimBlock(1, 1, 1); // How many threads per block + + // Create the std::vector to store the fluxes and declare the device + // pointers + std::vector testFlux(nFields - 1, 0); + Real *devConservedLeft; + Real *devConservedRight; + Real *devConservedMagXFace; + Real *devTestFlux; + + // Allocate device arrays and copy data + CudaSafeCall( + cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); + CudaSafeCall( + cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); + CudaSafeCall( + cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); + + CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), + stateLeft.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), + stateRight.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedMagXFace, magneticX.data(), + magneticX.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + + // Run kernel + hipLaunchKernelGGL( + mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface + devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); + + CudaCheckError(); + CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, + testFlux.size() * sizeof(Real), + cudaMemcpyDeviceToHost)); + + // Make sure to sync with the device so we have the results + cudaDeviceSynchronize(); + CudaCheckError(); + + // Free device arrays + cudaFree(devConservedLeft); + cudaFree(devConservedRight); + cudaFree(devConservedMagXFace); + cudaFree(devTestFlux); + + // The HLLD solver only writes the the first two "slots" for + // magnetic flux so let's rearrange to make sure we have all the + // magnetic fluxes in the right spots + testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0); + std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, + testFlux.begin() + 4); // Rotate momentum + + return testFlux; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Check if the fluxes are correct + * + * \param[in] fiducialFlux The fiducial flux in conserved variables. In + * order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] scalarFlux The fiducial flux in the passive scalars + * \param[in] thermalEnergyFlux The fiducial flux in the dual energy + * thermal energy + * \param[in] testFlux The test flux in conserved variables. In order the + * elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] customOutput Any custom output the user would like to + * print. It will print after the default GTest output but before the + * values that failed are printed + * \param[in] direction Which plane the interface is. 0 = plane normal to + * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. + */ + void checkResults(std::vector fiducialFlux, + std::vector scalarFlux, Real thermalEnergyFlux, + std::vector const &testFlux, + std::string const &customOutput = "", + int const &direction = 0) + { + // Field names + std::vector fieldNames{"Densities", "X Momentum", + "Y Momentum", "Z Momentum", + "Energies", "X Magnetic Field", + "Y Magnetic Field", "Z Magnetic Field"}; + #ifdef DE + fieldNames.push_back("Thermal energy (dual energy)"); + fiducialFlux.push_back(thermalEnergyFlux); + #endif // DE + #ifdef SCALAR + std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; + fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, + scalarNames.begin(), + scalarNames.begin() + grid_enum::nscalars); + + fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, + scalarFlux.begin(), + scalarFlux.begin() + grid_enum::nscalars); + #endif // SCALAR + + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and + (fiducialFlux.size() == fieldNames.size())) + << "The fiducial flux, test flux, and field name vectors are not all " + "the same length" + << std::endl + << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl + << "testFlux.size() = " << testFlux.size() << std::endl + << "fieldNames.size() = " << fieldNames.size() << std::endl; + + // Check for equality + for (size_t i = 0; i < fieldNames.size(); i++) { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + + bool areEqual = testingUtilities::nearlyEqualDbl( + fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) + << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" + << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + } + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Convert a vector of quantities in primitive variables to + * conserved variables + * + * \param[in] input The state in primitive variables. In order the + * elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \return std::vector The state in conserved variables. In order + * the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + */ + std::vector primitive2Conserved( + std::vector const &input, double const &gamma, + std::vector const &primitiveScalars) + { + std::vector output(input.size()); + output.at(0) = input.at(0); // Density + output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum + output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum + output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum + output.at(4) = mhd::utils::computeEnergy( + input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), + input.at(5), input.at(6), input.at(7), + gamma); // Pressure to Energy + output.at(5) = input.at(5); // X Magnetic Field + output.at(6) = input.at(6); // Y Magnetic Field + output.at(7) = input.at(7); // Z Magnetic Field + + #ifdef SCALAR + std::vector conservedScalar(primitiveScalars.size()); + std::transform(primitiveScalars.begin(), primitiveScalars.end(), + conservedScalar.begin(), + [&](Real const &c) { return c * output.at(0); }); + output.insert(output.begin() + grid_enum::magnetic_start, + conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + #endif // SCALAR + #ifdef DE + output.push_back(mhd::utils::computeThermalEnergy( + output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), + output.at(grid_enum::magnetic_x), output.at(grid_enum::magnetic_y), + output.at(grid_enum::magnetic_z), gamma)); + #endif // DE + return output; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief On test start make sure that the number of NSCALARS is allowed + * + */ + void SetUp() + { + #ifdef SCALAR + ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently " + "supported in HLLD tests. NSCALARS = " + << NSCALARS; + ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test " + "with passive scalars. NSCALARS = " + << NSCALARS; + #endif // SCALAR + } + // ===================================================================== + private: +}; +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Brio & Wu Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, + BrioAndWuShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 2.; + Real const Vz = 0.0; + Real const Bx = 0.75; + Real const Bz = 0.0; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive + // Scalars | + leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, + gamma, primitiveScalar), + leftFastRareLeftSide = primitive2Conserved( + {0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, + gamma, primitiveScalar), + leftFastRareRightSide = primitive2Conserved( + {0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, + gamma, primitiveScalar), + compoundLeftSide = primitive2Conserved( + {0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, + gamma, primitiveScalar), + compoundPeak = primitive2Conserved( + {0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, + gamma, primitiveScalar), + compoundRightSide = primitive2Conserved( + {0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, + gamma, primitiveScalar), + contactLeftSide = primitive2Conserved( + {0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, + gamma, primitiveScalar), + contactRightSide = primitive2Conserved( + {0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, + gamma, primitiveScalar), + slowShockLeftSide = primitive2Conserved( + {0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, + gamma, primitiveScalar), + slowShockRightSide = primitive2Conserved( + {0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, + gamma, primitiveScalar), + rightFastRareLeftSide = primitive2Conserved( + {0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, + gamma, primitiveScalar), + rightFastRareRightSide = primitive2Conserved( + {0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, + gamma, primitiveScalar), + rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, + gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Brio & Wu state\n" + "Right State: Left Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Brio & Wu state\n" + "Right State: Right Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Brio & Wu state\n" + "Right State: Right Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + 0.064707291981509041, 0.0, + 1.0074980455427278, 0}; + std::vector const scalarFlux{ + 0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; + Real thermalEnergyFlux = 0.20673357746080046; + std::vector const testFluxes = + computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Brio & Wu state\n" + "Right State: Left Brio & Wu state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + -0.064707291981509041, 0.0, + -1.0074980455427278, 0}; + std::vector const scalarFlux{ + -0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; + Real thermalEnergyFlux = -0.20673357746080046; + std::vector const testFluxes = + computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left of left fast rarefaction\n" + "Right State: Right of left fast rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.4253304970883941, 0.47729308161522394, + -0.55321646324583107, 0, + 0.92496835095531071, 0.0, + 0.53128887284876058, 0}; + std::vector const scalarFlux{ + 0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; + Real thermalEnergyFlux = 0.41622256825457099; + std::vector const testFluxes = computeFluxes( + leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left fast rarefaction\n" + "Right State: Left of left fast rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.070492123816403796, 1.2489600267034342, + -0.71031457071286608, 0, + 0.21008080091470105, 0.0, + 0.058615131833681167, 0}; + std::vector const scalarFlux{ + 0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; + Real thermalEnergyFlux = 0.047345816580591255; + std::vector const testFluxes = computeFluxes( + leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of compound wave\n" + "Right State: Right of compound wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.4470171023231666, 0.60747660800918468, + -0.20506357956052623, 0, + 0.72655525704800772, 0.0, + 0.76278089951123285, 0}; + std::vector const scalarFlux{ + 0.4948468279606959, 0.99623058485843297, 1.482091544807598}; + Real thermalEnergyFlux = 0.38787931087981475; + std::vector const testFluxes = + computeFluxes(compoundLeftSide, compoundRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of compound wave\n" + "Right State: Left of compound wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.38496850292724116, 0.66092864409611585, + -0.3473204105316457, 0, + 0.89888639514227009, 0.0, + 0.71658566275120927, 0}; + std::vector const scalarFlux{ + 0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; + Real thermalEnergyFlux = 0.28530908823756074; + std::vector const testFluxes = + computeFluxes(compoundRightSide, compoundLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of Compound Wave\n" + "Right State: Peak of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.41864266180405574, 0.63505764056357727, + -0.1991008813536404, 0, + 0.73707474818824525, 0.0, + 0.74058225030218761, 0}; + std::vector const scalarFlux{ + 0.46343639240225803, 0.93299478173931882, 1.388015684704111}; + Real thermalEnergyFlux = 0.36325864563467081; + std::vector const testFluxes = + computeFluxes(compoundLeftSide, compoundPeak, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Peak of Compound Wave\n" + "Right State: Left of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.39520761138156862, 0.6390998385557225, + -0.35132701297727598, 0, + 0.89945171879176522, 0.0, + 0.71026545717401468, 0}; + std::vector const scalarFlux{ + 0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; + Real thermalEnergyFlux = 0.32239432669410983; + std::vector const testFluxes = + computeFluxes(compoundPeak, compoundLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Peak of Compound Wave\n" + "Right State: Right of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.4285899590904928, 0.6079309920345296, + -0.26055320217638239, 0, + 0.75090757444649436, 0.0, + 0.85591904930227747, 0}; + std::vector const scalarFlux{ + 0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; + Real thermalEnergyFlux = 0.34962629086469987; + std::vector const testFluxes = + computeFluxes(compoundPeak, compoundRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of Compound Wave\n" + "Right State: Peak of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.39102247793946454, 0.65467021266207581, + -0.25227691377588229, 0, + 0.76271525822813691, 0.0, + 0.83594460438033491, 0}; + std::vector const scalarFlux{ + 0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; + Real thermalEnergyFlux = 0.28979582956267347; + std::vector const testFluxes = + computeFluxes(compoundRightSide, compoundPeak, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.40753761783585118, 0.62106392255463172, + -0.2455554035355339, 0, + 0.73906344777217226, 0.0, + 0.8687394222350926, 0}; + std::vector const scalarFlux{ + 0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; + Real thermalEnergyFlux = 0.30895701155896288; + std::vector const testFluxes = + computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.13849588572126192, 0.46025037934770729, + 0.18052412687974539, 0, + 0.35385590617992224, 0.0, + 0.86909622543144227, 0}; + std::vector const scalarFlux{ + 0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; + Real thermalEnergyFlux = 0.30928031735570188; + std::vector const testFluxes = + computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Slow shock left side\n" + "Right State: Slow shock right side\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 3.5274134848883865e-05, 0.32304849716274459, + 0.60579784881286636, 0, + -0.32813070621836449, 0.0, + 0.40636483121437972, 0}; + std::vector const scalarFlux{3.9048380136491711e-05, + 7.8612589559210735e-05, + 0.00011695189454326261}; + Real thermalEnergyFlux = 4.4037784886918126e-05; + std::vector const testFluxes = computeFluxes( + slowShockLeftSide, slowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Slow shock right side\n" + "Right State: Slow shock left side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.016514307834939734, 0.16452009375678914, + 0.71622171077118635, 0, + -0.37262428139914472, 0.0, + 0.37204015363322052, 0}; + std::vector const scalarFlux{ + -0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; + Real thermalEnergyFlux = -0.020617189878790236; + std::vector const testFluxes = computeFluxes( + slowShockRightSide, slowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right fast rarefaction left side\n" + "Right State: Right fast rarefaction right side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.026222824218991747, 0.22254903570732654, + 0.68544334213642255, 0, + -0.33339172106895454, 0.0, + 0.32319665359522443, 0}; + std::vector const scalarFlux{ + -0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; + Real thermalEnergyFlux = -0.020960370728633469; + std::vector const testFluxes = computeFluxes( + rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right fast rarefaction right side\n" + "Right State: Right fast rarefaction left side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.001088867226159973, 0.32035322820305906, + 0.74922357263343131, 0, + -0.0099746892805345766, 0.0, + 0.0082135595470345102, 0}; + std::vector const scalarFlux{-0.0012053733294214947, + -0.0024266696462237609, + -0.0036101547366371614}; + Real thermalEnergyFlux = -0.00081785194236053073; + std::vector const testFluxes = computeFluxes( + rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Dai & Woodward Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, + DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const coef = 1. / (std::sqrt(4. * M_PI)); + Real const Bx = 4. * coef; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // States + std::vector< + Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved( + {1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, + primitiveScalar), + leftFastShockLeftSide = + primitive2Conserved({1.09406, 1.176560, 0.021003, 0.506113, 0.970815, + 1.12838, 1.105355, 0.614087}, + gamma, primitiveScalar), + leftFastShockRightSide = + primitive2Conserved({1.40577, 0.693255, 0.210562, 0.611423, 1.494290, + 1.12838, 1.457700, 0.809831}, + gamma, primitiveScalar), + leftRotationLeftSide = + primitive2Conserved({1.40086, 0.687774, 0.215124, 0.609161, 1.485660, + 1.12838, 1.458735, 0.789960}, + gamma, primitiveScalar), + leftRotationRightSide = + primitive2Conserved({1.40119, 0.687504, 0.330268, 0.334140, 1.486570, + 1.12838, 1.588975, 0.475782}, + gamma, primitiveScalar), + leftSlowShockLeftSide = + primitive2Conserved({1.40519, 0.685492, 0.326265, 0.333664, 1.493710, + 1.12838, 1.575785, 0.472390}, + gamma, primitiveScalar), + leftSlowShockRightSide = + primitive2Conserved({1.66488, 0.578545, 0.050746, 0.250260, 1.984720, + 1.12838, 1.344490, 0.402407}, + gamma, primitiveScalar), + contactLeftSide = + primitive2Conserved({1.65220, 0.578296, 0.049683, 0.249962, 1.981250, + 1.12838, 1.346155, 0.402868}, + gamma, primitiveScalar), + contactRightSide = + primitive2Conserved({1.49279, 0.578276, 0.049650, 0.249924, 1.981160, + 1.12838, 1.346180, 0.402897}, + gamma, primitiveScalar), + rightSlowShockLeftSide = + primitive2Conserved({1.48581, 0.573195, 0.035338, 0.245592, 1.956320, + 1.12838, 1.370395, 0.410220}, + gamma, primitiveScalar), + rightSlowShockRightSide = + primitive2Conserved({1.23813, 0.450361, -0.275532, 0.151746, 1.439000, + 1.12838, 1.609775, 0.482762}, + gamma, primitiveScalar), + rightRotationLeftSide = + primitive2Conserved({1.23762, 0.450102, -0.274410, 0.145585, 1.437950, + 1.12838, 1.606945, 0.493879}, + gamma, primitiveScalar), + rightRotationRightSide = + primitive2Conserved({1.23747, 0.449993, -0.180766, -0.090238, + 1.437350, 1.12838, 1.503855, 0.752090}, + gamma, primitiveScalar), + rightFastShockLeftSide = + primitive2Conserved({1.22305, 0.424403, -0.171402, -0.085701, + 1.409660, 1.12838, 1.447730, 0.723864}, + gamma, primitiveScalar), + rightFastShockRightSide = + primitive2Conserved({1.00006, 0.000121, -0.000057, -0.000028, + 1.000100, 1.12838, 1.128435, 0.564217}, + gamma, primitiveScalar), + rightICs = + primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, + gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Dai & Woodward state\n" + "Right State: Left Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, + 1.0381971863420549, + -1.1459155902616465, + -0.63661977236758127, + 0, + 0.0, + 0, + -1.1102230246251565e-16}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Dai & Woodward state\n" + "Right State: Right Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, + 0.35915494309189522, + -1.2732395447351625, + -0.63661977236758127, + -0.63661977236758172, + 0.0, + 2.2204460492503131e-16, + -1.1283791670955123}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Dai & Woodward state\n" + "Right State: Right Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -1.1194725181819352, -0.11432087006939984, 0.0, + 0.056156000248263505, -0.42800560867873094}; + std::vector const scalarFlux{ + 0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; + Real thermalEnergyFlux = 0.24104061926661174; + std::vector const testFluxes = + computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Dai & Woodward state\n" + "Right State: Left Dai & Woodward state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -0.14549552299758384, -0.47242308031148195, 0.0, + -0.056156000248263505, -0.55262526758377528}; + std::vector const scalarFlux{ + -0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; + Real thermalEnergyFlux = -0.24104061926661174; + std::vector const testFluxes = + computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left of left fast shock\n" + "Right State: Right of left fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.96813688187727132, 3.0871217875403394, -1.4687093290523414, + -0.33726008721080036, 4.2986213406773457, 0.0, + 0.84684181393860269, -0.087452560407274671}; + std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, + 3.2098715673061045}; + Real thermalEnergyFlux = 1.2886155333980993; + std::vector const testFluxes = computeFluxes( + leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left fast shock\n" + "Right State: Left of left fast shock\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 1.3053938862274184, 2.4685129176021858, -1.181892850065283, + -0.011160487372167127, 5.1797404608257249, 0.0, + 1.1889903073770265, 0.10262704114294516}; + std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, + 4.3280519500627666}; + Real thermalEnergyFlux = 2.081389946702628; + std::vector const testFluxes = computeFluxes( + leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of left rotation/Alfven wave\n" + "Right State: Right of left rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.96326128304298586, 2.8879592118317445, -1.4808188010794987, + -0.20403672861184916, 4.014027751838869, 0.0, + 0.7248753989305099, -0.059178137562467162}; + std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, + 3.1937064501984724}; + Real thermalEnergyFlux = 1.5323573637968553; + std::vector const testFluxes = computeFluxes( + leftRotationLeftSide, leftRotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left rotation/Alfven wave\n" + "Right State: Left of left rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.96353754504060063, 2.8875487093397085, -1.4327309336053695, + -0.31541343522923493, 3.9739842521208342, 0.0, + 0.75541746728406312, -0.13479771672887678}; + std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, + 3.1946224007710313}; + Real thermalEnergyFlux = 1.5333744977458499; + std::vector const testFluxes = computeFluxes( + leftRotationRightSide, leftRotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of left slow shock\n" + "Right State: Right of left slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.88716095730727451, 2.9828594399125663, -1.417062582518549, + -0.21524331343191233, 3.863474778369334, 0.0, + 0.71242370728996041, -0.05229712416644372}; + std::vector const scalarFlux{ + 0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; + Real thermalEnergyFlux = 1.4145715457049737; + std::vector const testFluxes = computeFluxes( + leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left slow shock\n" + "Right State: Left of left slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 1.042385440439527, 2.7732383399777376, -1.5199872074603551, + -0.21019362664841068, 4.1322001036232585, 0.0, + 0.72170937317481543, -0.049474715634396704}; + std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, + 3.4560437166206879}; + Real thermalEnergyFlux = 1.8639570701934713; + std::vector const testFluxes = computeFluxes( + leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.95545795601418737, 2.8843900822429749, -1.4715039715239722, + -0.21575736014726318, 4.0078718055059257, 0.0, + 0.72241353110189066, -0.049073560388753337}; + std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, + 3.1678344087247892}; + Real thermalEnergyFlux = 1.7186185770667382; + std::vector const testFluxes = + computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.86324813554422819, 2.8309913324581251, -1.4761428591480787, + -0.23887765947428419, 3.9892942559102793, 0.0, + 0.72244123046603836, -0.049025527032060034}; + std::vector const scalarFlux{ + 0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; + Real thermalEnergyFlux = 1.7184928987481187; + std::vector const testFluxes = + computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right slow shock\n" + "Right State: Right of right slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.81125524370350677, 2.901639500435365, -1.5141545346789429, + -0.262600896007809, 3.8479660419540087, 0.0, + 0.7218977970017596, -0.049091614519593846}; + std::vector const scalarFlux{ + 0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; + Real thermalEnergyFlux = 1.6022319728249694; + std::vector const testFluxes = computeFluxes( + rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right slow shock\n" + "Right State: Left of right slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.60157947557836688, 2.3888357198399746, -1.9910500022202977, + -0.45610948442354332, 3.5359430988850069, 0.0, + 1.0670963294022622, 0.05554893654378229}; + std::vector const scalarFlux{0.66594699332331575, + 1.3406911495770899, 1.994545286188885}; + Real thermalEnergyFlux = 1.0487665253534804; + std::vector const testFluxes = computeFluxes( + rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right rotation/Alfven wave\n" + "Right State: Right of right rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.55701691287884714, 2.4652223621237814, -1.9664615862227277, + -0.47490477894092042, 3.3900659850690529, 0.0, + 1.0325648885587542, 0.059165409025635551}; + std::vector const scalarFlux{ + 0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; + Real thermalEnergyFlux = 0.9707694646266285; + std::vector const testFluxes = computeFluxes( + rightRotationLeftSide, rightRotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right rotation/Alfven wave\n" + "Right State: Left of right rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.55689116371132596, 2.4648517303940851, -1.7972202655166787, + -0.90018282739798461, 3.3401033852664566, 0.0, + 0.88105841856465605, 0.43911718823267476}; + std::vector const scalarFlux{ + 0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; + Real thermalEnergyFlux = 0.9702629326292449; + std::vector const testFluxes = computeFluxes( + rightRotationRightSide, rightRotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right fast shock\n" + "Right State: Right of right fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.48777637414577313, 2.3709438477809708, -1.7282900552525988, + -0.86414423547773778, 2.8885015704245069, 0.0, + 0.77133731061645838, 0.38566794697432505}; + std::vector const scalarFlux{ + 0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; + Real thermalEnergyFlux = 0.84330016382608752; + std::vector const testFluxes = computeFluxes( + rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right fast shock\n" + "Right State: Left of right fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.040639426423817904, 1.0717156491947966, -1.2612066401572222, + -0.63060225433149875, 0.15803727234007203, 0.0, + 0.042555541396817498, 0.021277678888288909}; + std::vector const scalarFlux{ + 0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; + Real thermalEnergyFlux = 0.060961577855018087; + std::vector const testFluxes = computeFluxes( + rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Ryu & Jones 4d Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, + RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const Bx = 0.7; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // States + std::vector< + Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, + gamma, primitiveScalar), + hydroRareLeftSide = + primitive2Conserved({0.990414, 0.012415, 1.458910e-58, 6.294360e-59, + 0.984076, Bx, 1.252355e-57, 5.366795e-58}, + gamma, primitiveScalar), + hydroRareRightSide = + primitive2Conserved({0.939477, 0.079800, 1.557120e-41, 7.505190e-42, + 0.901182, Bx, 1.823624e-40, 8.712177e-41}, + gamma, primitiveScalar), + switchOnSlowShockLeftSide = + primitive2Conserved({0.939863, 0.079142, 1.415730e-02, 7.134030e-03, + 0.901820, Bx, 2.519650e-02, 1.290082e-02}, + gamma, primitiveScalar), + switchOnSlowShockRightSide = + primitive2Conserved({0.651753, 0.322362, 8.070540e-01, 4.425110e-01, + 0.490103, Bx, 6.598380e-01, 3.618000e-01}, + gamma, primitiveScalar), + contactLeftSide = + primitive2Conserved({0.648553, 0.322525, 8.072970e-01, 4.426950e-01, + 0.489951, Bx, 6.599295e-01, 3.618910e-01}, + gamma, primitiveScalar), + contactRightSide = + primitive2Conserved({0.489933, 0.322518, 8.073090e-01, 4.426960e-01, + 0.489980, Bx, 6.599195e-01, 3.618850e-01}, + gamma, primitiveScalar), + slowShockLeftSide = + primitive2Conserved({0.496478, 0.308418, 8.060830e-01, 4.420150e-01, + 0.489823, Bx, 6.686695e-01, 3.666915e-01}, + gamma, primitiveScalar), + slowShockRightSide = + primitive2Conserved({0.298260, -0.016740, 2.372870e-01, 1.287780e-01, + 0.198864, Bx, 8.662095e-01, 4.757390e-01}, + gamma, primitiveScalar), + rotationLeftSide = + primitive2Conserved({0.298001, -0.017358, 2.364790e-01, 1.278540e-01, + 0.198448, Bx, 8.669425e-01, 4.750845e-01}, + gamma, primitiveScalar), + rotationRightSide = + primitive2Conserved({0.297673, -0.018657, 1.059540e-02, 9.996860e-01, + 0.197421, Bx, 9.891580e-01, 1.024949e-04}, + gamma, primitiveScalar), + fastRareLeftSide = + primitive2Conserved({0.297504, -0.020018, 1.137420e-02, 1.000000e+00, + 0.197234, Bx, 9.883860e-01, -4.981931e-17}, + gamma, primitiveScalar), + fastRareRightSide = + primitive2Conserved({0.299996, -0.000033, 1.855120e-05, 1.000000e+00, + 0.199995, Bx, 9.999865e-01, 1.737190e-16}, + gamma, primitiveScalar), + rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, + gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Ryu & Jones 4d state\n" + "Right State: Left Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Ryu & Jones 4d state\n" + "Right State: Right Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-5.5511151231257827e-17, + 0.45500000000000013, + -0.69999999999999996, + -5.5511151231257827e-17, + 0, + 0.0, + 0, + -0.69999999999999996}; + std::vector const scalarFlux{-6.1450707278254418e-17, + -1.2371317869019906e-16, + -1.8404800947169341e-16}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Ryu & Jones 4d state\n" + "Right State: Right Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + -0.21566989083797167, -0.13287876964320211, 0.0, + -0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{ + 0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; + Real thermalEnergyFlux = 0.13864309478397996; + std::vector const testFluxes = + computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Ryu & Jones 4d state\n" + "Right State: Left Ryu & Jones 4d state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + 0.21566989083797167, 0.13287876964320211, 0.0, + 0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{ + -0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; + Real thermalEnergyFlux = -0.13864309478397996; + std::vector const testFluxes = + computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left side of pure hydrodynamic rarefaction\n" + "Right State: Right side of pure hydrodynamic rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.074035256375659553, 0.66054553664209648, + -6.1597070943493028e-41, -2.9447391900433873e-41, + 0.1776649658235645, 0.0, + -6.3466063324344113e-41, -3.0340891384335242e-41}; + std::vector const scalarFlux{ + 0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; + Real thermalEnergyFlux = 0.11034221894046368; + std::vector const testFluxes = computeFluxes( + hydroRareLeftSide, hydroRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right side of pure hydrodynamic rarefaction\n" + "Right State: Left side of pure hydrodynamic rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.013336890338886076, 0.74071279157971992, + -6.1745213352160876e-41, -2.9474651270630147e-41, + 0.033152482405470307, 0.0, + 6.2022392844946449e-41, 2.9606965476795895e-41}; + std::vector const scalarFlux{ + 0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; + Real thermalEnergyFlux = 0.019189877201961154; + std::vector const testFluxes = computeFluxes( + hydroRareRightSide, hydroRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of switch on slow shock\n" + "Right State: Right of switch on slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.19734622040826083, 0.47855039640569758, -0.3392293209655618, + -0.18588204716255491, 0.10695446263054809, 0.0, + -0.3558357543098733, -0.19525093130352045}; + std::vector const scalarFlux{ + 0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; + Real thermalEnergyFlux = 0.2840373040888583; + std::vector const testFluxes = + computeFluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, + gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of switch on slow shock\n" + "Right State: Left of switch on slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.097593254768855386, 0.76483698872352757, -0.02036438492698419, + -0.010747481940703562, 0.25327551496496836, 0.0, + -0.002520109973016129, -0.00088262199017708799}; + std::vector const scalarFlux{ + 0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; + Real thermalEnergyFlux = 0.1100817647375162; + std::vector const testFluxes = + computeFluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, + gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.2091677440314007, 0.5956612619664029, -0.29309091669513981, + -0.16072556008504282, 0.19220050968424285, 0.0, + -0.35226977371803297, -0.19316940226499904}; + std::vector const scalarFlux{ + 0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; + Real thermalEnergyFlux = 0.23702444986592192; + std::vector const testFluxes = + computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.15801775068597168, 0.57916072367837657, -0.33437339604094024, + -0.18336617461176744, 0.16789791355547545, 0.0, + -0.3522739911439669, -0.19317084712861482}; + std::vector const scalarFlux{ + 0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; + Real thermalEnergyFlux = 0.23704936434506069; + std::vector const testFluxes = + computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of slow shock\n" + "Right State: Right of slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.11744487326715558, 0.66868230621718128, -0.35832022960458892, + -0.19650694834641164, 0.057880816021092185, 0.0, + -0.37198011453582402, -0.20397277844271294}; + std::vector const scalarFlux{ + 0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; + Real thermalEnergyFlux = 0.1738058891582446; + std::vector const testFluxes = computeFluxes( + slowShockLeftSide, slowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of slow shock\n" + "Right State: Left of slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.038440990187426027, 0.33776683678923869, -0.62583241538732792, + -0.3437911783906169, -0.13471828103488348, 0.0, + -0.15165427985881363, -0.082233932588833825}; + std::vector const scalarFlux{ + 0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; + Real thermalEnergyFlux = 0.038445630017261548; + std::vector const testFluxes = computeFluxes( + slowShockRightSide, slowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of rotation/Alfven wave\n" + "Right State: Right of rotation/Alfven wave\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, + -0.33352435102145184, -0.21197843894720192, 0.0, + -0.18030635192654354, -0.098381113757603278}; + std::vector const scalarFlux{ + -0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; + Real thermalEnergyFlux = -0.0052395622905745485; + std::vector const testFluxes = + computeFluxes(rotationLeftSide, rotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of rotation/Alfven wave\n" + "Right State: Left of rotation/Alfven wave\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.005459628948343731, 0.4415038084184626, -0.69273580053867279, + -0.0051834737482743809, -0.037389286119015486, 0.0, + -0.026148289294373184, -0.69914753968916865}; + std::vector const scalarFlux{ + -0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; + Real thermalEnergyFlux = -0.0054536013916442853; + std::vector const testFluxes = + computeFluxes(rotationRightSide, rotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of fast rarefaction\n" + "Right State: Right of fast rarefaction\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, + -0.0059354802028144804, -0.040194357552219451, 0.0, + -0.027710302430178135, -0.70000000000000007}; + std::vector const scalarFlux{ + -0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; + Real thermalEnergyFlux = -0.0059354109546219782; + std::vector const testFluxes = + computeFluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of fast rarefaction\n" + "Right State: Left of fast rarefaction\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -3.0171858819483255e-05, 0.45503057873272706, + -0.69998654276213712, -3.0171858819427744e-05, + -0.00014827469339251387, 0.0, + -8.2898844654399895e-05, -0.69999999999999984}; + std::vector const scalarFlux{-3.340017317660794e-05, + -6.7241562798797897e-05, + -0.00010003522597924373}; + Real thermalEnergyFlux = -3.000421709818028e-05; + std::vector const testFluxes = + computeFluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Einfeldt Strong Rarefaction (EFR) + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, + EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const V0 = 2.; + Real const Vy = 0.0; + Real const Vz = 0.0; + Real const Bx = 0.0; + Real const Bz = 0.0; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // States + std::vector< + Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, + gamma, primitiveScalar), + leftRarefactionCenter = primitive2Conserved( + {0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, + primitiveScalar), + leftVxTurnOver = primitive2Conserved( + {0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, + primitiveScalar), + midPoint = primitive2Conserved( + {0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, + primitiveScalar), + rightVxTurnOver = primitive2Conserved( + {0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, + primitiveScalar), + rightRarefactionCenter = primitive2Conserved( + {0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, + primitiveScalar), + rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, + gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{ + -2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = + computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, + 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = + computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = + computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + + // Intermediate state checks + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Left rarefaction center\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.43523032140000006, + 0.64193857338676208, + -0, + -0, + -0.67142479846795033, + 0.0, + -0.21614384652000002, + -0}; + std::vector const scalarFlux{ + -0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; + Real thermalEnergyFlux = -0.19705631998499995; + std::vector const testFluxes = + computeFluxes(leftICs, leftRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left rarefaction center\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{ + -2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = + computeFluxes(leftRarefactionCenter, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left rarefaction center\n" + "Right State: Left Vx turnover point\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.023176056428381629, + -2.0437812714100764e-05, + 0, + 0, + -0.00098843768795337005, + 0.0, + -0.011512369309265979, + 0}; + std::vector const scalarFlux{ + -0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; + Real thermalEnergyFlux = -0.0052127803322822184; + std::vector const testFluxes = computeFluxes( + leftRarefactionCenter, leftVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Vx turnover point\n" + "Right State: Left rarefaction center\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.43613091609689758, + 0.64135749005731213, + 0, + 0, + -0.67086080671260462, + 0.0, + -0.21659109937066717, + 0}; + std::vector const scalarFlux{ + -0.48279584670145054, -0.9719694288205295, -1.445998239926636}; + Real thermalEnergyFlux = -0.19746407621898149; + std::vector const testFluxes = computeFluxes( + leftVxTurnOver, leftRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Vx turnover point\n" + "Right State: Midpoint\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.0011656375857387598, + 0.0062355370788444902, + 0, + 0, + -0.00055517615333601446, + 0.0, + -0.0005829533231464588, + 0}; + std::vector const scalarFlux{-0.0012903579278217153, + -0.0025977614899708843, + -0.0038646879530001054}; + Real thermalEnergyFlux = -0.00034184143405415065; + std::vector const testFluxes = + computeFluxes(leftVxTurnOver, midPoint, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Midpoint\n" + "Right State: Left Vx turnover point\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.0068097924351817191, + 0.010501781004354172, + 0, + 0, + -0.0027509360975397175, + 0.0, + -0.0033826654536986789, + 0}; + std::vector const scalarFlux{ + -0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; + Real thermalEnergyFlux = -0.001531664896602873; + std::vector const testFluxes = + computeFluxes(midPoint, leftVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Midpoint\n" + "Right State: Right Vx turnover point\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.0013952100758668729, + 0.0061359407125797273, + 0, + 0, + 0.00065984543596031629, + 0.0, + 0.00069776606396793105, + 0}; + std::vector const scalarFlux{ + 0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; + Real thermalEnergyFlux = 0.00040916715364737997; + std::vector const testFluxes = + computeFluxes(midPoint, rightVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Vx turnover point\n" + "Right State: Midpoint\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.0090024688079190333, + 0.011769373146023688, + 0, + 0, + 0.003725251767222792, + 0.0, + 0.0045418689996141555, + 0}; + std::vector const scalarFlux{ + 0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; + Real thermalEnergyFlux = 0.0020542406295284269; + std::vector const testFluxes = + computeFluxes(rightVxTurnOver, midPoint, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Vx turnover point\n" + "Right State: Right rarefaction center\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.023310393229073981, + 0.0033086897645311728, + 0, + 0, + 0.0034208520409618887, + 0.0, + 0.011760413130542123, + 0}; + std::vector const scalarFlux{ + 0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; + Real thermalEnergyFlux = 0.0053191138878843835; + std::vector const testFluxes = computeFluxes( + rightVxTurnOver, rightRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right rarefaction center\n" + "Right State: Right Vx turnover point\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.33914253809565298, + 0.46770133685446141, + 0, + 0, + 0.46453338019960133, + 0.0, + 0.17077520175095764, + 0}; + std::vector const scalarFlux{ + 0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; + Real thermalEnergyFlux = 0.1444638874418068; + std::vector const testFluxes = computeFluxes( + rightRarefactionCenter, rightVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right rarefaction center\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.33976563660000003, + 0.46733255780629601, + 0, + 0, + 0.46427650313257612, + 0.0, + 0.17108896296000001, + 0}; + std::vector const scalarFlux{ + 0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; + Real thermalEnergyFlux = 0.14472930749999999; + std::vector const testFluxes = + computeFluxes(rightRarefactionCenter, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Right rarefaction center\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, + 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = + computeFluxes(rightICs, rightRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using the constant states from the + * examples in cholla/examples/3D + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) +{ + // Constant Values + Real const gamma = 5. / 3.; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // States + std::vector< + Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + zeroMagneticField = + primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, + gamma, primitiveScalar), + onesMagneticField = + primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, + gamma, primitiveScalar); + + for (size_t direction = 2; direction < 3; direction++) { + { + std::string const outputString{ + "Left State: Constant state, zero magnetic field\n" + "Right State: Constant state, zero magnetic field\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 1.380658e-05, 0, 0, 0, 0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = + computeFluxes(zeroMagneticField, zeroMagneticField, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Constant state, ones magnetic field\n" + "Right State: Constant state, ones magnetic field\n" + "HLLD State: Left Double Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, + 0.50001380657999994, + -1, + -1, + -1.7347234759768071e-18, + 0.0, + 3.4694469519536142e-18, + 3.4694469519536142e-18}; + std::vector const scalarFlux{1.5731381063233131e-14, + 3.1670573744690958e-14, + 4.7116290424753513e-14}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = + computeFluxes(onesMagneticField, onesMagneticField, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver with the degenerate state + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, + DegenerateStateCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + + // State + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive + // Scalars | + state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, + gamma, primitiveScalar); + + std::vector const fiducialFlux{1, -449999997, -29999, -29999, + -59994, 0.0, -29999, -29999}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, + 3.3155141874999998}; + Real thermalEnergyFlux = 1.5; + std::string const outputString{ + "Left State: Degenerate state\n" + "Right State: Degenerate state\n" + "HLLD State: Left Double Star State"}; + + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + // If you run into issues with the energy try 0.001953125 instead. + // That's what I got when running the Athena solver on its own. Running + // the Athena solver with theses tests gave me -0.00080700946455175148 + // though + for (size_t direction = 0; direction < 3; direction++) { + std::vector const testFluxes = + computeFluxes(state, state, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver with all zeroes + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes) +{ + // Constant Values + Real const gamma = 5. / 3.; + + // State + size_t numElements = 8; + #ifdef SCALAR + numElements += 3; + #endif // SCALAR + + std::vector const state(numElements, 0.0); + std::vector const fiducialFlux(8, 0.0); + std::vector const scalarFlux(3, 0.0); + Real thermalEnergyFlux = 0.0; + + std::string const outputString{ + "Left State: All zeroes\n" + "Right State: All zeroes\n" + "HLLD State: Right Star State"}; + + for (size_t direction = 0; direction < 3; direction++) { + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const testFluxes = + computeFluxes(state, state, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } +} +// ========================================================================= + +// ========================================================================= +/*! +* \brief Test the HLLD Riemann Solver with negative pressure, energy, and + density. +* +*/ +TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) +{ + // Constant Values + Real const gamma = 5. / 3.; + + // States + std::vector // | Density | X-Momentum | Y-Momentum | Z-Momentum | + // Energy | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + negativePressure = {1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, + negativeEnergy = {1.0, 1.0, 1.0, 1.0, -(5 - gamma), 1.0, 1.0, 1.0}, + negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, + negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, + -gamma, 1.0, 1.0, 1.0}, + negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; + + #ifdef SCALAR + std::vector const conservedScalar{1.1069975296, 2.2286185018, + 3.3155141875}; + negativePressure.insert(negativePressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeEnergy.insert(negativeEnergy.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensity.insert(negativeDensity.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensityEnergyPressure.insert( + negativeDensityEnergyPressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, + conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + #endif // SCALAR + #ifdef DE + negativePressure.push_back(mhd::utils::computeThermalEnergy( + negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), + negativePressure.at(2), negativePressure.at(3), + negativePressure.at(grid_enum::magnetic_x), + negativePressure.at(grid_enum::magnetic_y), + negativePressure.at(grid_enum::magnetic_z), gamma)); + negativeEnergy.push_back(mhd::utils::computeThermalEnergy( + negativeEnergy.at(4), negativeEnergy.at(0), negativeEnergy.at(1), + negativeEnergy.at(2), negativeEnergy.at(3), + negativeEnergy.at(grid_enum::magnetic_x), + negativeEnergy.at(grid_enum::magnetic_y), + negativeEnergy.at(grid_enum::magnetic_z), gamma)); + negativeDensity.push_back(mhd::utils::computeThermalEnergy( + negativeDensity.at(4), negativeDensity.at(0), negativeDensity.at(1), + negativeDensity.at(2), negativeDensity.at(3), + negativeDensity.at(grid_enum::magnetic_x), + negativeDensity.at(grid_enum::magnetic_y), + negativeDensity.at(grid_enum::magnetic_z), gamma)); + negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy( + negativeDensityEnergyPressure.at(4), negativeDensityEnergyPressure.at(0), + negativeDensityEnergyPressure.at(1), negativeDensityEnergyPressure.at(2), + negativeDensityEnergyPressure.at(3), + negativeDensityEnergyPressure.at(grid_enum::magnetic_x), + negativeDensityEnergyPressure.at(grid_enum::magnetic_y), + negativeDensityEnergyPressure.at(grid_enum::magnetic_z), gamma)); + negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy( + negativeDensityPressure.at(4), negativeDensityPressure.at(0), + negativeDensityPressure.at(1), negativeDensityPressure.at(2), + negativeDensityPressure.at(3), + negativeDensityPressure.at(grid_enum::magnetic_x), + negativeDensityPressure.at(grid_enum::magnetic_y), + negativeDensityPressure.at(grid_enum::magnetic_z), gamma)); + #endif // DE + + for (size_t direction = 0; direction < 3; direction++) { + { + std::string const outputString{ + "Left State: Negative Pressure\n" + "Right State: Negative Pressure\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, + 3.3155141874999998}; + Real thermalEnergyFlux = -1.5; + std::vector const testFluxes = + computeFluxes(negativePressure, negativePressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Energy\n" + "Right State: Negative Energy\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, + 3.3155141874999998}; + Real thermalEnergyFlux = -6.333333333333333; + std::vector const testFluxes = + computeFluxes(negativeEnergy, negativeEnergy, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density\n" + "Right State: Negative Density\n" + "HLLD State: Left State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, + -5e+19, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, + 2.2286185018000002e+20, + 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = + computeFluxes(negativeDensity, negativeDensity, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density, Energy, and Pressure\n" + "Right State: Negative Density, Energy, and Pressure\n" + "HLLD State: Right State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, + 1.5E+20, 0, 0, 0}; + std::vector const scalarFlux{-1.1069975296000002e+20, + -2.2286185018000002e+20, + -3.3155141874999997e+20}; + Real thermalEnergyFlux = 1.5000000000000001e+40; + std::vector const testFluxes = + computeFluxes(negativeDensityEnergyPressure, + negativeDensityEnergyPressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density and Pressure\n" + "Right State: Negative Density and Pressure\n" + "HLLD State: Left State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, + -1.5e+20, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, + 2.2286185018000002e+20, + 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = computeFluxes( + negativeDensityPressure, negativeDensityPressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, + outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +// End of integration tests for the entire HLLD solver. Unit tests are below +// ========================================================================= + +// ========================================================================= +// Unit tests for the contents of the mhd::_internal namespace +// ========================================================================= +/*! + * \brief A struct to hold some basic test values + * + */ +namespace +{ +struct testParams { + // List of cases + std::vector names{"Case 1", "Case 2"}; + + // Conserved Variables + double gamma = 5. / 3.; + std::valarray densityL{21.50306776645775, 48.316634031589935}; + std::valarray densityR{81.1217731762265, 91.02955738853635}; + std::valarray momentumXL{38.504606872151484, 18.984145880030045}; + std::valarray momentumXR{8.201811315045326, 85.24863367778745}; + std::valarray momentumYL{7.1046427940455015, 33.76182584816693}; + std::valarray momentumYR{13.874767484202021, 33.023492551299974}; + std::valarray momentumZL{32.25700338919422, 89.52561861038686}; + std::valarray momentumZR{33.85305318830181, 8.664313303796256}; + std::valarray energyL{65.75120838109942, 38.461354599479826}; + std::valarray energyR{18.88982523270516, 83.65639784178894}; + std::valarray magneticXL{92.75101068883114, 31.588767769990532}; + std::valarray magneticXR{93.66196246448985, 84.3529879134052}; + std::valarray magneticYL{12.297499156516622, 63.74471969570406}; + std::valarray magneticYR{84.9919141787549, 35.910258841630984}; + std::valarray magneticZL{46.224045698787776, 37.70326455170754}; + std::valarray magneticZR{34.852095153095384, 24.052685003977757}; + // Star States + std::valarray densityStarL{28.520995251761526, 54.721668215064945}; + std::valarray densityStarR{49.09069570738605, 72.68000504460609}; + std::valarray momentumStarXL{48.96082367518151, 97.15439466280228}; + std::valarray momentumStarXR{65.74705433463932, 94.5689655974538}; + std::valarray momentumStarYL{44.910034185328996, 78.60179936059853}; + std::valarray momentumStarYR{51.642522487399276, 44.63864007208728}; + std::valarray momentumStarZL{39.78163555990428, 63.01612978428839}; + std::valarray momentumStarZR{33.47900698769427, 52.19410653341197}; + std::valarray energyStarL{6.579867455284738, 30.45043664908369}; + std::valarray energyStarR{90.44484278669114, 61.33664731346812}; + std::valarray magneticStarXL{49.81491527582234, 62.379765828560906}; + std::valarray magneticStarXR{67.77402751903804, 64.62226739788758}; + std::valarray magneticStarYL{62.09348829143065, 54.27916744403672}; + std::valarray magneticStarYR{26.835645069149873, 98.97444628327318}; + std::valarray magneticStarZL{62.765890944643196, 93.26765455509641}; + std::valarray magneticStarZR{7.430231695917344, 10.696380763901459}; + // Double Star State + std::valarray momentumDoubleStarXL{75.42525315887075, + 83.87480678359029}; + std::valarray momentumDoubleStarYL{22.56132540660678, + 76.11074421934487}; + std::valarray momentumDoubleStarZL{27.83908778933224, + 28.577101567661465}; + std::valarray energyDoubleStar{45.83202455707669, 55.4553014145573}; + std::valarray magneticDoubleStarY{20.943239839455895, + 83.8514810487021}; + std::valarray magneticDoubleStarZ{83.3802438268807, + 80.36671251730783}; + // Fluxes + std::valarray densityFluxL{12.939239309626116, 81.71524586517073}; + std::valarray momentumFluxXL{65.05481464917627, 56.09885069707803}; + std::valarray momentumFluxYL{73.67692845586782, 2.717246983403787}; + std::valarray momentumFluxZL{16.873647595664387, 39.70132983192873}; + std::valarray energyFluxL{52.71888731972469, 81.63926176158796}; + std::valarray magneticFluxXL{67.7412464028116, 42.85301340921149}; + std::valarray magneticFluxYL{58.98928445415967, 57.04344459221359}; + std::valarray magneticFluxZL{29.976925743532302, 97.73329827141359}; + std::valarray momentumStarFluxX{74.90125547448865, + 26.812722601652684}; + std::valarray momentumStarFluxY{16.989138610622945, + 48.349566649914976}; + std::valarray momentumStarFluxZ{38.541822734846185, + 61.22843961052538}; + std::valarray energyStarFlux{19.095105176247017, 45.43224973313112}; + std::valarray magneticStarFluxY{96.23964526624277, 33.05337536594796}; + std::valarray magneticStarFluxZ{86.22516928268347, 15.62102082410738}; + + // Derived/Primitive variables + std::valarray velocityXL = momentumXL / densityL; + std::valarray velocityXR = momentumXR / densityR; + std::valarray velocityYL = momentumYL / densityL; + std::valarray velocityYR = momentumYR / densityR; + std::valarray velocityZL = momentumZL / densityL; + std::valarray velocityZR = momentumZR / densityR; + std::valarray totalPressureStarL{66.80958736783934, + 72.29644038317676}; + std::vector gasPressureL; + std::vector gasPressureR; + std::vector totalPressureL; + std::vector totalPressureR; + // Star State + std::valarray velocityStarXL = momentumStarXL / densityStarL; + std::valarray velocityStarXR = momentumStarXR / densityStarR; + std::valarray velocityStarYL = momentumStarYL / densityStarL; + std::valarray velocityStarYR = momentumStarYR / densityStarR; + std::valarray velocityStarZL = momentumStarZL / densityStarL; + std::valarray velocityStarZR = momentumStarZR / densityStarR; + // Double Star State + std::valarray velocityDoubleStarXL = + momentumDoubleStarXL / densityStarL; + std::valarray velocityDoubleStarYL = + momentumDoubleStarYL / densityStarL; + std::valarray velocityDoubleStarZL = + momentumDoubleStarZL / densityStarL; + // Other + std::valarray speedM{68.68021569453585, 70.08236749169825}; + std::valarray speedSide{70.37512772923496, 3.6579130085113265}; + testParams() + { + for (size_t i = 0; i < names.size(); i++) { + gasPressureL.push_back(mhd::utils::computeGasPressure( + energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], + magneticXL[i], magneticYL[i], magneticZL[i], gamma)); + gasPressureR.push_back(mhd::utils::computeGasPressure( + energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], + magneticXR[i], magneticYR[i], magneticZR[i], gamma)); + totalPressureL.push_back(mhd::utils::computeTotalPressure( + gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); + totalPressureR.push_back(mhd::utils::computeTotalPressure( + gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); + } + } +}; +} // namespace +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_approximateWaveSpeeds function + * + */ +TEST(tMHDHlldInternalApproximateWaveSpeeds, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + std::vector const fiducialSpeedL{-22.40376497145191, + -11.190385012513822}; + std::vector const fiducialSpeedR{24.295526347371595, + 12.519790189404299}; + std::vector const fiducialSpeedM{-0.81760587897407833, + -0.026643804611559244}; + std::vector const fiducialSpeedStarL{-19.710500632936679, + -4.4880642018724357}; + std::vector const fiducialSpeedStarR{9.6740190040662242, + 3.4191202933087519}; + std::vector const fiducialDensityStarL{24.101290139122913, + 50.132466596958501}; + std::vector const fiducialDensityStarR{78.154104734671265, + 84.041595114910123}; + + double testSpeedL = 0; + double testSpeedR = 0; + double testSpeedM = 0; + double testSpeedStarL = 0; + double testSpeedStarR = 0; + double testDensityStarL = 0; + double testDensityStarR = 0; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_approximateWaveSpeeds( + parameters.densityL[i], parameters.momentumXL[i], + parameters.momentumYL[i], parameters.momentumZL[i], + parameters.velocityXL[i], parameters.velocityYL[i], + parameters.velocityZL[i], parameters.gasPressureL[i], + parameters.totalPressureL[i], parameters.magneticXL[i], + parameters.magneticYL[i], parameters.magneticZL[i], + parameters.densityR[i], parameters.momentumXR[i], + parameters.momentumYR[i], parameters.momentumZR[i], + parameters.velocityXR[i], parameters.velocityYR[i], + parameters.velocityZR[i], parameters.gasPressureR[i], + parameters.totalPressureR[i], parameters.magneticYR[i], + parameters.magneticZR[i], parameters.gamma, testSpeedL, testSpeedR, + testSpeedM, testSpeedStarL, testSpeedStarR, testDensityStarL, + testDensityStarR); + // Now check results + testingUtilities::checkResults(fiducialSpeedL[i], testSpeedL, + parameters.names.at(i) + ", SpeedL"); + testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeedR, + parameters.names.at(i) + ", SpeedR"); + testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeedM, + parameters.names.at(i) + ", SpeedM"); + testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeedStarL, + parameters.names.at(i) + ", SpeedStarL"); + testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeedStarR, + parameters.names.at(i) + ", SpeedStarR"); + testingUtilities::checkResults(fiducialDensityStarL.at(i), testDensityStarL, + parameters.names.at(i) + ", DensityStarL"); + testingUtilities::checkResults(fiducialDensityStarR.at(i), testDensityStarR, + parameters.names.at(i) + ", DensityStarR"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_starFluxes function in the non-degenerate + * case + * + */ +TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialVelocityStarY{12.831290892281075, + 12.92610185957192}; + std::vector const fiducialVelocityStarZ{48.488664548015286, + 9.0850326944201107}; + std::vector const fiducialEnergyStar{1654897.6912410262, + 956.83439334487116}; + std::vector const fiducialMagneticStarY{-186.47142421374559, + 2.6815421494204679}; + std::vector const fiducialMagneticStarZ{-700.91191100481922, + 1.5860591049546646}; + std::vector const fiducialDensityStarFlux{506.82678248238807, + 105.14430372486369}; + std::vector const fiducialMomentumStarFluxX{135208.06632708258, + 14014.840899433098}; + std::vector const fiducialMomentumStarFluxY{25328.25203616685, + 2466.5997745560339}; + std::vector const fiducialMomentumStarFluxZ{95071.711914347878, + 1530.7490710422007}; + std::vector const fiducialEnergyStarFlux{116459061.8691024, + 3440.9679468544314}; + std::vector const fiducialMagneticStarFluxY{-13929.399086330559, + -166.32034689537392}; + std::vector const fiducialMagneticStarFluxZ{-52549.811458376971, + -34.380297363339892}; + + double testVelocityStarY; + double testVelocityStarZ; + double testEnergyStar; + double testMagneticStarY; + double testMagneticStarZ; + double testDensityStarFlux; + double testMomentumStarFluxX; + double testMomentumStarFluxY; + double testMomentumStarFluxZ; + double testEnergyStarFlux; + double testMagneticStarFluxY; + double testMagneticStarFluxZ; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_starFluxes( + parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], + parameters.velocityXL[i], parameters.velocityYL[i], + parameters.velocityZL[i], parameters.momentumXL[i], + parameters.momentumYL[i], parameters.momentumZL[i], + parameters.energyL[i], parameters.totalPressureL[i], + parameters.magneticXL[i], parameters.magneticYL[i], + parameters.magneticZL[i], parameters.densityStarL[i], + parameters.totalPressureStarL[i], parameters.densityFluxL[i], + parameters.momentumFluxXL[i], parameters.momentumFluxYL[i], + parameters.momentumFluxZL[i], parameters.energyFluxL[i], + parameters.magneticFluxYL[i], parameters.magneticFluxZL[i], + testVelocityStarY, testVelocityStarZ, testEnergyStar, testMagneticStarY, + testMagneticStarZ, testDensityStarFlux, testMomentumStarFluxX, + testMomentumStarFluxY, testMomentumStarFluxZ, testEnergyStarFlux, + testMagneticStarFluxY, testMagneticStarFluxZ); + + // Now check results + testingUtilities::checkResults(fiducialVelocityStarY[i], testVelocityStarY, + parameters.names.at(i) + ", VelocityStarY"); + testingUtilities::checkResults(fiducialVelocityStarZ[i], testVelocityStarZ, + parameters.names.at(i) + ", VelocityStarZ"); + testingUtilities::checkResults(fiducialEnergyStar[i], testEnergyStar, + parameters.names.at(i) + ", EnergyStar"); + testingUtilities::checkResults(fiducialMagneticStarY[i], testMagneticStarY, + parameters.names.at(i) + ", MagneticStarY"); + testingUtilities::checkResults(fiducialMagneticStarZ[i], testMagneticStarZ, + parameters.names.at(i) + ", MagneticStarZ"); + testingUtilities::checkResults( + fiducialDensityStarFlux[i], testDensityStarFlux, + parameters.names.at(i) + ", DensityStarFlux"); + testingUtilities::checkResults( + fiducialMomentumStarFluxX[i], testMomentumStarFluxX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testingUtilities::checkResults( + fiducialMomentumStarFluxY[i], testMomentumStarFluxY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testingUtilities::checkResults( + fiducialMomentumStarFluxZ[i], testMomentumStarFluxZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialEnergyStarFlux[i], + testEnergyStarFlux, + parameters.names.at(i) + ", EnergyStarFlux"); + testingUtilities::checkResults( + fiducialMagneticStarFluxY[i], testMagneticStarFluxY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testingUtilities::checkResults( + fiducialMagneticStarFluxZ[i], testMagneticStarFluxZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); + } +} + +/*! + * \brief Test the mhd::_internal::_starFluxes function in the degenerate + * case + * + */ +TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + // Used to get us into the degenerate case + double const totalPressureStarMultiplier = 1E15; + + std::vector const fiducialVelocityStarY{0.33040135813215948, + 0.69876195899931859}; + std::vector const fiducialVelocityStarZ{1.500111692877206, + 1.8528943583250035}; + std::vector const fiducialEnergyStar{2.7072182962581443e+18, + -76277716432851392}; + std::vector const fiducialMagneticStarY{12.297499156516622, + 63.744719695704063}; + std::vector const fiducialMagneticStarZ{46.224045698787776, + 37.703264551707541}; + std::vector const fiducialDensityStarFlux{506.82678248238807, + 105.14430372486369}; + std::vector const fiducialMomentumStarFluxX{135208.06632708258, + 14014.840899433098}; + std::vector const fiducialMomentumStarFluxY{236.85804348470396, + 19.08858135095122}; + std::vector const fiducialMomentumStarFluxZ{757.76012607552047, + 83.112898961023902}; + std::vector const fiducialEnergyStarFlux{1.9052083339008875e+20, + -2.7901725119926531e+17}; + std::vector const fiducialMagneticStarFluxY{58.989284454159673, + 57.043444592213589}; + std::vector const fiducialMagneticStarFluxZ{29.976925743532302, + 97.733298271413588}; + + double testVelocityStarY; + double testVelocityStarZ; + double testEnergyStar; + double testMagneticStarY; + double testMagneticStarZ; + double testDensityStarFlux; + double testMomentumStarFluxX; + double testMomentumStarFluxY; + double testMomentumStarFluxZ; + double testEnergyStarFlux; + double testMagneticStarFluxY; + double testMagneticStarFluxZ; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_starFluxes( + parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], + parameters.velocityXL[i], parameters.velocityYL[i], + parameters.velocityZL[i], parameters.momentumXL[i], + parameters.momentumYL[i], parameters.momentumZL[i], + parameters.energyL[i], parameters.totalPressureL[i], + parameters.magneticXL[i], parameters.magneticYL[i], + parameters.magneticZL[i], parameters.densityStarL[i], + parameters.totalPressureStarL[i] * totalPressureStarMultiplier, + parameters.densityFluxL[i], parameters.momentumFluxXL[i], + parameters.momentumFluxYL[i], parameters.momentumFluxZL[i], + parameters.energyFluxL[i], parameters.magneticFluxYL[i], + parameters.magneticFluxZL[i], testVelocityStarY, testVelocityStarZ, + testEnergyStar, testMagneticStarY, testMagneticStarZ, + testDensityStarFlux, testMomentumStarFluxX, testMomentumStarFluxY, + testMomentumStarFluxZ, testEnergyStarFlux, testMagneticStarFluxY, + testMagneticStarFluxZ); + + // Now check results + testingUtilities::checkResults(fiducialVelocityStarY[i], testVelocityStarY, + parameters.names.at(i) + ", VelocityStarY"); + testingUtilities::checkResults(fiducialVelocityStarZ[i], testVelocityStarZ, + parameters.names.at(i) + ", VelocityStarZ"); + testingUtilities::checkResults(fiducialEnergyStar[i], testEnergyStar, + parameters.names.at(i) + ", EnergyStar"); + testingUtilities::checkResults(fiducialMagneticStarY[i], testMagneticStarY, + parameters.names.at(i) + ", MagneticStarY"); + testingUtilities::checkResults(fiducialMagneticStarZ[i], testMagneticStarZ, + parameters.names.at(i) + ", MagneticStarZ"); + testingUtilities::checkResults( + fiducialDensityStarFlux[i], testDensityStarFlux, + parameters.names.at(i) + ", DensityStarFlux"); + testingUtilities::checkResults( + fiducialMomentumStarFluxX[i], testMomentumStarFluxX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testingUtilities::checkResults( + fiducialMomentumStarFluxY[i], testMomentumStarFluxY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testingUtilities::checkResults( + fiducialMomentumStarFluxZ[i], testMomentumStarFluxZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialEnergyStarFlux[i], + testEnergyStarFlux, + parameters.names.at(i) + ", EnergyStarFlux"); + testingUtilities::checkResults( + fiducialMagneticStarFluxY[i], testMagneticStarFluxY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testingUtilities::checkResults( + fiducialMagneticStarFluxZ[i], testMagneticStarFluxZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_nonStarFluxes function + * + */ +TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialDensityFlux{38.504606872151484, + 18.984145880030045}; + std::vector const fiducialMomentumFluxX{-3088.4810263278778, + 2250.9966820900618}; + std::vector const fiducialMomentumFluxY{-1127.8835013070616, + -2000.3517480656785}; + std::vector const fiducialMomentumFluxZ{-4229.5657456907293, + -1155.8240512956793}; + std::vector const fiducialMagneticFluxY{-8.6244637840856555, + 2.9729840344910059}; + std::vector const fiducialMagneticFluxZ{-56.365490339906408, + -43.716615275067923}; + std::vector const fiducialEnergyFlux{-12344.460641662206, + -2717.2127176227905}; + + double testDensityFlux; + double testMomentumFluxX; + double testMomentumFluxY; + double testMomentumFluxZ; + double testMagneticFluxY; + double testMagneticFluxZ; + double testEnergyFlux; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_nonStarFluxes( + parameters.momentumXL[i], parameters.velocityXL[i], + parameters.velocityYL[i], parameters.velocityZL[i], + parameters.totalPressureL[i], parameters.energyL[i], + parameters.magneticXL[i], parameters.magneticYL[i], + parameters.magneticZL[i], testDensityFlux, testMomentumFluxX, + testMomentumFluxY, testMomentumFluxZ, testMagneticFluxY, + testMagneticFluxZ, testEnergyFlux); + + // Now check results + testingUtilities::checkResults(fiducialDensityFlux[i], testDensityFlux, + parameters.names.at(i) + ", DensityFlux"); + testingUtilities::checkResults(fiducialMomentumFluxX[i], testMomentumFluxX, + parameters.names.at(i) + ", MomentumFluxX"); + testingUtilities::checkResults(fiducialMomentumFluxY[i], testMomentumFluxY, + parameters.names.at(i) + ", MomentumFluxY"); + testingUtilities::checkResults(fiducialMomentumFluxZ[i], testMomentumFluxZ, + parameters.names.at(i) + ", MomentumFluxZ"); + testingUtilities::checkResults(fiducialMagneticFluxY[i], testMagneticFluxY, + parameters.names.at(i) + ", MagneticFluxY"); + testingUtilities::checkResults(fiducialMagneticFluxZ[i], testMagneticFluxZ, + parameters.names.at(i) + ", MagneticFluxZ"); + testingUtilities::checkResults(fiducialEnergyFlux[i], testEnergyFlux, + parameters.names.at(i) + ", EnergyFlux"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_doubleStarState function. Non-degenerate + * state + * + */ +TEST(tMHDHlldInternalDoubleStarState, + CorrectInputNonDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + double const fixedEpsilon = 7E-12; + + std::vector const fiducialVelocityDoubleStarY{-1.5775383335759607, + 3.803188977150934}; + std::vector const fiducialVelocityDoubleStarZ{-3.4914062207842482, + -4.2662645349592765}; + std::vector const fiducialMagneticDoubleStarY{45.259313435283325, + 71.787329583230417}; + std::vector const fiducialMagneticDoubleStarZ{36.670978215630669, + 53.189673238238178}; + std::vector const fiducialEnergyDoubleStarL{-2048.1953674500514, + -999.79694164635089}; + std::vector const fiducialEnergyDoubleStarR{1721.0582276783764, + 252.04716752257781}; + + double testVelocityDoubleStarY; + double testVelocityDoubleStarZ; + double testMagneticDoubleStarY; + double testMagneticDoubleStarZ; + double testEnergyDoubleStarL; + double testEnergyDoubleStarR; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_doubleStarState( + parameters.speedM[i], parameters.magneticXL[i], + parameters.totalPressureStarL[i], parameters.densityStarL[i], + parameters.velocityStarYL[i], parameters.velocityStarZL[i], + parameters.energyStarL[i], parameters.magneticStarYL[i], + parameters.magneticStarZL[i], parameters.densityStarR[i], + parameters.velocityStarYR[i], parameters.velocityStarZR[i], + parameters.energyStarR[i], parameters.magneticStarYR[i], + parameters.magneticStarZR[i], testVelocityDoubleStarY, + testVelocityDoubleStarZ, testMagneticDoubleStarY, + testMagneticDoubleStarZ, testEnergyDoubleStarL, testEnergyDoubleStarR); + + // Now check results + testingUtilities::checkResults( + fiducialVelocityDoubleStarY[i], testVelocityDoubleStarY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testingUtilities::checkResults( + fiducialVelocityDoubleStarZ[i], testVelocityDoubleStarZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarY[i], testMagneticDoubleStarY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarZ[i], testMagneticDoubleStarZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testingUtilities::checkResults( + fiducialEnergyDoubleStarL[i], testEnergyDoubleStarL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testingUtilities::checkResults( + fiducialEnergyDoubleStarR[i], testEnergyDoubleStarR, + parameters.names.at(i) + ", EnergyDoubleStarR", fixedEpsilon); + } +} + +/*! + * \brief Test the mhd::_internal::_doubleStarState function in the + * degenerate state. + * + */ +TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialVelocityDoubleStarY{1.5746306813243216, + 1.4363926014039052}; + std::vector const fiducialVelocityDoubleStarZ{1.3948193325212686, + 1.1515754515491903}; + std::vector const fiducialMagneticDoubleStarY{62.093488291430653, + 54.279167444036723}; + std::vector const fiducialMagneticDoubleStarZ{62.765890944643196, + 93.267654555096414}; + std::vector const fiducialEnergyDoubleStarL{6.579867455284738, + 30.450436649083692}; + std::vector const fiducialEnergyDoubleStarR{90.44484278669114, + 61.33664731346812}; + + double testVelocityDoubleStarY; + double testVelocityDoubleStarZ; + double testMagneticDoubleStarY; + double testMagneticDoubleStarZ; + double testEnergyDoubleStarL; + double testEnergyDoubleStarR; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_doubleStarState( + parameters.speedM[i], 0.0, parameters.totalPressureStarL[i], + parameters.densityStarL[i], parameters.velocityStarYL[i], + parameters.velocityStarZL[i], parameters.energyStarL[i], + parameters.magneticStarYL[i], parameters.magneticStarZL[i], + parameters.densityStarR[i], parameters.velocityStarYR[i], + parameters.velocityStarZR[i], parameters.energyStarR[i], + parameters.magneticStarYR[i], parameters.magneticStarZR[i], + testVelocityDoubleStarY, testVelocityDoubleStarZ, + testMagneticDoubleStarY, testMagneticDoubleStarZ, testEnergyDoubleStarL, + testEnergyDoubleStarR); + // Now check results + testingUtilities::checkResults( + fiducialVelocityDoubleStarY[i], testVelocityDoubleStarY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testingUtilities::checkResults( + fiducialVelocityDoubleStarZ[i], testVelocityDoubleStarZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarY[i], testMagneticDoubleStarY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarZ[i], testMagneticDoubleStarZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testingUtilities::checkResults( + fiducialEnergyDoubleStarL[i], testEnergyDoubleStarL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testingUtilities::checkResults( + fiducialEnergyDoubleStarR[i], testEnergyDoubleStarR, + parameters.names.at(i) + ", EnergyDoubleStarR"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_doubleStarFluxes function + * + */ +TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialMomentumDoubleStarFluxX{ + 1937.3388606704509, -21.762854649386174}; + std::vector const fiducialMomentumDoubleStarFluxY{-1555.8040962754276, + 39.237503643804175}; + std::vector const fiducialMomentumDoubleStarFluxZ{ + -801.91650203165148, -64.746529703562871}; + std::vector const fiducialEnergyDoubleStarFlux{2781.4706748628528, + 136.89786983482355}; + std::vector const fiducialMagneticDoubleStarFluxY{-2799.7143456312342, + 141.2263259922299}; + std::vector const fiducialMagneticDoubleStarFluxZ{ + 1536.9628864256708, -31.569502877970095}; + + double testMomentumDoubleStarFluxX; + double testMomentumDoubleStarFluxY; + double testMomentumDoubleStarFluxZ; + double testEnergyDoubleStarFlux; + double testMagneticDoubleStarFluxY; + double testMagneticDoubleStarFluxZ; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::_doubleStarFluxes( + parameters.speedSide[i], parameters.momentumStarFluxX[i], + parameters.momentumStarFluxY[i], parameters.momentumStarFluxZ[i], + parameters.energyStarFlux[i], parameters.magneticStarFluxY[i], + parameters.magneticStarFluxZ[i], parameters.densityStarL[i], + parameters.velocityStarXL[i], parameters.velocityStarYL[i], + parameters.velocityStarZL[i], parameters.energyStarL[i], + parameters.magneticStarYL[i], parameters.magneticStarZL[i], + parameters.velocityDoubleStarXL[i], parameters.velocityDoubleStarYL[i], + parameters.velocityDoubleStarZL[i], parameters.energyDoubleStar[i], + parameters.magneticDoubleStarY[i], parameters.magneticDoubleStarZ[i], + testMomentumDoubleStarFluxX, testMomentumDoubleStarFluxY, + testMomentumDoubleStarFluxZ, testEnergyDoubleStarFlux, + testMagneticDoubleStarFluxY, testMagneticDoubleStarFluxZ); + + // Now check results + testingUtilities::checkResults( + fiducialMomentumDoubleStarFluxX[i], testMomentumDoubleStarFluxX, + parameters.names.at(i) + ", MomentumDoubleStarFluxX"); + testingUtilities::checkResults( + fiducialMomentumDoubleStarFluxY[i], testMomentumDoubleStarFluxY, + parameters.names.at(i) + ", MomentumDoubleStarFluxY"); + testingUtilities::checkResults( + fiducialMomentumDoubleStarFluxZ[i], testMomentumDoubleStarFluxZ, + parameters.names.at(i) + ", MomentumDoubleStarFluxZ"); + testingUtilities::checkResults( + fiducialEnergyDoubleStarFlux[i], testEnergyDoubleStarFlux, + parameters.names.at(i) + ", EnergyDoubleStarFlux"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarFluxY[i], testMagneticDoubleStarFluxY, + parameters.names.at(i) + ", MagneticDoubleStarFluxY"); + testingUtilities::checkResults( + fiducialMagneticDoubleStarFluxZ[i], testMagneticDoubleStarFluxZ, + parameters.names.at(i) + ", MagneticDoubleStarFluxZ"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::_returnFluxes function + * + */ +TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) +{ + double const dummyValue = 999; + double const densityFlux = 1; + double const momentumFluxX = 2; + double const momentumFluxY = 3; + double const momentumFluxZ = 4; + double const energyFlux = 5; + double const magneticFluxY = 6; + double const magneticFluxZ = 7; + + int threadId = 0; + int n_cells = 10; + int nFields = 8; // Total number of conserved fields + #ifdef SCALAR + nFields += NSCALARS; + #endif // SCALAR + #ifdef DE + nFields++; + #endif // DE + + // Lambda for finding indices and check if they're correct + auto findIndex = [](std::vector const &vec, double const &num, + int const &fidIndex, std::string const &name) { + int index = + std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); + // EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << + // std::endl; + + return index; + }; + + for (size_t direction = 0; direction < 3; direction++) { + int o1, o2, o3; + if (direction == 0) { + o1 = 1; + o2 = 2; + o3 = 3; + } + if (direction == 1) { + o1 = 2; + o2 = 3; + o3 = 1; + } + if (direction == 2) { + o1 = 3; + o2 = 1; + o3 = 2; + } + + std::vector testFluxArray(nFields * n_cells, dummyValue); + + // Fiducial Indices + int const fiducialDensityIndex = threadId; + int const fiducialMomentumIndexX = threadId + n_cells * o1; + int const fiducialMomentumIndexY = threadId + n_cells * o2; + int const fiducialMomentumIndexZ = threadId + n_cells * o3; + int const fiducialEnergyIndex = threadId + n_cells * 4; + int const fiducialMagneticYIndex = + threadId + n_cells * (grid_enum::magnetic_x); + int const fiducialMagneticZIndex = + threadId + n_cells * (grid_enum::magnetic_y); + + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, + testFluxArray.data(), densityFlux, + momentumFluxX, momentumFluxY, momentumFluxZ, + energyFlux, magneticFluxY, magneticFluxZ); + + // Find the indices for the various fields + int densityLoc = + findIndex(testFluxArray, densityFlux, fiducialDensityIndex, "density"); + int momentumXLocX = findIndex(testFluxArray, momentumFluxX, + fiducialMomentumIndexX, "momentum X"); + int momentumYLocY = findIndex(testFluxArray, momentumFluxY, + fiducialMomentumIndexY, "momentum Y"); + int momentumZLocZ = findIndex(testFluxArray, momentumFluxZ, + fiducialMomentumIndexZ, "momentum Z"); + int energyLoc = + findIndex(testFluxArray, energyFlux, fiducialEnergyIndex, "energy"); + int magneticYLoc = findIndex(testFluxArray, magneticFluxY, + fiducialMagneticYIndex, "magnetic Y"); + int magneticZLoc = findIndex(testFluxArray, magneticFluxZ, + fiducialMagneticZIndex, "magnetic Z"); + + for (size_t i = 0; i < testFluxArray.size(); i++) { + // Skip the already checked indices + if ((i != densityLoc) and (i != momentumXLocX) and + (i != momentumYLocY) and (i != momentumZLocZ) and (i != energyLoc) and + (i != magneticYLoc) and (i != magneticZLoc)) { + EXPECT_EQ(dummyValue, testFluxArray.at(i)) + << "Unexpected value at index that _returnFluxes shouldn't be " + "touching" + << std::endl + << "Index = " << i << std::endl + << "Direction = " << direction << std::endl; + } + } + } +} + // ========================================================================= + #endif // MHD +#endif // CUDA diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index 88b094468..8c92da290 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -3,34 +3,40 @@ #ifdef CUDA -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../riemann_solvers/roe_cuda.h" - -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif - -/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields) - * \brief Roe Riemann solver based on the version described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif + +/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, + * int dir, int n_fields) \brief Roe Riemann solver based on the version + * described in Stone et al, 2008. */ +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; Real etah = 0.0; - Real g1 = gamma - 1.0; + Real g1 = gamma - 1.0; Real Hl, Hr; Real sqrtdl, sqrtdr, vx, vy, vz, H; Real vsq, asq, a; @@ -48,89 +54,95 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R Real dgel, gel, dger, ger, f_ge_l, f_ge_r, E_kin; #endif #ifdef SCALAR - Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS]; + Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], + scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS]; #endif int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - if (xid < nx && yid < ny && zid < nz) - { + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i= 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i lambda_m) { + if (lambda_0 > lambda_m) { if (test0 <= 0.0) { - hlle_flag=1; + hlle_flag = 1; } - if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) { - hlle_flag=2; + if (test4 - + 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < + 0.0) { + hlle_flag = 2; } } test0 += a3 + a4; - test1 += a3*vx; - test2 += a1 + a3*vy; - test3 += a2 + a3*vz; - test4 += a1*vy + a2*vz + a3*0.5*vsq; + test1 += a3 * vx; + test2 += a1 + a3 * vy; + test3 += a2 + a3 * vz; + test4 += a1 * vy + a2 * vz + a3 * 0.5 * vsq; - if(lambda_p > lambda_0) { + if (lambda_p > lambda_0) { if (test0 <= 0.0) { - hlle_flag=1; + hlle_flag = 1; } - if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) { - hlle_flag=2; + if (test4 - + 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < + 0.0) { + hlle_flag = 2; } } - // if pressure or density is negative, and we have not already returned the supersonic fluxes, - // return the HLLE fluxes + // if pressure or density is negative, and we have not already returned + // the supersonic fluxes, return the HLLE fluxes if (hlle_flag != 0) { - Real cfl, cfr, al, ar, bm, bp, tmp; // compute max and fmin wave speeds - cfl = sqrt(gamma*pl/dl); // sound speed in left state - cfr = sqrt(gamma*pr/dr); // sound speed in right state + cfl = sqrt(gamma * pl / dl); // sound speed in left state + cfr = sqrt(gamma * pr / dr); // sound speed in right state // take max/fmin of Roe eigenvalues and left and right sound speeds al = fmin(lambda_m, vxl - cfl); ar = fmax(lambda_p, vxr + cfr); - bm = fmin(al, (Real) 0.0); - bp = fmax(ar, (Real) 0.0); + bm = fmin(al, (Real)0.0); + bp = fmax(ar, (Real)0.0); // compute left and right fluxes - f_d_l = mxl - bm*dl; - f_d_r = mxr - bp*dr; + f_d_l = mxl - bm * dl; + f_d_r = mxr - bp * dr; - f_mx_l = mxl*(vxl - bm) + pl; - f_mx_r = mxr*(vxr - bp) + pr; + f_mx_l = mxl * (vxl - bm) + pl; + f_mx_r = mxr * (vxr - bp) + pr; - f_my_l = myl*(vxl - bm); - f_my_r = myr*(vxr - bp); + f_my_l = myl * (vxl - bm); + f_my_r = myr * (vxr - bp); - f_mz_l = mzl*(vxl - bm); - f_mz_r = mzr*(vxr - bp); + f_mz_l = mzl * (vxl - bm); + f_mz_r = mzr * (vxr - bp); - f_E_l = El*(vxl - bm) + pl*vxl; - f_E_r = Er*(vxr - bp) + pr*vxr; + f_E_l = El * (vxl - bm) + pl * vxl; + f_E_r = Er * (vxr - bp) + pr * vxr; - #ifdef DE - f_ge_l = dgel*(vxl - bm); - f_ge_r = dger*(vxr - bp); - #endif + #ifdef DE + f_ge_l = dgel * (vxl - bm); + f_ge_r = dger * (vxr - bp); + #endif - #ifdef SCALAR - for (int i=0; i= 0.0) - dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarl[i]; + dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarl[i]; else - dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarr[i]; + dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarr[i]; } - #endif - #ifdef DE + #endif + #ifdef DE if (dev_flux[tid] >= 0.0) - dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * gel; + dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * gel; else - dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * ger; - #endif + dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * ger; + #endif } - } - } - } - -#endif //CUDA +#endif // CUDA diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h index 3e7fcc772..00df99d71 100644 --- a/src/riemann_solvers/roe_cuda.h +++ b/src/riemann_solvers/roe_cuda.h @@ -3,17 +3,19 @@ #ifdef CUDA -#ifndef ROE_CUDA_H -#define Roe_CUDA_H - -#include "../global/global.h" - - -/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields) - * \brief Roe Riemann solver based on the version described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); - - - -#endif //ROE_CUDA_H -#endif //CUDA + #ifndef ROE_CUDA_H + #define Roe_CUDA_H + + #include "../global/global.h" + +/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, + * int dir, int n_fields) \brief Roe Riemann solver based on the version + * described in Stone et al, 2008. */ +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, + Real *dev_bounds_R, Real *dev_flux, + int nx, int ny, int nz, int n_ghost, + Real gamma, int dir, int n_fields); + + #endif // ROE_CUDA_H +#endif // CUDA diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp index 8b62ef092..7db321cc9 100644 --- a/src/system_tests/cooling_system_tests.cpp +++ b/src/system_tests/cooling_system_tests.cpp @@ -4,25 +4,22 @@ * */ - // External Libraries and Headers #include -#include // provides std:sin + +#include // provides std:sin // Local includes #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" - - #ifndef PI -#define PI 3.141592653589793 + #define PI 3.141592653589793 #endif #define COOL_RHO 6.9498489284711 -TEST(tCOOLINGSYSTEMConstant5, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant5, CorrectInputExpectCorrectOutput) { // dt = 0.3 // rho = COOL_RHO*1e5 @@ -31,61 +28,53 @@ TEST(tCOOLINGSYSTEMConstant5, /* double energy = 0.0014850544057189395;// Python */ - double energy = 0.00148501098087863;// Cholla + double energy = 0.00148501098087863; // Cholla systemTest::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - + testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); + testingUtilities::analyticConstant(testObject, "Energy", energy); } - -TEST(tCOOLINGSYSTEMConstant7, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant7, CorrectInputExpectCorrectOutput) { // dt = 100 // rho = COOL_RHO*1e5 // pressure = 1e-1 // T = 1e7 // double energy = 0.14982743570299709; // Python - double energy = 0.14982745510047499; // Cholla + double energy = 0.14982745510047499; // Cholla systemTest::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - + testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); + testingUtilities::analyticConstant(testObject, "Energy", energy); } -TEST(tCOOLINGSYSTEMConstant8, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant8, CorrectInputExpectCorrectOutput) { // dt = 90 // rho = COOL_RHO*1e5 // pressure = 1 // T = 1e8 - + // double energy = 1.499669522009355; // Python - double energy = 1.4996695198095711; // Cholla + double energy = 1.4996695198095711; // Cholla systemTest::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - - + testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); + testingUtilities::analyticConstant(testObject, "Energy", energy); } - diff --git a/src/system_tests/gravity_system_tests.cpp b/src/system_tests/gravity_system_tests.cpp index 76cae4d7d..eba293cbb 100644 --- a/src/system_tests/gravity_system_tests.cpp +++ b/src/system_tests/gravity_system_tests.cpp @@ -20,11 +20,10 @@ * */ /// @{ -TEST(tGRAVITYSYSTEMSphericalCollapse, - CorrectInputExpectCorrectOutput) +TEST(tGRAVITYSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest; - collapseTest.runTest(); + systemTest::SystemTestRunner collapseTest; + collapseTest.runTest(); } /// @} // ============================================================================= diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 5ed2b050c..76e1fce7b 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -5,65 +5,66 @@ * */ - // External Libraries and Headers #include -#include // provides std:sin + +#include // provides std:sin // Local includes +#include "../io/io.h" #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" -#include "../io/io.h" #ifndef PI -#define PI 3.141592653589793 + #define PI 3.141592653589793 #endif // ============================================================================= // Test Suite: tHYDROtMHDSYSTEMSodShockTube // ============================================================================= /*! - * \defgroup tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \defgroup + * tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput * \brief Test the Sod Shock tube initial conditions as a parameterized test * with varying numbers of MPI ranks * */ /// @{ class tHYDROSYSTEMSodShockTubeParameterizedMpi - :public - ::testing::TestWithParam + : public ::testing::TestWithParam { -protected: - systemTest::SystemTestRunner sodTest; + protected: + systemTest::SystemTestRunner sodTest; }; TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { - #ifdef MHD - // Loosen correctness check to account for MHD only having PCM. This is - // about the error between PCM and PPMP in hydro - sodTest.setFixedEpsilon(1E-3); - - // Don't test the gas energy fields - auto datasetNames = sodTest.getDataSetsToTest(); - datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); - - // Set the magnetic fiducial datasets to zero - size_t const size = std::pow(65, 3); - std::vector const magVec(0, size); - - for (auto field: {"magnetic_x","magnetic_y","magnetic_z"}) - { - sodTest.setFiducialData(field, magVec); - datasetNames.push_back(field); - } - - sodTest.setDataSetsToTest(datasetNames); - #endif //MHD - - sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); +#ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + sodTest.setFixedEpsilon(1E-3); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase( + std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), + datasetNames.end()); + + // Set the magnetic fiducial datasets to zero + size_t const size = std::pow(65, 3); + std::vector const magVec(0, size); + + for (auto field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { + sodTest.setFiducialData(field, magVec); + datasetNames.push_back(field); + } + + sodTest.setDataSetsToTest(datasetNames); +#endif // MHD + + sodTest.numMpiRanks = GetParam(); + sodTest.runTest(); } INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, @@ -72,8 +73,7 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, /// @} // ============================================================================= -TEST(tHYDROtMHDSYSTEMConstant, - CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner testObject(false, false, false); @@ -81,45 +81,46 @@ TEST(tHYDROtMHDSYSTEMConstant, testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",1.0); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",1.5e-5); - + testingUtilities::analyticConstant(testObject, "density", 1.0); + testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); + testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); + testingUtilities::analyticConstant(testObject, "Energy", 1.5e-5); } - -TEST(tHYDROtMHDSYSTEMSoundWave3D, - CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) { - double time = 0.05; + double time = 0.05; double amplitude = 1e-5; - double dx = 1./64.; + double dx = 1. / 64.; - double real_kx = 2*PI;//kx of the physical problem + double real_kx = 2 * PI; // kx of the physical problem - double kx = real_kx * dx; - double speed = 1;//speed of wave is 1 since P = 0.6 and gamma = 1.666667 - double phase = kx*0.5 - speed * time * real_kx; //kx*0.5 for half-cell offset + double kx = real_kx * dx; + double speed = 1; // speed of wave is 1 since P = 0.6 and gamma = 1.666667 + double phase = + kx * 0.5 - speed * time * real_kx; // kx*0.5 for half-cell offset double tolerance = 1e-7; systemTest::SystemTestRunner testObject(false, false, false); - #ifdef MHD - // Loosen correctness check to account for MHD only having PCM. This is - // about the error between PCM and PPMP in hydro - tolerance = 1E-6; - #endif //MHD +#ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + tolerance = 1E-6; +#endif // MHD testObject.launchCholla(); testObject.openHydroTestData(); - ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance)); - ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance)); - //testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); - //testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine( + testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + ASSERT_NO_FATAL_FAILURE( + testingUtilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, + kx, 0.0, 0.0, phase, tolerance)); + // testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + // testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); } // ============================================================================= @@ -133,73 +134,87 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, */ /// @{ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi - :public - ::testing::TestWithParam + : public ::testing::TestWithParam { -public: - tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() - : waveTest(false, true, false, false) - {}; -protected: - systemTest::SystemTestRunner waveTest; - - #ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; - #else //PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; - #endif //PCM - - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &vx=0.0) - { - // Constant for all tests - size_t const N = 32; - double const domain = 0.5; - double const gamma = 5./3.; - double const tOut = 2*domain / waveSpeed; - - // Settings - waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); - waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" init=Linear_Wave"); - waveTest.chollaLaunchParams.append(" xmin=0.0"); - waveTest.chollaLaunchParams.append(" ymin=0.0"); - waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); - waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" xl_bcnd=1"); - waveTest.chollaLaunchParams.append(" xu_bcnd=1"); - waveTest.chollaLaunchParams.append(" yl_bcnd=1"); - waveTest.chollaLaunchParams.append(" yu_bcnd=1"); - waveTest.chollaLaunchParams.append(" zl_bcnd=1"); - waveTest.chollaLaunchParams.append(" zu_bcnd=1"); - waveTest.chollaLaunchParams.append(" rho=1.0"); - waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); - waveTest.chollaLaunchParams.append(" vy=0"); - waveTest.chollaLaunchParams.append(" vz=0"); - waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); - waveTest.chollaLaunchParams.append(" Bx=0"); - waveTest.chollaLaunchParams.append(" By=0"); - waveTest.chollaLaunchParams.append(" Bz=0"); - waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); - waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); - waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); - } + public: + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false){}; + + protected: + systemTest::SystemTestRunner waveTest; + +#ifdef PCM + double const allowedL1Error = + 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; +#else // PCM + double const allowedL1Error = + 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; +#endif // PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &vx = 0.0) + { + // Constant for all tests + size_t const N = 32; + double const domain = 0.5; + double const gamma = 5. / 3.; + double const tOut = 2 * domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=0"); + waveTest.chollaLaunchParams.append(" By=0"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); + } }; // Sound Waves Moving Left and Right @@ -207,55 +222,55 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.; - int const numTimeSteps = 214; + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; - double const rEigenVec_rho = 1; - double const rEigenVec_MomentumX = 1; - double const rEigenVec_MomentumY = 1; - double const rEigenVec_MomentumZ = 1; - double const rEigenVec_E = 1.5; + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); - // Set the number of MPI ranks - waveTest.numMpiRanks = GetParam(); + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps); + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); - // Check Results - waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); + // Check Results + waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError); } TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.; - int const numTimeSteps = 214; + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; - double const rEigenVec_rho = 1; - double const rEigenVec_MomentumX = -1; - double const rEigenVec_MomentumY = 1; - double const rEigenVec_MomentumZ = 1; - double const rEigenVec_E = 1.5; + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = -1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); - // Set the number of MPI ranks - waveTest.numMpiRanks = GetParam(); + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps); + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); - // Check Results - waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); + // Check Results + waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError); } // Contact Waves Moving Left and Right @@ -263,34 +278,33 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.0; - int const numTimeSteps = 427; - - double const rEigenVec_rho = 1; - double const rEigenVec_MomentumX = 1; - double const rEigenVec_MomentumY = 0; - double const rEigenVec_MomentumZ = 0; - double const rEigenVec_E = 0.5; - double const velocityX = waveSpeed; - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - velocityX); - - // Set the number of MPI ranks - waveTest.numMpiRanks = GetParam(); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 1.0; + int const numTimeSteps = 427; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + velocityX); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -INSTANTIATE_TEST_SUITE_P(, - tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(, tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1)); /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 39cec0b89..ac8be92f7 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -12,8 +12,8 @@ #include // Local includes -#include "../system_tests/system_tester.h" #include "../io/io.h" +#include "../system_tests/system_tester.h" // ============================================================================= // Test Suite: tMHDSYSTEMConstantParameterizedMpi @@ -26,31 +26,29 @@ */ /// @{ class tMHDSYSTEMConstantParameterizedMpi - :public - ::testing::TestWithParam + : public ::testing::TestWithParam { -protected: - systemTest::SystemTestRunner constantTest; + protected: + systemTest::SystemTestRunner constantTest; }; // Test with all mangetic fields set to zero TEST_P(tMHDSYSTEMConstantParameterizedMpi, ZeroMagneticFieldCorrectInputExpectCorrectOutput) { - constantTest.numMpiRanks = GetParam(); - constantTest.runTest(); + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); } // Test with all mangetic fields set to one TEST_P(tMHDSYSTEMConstantParameterizedMpi, MagneticFieldCorrectInputExpectCorrectOutput) { - constantTest.numMpiRanks = GetParam(); - constantTest.runTest(); + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); } -INSTANTIATE_TEST_SUITE_P(, - tMHDSYSTEMConstantParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMConstantParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= @@ -66,117 +64,146 @@ INSTANTIATE_TEST_SUITE_P(, */ /// @{ class tMHDSYSTEMLinearWavesParameterizedAngle - :public - ::testing::TestWithParam> + : public ::testing::TestWithParam> { -public: - tMHDSYSTEMLinearWavesParameterizedAngle() - : waveTest(false, true, false, false){}; -protected: - systemTest::SystemTestRunner waveTest; - - #ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; - #else //PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; - #endif //PCM - - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &rEigenVec_Bx, double const &rEigenVec_By, - double const &rEigenVec_Bz, double const &pitch, - double const &yaw, double const &domain, - int const &domain_direction, double const &vx=0.0) - { - // Constant for all tests - size_t const N = 32; - double const gamma = 5./3.; - double const tOut = 2*domain / waveSpeed; - - // Define vector values - double x_len=domain, y_len=domain, z_len=domain; - int nx=N, ny=N, nz=N; - double vx_rot=vx, vy_rot=0, vz_rot=0; - double Bx_rot=1, By_rot=1.5, Bz_rot=0; - - double rEigenVec_Bx_rot = rEigenVec_Bx; - double rEigenVec_By_rot = rEigenVec_By; - double rEigenVec_Bz_rot = rEigenVec_Bz; - - double rEigenVec_MomentumX_rot = rEigenVec_MomentumX; - double rEigenVec_MomentumY_rot = rEigenVec_MomentumY; - double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ; - - switch (domain_direction) - { - case 1: - x_len *= 2; - nx *= 2; - break; - case 2: // swap X and Y - y_len *= 2; - ny *= 2; - std::swap(vx_rot, vy_rot); - std::swap(Bx_rot, By_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); - break; - case 3: // swap X and Z - z_len *= 2; - nz *= 2; - std::swap(vx_rot, vz_rot); - std::swap(Bx_rot, Bz_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); - break; - default: - throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); - break; - } - - // Settings - waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); - waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); - waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); - waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" init=Linear_Wave"); - waveTest.chollaLaunchParams.append(" xmin=0.0"); - waveTest.chollaLaunchParams.append(" ymin=0.0"); - waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); - waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); - waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); - waveTest.chollaLaunchParams.append(" xl_bcnd=1"); - waveTest.chollaLaunchParams.append(" xu_bcnd=1"); - waveTest.chollaLaunchParams.append(" yl_bcnd=1"); - waveTest.chollaLaunchParams.append(" yu_bcnd=1"); - waveTest.chollaLaunchParams.append(" zl_bcnd=1"); - waveTest.chollaLaunchParams.append(" zu_bcnd=1"); - waveTest.chollaLaunchParams.append(" rho=1.0"); - waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx_rot)); - waveTest.chollaLaunchParams.append(" vy=" + to_string_exact(vy_rot)); - waveTest.chollaLaunchParams.append(" vz=" + to_string_exact(vz_rot)); - waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); - waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact(Bx_rot)); - waveTest.chollaLaunchParams.append(" By=" + to_string_exact(By_rot)); - waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact(Bz_rot)); - waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); - waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); - waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + public: + tMHDSYSTEMLinearWavesParameterizedAngle() + : waveTest(false, true, false, false){}; + + protected: + systemTest::SystemTestRunner waveTest; + +#ifdef PCM + double const allowedL1Error = + 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; +#else // PCM + double const allowedL1Error = + 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; +#endif // PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, + double const &rEigenVec_By, double const &rEigenVec_Bz, + double const &pitch, double const &yaw, + double const &domain, int const &domain_direction, + double const &vx = 0.0) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5. / 3.; + double const tOut = 2 * domain / waveSpeed; + + // Define vector values + double x_len = domain, y_len = domain, z_len = domain; + int nx = N, ny = N, nz = N; + double vx_rot = vx, vy_rot = 0, vz_rot = 0; + double Bx_rot = 1, By_rot = 1.5, Bz_rot = 0; + + double rEigenVec_Bx_rot = rEigenVec_Bx; + double rEigenVec_By_rot = rEigenVec_By; + double rEigenVec_Bz_rot = rEigenVec_Bz; + + double rEigenVec_MomentumX_rot = rEigenVec_MomentumX; + double rEigenVec_MomentumY_rot = rEigenVec_MomentumY; + double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ; + + switch (domain_direction) { + case 1: + x_len *= 2; + nx *= 2; + break; + case 2: // swap X and Y + y_len *= 2; + ny *= 2; + std::swap(vx_rot, vy_rot); + std::swap(Bx_rot, By_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); + break; + case 3: // swap X and Z + z_len *= 2; + nz *= 2; + std::swap(vx_rot, vz_rot); + std::swap(Bx_rot, Bz_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); + break; + default: + throw std::invalid_argument( + "Invalid value of domain_direction given to setLaunchParams"); + break; } + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + waveTest.chollaLaunchParams.append(" tout=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + + to_string_exact(x_len)); + waveTest.chollaLaunchParams.append(" ylen=" + + to_string_exact(y_len)); + waveTest.chollaLaunchParams.append(" zlen=" + + to_string_exact(z_len)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + + to_string_exact(vx_rot)); + waveTest.chollaLaunchParams.append(" vy=" + + to_string_exact(vy_rot)); + waveTest.chollaLaunchParams.append(" vz=" + + to_string_exact(vz_rot)); + waveTest.chollaLaunchParams.append(" P=" + + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=" + + to_string_exact(Bx_rot)); + waveTest.chollaLaunchParams.append(" By=" + + to_string_exact(By_rot)); + waveTest.chollaLaunchParams.append(" Bz=" + + to_string_exact(Bz_rot)); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumX=" + + to_string_exact(rEigenVec_MomentumX_rot)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumY=" + + to_string_exact(rEigenVec_MomentumY_rot)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumZ=" + + to_string_exact(rEigenVec_MomentumZ_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append( + " rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); + waveTest.chollaLaunchParams.append( + " rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); + waveTest.chollaLaunchParams.append( + " rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); + waveTest.chollaLaunchParams.append(" pitch=" + + to_string_exact(pitch)); + waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } }; // Fast Magnetosonic Waves Moving Left and Right @@ -184,75 +211,75 @@ class tMHDSYSTEMLinearWavesParameterizedAngle TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 2.; - std::vector const numTimeSteps = {214, 204, 220}; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 2; - double const rEigenVec_MomentumX = prefix * 4; - double const rEigenVec_MomentumY = prefix * -2; // + for left wave - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * 4; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 9; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - #ifdef PCM - waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); - #else //PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); - #endif //PCM + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; // + for left wave + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results +#ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); +#else // PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#endif // PCM } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 2.; - std::vector const numTimeSteps = {214, 204, 220}; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 2; - double const rEigenVec_MomentumX = prefix * -4; - double const rEigenVec_MomentumY = prefix * 2; - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * 4; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 9; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - #ifdef PCM - waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); - #else //PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); - #endif //PCM + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * -4; + double const rEigenVec_MomentumY = prefix * 2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results +#ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); +#else // PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#endif // PCM } // Slow Magnetosonic Waves Moving Left and Right @@ -260,67 +287,67 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 0.5; - std::vector const numTimeSteps = {854, 813, 880}; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 4; - double const rEigenVec_MomentumX = prefix * 2; - double const rEigenVec_MomentumY = prefix * 4; - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * -2; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 3; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 0.5; - std::vector const numTimeSteps = {854, 813, 880}; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 4; - double const rEigenVec_MomentumX = prefix * -2; - double const rEigenVec_MomentumY = prefix * -4; - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * -2; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 3; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } // Alfven Waves Moving Left and Right @@ -328,65 +355,65 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.0; - std::vector const numTimeSteps = {427, 407, 440}; - - double const rEigenVec_rho = 0; - double const rEigenVec_MomentumX = 0; - double const rEigenVec_MomentumY = 0; - double const rEigenVec_MomentumZ = -1; - double const rEigenVec_Bx = 0; - double const rEigenVec_By = 0; - double const rEigenVec_Bz = 1; - double const rEigenVec_E = 0; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.0; - std::vector const numTimeSteps = {427, 407, 440}; - - double const rEigenVec_rho = 0; - double const rEigenVec_MomentumX = 0; - double const rEigenVec_MomentumY = 0; - double const rEigenVec_MomentumZ = 1; - double const rEigenVec_Bx = 0; - double const rEigenVec_By = 0; - double const rEigenVec_Bz = 1; - double const rEigenVec_E = 0; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } // Contact Wave Moving Right @@ -394,48 +421,48 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 1.0; - std::vector const numTimeSteps = {641, 620, 654}; - - double const rEigenVec_rho = 1; - double const rEigenVec_MomentumX = 1; - double const rEigenVec_MomentumY = 0; - double const rEigenVec_MomentumZ = 0; - double const rEigenVec_Bx = 0; - double const rEigenVec_By = 0; - double const rEigenVec_Bz = 0; - double const rEigenVec_E = 0.5; - double const velocityX = waveSpeed; - - // Get the test parameters - auto [pitch, yaw, domain, domain_direction] = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction, velocityX); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); - - // Check Results - #ifdef PCM - waveTest.runL1ErrorTest(1.35*allowedL1Error, 1.35*allowedError); - #else //PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); - #endif //PCM + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {641, 620, 654}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction, velocityX); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results +#ifdef PCM + waveTest.runL1ErrorTest(1.35 * allowedL1Error, 1.35 * allowedError); +#else // PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#endif // PCM } -INSTANTIATE_TEST_SUITE_P(, - tMHDSYSTEMLinearWavesParameterizedAngle, - ::testing::Values( - std::make_tuple(0.0*M_PI, 0.0*M_PI, 0.5, 1), - std::make_tuple(0.0*M_PI, 0.5*M_PI, 0.5, 2), - std::make_tuple(0.5*M_PI, 0.0*M_PI, 0.5, 3) - //std::make_tuple(std::asin(2./3.), std::asin(2./std::sqrt(5.)), 1.5, 1) - )); +INSTANTIATE_TEST_SUITE_P( + , tMHDSYSTEMLinearWavesParameterizedAngle, + ::testing::Values(std::make_tuple(0.0 * M_PI, 0.0 * M_PI, 0.5, 1), + std::make_tuple(0.0 * M_PI, 0.5 * M_PI, 0.5, 2), + std::make_tuple(0.5 * M_PI, 0.0 * M_PI, 0.5, 3) + // std::make_tuple(std::asin(2./3.), + // std::asin(2./std::sqrt(5.)), 1.5, 1) + )); /// @} // ============================================================================= @@ -445,25 +472,24 @@ INSTANTIATE_TEST_SUITE_P(, // with the hydro sod test // ============================================================================= /*! - * \defgroup tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput - * \brief Test the Sod Shock tube initial conditions as a parameterized test - * with varying numbers of MPI ranks + * \defgroup + * tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput \brief + * Test the Sod Shock tube initial conditions as a parameterized test with + * varying numbers of MPI ranks * */ /// @{ class tMHDSYSTEMSodShockTubeParameterizedMpi - :public - ::testing::TestWithParam + : public ::testing::TestWithParam { -protected: - systemTest::SystemTestRunner sodTest; + protected: + systemTest::SystemTestRunner sodTest; }; -TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, - CorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { - sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); + sodTest.numMpiRanks = GetParam(); + sodTest.runTest(); } INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, @@ -475,11 +501,10 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, // ============================================================================= // Test Suite: tMHDSYSTEMEinfeldtStrongRarefaction // ============================================================================= -TEST(tMHDSYSTEMEinfeldtStrongRarefaction, - CorrectInputExpectCorrectOutput) +TEST(tMHDSYSTEMEinfeldtStrongRarefaction, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner rarefactionTest; - rarefactionTest.runTest(); + systemTest::SystemTestRunner rarefactionTest; + rarefactionTest.runTest(); } // ============================================================================= @@ -494,73 +519,91 @@ TEST(tMHDSYSTEMEinfeldtStrongRarefaction, */ /// @{ class tMHDSYSTEMLinearWavesParameterizedMpi - :public - ::testing::TestWithParam + : public ::testing::TestWithParam { -public: - tMHDSYSTEMLinearWavesParameterizedMpi() - : waveTest(false, true, false, false){}; -protected: - systemTest::SystemTestRunner waveTest; - - #ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; - #else //PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; - #endif //PCM - - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &rEigenVec_Bx, double const &rEigenVec_By, - double const &rEigenVec_Bz) - { - // Constant for all tests - size_t const N = 32; - double const gamma = 5./3.; - double const domain = 0.5; - double const tOut = 2*domain / waveSpeed; - - // Settings - waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); - waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" init=Linear_Wave"); - waveTest.chollaLaunchParams.append(" xmin=0.0"); - waveTest.chollaLaunchParams.append(" ymin=0.0"); - waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); - waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" xl_bcnd=1"); - waveTest.chollaLaunchParams.append(" xu_bcnd=1"); - waveTest.chollaLaunchParams.append(" yl_bcnd=1"); - waveTest.chollaLaunchParams.append(" yu_bcnd=1"); - waveTest.chollaLaunchParams.append(" zl_bcnd=1"); - waveTest.chollaLaunchParams.append(" zu_bcnd=1"); - waveTest.chollaLaunchParams.append(" rho=1.0"); - waveTest.chollaLaunchParams.append(" vx=0"); - waveTest.chollaLaunchParams.append(" vy=0"); - waveTest.chollaLaunchParams.append(" vz=0"); - waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); - waveTest.chollaLaunchParams.append(" Bx=1"); - waveTest.chollaLaunchParams.append(" By=1.5"); - waveTest.chollaLaunchParams.append(" Bz=0"); - waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); - waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx)); - waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz)); - } + public: + tMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false){}; + + protected: + systemTest::SystemTestRunner waveTest; + +#ifdef PCM + double const allowedL1Error = + 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; +#else // PCM + double const allowedL1Error = + 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; +#endif // PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, + double const &rEigenVec_By, double const &rEigenVec_Bz) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5. / 3.; + double const domain = 0.5; + double const tOut = 2 * domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=0"); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=1"); + waveTest.chollaLaunchParams.append(" By=1.5"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append( + " rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + + to_string_exact(rEigenVec_Bx)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + + to_string_exact(rEigenVec_By)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + + to_string_exact(rEigenVec_Bz)); + } }; // Slow Magnetosonic Waves Moving Left and Right @@ -568,69 +611,68 @@ class tMHDSYSTEMLinearWavesParameterizedMpi TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 0.5; - int const numTimeSteps = 854; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 4; - double const rEigenVec_MomentumX = prefix * 2; - double const rEigenVec_MomentumY = prefix * 4; - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * -2; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 3; - - // Get the test parameters - waveTest.numMpiRanks = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { - // Specific to this test - double const waveSpeed = 0.5; - int const numTimeSteps = 854; - - double const prefix = 1./(2*std::sqrt(5)); - double const rEigenVec_rho = prefix * 4; - double const rEigenVec_MomentumX = prefix * -2; - double const rEigenVec_MomentumY = prefix * -4; - double const rEigenVec_MomentumZ = prefix * 0; - double const rEigenVec_Bx = prefix * 0; - double const rEigenVec_By = prefix * -2; - double const rEigenVec_Bz = prefix * 0; - double const rEigenVec_E = prefix * 3; - - // Get the test parameters - waveTest.numMpiRanks = GetParam(); - - // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); - - // Set the number of timesteps - waveTest.setFiducialNumTimeSteps(numTimeSteps); - - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -INSTANTIATE_TEST_SUITE_P(, - tMHDSYSTEMLinearWavesParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/particles_system_tests.cpp b/src/system_tests/particles_system_tests.cpp index 7f6d4552e..9c2fbb892 100644 --- a/src/system_tests/particles_system_tests.cpp +++ b/src/system_tests/particles_system_tests.cpp @@ -20,11 +20,10 @@ * */ /// @{ -TEST(tPARTICLESSYSTEMSphericalCollapse, - CorrectInputExpectCorrectOutput) +TEST(tPARTICLESSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest(true); - collapseTest.runTest(); + systemTest::SystemTestRunner collapseTest(true); + collapseTest.runTest(); } /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index a2835ce7c..db038fa9a 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -7,21 +7,22 @@ // STL includes #include -#include -#include -#include -#include + #include #include +#include +#include #include +#include +#include // External Libraries and Headers #include // Local includes -#include "../system_tests/system_tester.h" // Include the header file -#include "../utils/testing_utilities.h" #include "../io/io.h" +#include "../system_tests/system_tester.h" // Include the header file +#include "../utils/testing_utilities.h" // ============================================================================= // Public Members @@ -30,412 +31,374 @@ // ============================================================================= void systemTest::SystemTestRunner::runTest() { - /// Only run if this variable is set to `true`. Generally this and - /// globalCompareSystemTestResults should only be used for large MPI / tests - /// where the user wishes to separate the execution of cholla and the / - /// comparison of results onto different machines/jobs - if (globalRunCholla) - { - // Launch Cholla. Note that this dumps all console output to the console - // log file as requested by the user. - launchCholla(); - } - - /// If set to false then no comparison will be performed. Generally this and - /// globalRunCholla should only be used for large MPI tests where the user - /// wishes to separate the execution of cholla and the comparison of results - /// onto different machines/jobs - if (not globalCompareSystemTestResults) return; - - // Make sure we have all the required data files and open the test data file - _testHydroFieldsFileVec.resize(numMpiRanks); - _testParticlesFileVec.resize(numMpiRanks); - for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) - { - // Load the hydro data - if (_hydroDataExists) - { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } - - // Load the particles data - if (_particleDataExists) - { - std::string fileName = "/1_particles.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) { + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + launchCholla(); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) return; + + // Make sure we have all the required data files and open the test data file + _testHydroFieldsFileVec.resize(numMpiRanks); + _testParticlesFileVec.resize(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + // Load the hydro data + if (_hydroDataExists) { + std::string fileName = "/1.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); } - // If this is a particle build then read in the IDs and generate the sorting - // vector - if (_particleDataExists) - { - _testParticleIDs = _loadTestParticleData("particle_IDs"); - - if (_fiducialFileExists) _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + // Load the particles data + if (_particleDataExists) { + std::string fileName = "/1_particles.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); } + } - // Get the list of test dataset names - if (_hydroDataExists) - _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); - if (_particleDataExists) - { - // Load the data, replace the density value with the new name, then append - std::vector particleNames = _findDataSetNames(_testParticlesFileVec[0]); - auto iter = std::find(particleNames.begin(), particleNames.end(), "density"); - *iter = "particle_density"; - - _testDataSetNames.insert(_testDataSetNames.end(), - particleNames.begin(), - particleNames.end()); - } + // If this is a particle build then read in the IDs and generate the sorting + // vector + if (_particleDataExists) { + _testParticleIDs = _loadTestParticleData("particle_IDs"); - // Start Performing Checks - // ======================= - // Check the number of time steps - if (_compareNumTimeSteps) _checkNumTimeSteps(); - - // Check that the test file has as many, or more, datasets than the fiducial - // file. Provide a warning if the datasets are not the same size - EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) - << std::endl - << "Warning: The test data has " - << _testDataSetNames.size() - << " datasets and the fiducial data has " - << _fiducialDataSetNames.size() - << " datasets" << std::endl << std::endl; - - // Loop over the datasets to be tested - for (auto dataSetName: _fiducialDataSetNames) - { - // check that the test data has the dataset in it - ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) - << "The test data does not contain the dataset '" + dataSetName - + "' or contains it more than once."; - - // Get data vectors - std::vector testDims(3,1); - std::vector testData; - std::vector fiducialData; - // This is just a vector of all the different dataset names for - // particles to help choose whether to call _loadTestParticleData - // or loadTestFieldData - std::vector particleIDs = {"particle_IDs", - "pos_x", - "pos_y", - "pos_z", - "vel_x", - "vel_y", - "vel_z"}; - if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) - != particleIDs.end()) - { - // This is a particle data set - - // Set some basic parameters - testDims[0] = _testTotalNumParticles; - - // Load in the data. Note the special handling for particle_IDs - if (dataSetName == "particle_IDs") - { - testData = _testParticleIDs; - fiducialData = _fiducialParticleIDs; - } - else - { - testData = _loadTestParticleData(dataSetName); - fiducialData = _loadFiducialParticleData(dataSetName); - } - } - else - { - // This is a field data set - testData = loadTestFieldData(dataSetName, testDims); - // Get fiducial data - fiducialData = _loadFiducialFieldData(dataSetName); - } + if (_fiducialFileExists) + _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + } - // Check that they're the same length - ASSERT_EQ(fiducialData.size(), testData.size()) - << "The fiducial and test '" - << dataSetName - << "' datasets are not the same length"; - - // Compare values - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - // Check for equality and iff not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), - testData.at(index), - absoluteDiff, - ulpsDiff, - _fixedEpsilon); - ASSERT_TRUE(areEqual) - << std::endl - << "Difference in " - << dataSetName - << " dataset at [" - << i << "," << j << "," << k <<"]" << std::endl - << "The fiducial value is: " << fiducialData[index] << std::endl - << "The test value is: " << testData[index] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } + // Get the list of test dataset names + if (_hydroDataExists) + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + if (_particleDataExists) { + // Load the data, replace the density value with the new name, then append + std::vector particleNames = + _findDataSetNames(_testParticlesFileVec[0]); + auto iter = + std::find(particleNames.begin(), particleNames.end(), "density"); + *iter = "particle_density"; + + _testDataSetNames.insert(_testDataSetNames.end(), particleNames.begin(), + particleNames.end()); + } + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) _checkNumTimeSteps(); + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " << _testDataSetNames.size() + << " datasets and the fiducial data has " << _fiducialDataSetNames.size() + << " datasets" << std::endl + << std::endl; + + // Loop over the datasets to be tested + for (auto dataSetName : _fiducialDataSetNames) { + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), + dataSetName), + 1) + << "The test data does not contain the dataset '" + dataSetName + + "' or contains it more than once."; + + // Get data vectors + std::vector testDims(3, 1); + std::vector testData; + std::vector fiducialData; + // This is just a vector of all the different dataset names for + // particles to help choose whether to call _loadTestParticleData + // or loadTestFieldData + std::vector particleIDs = { + "particle_IDs", "pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"}; + if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) != + particleIDs.end()) { + // This is a particle data set + + // Set some basic parameters + testDims[0] = _testTotalNumParticles; + + // Load in the data. Note the special handling for particle_IDs + if (dataSetName == "particle_IDs") { + testData = _testParticleIDs; + fiducialData = _fiducialParticleIDs; + } else { + testData = _loadTestParticleData(dataSetName); + fiducialData = _loadFiducialParticleData(dataSetName); + } + } else { + // This is a field data set + testData = loadTestFieldData(dataSetName, testDims); + // Get fiducial data + fiducialData = _loadFiducialFieldData(dataSetName); + } + + // Check that they're the same length + ASSERT_EQ(fiducialData.size(), testData.size()) + << "The fiducial and test '" << dataSetName + << "' datasets are not the same length"; + + // Compare values + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + size_t index = + (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + // Check for equality and iff not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual = testingUtilities::nearlyEqualDbl( + fiducialData.at(index), testData.at(index), absoluteDiff, + ulpsDiff, _fixedEpsilon); + ASSERT_TRUE(areEqual) + << std::endl + << "Difference in " << dataSetName << " dataset at [" << i << "," + << j << "," << k << "]" << std::endl + << "The fiducial value is: " << fiducialData[index] + << std::endl + << "The test value is: " << testData[index] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } + } } + } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, - double const &maxAllowedError) +void systemTest::SystemTestRunner::runL1ErrorTest( + double const &maxAllowedL1Error, double const &maxAllowedError) { - /// Only run if this variable is set to `true`. Generally this and - /// globalCompareSystemTestResults should only be used for large MPI / tests - /// where the user wishes to separate the execution of cholla and the / - /// comparison of results onto different machines/jobs - if (globalRunCholla) - { - // Launch Cholla. Note that this dumps all console output to the console - // log file as requested by the user. - launchCholla(); - } - - // Check that there is hydro data and no particle data - if (_particleDataExists) - { - std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles"; - throw std::runtime_error(errMessage); - } - if (not _hydroDataExists) - { - std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; - throw std::runtime_error(errMessage); - } - - /// If set to false then no comparison will be performed. Generally this and - /// globalRunCholla should only be used for large MPI tests where the user - /// wishes to separate the execution of cholla and the comparison of results - /// onto different machines/jobs - if (not globalCompareSystemTestResults) return; - - // Make sure we have all the required data files and open the data files - _testHydroFieldsFileVec.resize(numMpiRanks); - std::vector initialHydroFieldsFileVec(numMpiRanks); - for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) - { - // Initial time data - std::string fileName = "/0.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - - // Final time data - fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } - - // Get the list of test dataset names - _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]); - _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); - - // Start Performing Checks - // ======================= - // Check the number of time steps - if (_compareNumTimeSteps) _checkNumTimeSteps(); - - // Check that the test file has as many, or more, datasets than the fiducial - // file. Provide a warning if the datasets are not the same size - EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) - << std::endl - << "Warning: The test data has " - << _testDataSetNames.size() - << " datasets and the fiducial data has " - << _fiducialDataSetNames.size() - << " datasets" << std::endl << std::endl; - - // Loop over the datasets to be tested - double L2Norm = 0; - double maxError = 0; - for (auto dataSetName: _fiducialDataSetNames) - { - if (dataSetName == "GasEnergy") - { - continue; - } - - // check that the test data has the dataset in it - ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) - << "The test data does not contain the dataset '" + dataSetName - + "' or contains it more than once."; - - // Get data vectors - std::vector initialDims(3,1); - std::vector initialData; - std::vector finalDims(3,1); - std::vector finalData; - - // This is a field data set - initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); - // Get fiducial data - finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); - - // Check that they're the same length - ASSERT_EQ(initialData.size(), finalData.size()) - << "The initial and final '" - << dataSetName - << "' datasets are not the same length"; - - // Compute the L1 Error. - double L1Error = 0; - for (size_t i = 0; i < initialData.size(); i++) - { - double const diff = std::abs(initialData.at(i) - finalData.at(i)); - L1Error += diff; - maxError = (diff > maxError)? diff: maxError; - } - - L1Error *= (1./static_cast(initialDims[0]*initialDims[1]*initialDims[2])); - L2Norm += L1Error * L1Error; - - // Perform the correctness check - EXPECT_LT(L1Error, maxAllowedL1Error) << "the L1 error for the " - << dataSetName - << " data has exceeded the allowed value"; - } - - // Check the L1 Norm - L2Norm = std::sqrt(L2Norm); - EXPECT_LT(L2Norm, maxAllowedL1Error) - << "the norm of the L1 error vector has exceeded the allowed value"; - - // Check the Max Error - EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) { + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + launchCholla(); + } + + // Check that there is hydro data and no particle data + if (_particleDataExists) { + std::string errMessage = + "Error: SystemTestRunner::runL1ErrorTest does not support particles"; + throw std::runtime_error(errMessage); + } + if (not _hydroDataExists) { + std::string errMessage = + "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; + throw std::runtime_error(errMessage); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) return; + + // Make sure we have all the required data files and open the data files + _testHydroFieldsFileVec.resize(numMpiRanks); + std::vector initialHydroFieldsFileVec(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + // Initial time data + std::string fileName = "/0.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + + // Final time data + fileName = "/1.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + } + + // Get the list of test dataset names + _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]); + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) _checkNumTimeSteps(); + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " << _testDataSetNames.size() + << " datasets and the fiducial data has " << _fiducialDataSetNames.size() + << " datasets" << std::endl + << std::endl; + + // Loop over the datasets to be tested + double L2Norm = 0; + double maxError = 0; + for (auto dataSetName : _fiducialDataSetNames) { + if (dataSetName == "GasEnergy") { + continue; + } + + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), + dataSetName), + 1) + << "The test data does not contain the dataset '" + dataSetName + + "' or contains it more than once."; + + // Get data vectors + std::vector initialDims(3, 1); + std::vector initialData; + std::vector finalDims(3, 1); + std::vector finalData; + + // This is a field data set + initialData = + loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); + // Get fiducial data + finalData = + loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); + + // Check that they're the same length + ASSERT_EQ(initialData.size(), finalData.size()) + << "The initial and final '" << dataSetName + << "' datasets are not the same length"; + + // Compute the L1 Error. + double L1Error = 0; + for (size_t i = 0; i < initialData.size(); i++) { + double const diff = std::abs(initialData.at(i) - finalData.at(i)); + L1Error += diff; + maxError = (diff > maxError) ? diff : maxError; + } + + L1Error *= (1. / static_cast(initialDims[0] * initialDims[1] * + initialDims[2])); + L2Norm += L1Error * L1Error; + + // Perform the correctness check + EXPECT_LT(L1Error, maxAllowedL1Error) + << "the L1 error for the " << dataSetName + << " data has exceeded the allowed value"; + } + + // Check the L1 Norm + L2Norm = std::sqrt(L2Norm); + EXPECT_LT(L2Norm, maxAllowedL1Error) + << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) + << "The maximum error has exceeded the allowed value"; } // ============================================================================= // ============================================================================= void systemTest::SystemTestRunner::launchCholla() { - // Launch Cholla. Note that this dumps all console output to the console - // log file as requested by the user. - std::string const chollaRunCommand = globalMpiLauncher.getString() + " " - + std::to_string(numMpiRanks) + " " - + _chollaPath + " " - + _chollaSettingsPath + " " - + chollaLaunchParams + " " - + "outdir=" + _outputDirectory + "/" - + " >> " + _consoleOutputPath + " 2>&1 "; - auto returnEcho = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str()); - auto returnLaunch = system((chollaRunCommand).c_str()); - EXPECT_EQ(returnEcho, 0) - << "Warning: Echoing the launch command to the console output file " - << "returned a non-zero exit status code. Launch command is `" - << chollaRunCommand << "`" << std::endl; - EXPECT_EQ(returnLaunch, 0) - << "Warning: Launching Cholla returned a non-zero exit status. Likely " - << "failed to launch. Please see the log files" << std::endl; - - _safeMove("run_output.log", _outputDirectory); - // TODO: instead of commenting out, change to check if exist - //_safeMove("run_timing.log", _outputDirectory); + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + std::string const chollaRunCommand = + globalMpiLauncher.getString() + " " + std::to_string(numMpiRanks) + " " + + _chollaPath + " " + _chollaSettingsPath + " " + chollaLaunchParams + " " + + "outdir=" + _outputDirectory + "/" + " >> " + _consoleOutputPath + + " 2>&1 "; + auto returnEcho = system( + ("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath) + .c_str()); + auto returnLaunch = system((chollaRunCommand).c_str()); + EXPECT_EQ(returnEcho, 0) + << "Warning: Echoing the launch command to the console output file " + << "returned a non-zero exit status code. Launch command is `" + << chollaRunCommand << "`" << std::endl; + EXPECT_EQ(returnLaunch, 0) + << "Warning: Launching Cholla returned a non-zero exit status. Likely " + << "failed to launch. Please see the log files" << std::endl; + + _safeMove("run_output.log", _outputDirectory); + // TODO: instead of commenting out, change to check if exist + //_safeMove("run_timing.log", _outputDirectory); } // ============================================================================= // ============================================================================= void systemTest::SystemTestRunner::openHydroTestData() { - _testHydroFieldsFileVec.resize(numMpiRanks); - for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) - { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } + _testHydroFieldsFileVec.resize(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + std::string fileName = "/1.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, - std::vector const &dataVec) +void systemTest::SystemTestRunner::setFiducialData( + std::string const &fieldName, std::vector const &dataVec) { - // First check if there's a fiducial data file - if (_fiducialDataSets.count(fieldName) > 0) - { - std::string errMessage = "Error: Fiducial dataset for field '" - + fieldName - + "' already exists and cannot be overwritten"; - throw std::runtime_error(errMessage); - } - - // Put new vector into map - _fiducialDataSets[fieldName] = dataVec; + // First check if there's a fiducial data file + if (_fiducialDataSets.count(fieldName) > 0) { + std::string errMessage = "Error: Fiducial dataset for field '" + fieldName + + "' already exists and cannot be overwritten"; + throw std::runtime_error(errMessage); + } + + // Put new vector into map + _fiducialDataSets[fieldName] = dataVec; } // ============================================================================= // ============================================================================= std::vector systemTest::SystemTestRunner::generateConstantData( - double const &value, - size_t const &nx, - size_t const &ny, - size_t const &nz) + double const &value, size_t const &nx, size_t const &ny, size_t const &nz) { - size_t const length = nx*ny*nz; - std::vector outVec(length); - for (size_t i = 0; i < length; i++) - { - outVec[i] = value; - } - return outVec; + size_t const length = nx * ny * nz; + std::vector outVec(length); + for (size_t i = 0; i < length; i++) { + outVec[i] = value; + } + return outVec; } // ============================================================================= // ============================================================================= std::vector systemTest::SystemTestRunner::generateSineData( - double const &offset, - double const &litude, - double const &kx, - double const &ky, - double const &kz, - double const &phase, - size_t const &nx, - size_t const &ny, - size_t const &nz) + double const &offset, double const &litude, double const &kx, + double const &ky, double const &kz, double const &phase, size_t const &nx, + size_t const &ny, size_t const &nz) { - size_t const length = nx*ny*nz; - std::vector outVec(length); - for (size_t i = 0; i < nx; i++) - { - for (size_t j = 0; j < ny; j++) - { - for (size_t k = 0; k < nz; k++) - { - double value = offset + amplitude - * std::sin(kx*i + ky*j + kz*k + phase); - - size_t index = (i * ny * nz) + (j * nz) + k; - outVec[index] = value; - } - } - } - return outVec; + size_t const length = nx * ny * nz; + std::vector outVec(length); + for (size_t i = 0; i < nx; i++) { + for (size_t j = 0; j < ny; j++) { + for (size_t k = 0; k < nz; k++) { + double value = + offset + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + + size_t index = (i * ny * nz) + (j * nz) + k; + outVec[index] = value; + } + } + } + return outVec; } // ============================================================================= @@ -445,72 +408,63 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData, bool const &useFiducialFile, bool const &useSettingsFile) - : - _particleDataExists(particleData), - _hydroDataExists(hydroData) + : _particleDataExists(particleData), _hydroDataExists(hydroData) { - // Get the test name, with and underscore instead of a "." since - // we're actually generating file names - const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); - std::stringstream nameStream; - std::string suiteName = test_info->test_suite_name(); - suiteName = suiteName.substr(suiteName.find("/")+1, suiteName.length()); - nameStream << suiteName << "_" << test_info->name(); - std::string fullTestName = nameStream.str(); - _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); - - // Generate the input paths. Strip out everything after a "/" since that - // probably indicates a parameterized test. Also, check that the files exist - // and load fiducial HDF5 file if required - _chollaPath = ::globalChollaRoot.getString() - + "/bin/cholla." - + ::globalChollaBuild.getString() - + "." + ::globalChollaMachine.getString(); - _checkFileExists(_chollaPath); - if (useSettingsFile) - { - _chollaSettingsPath = ::globalChollaRoot.getString() - + "/src/system_tests/input_files/" - + _fullTestFileName + ".txt"; - _checkFileExists(_chollaSettingsPath); - } - else - { - _chollaSettingsPath = ::globalChollaRoot.getString() - + "/src/system_tests/input_files/" - + "blank_settings_file.txt"; - _checkFileExists(_chollaSettingsPath); - } - if (useFiducialFile) - { - _fiducialFilePath = ::globalChollaRoot.getString() - + "/cholla-tests-data/system_tests/" - + _fullTestFileName + ".h5"; - _checkFileExists(_fiducialFilePath); - _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); - _fiducialDataSetNames = _findDataSetNames(_fiducialFile); - _fiducialFileExists = true; - } - else - { - _fiducialFilePath = ""; - } - - // Generate output paths, these files don't exist yet - _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; - _consoleOutputPath = _outputDirectory + "/" + _fullTestFileName + "_console.log"; - - // Create the new directory and check that it exists - // TODO: C++17: When we update to C++17 or newer this section should - // TODO: use std::filesystem to create the directory and check that - // TODO: it exists - if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) - { - std::cerr << "Warning: Directory '" - + _outputDirectory - + "' either already exists or could not be created." - << std::endl; - } + // Get the test name, with and underscore instead of a "." since + // we're actually generating file names + const ::testing::TestInfo *const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + std::stringstream nameStream; + std::string suiteName = test_info->test_suite_name(); + suiteName = suiteName.substr(suiteName.find("/") + 1, suiteName.length()); + nameStream << suiteName << "_" << test_info->name(); + std::string fullTestName = nameStream.str(); + _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); + + // Generate the input paths. Strip out everything after a "/" since that + // probably indicates a parameterized test. Also, check that the files exist + // and load fiducial HDF5 file if required + _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + + ::globalChollaBuild.getString() + "." + + ::globalChollaMachine.getString(); + _checkFileExists(_chollaPath); + if (useSettingsFile) { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + _fullTestFileName + + ".txt"; + _checkFileExists(_chollaSettingsPath); + } else { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + + "blank_settings_file.txt"; + _checkFileExists(_chollaSettingsPath); + } + if (useFiducialFile) { + _fiducialFilePath = ::globalChollaRoot.getString() + + "/cholla-tests-data/system_tests/" + _fullTestFileName + + ".h5"; + _checkFileExists(_fiducialFilePath); + _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); + _fiducialDataSetNames = _findDataSetNames(_fiducialFile); + _fiducialFileExists = true; + } else { + _fiducialFilePath = ""; + } + + // Generate output paths, these files don't exist yet + _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; + _consoleOutputPath = + _outputDirectory + "/" + _fullTestFileName + "_console.log"; + + // Create the new directory and check that it exists + // TODO: C++17: When we update to C++17 or newer this section should + // TODO: use std::filesystem to create the directory and check that + // TODO: it exists + if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) { + std::cerr << "Warning: Directory '" + _outputDirectory + + "' either already exists or could not be created." + << std::endl; + } } // ============================================================================= @@ -518,12 +472,11 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, // Destructor systemTest::SystemTestRunner::~SystemTestRunner() { - _fiducialFile.close(); - for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) - { - if (_hydroDataExists) _testHydroFieldsFileVec[i].close(); - if (_particleDataExists) _testParticlesFileVec[i].close(); - } + _fiducialFile.close(); + for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) { + if (_hydroDataExists) _testHydroFieldsFileVec[i].close(); + if (_particleDataExists) _testParticlesFileVec[i].close(); + } } // ============================================================================= @@ -534,334 +487,304 @@ systemTest::SystemTestRunner::~SystemTestRunner() // ============================================================================= void systemTest::SystemTestRunner::_checkFileExists(std::string const &filePath) { - // TODO C++17 std::filesystem does this better - std::fstream file; - file.open(filePath); - if (not file) - { - std::string errMessage = "Error: File '" + filePath + "' not found."; - throw std::invalid_argument(errMessage); - } + // TODO C++17 std::filesystem does this better + std::fstream file; + file.open(filePath); + if (not file) { + std::string errMessage = "Error: File '" + filePath + "' not found."; + throw std::invalid_argument(errMessage); + } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::_safeMove(std::string const &sourcePath, - std::string const &destinationDirectory) +void systemTest::SystemTestRunner::_safeMove( + std::string const &sourcePath, std::string const &destinationDirectory) { - // TODO C++17 std::filesystem does this better - _checkFileExists(sourcePath); - if(std::rename(sourcePath.c_str(), (destinationDirectory + "/" + sourcePath).c_str()) < 0) - { - std::string errMessage = "Error: File '" - + sourcePath - + "' could not be moved to '" - + destinationDirectory - + "`"; - throw std::invalid_argument(errMessage); - } + // TODO C++17 std::filesystem does this better + _checkFileExists(sourcePath); + if (std::rename(sourcePath.c_str(), + (destinationDirectory + "/" + sourcePath).c_str()) < 0) { + std::string errMessage = "Error: File '" + sourcePath + + "' could not be moved to '" + + destinationDirectory + "`"; + throw std::invalid_argument(errMessage); + } } // ============================================================================= // ============================================================================= void systemTest::SystemTestRunner::_checkNumTimeSteps() { - int fiducialNSteps, testNSteps; - - H5::Attribute tStepAttr; - if (_hydroDataExists) - { - tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step"); - } - else if (_particleDataExists) - { - tStepAttr = _testParticlesFileVec[0].openAttribute("n_step"); - } - else - { - std::string errMessage = "Error: Both hydro and particle data are turned off."; - throw std::invalid_argument(errMessage); - } - - tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps); - - if (_fiducialFileExists) - { - tStepAttr = _fiducialFile.openAttribute("n_step"); - tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps); - } - else - { - fiducialNSteps = _numFiducialTimeSteps; - } - - EXPECT_EQ(fiducialNSteps, testNSteps) - << "The number of time steps is not equal"; + int fiducialNSteps, testNSteps; + + H5::Attribute tStepAttr; + if (_hydroDataExists) { + tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step"); + } else if (_particleDataExists) { + tStepAttr = _testParticlesFileVec[0].openAttribute("n_step"); + } else { + std::string errMessage = + "Error: Both hydro and particle data are turned off."; + throw std::invalid_argument(errMessage); + } + + tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps); + + if (_fiducialFileExists) { + tStepAttr = _fiducialFile.openAttribute("n_step"); + tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps); + } else { + fiducialNSteps = _numFiducialTimeSteps; + } + + EXPECT_EQ(fiducialNSteps, testNSteps) + << "The number of time steps is not equal"; }; // ============================================================================= // ============================================================================= std::vector systemTest::SystemTestRunner::loadTestFieldData( - std::string dataSetName, - std::vector &testDims, - std::vector file) + std::string dataSetName, std::vector &testDims, + std::vector file) { - // Switch which fileset we're using if it's a particle dataset - if (dataSetName == "particle_density") - { - file = _testParticlesFileVec; - dataSetName = "density"; - } - else if (file.size() == 0) - { - file = _testHydroFieldsFileVec; - } - - // Get the size of each dimension. First check if the field is a magnetic - // field or not to make sure we're retreiving the right dimensions - std::string dimsName = (dataSetName.find("magnetic") != std::string::npos)? - "magnetic_field_dims": "dims"; - H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); - dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); - - // Allocate the vector - std::vector testData(testDims[0] * testDims[1] * testDims[2]); - - for (size_t rank = 0; rank < numMpiRanks; rank++) - { - // Open the dataset - H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = testDataSet.getSpace(); - - std::vector tempDims{1,1,1}; - int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data()); - - // Allocate vectors, Note that I'm casting everything to double. Some - // of the vectors are ints in the HDF5 file and if the casting - // becomes an issue we can fix it later - std::vector tempArr(tempDims[0] * tempDims[1] * tempDims[2]); - - // Read in data - testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE); - - // Get offset - std::vector offset(3,1); - H5::Attribute offsetAttr = file[rank].openAttribute("offset"); - offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); - - // Get dims_local - std::vector dimsLocal(3,1); - std::string dimsNameLocal = (dataSetName.find("magnetic") != std::string::npos)? - "magnetic_field_dims_local": "dims_local"; - H5::Attribute dimsLocalAttr = file[rank].openAttribute(dimsNameLocal.c_str()); - dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); - - // Now we add the data to the larger vector - size_t localIndex = 0; - for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) - { - for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) - { - for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) - { - // Compute the location to put the next element - size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - // Perform copy - testData[overallIndex] = tempArr[localIndex]; - - // Increment local index - localIndex++; - } - } + // Switch which fileset we're using if it's a particle dataset + if (dataSetName == "particle_density") { + file = _testParticlesFileVec; + dataSetName = "density"; + } else if (file.size() == 0) { + file = _testHydroFieldsFileVec; + } + + // Get the size of each dimension. First check if the field is a magnetic + // field or not to make sure we're retreiving the right dimensions + std::string dimsName = (dataSetName.find("magnetic") != std::string::npos) + ? "magnetic_field_dims" + : "dims"; + H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); + dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); + + // Allocate the vector + std::vector testData(testDims[0] * testDims[1] * testDims[2]); + + for (size_t rank = 0; rank < numMpiRanks; rank++) { + // Open the dataset + H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = testDataSet.getSpace(); + + std::vector tempDims{1, 1, 1}; + int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data()); + + // Allocate vectors, Note that I'm casting everything to double. Some + // of the vectors are ints in the HDF5 file and if the casting + // becomes an issue we can fix it later + std::vector tempArr(tempDims[0] * tempDims[1] * tempDims[2]); + + // Read in data + testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE); + + // Get offset + std::vector offset(3, 1); + H5::Attribute offsetAttr = file[rank].openAttribute("offset"); + offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); + + // Get dims_local + std::vector dimsLocal(3, 1); + std::string dimsNameLocal = + (dataSetName.find("magnetic") != std::string::npos) + ? "magnetic_field_dims_local" + : "dims_local"; + H5::Attribute dimsLocalAttr = + file[rank].openAttribute(dimsNameLocal.c_str()); + dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); + + // Now we add the data to the larger vector + size_t localIndex = 0; + for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) { + for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) { + for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) { + // Compute the location to put the next element + size_t overallIndex = + (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + // Perform copy + testData[overallIndex] = tempArr[localIndex]; + + // Increment local index + localIndex++; } + } } + } - // Return the entire, concatenated, dataset - return testData; + // Return the entire, concatenated, dataset + return testData; } // ============================================================================= // ============================================================================= std::vector systemTest::SystemTestRunner::_loadTestParticleData( - std::string const &dataSetName) + std::string const &dataSetName) { + // Determine the total number of particles + if (_testTotalNumParticles == 0) { + for (auto file : _testParticlesFileVec) { + // Open the dataset + H5::DataSet const dataSet = file.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace dataSpace = dataSet.getSpace(); + + // Get the number of elements and increase the total count + size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); + _testTotalNumParticles += localNumParticles; + } + } + + // Allocate the vectors + std::vector unsortedTestData; + std::vector testData(_testTotalNumParticles); + + // Load in the data + for (size_t rank = 0; rank < numMpiRanks; rank++) { + // Open the dataset + H5::DataSet const testDataSet = + _testParticlesFileVec[rank].openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = testDataSet.getSpace(); + + size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); + std::vector tempVector(localNumParticles); + + // Read in data + testDataSet.read(tempVector.data(), H5::PredType::NATIVE_DOUBLE); + unsortedTestData.insert(unsortedTestData.end(), tempVector.begin(), + tempVector.end()); + } + + // Generate the sorting vector if it's not already generated + std::vector tempSortedIndices; + if (dataSetName == "particle_IDs") { + tempSortedIndices.resize(_testTotalNumParticles); + std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); + std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), + [&](size_t A, size_t B) -> bool { + return unsortedTestData[A] < unsortedTestData[B]; + }); + } + std::vector static const sortedIndices = tempSortedIndices; + + // Sort the vector + for (size_t i = 0; i < _testTotalNumParticles; i++) { + testData.at(i) = unsortedTestData.at(sortedIndices.at(i)); + } + + // Return the entire dataset fully concatenated and sorted + return testData; +} +// ============================================================================= + +// ============================================================================= +std::vector systemTest::SystemTestRunner::_loadFiducialFieldData( + std::string const &dataSetName) +{ + if (_fiducialFileExists) { + // Open the dataset + H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace(); + + std::vector fidDims{1, 1, 1}; + fiducialDataSpace.getSimpleExtentDims(fidDims.data()); + + // Allocate vectors, Note that I'm casting everything to double. Some + // of the vectors are ints in the HDF5 file and if the casting + // becomes an issue we can fix it later + std::vector fiducialData(fidDims[0] * fidDims[1] * fidDims[2]); + + // Read in data + fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE); + return fiducialData; + } else { + return _fiducialDataSets[dataSetName]; + } +} +// ============================================================================= + +// ============================================================================= +std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( + std::string const &dataSetName) +{ + if (_fiducialFileExists) { // Determine the total number of particles - if (_testTotalNumParticles == 0) - { - for (auto file: _testParticlesFileVec) - { - // Open the dataset - H5::DataSet const dataSet = file.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace dataSpace = dataSet.getSpace(); - - // Get the number of elements and increase the total count - size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); - _testTotalNumParticles += localNumParticles; - } + if (_fiducialTotalNumParticles == 0) { + // Open the dataset + H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace dataSpace = dataSet.getSpace(); + + // Get the number of elements and increase the total count + size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); + _fiducialTotalNumParticles += localNumParticles; } // Allocate the vectors - std::vector unsortedTestData; - std::vector testData(_testTotalNumParticles); + std::vector unsortedFiducialData(_fiducialTotalNumParticles); + std::vector fiducialData(_fiducialTotalNumParticles); // Load in the data - for (size_t rank = 0; rank < numMpiRanks; rank++) - { - // Open the dataset - H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName); + // Open the dataset + H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = testDataSet.getSpace(); + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = fiducialDataSet.getSpace(); - size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); - std::vector tempVector(localNumParticles); + size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); - // Read in data - testDataSet.read(tempVector.data(), + // Read in data + fiducialDataSet.read(unsortedFiducialData.data(), H5::PredType::NATIVE_DOUBLE); - unsortedTestData.insert(unsortedTestData.end(), - tempVector.begin(), - tempVector.end() ); - } // Generate the sorting vector if it's not already generated std::vector tempSortedIndices; - if (dataSetName == "particle_IDs") - { - tempSortedIndices.resize(_testTotalNumParticles); - std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); - std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), + if (dataSetName == "particle_IDs") { + tempSortedIndices.resize(_fiducialTotalNumParticles); + std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); + std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), [&](size_t A, size_t B) -> bool { - return unsortedTestData[A] < unsortedTestData[B]; - }); + return unsortedFiducialData.at(A) < + unsortedFiducialData.at(B); + }); } - std::vector static const sortedIndices = tempSortedIndices; + std::vector const static sortedIndices = tempSortedIndices; // Sort the vector - for (size_t i = 0; i < _testTotalNumParticles; i++) - { - testData.at(i) = unsortedTestData.at(sortedIndices.at(i)); + for (size_t i = 0; i < _fiducialTotalNumParticles; i++) { + fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i)); } // Return the entire dataset fully concatenated and sorted - return testData; -} -// ============================================================================= - -// ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialFieldData( - std::string const &dataSetName) -{ - if (_fiducialFileExists) - { - // Open the dataset - H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace(); - - std::vector fidDims{1,1,1}; - fiducialDataSpace.getSimpleExtentDims(fidDims.data()); - - // Allocate vectors, Note that I'm casting everything to double. Some - // of the vectors are ints in the HDF5 file and if the casting - // becomes an issue we can fix it later - std::vector fiducialData(fidDims[0] * fidDims[1] * fidDims[2]); - - // Read in data - fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE); - return fiducialData; - } - else - { - return _fiducialDataSets[dataSetName]; - } -} -// ============================================================================= - -// ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( - std::string const &dataSetName) -{ - if (_fiducialFileExists) - { - // Determine the total number of particles - if (_fiducialTotalNumParticles == 0) - { - // Open the dataset - H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace dataSpace = dataSet.getSpace(); - - // Get the number of elements and increase the total count - size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); - _fiducialTotalNumParticles += localNumParticles; - } - - // Allocate the vectors - std::vector unsortedFiducialData(_fiducialTotalNumParticles); - std::vector fiducialData(_fiducialTotalNumParticles); - - // Load in the data - // Open the dataset - H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = fiducialDataSet.getSpace(); - - size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); - - // Read in data - fiducialDataSet.read(unsortedFiducialData.data(), - H5::PredType::NATIVE_DOUBLE); - - // Generate the sorting vector if it's not already generated - std::vector tempSortedIndices; - if (dataSetName == "particle_IDs") - { - tempSortedIndices.resize(_fiducialTotalNumParticles); - std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); - std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), - [&](size_t A, size_t B) -> bool { - return unsortedFiducialData.at(A) < unsortedFiducialData.at(B); - }); - } - std::vector const static sortedIndices = tempSortedIndices; - - // Sort the vector - for (size_t i = 0; i < _fiducialTotalNumParticles; i++) - { - fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i)); - } - - // Return the entire dataset fully concatenated and sorted - return fiducialData; - } - else - { - return _fiducialDataSets[dataSetName]; - } + return fiducialData; + } else { + return _fiducialDataSets[dataSetName]; + } } // ============================================================================= // ============================================================================= std::vector systemTest::SystemTestRunner::_findDataSetNames( - H5::H5File const &inputFile) + H5::H5File const &inputFile) { - std::vector outputVector; + std::vector outputVector; - for (size_t dataSetID = 0; - dataSetID < inputFile.getNumObjs(); - dataSetID++) - { - outputVector.push_back(inputFile.getObjnameByIdx(dataSetID)); - } - return outputVector; + for (size_t dataSetID = 0; dataSetID < inputFile.getNumObjs(); dataSetID++) { + outputVector.push_back(inputFile.getObjnameByIdx(dataSetID)); + } + return outputVector; }; // ============================================================================= diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 29b8b74d0..e9eb2a0cb 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -9,10 +9,10 @@ #pragma once // STL includes -#include -#include #include +#include #include +#include // External Libraries and Headers #include @@ -23,381 +23,387 @@ */ namespace systemTest { - /*! - * \brief Runs a system test using the full test name to determine all - * paths. - * - * \details By default this class uses the full name of your test, i.e. the test - * suite name plus the test name, along with some global variables to - * determine the paths to all the input files. The global variables are all - * set in main_tests.cpp and are the path to the Cholla directory, the make - * type being used, and the machine being run on. If the main function does - * get those it will throw an error so that error checking is not done here. - * - * To run a system test simply name the test according to convetion and put - * the input file in the `cholla/src/system_tests/input_files` directory and - * the data file in the `cholla/src/system_tests/fiducial_data` directory. - * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5` - * extension respectively. If this class can't find the files it will - * throw an error with the path it searched. All the output files from the - * test are deposited in `cholla/bin/testSuiteName_testCaseName` - * - * More advanced functionality is provided with a series of member functions - * that allow you to programmatically generate the fiducial HDF5 file, - * choose which datasets to compare, whether or not to compare the number of - * time steps, etc. - * - */ - class SystemTestRunner; -} // namespace systemTest +/*! + * \brief Runs a system test using the full test name to determine all + * paths. + * + * \details By default this class uses the full name of your test, i.e. the test + * suite name plus the test name, along with some global variables to + * determine the paths to all the input files. The global variables are all + * set in main_tests.cpp and are the path to the Cholla directory, the make + * type being used, and the machine being run on. If the main function does + * get those it will throw an error so that error checking is not done here. + * + * To run a system test simply name the test according to convetion and put + * the input file in the `cholla/src/system_tests/input_files` directory and + * the data file in the `cholla/src/system_tests/fiducial_data` directory. + * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5` + * extension respectively. If this class can't find the files it will + * throw an error with the path it searched. All the output files from the + * test are deposited in `cholla/bin/testSuiteName_testCaseName` + * + * More advanced functionality is provided with a series of member functions + * that allow you to programmatically generate the fiducial HDF5 file, + * choose which datasets to compare, whether or not to compare the number of + * time steps, etc. + * + */ +class SystemTestRunner; +} // namespace systemTest class systemTest::SystemTestRunner { -public: - /// The number of MPI ranks, defaults to 1 - size_t numMpiRanks = 1; - - /*! - * \brief Set the parameters that Cholla launches with, potentially entirely - * replacing the need for a settings file. A string of the launch parameters - * that will override the values in the settings file (if given). Any of - * Cholla's standard launch paramters work except `outdir` as that is - * reserved for usage in the systemTest::SystemTestRunner.runTest() method - */ - std::string chollaLaunchParams; - - /*! - * \brief Run the system test that has been set up - * - */ - void runTest(); - - /*! - * \brief Compute the L1 error for each field compared to the initial - * conditions. Doesn't work with particle data - * - * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test - * \param[in] maxAllowedError The maximum allowed for any value in the test - * - */ - void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError=1E-7); - - /*! - * \brief Launch Cholla as it is set up - * - */ - void launchCholla(); - - void openHydroTestData(); - - /*! - * \brief Get the Cholla Path object - * - * \return std::string The path to the Cholla executable - */ - std::string getChollaPath(){return _chollaPath;}; - - /*! - * \brief Get the Cholla Settings File Path object - * - * \return std::string The full filename/path to the settings file used to - * initialize Cholla - */ - std::string getChollaSettingsFilePath(){return _chollaSettingsPath;}; - - /*! - * \brief Get the Output Directory object - * - * \return std::string The path to the directory where all the output is - * stored - */ - std::string getOutputDirectory(){return _outputDirectory;}; - - /*! - * \brief Get the Console Output Path object - * - * \return std::string The full filename/path to the file where all the - * console output is stored - */ - std::string getConsoleOutputPath(){return _consoleOutputPath;}; - - /*! - * \brief Get the Fiducial File object - * - * \return H5::H5File - */ - H5::H5File getFiducialFile(){return _fiducialFile;}; - - /*! - * \brief Get the Test File object - * - * \param index The MPI rank of the file you want to return. Defaults to 0 - * \return H5::H5File - */ - H5::H5File getTestFile(size_t const &i = 0){return _testHydroFieldsFileVec[i];}; - - /*! - * \brief Get the vector of datasets that will be tested - * - * \return std::vector - */ - std::vector getDataSetsToTest(){return _fiducialDataSetNames;}; - - /*! - * \brief Set the Fixed Epsilon value - * - * \param[in] newVal The new value of fixed epsilon - */ - void setFixedEpsilon(double const &newVal){_fixedEpsilon = newVal;}; - - /*! - * \brief Choose which datasets to test. By default it tests all the - * datasets in the fiducial data. A warning will be thrown if not all the - * datasets are being tested. Note that any call to this function will - * overwrite the default values - * - * \param[in] dataSetNames A std::vector of std::strings where each entry is - * a dataset name. Note that it is case sensitive - */ - void setDataSetsToTest(std::vector const &dataSetNames) - {_fiducialDataSetNames = dataSetNames;}; - - /*! - * \brief Set the Compare Num Time Steps object - * - * \param[in] compare Defaults to `true`. If false then the number of timesteps - * is not compared. - */ - void setCompareNumTimeSteps(bool const &compare) - {_compareNumTimeSteps = compare;}; - - /*! - * \brief Set or add a fiducial dataset - * - * \param[in] fieldName The name of the field to be added - * \param[in] dataArr The std::vector for the data vector to be added as - * a data set - */ - void setFiducialData(std::string const &fieldName, - std::vector const &dataVec); - - /*! - * \brief Set the Fiducial Num Time Steps object - * - * \param numTimeSteps The number of time steps in the fiducial data - */ - void setFiducialNumTimeSteps(int const &numTimeSteps) - {_numFiducialTimeSteps = numTimeSteps;}; - - /*! - * \brief Generate an vector of the specified size populated by the specified - * value. - * - * \param[in] value The value to populate the vector with - * \param[in] nx (optional) The size of the field in the x-direction. - * Defaults to 1 - * \param[in] ny (optional) The size of the field in the y-direction. - * Defaults to 1 - * \param[in] nz (optional) The size of the field in the z-direction. - * Defaults to 1 - * \return std::vector A 1-dimensional std::vector of the required - * size containing the data. - */ - std::vector generateConstantData(double const &value, - size_t const &nx=1, - size_t const &ny=1, - size_t const &nz=1); - - /*! - * \brief Load the test data for physical fields from the HDF5 file(s). If - * there is more than one HDF5 file then it concatenates the contents into a - * single vector. Particle data is handeled with _loadTestParticleData - * - * \param[in] dataSetName The name of the dataset to get - * \param[out] testDims An vector with the length of each dimension in it - * \param[in] file (optional) The vector of HDF5 files to load - * \return std::vector A vector containing the data - */ - std::vector loadTestFieldData(std::string dataSetName, - std::vector &testDims, - std::vector file={}); - - /*! - * \brief Generate a std::vector of the specified size populated by a sine - * wave. The equation used to generate the wave is: - * - * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase) - * - * \param[in] offset Flat offset from zero - * \param[in] amplitude Amplitude of the wave - * \param[in] kx The x component of the wave vector in pixel units - * \param[in] ky The y component of the wave vector in pixel units - * \param[in] kz The z component of the wave vector in pixel units - * \param[in] phase Phase of the sine wave - * \param[in] nx (optional) The size of the field in the x-direction. - * Defaults to 1 - * \param[in] ny (optional) The size of the field in the y-direction. - * Defaults to 1 - * \param[in] nz (optional) The size of the field in the z-direction. - * Defaults to 1 - * \return std::vector A 1-dimensional std::vector of the required - * size containing the data. - */ - std::vector generateSineData(double const &offset, - double const &litude, - double const &kx, - double const &ky, - double const &kz, - double const &phase, - size_t const &nx=1, - size_t const &ny=1, - size_t const &nz=1); - - // Constructor and Destructor - /*! - * \brief Construct a new System Test Runner object - * - * \param[in] particleData Is there particle data? - * \param[in] hydroData Is there hydro data? - * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will - * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to - * `false` to generate your own - * \param[in] useSettingsFile Indicate if you're using a settings file. If - * `true` then the settings file is automatically found based on the naming - * convention. If false then the user MUST provide all the required settings - * with the SystemTestRunner::chollaLaunchParams member variable - */ - SystemTestRunner(bool const &particleData=false, - bool const &hydroData=true, - bool const &useFiducialFile=true, - bool const &useSettingsFile=true); - ~SystemTestRunner(); - -private: - /// The fiducial dat file - H5::H5File _fiducialFile; - /// The test hydro field data files - std::vector _testHydroFieldsFileVec; - /// The test particle data files - std::vector _testParticlesFileVec; - - /// The path to the Cholla executable - std::string _chollaPath; - /// The full name of the test with an underscore instead of a period. This - /// is the name of many of the input files, the output directory, etc - std::string _fullTestFileName; - /// The path to the Cholla settings file - std::string _chollaSettingsPath; - /// The path to the fiducial data file - std::string _fiducialFilePath; - /// The path to the output directory - std::string _outputDirectory; - /// The path and name of the console output file - std::string _consoleOutputPath; - - /// A list of all the data set names in the fiducial data file - std::vector _fiducialDataSetNames; - /// A list of all the data set names in the test data file - std::vector _testDataSetNames; - - /// The number of fiducial time steps - int _numFiducialTimeSteps; - /// Map of fiducial data sets if we're not using a fiducial file - std::unordered_map> _fiducialDataSets; - - /// The test particle IDs - std::vector _testParticleIDs; - /// The total number of particles in the test dataset - size_t _testTotalNumParticles=0; - /// The fiducial particle IDs - std::vector _fiducialParticleIDs; - /// The total number of particles in the fiducial dataset - size_t _fiducialTotalNumParticles=0; - - /// Fixed epsilon is changed from the default since AMD/Clang - /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 - double _fixedEpsilon = 5.0E-12; - - /// Flag to indicate if a fiducial HDF5 data file is being used or a - /// programmatically generated H5File object. `true` = use a file, `false` = - /// use generated H5File object - bool _fiducialFileExists = false; - /// Flag to choose whether or not to compare the number of time steps - bool _compareNumTimeSteps = true; - - /// Flag to indicate whether or not there is hydro field data - /// If true then hydro data files are searched for and will be compared to - /// fiducial values. If false then it is assumed that the test produces no - /// hydro field data - bool _hydroDataExists = true; - /// Flag to indicate whether or not there is particle data - /// If true then particle data files are searched for and will be compared - /// to fiducial values. If false then it is assumed that the test produces - /// no particle data - bool _particleDataExists = false; - - - /*! - * \brief Move a file. Throws an exception if the file does not exist. - * or if the move was unsuccessful - * - * \param[in] sourcePath The path the the file to be moved - * \param[in] destinationDirectory The path to the director the file should - * be moved to - */ - void _safeMove(std::string const &sourcePath, - std::string const &destinationDirectory); - - /*! - * \brief Checks if the given file exists. Throws an exception if the - * file does not exist. - * - * \param[in] filePath The path to the file to check for - */ - void _checkFileExists(std::string const &filePath); - - /*! - * \brief Using GTest assertions to check if the fiducial and test data have - * the same number of time steps - * - */ - void _checkNumTimeSteps(); - - /*! - * \brief Load the test data for particles from the HDF5 file(s). If - * there is more than one HDF5 file then it concatenates the contents into a - * single vector. Field data is handeled with _loadTestFieldData - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector containing the data - */ - std::vector _loadTestParticleData(std::string const &dataSetName); - - /*! - * \brief Load the test data for physical fields from the HDF5 file or - * returns the user set vector. - * Particle data is handeled with _loadFiducialParticleData. - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector with the contents of the data set - */ - std::vector _loadFiducialFieldData(std::string const &dataSetName); - - /*! - * \brief Load the fiducial data for particles from the HDF5 file or return - * the user set vector. Field data is handeled with _loadFiducialFieldData - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector containing the data - */ - std::vector _loadFiducialParticleData(std::string const &dataSetName); - - - /*! - * \brief Return a vector of all the dataset names in the given HDF5 file - * - * \param[in] inputFile The HDF5 file to find names in - * \return std::vector - */ - std::vector _findDataSetNames(H5::H5File const &inputFile); -}; // End of class systemTest::SystemTestRunner + public: + /// The number of MPI ranks, defaults to 1 + size_t numMpiRanks = 1; + + /*! + * \brief Set the parameters that Cholla launches with, potentially entirely + * replacing the need for a settings file. A string of the launch parameters + * that will override the values in the settings file (if given). Any of + * Cholla's standard launch paramters work except `outdir` as that is + * reserved for usage in the systemTest::SystemTestRunner.runTest() method + */ + std::string chollaLaunchParams; + + /*! + * \brief Run the system test that has been set up + * + */ + void runTest(); + + /*! + * \brief Compute the L1 error for each field compared to the initial + * conditions. Doesn't work with particle data + * + * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test + * \param[in] maxAllowedError The maximum allowed for any value in the test + * + */ + void runL1ErrorTest(double const &maxAllowedL1Error, + double const &maxAllowedError = 1E-7); + + /*! + * \brief Launch Cholla as it is set up + * + */ + void launchCholla(); + + void openHydroTestData(); + + /*! + * \brief Get the Cholla Path object + * + * \return std::string The path to the Cholla executable + */ + std::string getChollaPath() { return _chollaPath; }; + + /*! + * \brief Get the Cholla Settings File Path object + * + * \return std::string The full filename/path to the settings file used to + * initialize Cholla + */ + std::string getChollaSettingsFilePath() { return _chollaSettingsPath; }; + + /*! + * \brief Get the Output Directory object + * + * \return std::string The path to the directory where all the output is + * stored + */ + std::string getOutputDirectory() { return _outputDirectory; }; + + /*! + * \brief Get the Console Output Path object + * + * \return std::string The full filename/path to the file where all the + * console output is stored + */ + std::string getConsoleOutputPath() { return _consoleOutputPath; }; + + /*! + * \brief Get the Fiducial File object + * + * \return H5::H5File + */ + H5::H5File getFiducialFile() { return _fiducialFile; }; + + /*! + * \brief Get the Test File object + * + * \param index The MPI rank of the file you want to return. Defaults to 0 + * \return H5::H5File + */ + H5::H5File getTestFile(size_t const &i = 0) + { + return _testHydroFieldsFileVec[i]; + }; + + /*! + * \brief Get the vector of datasets that will be tested + * + * \return std::vector + */ + std::vector getDataSetsToTest() + { + return _fiducialDataSetNames; + }; + + /*! + * \brief Set the Fixed Epsilon value + * + * \param[in] newVal The new value of fixed epsilon + */ + void setFixedEpsilon(double const &newVal) { _fixedEpsilon = newVal; }; + + /*! + * \brief Choose which datasets to test. By default it tests all the + * datasets in the fiducial data. A warning will be thrown if not all the + * datasets are being tested. Note that any call to this function will + * overwrite the default values + * + * \param[in] dataSetNames A std::vector of std::strings where each entry is + * a dataset name. Note that it is case sensitive + */ + void setDataSetsToTest(std::vector const &dataSetNames) + { + _fiducialDataSetNames = dataSetNames; + }; + + /*! + * \brief Set the Compare Num Time Steps object + * + * \param[in] compare Defaults to `true`. If false then the number of + * timesteps is not compared. + */ + void setCompareNumTimeSteps(bool const &compare) + { + _compareNumTimeSteps = compare; + }; + + /*! + * \brief Set or add a fiducial dataset + * + * \param[in] fieldName The name of the field to be added + * \param[in] dataArr The std::vector for the data vector to be added as + * a data set + */ + void setFiducialData(std::string const &fieldName, + std::vector const &dataVec); + + /*! + * \brief Set the Fiducial Num Time Steps object + * + * \param numTimeSteps The number of time steps in the fiducial data + */ + void setFiducialNumTimeSteps(int const &numTimeSteps) + { + _numFiducialTimeSteps = numTimeSteps; + }; + + /*! + * \brief Generate an vector of the specified size populated by the specified + * value. + * + * \param[in] value The value to populate the vector with + * \param[in] nx (optional) The size of the field in the x-direction. + * Defaults to 1 + * \param[in] ny (optional) The size of the field in the y-direction. + * Defaults to 1 + * \param[in] nz (optional) The size of the field in the z-direction. + * Defaults to 1 + * \return std::vector A 1-dimensional std::vector of the required + * size containing the data. + */ + std::vector generateConstantData(double const &value, + size_t const &nx = 1, + size_t const &ny = 1, + size_t const &nz = 1); + + /*! + * \brief Load the test data for physical fields from the HDF5 file(s). If + * there is more than one HDF5 file then it concatenates the contents into a + * single vector. Particle data is handeled with _loadTestParticleData + * + * \param[in] dataSetName The name of the dataset to get + * \param[out] testDims An vector with the length of each dimension in it + * \param[in] file (optional) The vector of HDF5 files to load + * \return std::vector A vector containing the data + */ + std::vector loadTestFieldData(std::string dataSetName, + std::vector &testDims, + std::vector file = {}); + + /*! + * \brief Generate a std::vector of the specified size populated by a sine + * wave. The equation used to generate the wave is: + * + * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase) + * + * \param[in] offset Flat offset from zero + * \param[in] amplitude Amplitude of the wave + * \param[in] kx The x component of the wave vector in pixel units + * \param[in] ky The y component of the wave vector in pixel units + * \param[in] kz The z component of the wave vector in pixel units + * \param[in] phase Phase of the sine wave + * \param[in] nx (optional) The size of the field in the x-direction. + * Defaults to 1 + * \param[in] ny (optional) The size of the field in the y-direction. + * Defaults to 1 + * \param[in] nz (optional) The size of the field in the z-direction. + * Defaults to 1 + * \return std::vector A 1-dimensional std::vector of the required + * size containing the data. + */ + std::vector generateSineData( + double const &offset, double const &litude, double const &kx, + double const &ky, double const &kz, double const &phase, + size_t const &nx = 1, size_t const &ny = 1, size_t const &nz = 1); + + // Constructor and Destructor + /*! + * \brief Construct a new System Test Runner object + * + * \param[in] particleData Is there particle data? + * \param[in] hydroData Is there hydro data? + * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will + * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to + * `false` to generate your own + * \param[in] useSettingsFile Indicate if you're using a settings file. If + * `true` then the settings file is automatically found based on the naming + * convention. If false then the user MUST provide all the required settings + * with the SystemTestRunner::chollaLaunchParams member variable + */ + SystemTestRunner(bool const &particleData = false, + bool const &hydroData = true, + bool const &useFiducialFile = true, + bool const &useSettingsFile = true); + ~SystemTestRunner(); + + private: + /// The fiducial dat file + H5::H5File _fiducialFile; + /// The test hydro field data files + std::vector _testHydroFieldsFileVec; + /// The test particle data files + std::vector _testParticlesFileVec; + + /// The path to the Cholla executable + std::string _chollaPath; + /// The full name of the test with an underscore instead of a period. This + /// is the name of many of the input files, the output directory, etc + std::string _fullTestFileName; + /// The path to the Cholla settings file + std::string _chollaSettingsPath; + /// The path to the fiducial data file + std::string _fiducialFilePath; + /// The path to the output directory + std::string _outputDirectory; + /// The path and name of the console output file + std::string _consoleOutputPath; + + /// A list of all the data set names in the fiducial data file + std::vector _fiducialDataSetNames; + /// A list of all the data set names in the test data file + std::vector _testDataSetNames; + + /// The number of fiducial time steps + int _numFiducialTimeSteps; + /// Map of fiducial data sets if we're not using a fiducial file + std::unordered_map> _fiducialDataSets; + + /// The test particle IDs + std::vector _testParticleIDs; + /// The total number of particles in the test dataset + size_t _testTotalNumParticles = 0; + /// The fiducial particle IDs + std::vector _fiducialParticleIDs; + /// The total number of particles in the fiducial dataset + size_t _fiducialTotalNumParticles = 0; + + /// Fixed epsilon is changed from the default since AMD/Clang + /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 + double _fixedEpsilon = 5.0E-12; + + /// Flag to indicate if a fiducial HDF5 data file is being used or a + /// programmatically generated H5File object. `true` = use a file, `false` = + /// use generated H5File object + bool _fiducialFileExists = false; + /// Flag to choose whether or not to compare the number of time steps + bool _compareNumTimeSteps = true; + + /// Flag to indicate whether or not there is hydro field data + /// If true then hydro data files are searched for and will be compared to + /// fiducial values. If false then it is assumed that the test produces no + /// hydro field data + bool _hydroDataExists = true; + /// Flag to indicate whether or not there is particle data + /// If true then particle data files are searched for and will be compared + /// to fiducial values. If false then it is assumed that the test produces + /// no particle data + bool _particleDataExists = false; + + /*! + * \brief Move a file. Throws an exception if the file does not exist. + * or if the move was unsuccessful + * + * \param[in] sourcePath The path the the file to be moved + * \param[in] destinationDirectory The path to the director the file should + * be moved to + */ + void _safeMove(std::string const &sourcePath, + std::string const &destinationDirectory); + + /*! + * \brief Checks if the given file exists. Throws an exception if the + * file does not exist. + * + * \param[in] filePath The path to the file to check for + */ + void _checkFileExists(std::string const &filePath); + + /*! + * \brief Using GTest assertions to check if the fiducial and test data have + * the same number of time steps + * + */ + void _checkNumTimeSteps(); + + /*! + * \brief Load the test data for particles from the HDF5 file(s). If + * there is more than one HDF5 file then it concatenates the contents into a + * single vector. Field data is handeled with _loadTestFieldData + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector containing the data + */ + std::vector _loadTestParticleData(std::string const &dataSetName); + + /*! + * \brief Load the test data for physical fields from the HDF5 file or + * returns the user set vector. + * Particle data is handeled with _loadFiducialParticleData. + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector with the contents of the data set + */ + std::vector _loadFiducialFieldData(std::string const &dataSetName); + + /*! + * \brief Load the fiducial data for particles from the HDF5 file or return + * the user set vector. Field data is handeled with _loadFiducialFieldData + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector containing the data + */ + std::vector _loadFiducialParticleData(std::string const &dataSetName); + + /*! + * \brief Return a vector of all the dataset names in the given HDF5 file + * + * \param[in] inputFile The HDF5 file to find names in + * \return std::vector + */ + std::vector _findDataSetNames(H5::H5File const &inputFile); +}; // End of class systemTest::SystemTestRunner diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index ca0cacba8..337052a50 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -10,10 +10,10 @@ #pragma once // STL Includes -#include -#include -#include #include +#include +#include +#include // External Includes @@ -27,319 +27,303 @@ // ============================================================================= namespace cuda_utilities { - /*! - * \brief A templatized class to encapsulate a device global memory pointer - * in a std::vector like interface complete with most of the usual methods. - * This class is intended to be used only in host code and does not work - * device side; Passing the pointer to a kernel can be done with the - * `data()` method. This class works for any device side pointer, scalar or - * array valued. - * - * \tparam T Any serialized type where `sizeof(T)` returns correct results - * should work but non-primitive types have not been tested. - */ - template - class DeviceVector - { - public: - /*! - * \brief Construct a new Device Vector object by calling the - * `_allocate` private method - * - * \param[in] size The number of elements desired in the array. Can be - * any positive integer. - * \param[in] initialize (optional) If true then initialize the GPU - * memory to int(0) - */ - DeviceVector(size_t const size, bool const initialize=false); - - /*! - * \brief Destroy the Device Vector object by calling the `_deAllocate` - * private method - * - */ - ~DeviceVector() {_deAllocate();} - - /*! - * \brief Get the raw device pointer - * - * \return T* The pointer for the array in global memory - */ - T* data() {return _ptr;} - - /*! - * \brief Get the number of elements in the array. - * - * \return size_t The number of elements in the array - */ - size_t size() {return _size;} - - /*! - * \brief Overload the [] operator to return a value from device memory. - * This method performs a cudaMemcpy to copy the desired element to the - * host then returns it. Unlike the `at()` method this method does not - * perform bounds checking - * - * \param[in] index The index of the desired value - * \return T The value at dev_ptr[index] - */ - T operator [] (size_t const &index); - - /*! - * \brief Return a value from device memory. This method performs a - * cudaMemcpy to copy the desired element to the host then returns it. - * Unlike the `[]` overload this method perform bounds checking - * - * \param[in] index The index of the desired value - * \return T The value at dev_ptr[index] - */ - T const at(size_t const index); - - /*! - * \brief Assign a single value in the array. Should generally only be - * used when the pointer points to a scalar value. By default this - * writes `hostValue` to the 0th element of the array. - * - * \param[in] hostValue The value to write to the device array - * \param[in] index The location to write the value to, defaults to zero. - */ - void assign(T const &hostValue, size_t const &index=0); - - /*! - * \brief Resize the device container to contain `newSize` elements. If - * `newSize` is greater than the current size then all the values are - * kept and the rest of the array is default initialized. If `newSize` - * is smaller than the current size then the array is truncated and - * values at locations greater than `newSize` are lost. Keeping the - * values in the array requires that the new array be allocated, the - * values be copied, then the old array be freed; as such this method is - * quite slow and can use a large amount of memory. If you don't care - * about the values in the array then use the `reset` method - * - * \param[in] newSize The desired size of the array - */ - void resize(size_t const newSize); - - /*! - * \brief Reset the size of the array. This frees the old array and - * allocates a new one; all values in the array may be lost. The values - * in memory are not initialized and therefore the behaviour of the - * default values is undefined - * - * \param newSize - */ - void reset(size_t const newSize); - - /*! - * \brief Copy the first `arrSize` elements of `arrIn` to the device. - * - * \param[in] arrIn The pointer to the array to be copied to the device - * \param[in] arrSize The number of elements/size of the array to copy - * to the device - */ - void cpyHostToDevice(const T * arrIn, size_t const &arrSize); - - /*! - * \brief Copy the contents of a std::vector to the device - * - * \param[in] vecIn The array whose contents are to be copied - */ - void cpyHostToDevice(std::vector const &vecIn) - {cpyHostToDevice(vecIn.data(), vecIn.size());} - - /*! - * \brief Copy the array from the device to a host array. Checks if the - * host array is large enough based on the `arrSize` parameter. - * - * \param[out] arrOut The pointer to the host array - * \param[in] arrSize The number of elements allocated in the host array - */ - void cpyDeviceToHost(T * arrOut, size_t const &arrSize); - - /*! - * \brief Copy the array from the device to a host std::vector. Checks - * if the host array is large enough. - * - * \param[out] vecOut The std::vector to copy the device array into - */ - void cpyDeviceToHost(std::vector &vecOut) - {cpyDeviceToHost(vecOut.data(), vecOut.size());} - - private: - /// The size of the device array - size_t _size; - - /// The pointer to the device array - T *_ptr=nullptr; - - /*! - * \brief Allocate the device side array - * - * \param[in] size The size of the array to allocate - */ - void _allocate(size_t const size) - { - _size=size; - CudaSafeCall(cudaMalloc(&_ptr, _size*sizeof(T))); - } - - /*! - * \brief Free the device side array - * - */ - void _deAllocate(){CudaSafeCall(cudaFree(_ptr));} - }; -} // End of cuda_utilities namespace +/*! + * \brief A templatized class to encapsulate a device global memory pointer + * in a std::vector like interface complete with most of the usual methods. + * This class is intended to be used only in host code and does not work + * device side; Passing the pointer to a kernel can be done with the + * `data()` method. This class works for any device side pointer, scalar or + * array valued. + * + * \tparam T Any serialized type where `sizeof(T)` returns correct results + * should work but non-primitive types have not been tested. + */ +template +class DeviceVector +{ + public: + /*! + * \brief Construct a new Device Vector object by calling the + * `_allocate` private method + * + * \param[in] size The number of elements desired in the array. Can be + * any positive integer. + * \param[in] initialize (optional) If true then initialize the GPU + * memory to int(0) + */ + DeviceVector(size_t const size, bool const initialize = false); + + /*! + * \brief Destroy the Device Vector object by calling the `_deAllocate` + * private method + * + */ + ~DeviceVector() { _deAllocate(); } + + /*! + * \brief Get the raw device pointer + * + * \return T* The pointer for the array in global memory + */ + T *data() { return _ptr; } + + /*! + * \brief Get the number of elements in the array. + * + * \return size_t The number of elements in the array + */ + size_t size() { return _size; } + + /*! + * \brief Overload the [] operator to return a value from device memory. + * This method performs a cudaMemcpy to copy the desired element to the + * host then returns it. Unlike the `at()` method this method does not + * perform bounds checking + * + * \param[in] index The index of the desired value + * \return T The value at dev_ptr[index] + */ + T operator[](size_t const &index); + + /*! + * \brief Return a value from device memory. This method performs a + * cudaMemcpy to copy the desired element to the host then returns it. + * Unlike the `[]` overload this method perform bounds checking + * + * \param[in] index The index of the desired value + * \return T The value at dev_ptr[index] + */ + T const at(size_t const index); + + /*! + * \brief Assign a single value in the array. Should generally only be + * used when the pointer points to a scalar value. By default this + * writes `hostValue` to the 0th element of the array. + * + * \param[in] hostValue The value to write to the device array + * \param[in] index The location to write the value to, defaults to zero. + */ + void assign(T const &hostValue, size_t const &index = 0); + + /*! + * \brief Resize the device container to contain `newSize` elements. If + * `newSize` is greater than the current size then all the values are + * kept and the rest of the array is default initialized. If `newSize` + * is smaller than the current size then the array is truncated and + * values at locations greater than `newSize` are lost. Keeping the + * values in the array requires that the new array be allocated, the + * values be copied, then the old array be freed; as such this method is + * quite slow and can use a large amount of memory. If you don't care + * about the values in the array then use the `reset` method + * + * \param[in] newSize The desired size of the array + */ + void resize(size_t const newSize); + + /*! + * \brief Reset the size of the array. This frees the old array and + * allocates a new one; all values in the array may be lost. The values + * in memory are not initialized and therefore the behaviour of the + * default values is undefined + * + * \param newSize + */ + void reset(size_t const newSize); + + /*! + * \brief Copy the first `arrSize` elements of `arrIn` to the device. + * + * \param[in] arrIn The pointer to the array to be copied to the device + * \param[in] arrSize The number of elements/size of the array to copy + * to the device + */ + void cpyHostToDevice(const T *arrIn, size_t const &arrSize); + + /*! + * \brief Copy the contents of a std::vector to the device + * + * \param[in] vecIn The array whose contents are to be copied + */ + void cpyHostToDevice(std::vector const &vecIn) + { + cpyHostToDevice(vecIn.data(), vecIn.size()); + } + + /*! + * \brief Copy the array from the device to a host array. Checks if the + * host array is large enough based on the `arrSize` parameter. + * + * \param[out] arrOut The pointer to the host array + * \param[in] arrSize The number of elements allocated in the host array + */ + void cpyDeviceToHost(T *arrOut, size_t const &arrSize); + + /*! + * \brief Copy the array from the device to a host std::vector. Checks + * if the host array is large enough. + * + * \param[out] vecOut The std::vector to copy the device array into + */ + void cpyDeviceToHost(std::vector &vecOut) + { + cpyDeviceToHost(vecOut.data(), vecOut.size()); + } + + private: + /// The size of the device array + size_t _size; + + /// The pointer to the device array + T *_ptr = nullptr; + + /*! + * \brief Allocate the device side array + * + * \param[in] size The size of the array to allocate + */ + void _allocate(size_t const size) + { + _size = size; + CudaSafeCall(cudaMalloc(&_ptr, _size * sizeof(T))); + } + + /*! + * \brief Free the device side array + * + */ + void _deAllocate() { CudaSafeCall(cudaFree(_ptr)); } +}; +} // namespace cuda_utilities // ============================================================================= // End declaration of DeviceVector class // ============================================================================= - // ============================================================================= // Definition of DeviceVector class // ============================================================================= namespace cuda_utilities { - // ========================================================================= - // Public Methods - // ========================================================================= - - // ========================================================================= - template - DeviceVector::DeviceVector(size_t const size, bool const initialize) - { - _allocate(size); - - if (initialize) - { - CudaSafeCall(cudaMemset(_ptr, 0, _size*sizeof(T))); - } - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::resize(size_t const newSize) - { - // Assign old array to a new pointer - T * oldDevPtr = _ptr; - - // Determine how many elements to copy - size_t const count = std::min(_size, newSize) * sizeof(T); - - // Allocate new array - _allocate(newSize); - - // Copy the values from the old array to the new array - CudaSafeCall(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); - - // Free the old array - CudaSafeCall(cudaFree(oldDevPtr)); - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::reset(size_t const newSize) - { - _deAllocate(); - _allocate(newSize); - } - // ========================================================================= - - // ========================================================================= - template - T DeviceVector::operator [] (size_t const &index) - { - T hostValue; - CudaSafeCall(cudaMemcpy(&hostValue, - &(_ptr[index]), - sizeof(T), - cudaMemcpyDeviceToHost)); - return hostValue; - } - // ========================================================================= - - // ========================================================================= - template - T const DeviceVector::at(size_t const index) - { - if (index < _size) - { - // Use the overloaded [] operator to grab the value from GPU memory - // into host memory - return (*this)[index]; - } - else - { - throw std::out_of_range("Warning: DeviceVector.at() detected an" - " out of bounds memory access. Tried to" - " access element " - + std::to_string(index) - + " of " - + std::to_string(_size)); - } - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::assign(T const &hostValue, size_t const &index) - { - CudaSafeCall(cudaMemcpy(&(_ptr[index]), // destination - &hostValue, // source - sizeof(T), - cudaMemcpyHostToDevice)); - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::cpyHostToDevice(const T * arrIn, size_t const &arrSize) - { - if (arrSize <= _size) - { - CudaSafeCall(cudaMemcpy(_ptr, - arrIn, - arrSize*sizeof(T), - cudaMemcpyHostToDevice)); - } - else - { - throw std::out_of_range("Warning: Couldn't copy array to device," - " device array is too small. Host array" - " size=" - + std::to_string(arrSize) - + ", device array size=" - + std::to_string(arrSize)); - } - - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::cpyDeviceToHost(T * arrOut, size_t const &arrSize) - { - if (_size <= arrSize) - { - CudaSafeCall(cudaMemcpy(arrOut, - _ptr, - _size*sizeof(T), - cudaMemcpyDeviceToHost)); - } - else - { - throw std::out_of_range("Warning: Couldn't copy array to host, " - "host array is too small. Host array " - "size=" - + std::to_string(arrSize) - + ", device array size=" - + std::to_string(arrSize)); - } - } - // ========================================================================= -} // end namespace cuda_utilities +// ========================================================================= +// Public Methods +// ========================================================================= + +// ========================================================================= +template +DeviceVector::DeviceVector(size_t const size, bool const initialize) +{ + _allocate(size); + + if (initialize) { + CudaSafeCall(cudaMemset(_ptr, 0, _size * sizeof(T))); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::resize(size_t const newSize) +{ + // Assign old array to a new pointer + T *oldDevPtr = _ptr; + + // Determine how many elements to copy + size_t const count = std::min(_size, newSize) * sizeof(T); + + // Allocate new array + _allocate(newSize); + + // Copy the values from the old array to the new array + CudaSafeCall(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); + + // Free the old array + CudaSafeCall(cudaFree(oldDevPtr)); +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::reset(size_t const newSize) +{ + _deAllocate(); + _allocate(newSize); +} +// ========================================================================= + +// ========================================================================= +template +T DeviceVector::operator[](size_t const &index) +{ + T hostValue; + CudaSafeCall(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), + cudaMemcpyDeviceToHost)); + return hostValue; +} +// ========================================================================= + +// ========================================================================= +template +T const DeviceVector::at(size_t const index) +{ + if (index < _size) { + // Use the overloaded [] operator to grab the value from GPU memory + // into host memory + return (*this)[index]; + } else { + throw std::out_of_range( + "Warning: DeviceVector.at() detected an" + " out of bounds memory access. Tried to" + " access element " + + std::to_string(index) + " of " + std::to_string(_size)); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::assign(T const &hostValue, size_t const &index) +{ + CudaSafeCall(cudaMemcpy(&(_ptr[index]), // destination + &hostValue, // source + sizeof(T), cudaMemcpyHostToDevice)); +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::cpyHostToDevice(const T *arrIn, size_t const &arrSize) +{ + if (arrSize <= _size) { + CudaSafeCall( + cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); + } else { + throw std::out_of_range( + "Warning: Couldn't copy array to device," + " device array is too small. Host array" + " size=" + + std::to_string(arrSize) + + ", device array size=" + std::to_string(arrSize)); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::cpyDeviceToHost(T *arrOut, size_t const &arrSize) +{ + if (_size <= arrSize) { + CudaSafeCall( + cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); + } else { + throw std::out_of_range( + "Warning: Couldn't copy array to host, " + "host array is too small. Host array " + "size=" + + std::to_string(arrSize) + + ", device array size=" + std::to_string(arrSize)); + } +} +// ========================================================================= +} // end namespace cuda_utilities // ============================================================================= // End definition of DeviceVector class // ============================================================================= \ No newline at end of file diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 3db21baee..c873f2106 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -6,51 +6,53 @@ */ // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global.h" -#include "../utils/testing_utilities.h" #include "../utils/DeviceVector.h" +#include "../utils/testing_utilities.h" - -namespace // Anonymous namespace +namespace // Anonymous namespace { - template - void checkPointerAttributes(cuda_utilities::DeviceVector &devVector) - { - // Get the pointer information - cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); - - // Warning strings - std::string typeMessage = "ptrAttributes.type should be 2 since " - "that indicates type cudaMemoryTypeDevice. " - "0 is cudaMemoryTypeUnregistered, " - "1 is cudaMemoryTypeHost, and " - "3 is cudaMemoryTypeManaged"; - std::string const deviceMessage = "The pointer should be on device 0"; - std::string const devPtrMessage = "The device pointer is nullptr"; - std::string const hostPtrMessage = "The host pointer is not nullptr"; - - // Check that the pointer information is correct - #ifdef O_HIP - typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer."; - EXPECT_EQ(1, ptrAttributes.memoryType) << typeMessage; - #else // O_HIP is not defined i.e. we're using CUDA - EXPECT_EQ(2, ptrAttributes.type) << typeMessage; - #endif // O_HIP - EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; - EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage; - EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; - } -} // Anonymous namespace +template +void checkPointerAttributes(cuda_utilities::DeviceVector &devVector) +{ + // Get the pointer information + cudaPointerAttributes ptrAttributes; + CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); + + // Warning strings + std::string typeMessage = + "ptrAttributes.type should be 2 since " + "that indicates type cudaMemoryTypeDevice. " + "0 is cudaMemoryTypeUnregistered, " + "1 is cudaMemoryTypeHost, and " + "3 is cudaMemoryTypeManaged"; + std::string const deviceMessage = "The pointer should be on device 0"; + std::string const devPtrMessage = "The device pointer is nullptr"; + std::string const hostPtrMessage = "The host pointer is not nullptr"; + +// Check that the pointer information is correct +#ifdef O_HIP + typeMessage = + "ptrAttributes.memoryType should be 1 since that indicates a HIP device " + "pointer."; + EXPECT_EQ(1, ptrAttributes.memoryType) << typeMessage; +#else // O_HIP is not defined i.e. we're using CUDA + EXPECT_EQ(2, ptrAttributes.type) << typeMessage; +#endif // O_HIP + EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; + EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage; + EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; +} +} // Anonymous namespace // ============================================================================= // Tests for expected behavior @@ -58,137 +60,136 @@ namespace // Anonymous namespace TEST(tALLDeviceVectorConstructor, CheckConstructorDataAndSizeExpectProperAllocationAndValues) { - // Initialize the DeviceVector - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; + // Initialize the DeviceVector + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; - // Check that the size is correct - EXPECT_EQ(vectorSize, devVector.size()); + // Check that the size is correct + EXPECT_EQ(vectorSize, devVector.size()); - // Check the pointer information - checkPointerAttributes(devVector); + // Check the pointer information + checkPointerAttributes(devVector); } -TEST(tALLDeviceVectorDestructor, - CheckDestructorExpectProperDeallocation) +TEST(tALLDeviceVectorDestructor, CheckDestructorExpectProperDeallocation) { - // Initialize the DeviceVector - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - - // Destruct the object - devVector.~DeviceVector(); - - // Get the pointer information - cudaPointerAttributes ptrAttributes; - cudaPointerGetAttributes(&ptrAttributes, devVector.data()); - - // Warning strings - std::string typeMessage = "ptrAttributes.type should be 0 since " - "that indicates type cudaMemoryTypeUnregistered" - "0 is cudaMemoryTypeUnregistered, " - "1 is cudaMemoryTypeHost, " - "2 is cudaMemoryTypeDevice, and" - "3 is cudaMemoryTypeManaged"; - std::string deviceMessage = "The pointer should be null which is device -2"; - std::string const devPtrMessage = "The device pointer is nullptr"; - std::string const hostPtrMessage = "The host pointer is not nullptr"; - - // Check that the pointer information is correct - #ifdef O_HIP - typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer."; - deviceMessage = "The pointer should be 0"; - EXPECT_EQ(0, ptrAttributes.memoryType) << typeMessage; - EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; - #else // O_HIP is not defined i.e. we're using CUDA - EXPECT_EQ(0, ptrAttributes.type) << typeMessage; - EXPECT_EQ(-2, ptrAttributes.device) << deviceMessage; - #endif // O_HIP - EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; - EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; - - // Reconstruct DeviceVector object to avoid error - new (&devVector) cuda_utilities::DeviceVector{vectorSize}; + // Initialize the DeviceVector + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + + // Destruct the object + devVector.~DeviceVector(); + + // Get the pointer information + cudaPointerAttributes ptrAttributes; + cudaPointerGetAttributes(&ptrAttributes, devVector.data()); + + // Warning strings + std::string typeMessage = + "ptrAttributes.type should be 0 since " + "that indicates type cudaMemoryTypeUnregistered" + "0 is cudaMemoryTypeUnregistered, " + "1 is cudaMemoryTypeHost, " + "2 is cudaMemoryTypeDevice, and" + "3 is cudaMemoryTypeManaged"; + std::string deviceMessage = "The pointer should be null which is device -2"; + std::string const devPtrMessage = "The device pointer is nullptr"; + std::string const hostPtrMessage = "The host pointer is not nullptr"; + +// Check that the pointer information is correct +#ifdef O_HIP + typeMessage = + "ptrAttributes.memoryType should be 1 since that indicates a HIP device " + "pointer."; + deviceMessage = "The pointer should be 0"; + EXPECT_EQ(0, ptrAttributes.memoryType) << typeMessage; + EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; +#else // O_HIP is not defined i.e. we're using CUDA + EXPECT_EQ(0, ptrAttributes.type) << typeMessage; + EXPECT_EQ(-2, ptrAttributes.device) << deviceMessage; +#endif // O_HIP + EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; + EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; + + // Reconstruct DeviceVector object to avoid error + new (&devVector) cuda_utilities::DeviceVector{vectorSize}; } TEST(tALLDeviceVectorStdVectorHostToDeviceCopyAndIndexing, CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Check the values in device memory with both the .at() method and - // overloaded [] operator - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - EXPECT_EQ(stdVec.at(i), devVector[i]); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Check the values in device memory with both the .at() method and + // overloaded [] operator + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + EXPECT_EQ(stdVec.at(i), devVector[i]); + } } TEST(tALLDeviceVectorArrayHostToDeviceCopyAndIndexing, CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec.data(), stdVec.size()); - - // Check the values in device memory with both the .at() method and - // overloaded [] operator - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - EXPECT_EQ(stdVec.at(i), devVector[i]); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec.data(), stdVec.size()); + + // Check the values in device memory with both the .at() method and + // overloaded [] operator + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + EXPECT_EQ(stdVec.at(i), devVector[i]); + } } TEST(tALLDeviceVectorArrayAssignmentMethod, AssignSingleValuesExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; - // Perform assignment - devVector.assign(13); - devVector.assign(17,4); + // Perform assignment + devVector.assign(13); + devVector.assign(17, 4); - // Check the values in device memory - EXPECT_EQ(13, devVector.at(0)); - EXPECT_EQ(17, devVector.at(4)); + // Check the values in device memory + EXPECT_EQ(13, devVector.at(0)); + EXPECT_EQ(17, devVector.at(4)); } TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize), hostVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Copy the values to the host memory - devVector.cpyDeviceToHost(hostVec); - - // Check the values - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), hostVec.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize), hostVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Copy the values to the host memory + devVector.cpyDeviceToHost(hostVec); + + // Check the values + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), hostVec.at(i)); + } } TEST(tALLDeviceVectorArrayDeviceToHostCopy, @@ -207,145 +208,137 @@ TEST(tALLDeviceVectorArrayDeviceToHostCopy, devVector.cpyDeviceToHost(hostVec.data(), hostVec.size()); // Check the values - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), hostVec.at(i)); + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), hostVec.at(i)); } } -TEST(tALLDeviceVectorReset, - SetNewSizeExpectCorrectSize) +TEST(tALLDeviceVectorReset, SetNewSizeExpectCorrectSize) { - // Initialize the vectors - size_t const vectorSize = 10; - size_t const newSize = 20; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize), newVec(newSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - std::iota(newVec.begin(), newVec.end(), 20); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.reset(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Copy the new values into device memory - devVector.cpyHostToDevice(newVec); - - // Check the values - for (size_t i = 0; i < newSize; i++) - { - EXPECT_EQ(newVec.at(i), devVector.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + size_t const newSize = 20; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize), newVec(newSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + std::iota(newVec.begin(), newVec.end(), 20); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.reset(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + checkPointerAttributes(devVector); + + // Copy the new values into device memory + devVector.cpyHostToDevice(newVec); + + // Check the values + for (size_t i = 0; i < newSize; i++) { + EXPECT_EQ(newVec.at(i), devVector.at(i)); + } } -TEST(tALLDeviceVectorResize, - SetLargerSizeExpectCorrectSize) +TEST(tALLDeviceVectorResize, SetLargerSizeExpectCorrectSize) { - // Initialize the vectors - size_t const originalSize = 10; - size_t const newSize = 20; - cuda_utilities::DeviceVector devVector{originalSize}; - std::vector stdVec(originalSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.resize(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Check the values - for (size_t i = 0; i < originalSize; i++) - { - double const fiducialValue = (i < stdVec.size())? stdVec.at(i): 0; - EXPECT_EQ(fiducialValue, devVector.at(i)); - } + // Initialize the vectors + size_t const originalSize = 10; + size_t const newSize = 20; + cuda_utilities::DeviceVector devVector{originalSize}; + std::vector stdVec(originalSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.resize(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + checkPointerAttributes(devVector); + + // Check the values + for (size_t i = 0; i < originalSize; i++) { + double const fiducialValue = (i < stdVec.size()) ? stdVec.at(i) : 0; + EXPECT_EQ(fiducialValue, devVector.at(i)); + } } -TEST(tALLDeviceVectorResize, - SetSmallerSizeExpectCorrectSize) +TEST(tALLDeviceVectorResize, SetSmallerSizeExpectCorrectSize) { - // Initialize the vectors - size_t const vectorSize = 10; - size_t const newSize = 5; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.resize(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Check the values - for (size_t i = 0; i < newSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + size_t const newSize = 5; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.resize(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + checkPointerAttributes(devVector); + + // Check the values + for (size_t i = 0; i < newSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + } } // ============================================================================= // Tests for exceptions // ============================================================================= -TEST(tALLDeviceVectorAt, - OutOfBoundsAccessExpectThrowOutOfRange) +TEST(tALLDeviceVectorAt, OutOfBoundsAccessExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); - // Check that the .at() method throws the correct exception - EXPECT_THROW(devVector.at(100), std::out_of_range); + // Check that the .at() method throws the correct exception + EXPECT_THROW(devVector.at(100), std::out_of_range); } TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, - OutOfBoundsCopyExpectThrowOutOfRange) + OutOfBoundsCopyExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(2*vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(2 * vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); } TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, - OutOfBoundsCopyExpectThrowOutOfRange) + OutOfBoundsCopyExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize/2); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize / 2); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range); } diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp index a924b3f76..f1d04ac94 100644 --- a/src/utils/cuda_utilities.cpp +++ b/src/utils/cuda_utilities.cpp @@ -1,5 +1,6 @@ #include "../utils/cuda_utilities.h" -namespace cuda_utilities { +namespace cuda_utilities +{ -} // end namespace cuda_utilities +} // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 3f0ae5fba..0df707a66 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -14,121 +14,120 @@ namespace cuda_utilities { - /*! - * \brief Compute the x, y, and z indices based off of the 1D index - * - * \param[in] id The 1D index - * \param[in] nx The total number of cells in the x direction - * \param[in] ny The total number of cells in the y direction - * \param[out] xid The x index - * \param[out] yid The y index - * \param[out] zid The z index - */ - inline __host__ __device__ void compute3DIndices(int const &id, - int const &nx, - int const &ny, - int &xid, - int &yid, - int &zid) - { - zid = id / (nx * ny); - yid = (id - zid * nx * ny) / nx; - xid = id - zid * nx * ny - yid * nx; - } +/*! + * \brief Compute the x, y, and z indices based off of the 1D index + * + * \param[in] id The 1D index + * \param[in] nx The total number of cells in the x direction + * \param[in] ny The total number of cells in the y direction + * \param[out] xid The x index + * \param[out] yid The y index + * \param[out] zid The z index + */ +inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, + int const &ny, int &xid, + int &yid, int &zid) +{ + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; +} - /*! - * \brief Compute the 1D index based off of the 3D indices - * - * \param xid The x index - * \param yid The y index - * \param zid The z index - * \param nx The total number of cells in the x direction - * \param ny The total number of cells in the y direction - * \return int The 1D index - */ - inline __host__ __device__ int compute1DIndex(int const &xid, - int const &yid, - int const &zid, - int const &nx, - int const &ny) - { - return xid + yid*nx + zid*nx*ny; - } +/*! + * \brief Compute the 1D index based off of the 3D indices + * + * \param xid The x index + * \param yid The y index + * \param zid The z index + * \param nx The total number of cells in the x direction + * \param ny The total number of cells in the y direction + * \return int The 1D index + */ +inline __host__ __device__ int compute1DIndex(int const &xid, int const &yid, + int const &zid, int const &nx, + int const &ny) +{ + return xid + yid * nx + zid * nx * ny; +} - inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, int &is, int &ie, int &js, int &je, int &ks, int &ke) { - is = n_ghost; - ie = nx - n_ghost; - if (ny == 1) { - js = 0; - je = 1; - } else { - js = n_ghost; - je = ny - n_ghost; - } - if (nz == 1) { - ks = 0; - ke = 1; - } else { - ks = n_ghost; - ke = nz - n_ghost; - } - } +inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, + int const &nx, int const &ny, + int const &nz, int &is, + int &ie, int &js, int &je, + int &ks, int &ke) +{ + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; + } + if (nz == 1) { + ks = 0; + ke = 1; + } else { + ks = n_ghost; + ke = nz - n_ghost; + } +} - /*! - * \brief Initialize GPU memory - * - * \param[in] ptr The pointer to GPU memory - * \param[in] N The size of the array in bytes - */ - inline void initGpuMemory(Real *ptr, size_t N) - { - CudaSafeCall(cudaMemset(ptr, 0, N)); - } +/*! + * \brief Initialize GPU memory + * + * \param[in] ptr The pointer to GPU memory + * \param[in] N The size of the array in bytes + */ +inline void initGpuMemory(Real *ptr, size_t N) +{ + CudaSafeCall(cudaMemset(ptr, 0, N)); +} - // ===================================================================== - /*! - * \brief Struct to determine the optimal number of blocks and threads - * per block to use when launching a kernel. The member - * variables are `threadsPerBlock` and `numBlocks` which are chosen with - the occupancy API. Can target any device on the system through the - * optional constructor argument. - * NOTE: On AMD there's currently an issue that stops kernels from being - * passed. As a workaround for now this struct just returns the maximum - * number of blocks and threads per block that a MI250X can run at once. - * - */ - template - struct AutomaticLaunchParams - { - public: - /*! - * \brief Construct a new Reduction Launch Params object. By default it - * generates values of numBlocks and threadsPerBlock suitable for a - * kernel with a grid-stride loop. For a kernel with one thread per - * element set the optional `numElements` argument to the number of - * elements - * - * \param[in] kernel The kernel to determine the launch parameters for - * \param[in] numElements The number of elements in the array that - the kernel operates on - */ - AutomaticLaunchParams(T &kernel, size_t numElements=0) - { - cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); +// ===================================================================== +/*! + * \brief Struct to determine the optimal number of blocks and threads + * per block to use when launching a kernel. The member + * variables are `threadsPerBlock` and `numBlocks` which are chosen with + the occupancy API. Can target any device on the system through the + * optional constructor argument. + * NOTE: On AMD there's currently an issue that stops kernels from being + * passed. As a workaround for now this struct just returns the maximum + * number of blocks and threads per block that a MI250X can run at once. + * + */ +template +struct AutomaticLaunchParams { + public: + /*! + * \brief Construct a new Reduction Launch Params object. By default it + * generates values of numBlocks and threadsPerBlock suitable for a + * kernel with a grid-stride loop. For a kernel with one thread per + * element set the optional `numElements` argument to the number of + * elements + * + * \param[in] kernel The kernel to determine the launch parameters for + * \param[in] numElements The number of elements in the array that + the kernel operates on + */ + AutomaticLaunchParams(T &kernel, size_t numElements = 0) + { + cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, + 0); - if (numElements > 0) - { - numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; - } - } + if (numElements > 0) { + numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; + } + } - /// Defaulted Destructor - ~AutomaticLaunchParams()=default; + /// Defaulted Destructor + ~AutomaticLaunchParams() = default; - /// The maximum number of threads per block that the device supports - int threadsPerBlock; - /// The maximum number of scheduleable blocks on the device - int numBlocks; - }; - // ===================================================================== -} // end namespace cuda_utilities + /// The maximum number of threads per block that the device supports + int threadsPerBlock; + /// The maximum number of scheduleable blocks on the device + int numBlocks; +}; +// ===================================================================== +} // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index dc2f20066..a215fc976 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -1,23 +1,24 @@ /*! * \file cuda_utilities_tests.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) - * \brief Tests for the contents of cuda_utilities.h and cuda_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie + * (helenarichie@pitt.edu) \brief Tests for the contents of cuda_utilities.h and + * cuda_utilities.cpp * */ // STL Includes -#include -#include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/cuda_utilities.h" #include "../global/global.h" +#include "../utils/cuda_utilities.h" +#include "../utils/testing_utilities.h" /* PCM : n_ghost = 2 @@ -31,92 +32,93 @@ // Local helper functions namespace { - struct TestParams - { - std::vector n_ghost {2, 2, 3, 4}; - std::vector nx {100, 2048, 2048, 2048}; - std::vector ny {1, 2048, 2048, 2048}; - std::vector nz {1, 4096, 4096, 4096}; - std::vector names {"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", "Large PPMP/PPMC case"}; - - }; -} - -TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector> fiducial_indices {{2, 98, 0, 1, 0, 1}, - {2, 2046, 2, 2046, 2, 4094}, - {3, 2045, 3, 2045, 3, 4093}, - {4, 2044, 4, 2044, 4, 4092}}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - int is; - int ie; - int js; - int je; - int ks; - int ke; - cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), parameters.nz.at(i), is, ie, js, je, ks, ke); - - std::vector index_names {"is", "ie", "js", "je", "ks", "ke"}; - std::vector test_indices {is, ie, js, je, ks, ke}; - - for (size_t j = 0; j < test_indices.size(); j++) - { - testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], index_names[j] + " " + parameters.names[i]); - } +struct TestParams { + std::vector n_ghost{2, 2, 3, 4}; + std::vector nx{100, 2048, 2048, 2048}; + std::vector ny{1, 2048, 2048, 2048}; + std::vector nz{1, 4096, 4096, 4096}; + std::vector names{"Single-cell 3D PCM/PLMP case", + "Large 3D PCM/PLMP case", "Large PLMC case", + "Large PPMP/PPMC case"}; +}; +} // namespace + +TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector> fiducial_indices{{2, 98, 0, 1, 0, 1}, + {2, 2046, 2, 2046, 2, 4094}, + {3, 2045, 3, 2045, 3, 4093}, + {4, 2044, 4, 2044, 4, 4092}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + int is; + int ie; + int js; + int je; + int ks; + int ke; + cuda_utilities::Get_Real_Indices( + parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), + parameters.nz.at(i), is, ie, js, je, ks, ke); + + std::vector index_names{"is", "ie", "js", "je", "ks", "ke"}; + std::vector test_indices{is, ie, js, je, ks, ke}; + + for (size_t j = 0; j < test_indices.size(); j++) { + testingUtilities::checkResults( + fiducial_indices[i][j], test_indices[j], + index_names[j] + " " + parameters.names[i]); } + } } // ============================================================================= -TEST(tALLCompute3DIndices, - CorrectInputExpectCorrectOutput) +TEST(tALLCompute3DIndices, CorrectInputExpectCorrectOutput) { - // Parameters - int const id = 723; - int const nx = 34; - int const ny = 14; - - // Fiducial Data - int const fiducialXid = 9; - int const fiducialYid = 7; - int const fiducialZid = 1; - - // Test Variables - int testXid; - int testYid; - int testZid; - - // Get test data - cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid); - - EXPECT_EQ(fiducialXid, testXid); - EXPECT_EQ(fiducialYid, testYid); - EXPECT_EQ(fiducialZid, testZid); + // Parameters + int const id = 723; + int const nx = 34; + int const ny = 14; + + // Fiducial Data + int const fiducialXid = 9; + int const fiducialYid = 7; + int const fiducialZid = 1; + + // Test Variables + int testXid; + int testYid; + int testZid; + + // Get test data + cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid); + + EXPECT_EQ(fiducialXid, testXid); + EXPECT_EQ(fiducialYid, testYid); + EXPECT_EQ(fiducialZid, testZid); } // ============================================================================= // ============================================================================= -TEST(tALLCompute1DIndex, - CorrectInputExpectCorrectOutput) +TEST(tALLCompute1DIndex, CorrectInputExpectCorrectOutput) { - // Parameters - int const xid = 72; - int const yid = 53; - int const zid = 14; - int const nx = 128; - int const ny = 64; + // Parameters + int const xid = 72; + int const yid = 53; + int const zid = 14; + int const nx = 128; + int const ny = 64; - // Fiducial Data - int const fiducialId = 121544; + // Fiducial Data + int const fiducialId = 121544; - // Test Variable - int testId; + // Test Variable + int testId; - // Get test data - testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + // Get test data + testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - EXPECT_EQ(fiducialId, testId); + EXPECT_EQ(fiducialId, testId); } // ============================================================================= diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index 32aa2274e..b8187a502 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -3,28 +3,30 @@ #ifdef CUDA -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../io/io.h" -#include "../utils/error_check_cuda.h" - - -__global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value){ - + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" + #include "../utils/error_check_cuda.h" + #include "../utils/gpu.hpp" + +__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, + int ny, int nz, int n_ghost, + int *return_value) +{ int tid_j = blockIdx.x * blockDim.x + threadIdx.x; int tid_k = blockIdx.y * blockDim.y + threadIdx.y; - - if ( blockDim.x != N_Y || blockDim.y != N_Z ){ - if ( tid_j == 0 && tid_k == 0 ) printf("ERROR CHECK: Block Dimension Error \n" ); + if (blockDim.x != N_Y || blockDim.y != N_Z) { + if (tid_j == 0 && tid_k == 0) + printf("ERROR CHECK: Block Dimension Error \n"); return; } - __shared__ Real sh_data[N_Z*N_Y]; + __shared__ Real sh_data[N_Z * N_Y]; // int n_cells, indx_x, indx_3d, indx_2d; Real field_value; @@ -35,45 +37,37 @@ __global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, in int error = 0; indx_x = 0; - for ( indx_x=0; indx_x +#ifdef MPI_CHOLLA + #include void chexit(int code) { - - if(code==0) - { + if (code == 0) { /*exit normally*/ MPI_Finalize(); exit(code); - }else{ - + } else { /*exit with non-zero error code*/ - MPI_Abort(MPI_COMM_WORLD,code); + MPI_Abort(MPI_COMM_WORLD, code); exit(code); - } } #else /*MPI_CHOLLA*/ diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 461f9821b..c7968abab 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -6,160 +6,164 @@ #ifdef O_HIP -#include + #include -#if defined(PARIS) || defined(PARIS_GALACTIC) + #if defined(PARIS) || defined(PARIS_GALACTIC) -#include + #include -static void __attribute__((unused)) check(const hipfftResult err, const char *const file, const int line) +static void __attribute__((unused)) +check(const hipfftResult err, const char *const file, const int line) { if (err == HIPFFT_SUCCESS) return; - fprintf(stderr,"HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err); + fprintf(stderr, "HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, + err); fflush(stderr); exit(err); } -#endif //CUFFT PARIS PARIS_GALACTIC - -#define WARPSIZE 64 -static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; - -#define CUFFT_D2Z HIPFFT_D2Z -#define CUFFT_FORWARD HIPFFT_FORWARD -#define CUFFT_INVERSE HIPFFT_BACKWARD -#define CUFFT_Z2D HIPFFT_Z2D -#define CUFFT_Z2Z HIPFFT_Z2Z - -#define cudaDeviceSynchronize hipDeviceSynchronize -#define cudaError hipError_t -#define cudaError_t hipError_t -#define cudaErrorInsufficientDriver hipErrorInsufficientDriver -#define cudaErrorNoDevice hipErrorNoDevice -#define cudaEvent_t hipEvent_t -#define cudaEventCreate hipEventCreate -#define cudaEventElapsedTime hipEventElapsedTime -#define cudaEventRecord hipEventRecord -#define cudaEventSynchronize hipEventSynchronize -#define cudaFree hipFree -#define cudaFreeHost hipHostFree -#define cudaGetDevice hipGetDevice -#define cudaGetDeviceCount hipGetDeviceCount -#define cudaGetErrorString hipGetErrorString -#define cudaGetLastError hipGetLastError -#define cudaHostAlloc hipHostMalloc -#define cudaHostAllocDefault hipHostMallocDefault -#define cudaMalloc hipMalloc -#define cudaMemcpy hipMemcpy -#define cudaMemcpyAsync hipMemcpyAsync -#define cudaMemcpyPeer hipMemcpyPeer -#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost -#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice -#define cudaMemcpyHostToDevice hipMemcpyHostToDevice -#define cudaMemGetInfo hipMemGetInfo -#define cudaMemset hipMemset -#define cudaReadModeElementType hipReadModeElementType -#define cudaSetDevice hipSetDevice -#define cudaSuccess hipSuccess -#define cudaDeviceProp hipDeviceProp_t -#define cudaGetDeviceProperties hipGetDeviceProperties -#define cudaPointerAttributes hipPointerAttribute_t -#define cudaPointerGetAttributes hipPointerGetAttributes -#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize - -// Texture definitions -#define cudaArray hipArray -#define cudaMallocArray hipMallocArray -#define cudaFreeArray hipFreeArray -#define cudaMemcpyToArray hipMemcpyToArray -#define cudaMemcpy2DToArray hipMemcpy2DToArray - - -#define cudaTextureObject_t hipTextureObject_t -#define cudaCreateTextureObject hipCreateTextureObject -#define cudaDestroyTextureObject hipDestroyTextureObject - -#define cudaChannelFormatDesc hipChannelFormatDesc -#define cudaCreateChannelDesc hipCreateChannelDesc -#define cudaChannelFormatKindFloat hipChannelFormatKindFloat - -#define cudaResourceDesc hipResourceDesc -#define cudaResourceTypeArray hipResourceTypeArray -#define cudaTextureDesc hipTextureDesc -#define cudaAddressModeClamp hipAddressModeClamp -#define cudaFilterModeLinear hipFilterModeLinear -#define cudaFilterModePoint hipFilterModePoint -// Texture Definitions -#define cudaPointerAttributes hipPointerAttribute_t -#define cudaPointerGetAttributes hipPointerGetAttributes - -// FFT definitions -#define cufftDestroy hipfftDestroy -#define cufftDoubleComplex hipfftDoubleComplex -#define cufftDoubleReal hipfftDoubleReal -#define cufftExecD2Z hipfftExecD2Z -#define cufftExecZ2D hipfftExecZ2D -#define cufftExecZ2Z hipfftExecZ2Z -#define cufftHandle hipfftHandle -#define cufftPlan3d hipfftPlan3d -#define cufftPlanMany hipfftPlanMany - -#define curandStateMRG32k3a_t hiprandStateMRG32k3a_t -#define curand_init hiprand_init -#define curand_poisson hiprand_poisson - -static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line) + #endif // CUFFT PARIS PARIS_GALACTIC + + #define WARPSIZE 64 +static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; + + #define CUFFT_D2Z HIPFFT_D2Z + #define CUFFT_FORWARD HIPFFT_FORWARD + #define CUFFT_INVERSE HIPFFT_BACKWARD + #define CUFFT_Z2D HIPFFT_Z2D + #define CUFFT_Z2Z HIPFFT_Z2Z + + #define cudaDeviceSynchronize hipDeviceSynchronize + #define cudaError hipError_t + #define cudaError_t hipError_t + #define cudaErrorInsufficientDriver hipErrorInsufficientDriver + #define cudaErrorNoDevice hipErrorNoDevice + #define cudaEvent_t hipEvent_t + #define cudaEventCreate hipEventCreate + #define cudaEventElapsedTime hipEventElapsedTime + #define cudaEventRecord hipEventRecord + #define cudaEventSynchronize hipEventSynchronize + #define cudaFree hipFree + #define cudaFreeHost hipHostFree + #define cudaGetDevice hipGetDevice + #define cudaGetDeviceCount hipGetDeviceCount + #define cudaGetErrorString hipGetErrorString + #define cudaGetLastError hipGetLastError + #define cudaHostAlloc hipHostMalloc + #define cudaHostAllocDefault hipHostMallocDefault + #define cudaMalloc hipMalloc + #define cudaMemcpy hipMemcpy + #define cudaMemcpyAsync hipMemcpyAsync + #define cudaMemcpyPeer hipMemcpyPeer + #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost + #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice + #define cudaMemcpyHostToDevice hipMemcpyHostToDevice + #define cudaMemGetInfo hipMemGetInfo + #define cudaMemset hipMemset + #define cudaReadModeElementType hipReadModeElementType + #define cudaSetDevice hipSetDevice + #define cudaSuccess hipSuccess + #define cudaDeviceProp hipDeviceProp_t + #define cudaGetDeviceProperties hipGetDeviceProperties + #define cudaPointerAttributes hipPointerAttribute_t + #define cudaPointerGetAttributes hipPointerGetAttributes + #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize + + // Texture definitions + #define cudaArray hipArray + #define cudaMallocArray hipMallocArray + #define cudaFreeArray hipFreeArray + #define cudaMemcpyToArray hipMemcpyToArray + #define cudaMemcpy2DToArray hipMemcpy2DToArray + + #define cudaTextureObject_t hipTextureObject_t + #define cudaCreateTextureObject hipCreateTextureObject + #define cudaDestroyTextureObject hipDestroyTextureObject + + #define cudaChannelFormatDesc hipChannelFormatDesc + #define cudaCreateChannelDesc hipCreateChannelDesc + #define cudaChannelFormatKindFloat hipChannelFormatKindFloat + + #define cudaResourceDesc hipResourceDesc + #define cudaResourceTypeArray hipResourceTypeArray + #define cudaTextureDesc hipTextureDesc + #define cudaAddressModeClamp hipAddressModeClamp + #define cudaFilterModeLinear hipFilterModeLinear + #define cudaFilterModePoint hipFilterModePoint + // Texture Definitions + #define cudaPointerAttributes hipPointerAttribute_t + #define cudaPointerGetAttributes hipPointerGetAttributes + + // FFT definitions + #define cufftDestroy hipfftDestroy + #define cufftDoubleComplex hipfftDoubleComplex + #define cufftDoubleReal hipfftDoubleReal + #define cufftExecD2Z hipfftExecD2Z + #define cufftExecZ2D hipfftExecZ2D + #define cufftExecZ2Z hipfftExecZ2Z + #define cufftHandle hipfftHandle + #define cufftPlan3d hipfftPlan3d + #define cufftPlanMany hipfftPlanMany + + #define curandStateMRG32k3a_t hiprandStateMRG32k3a_t + #define curand_init hiprand_init + #define curand_poisson hiprand_poisson + +static void __attribute__((unused)) +check(const hipError_t err, const char *const file, const int line) { if (err == hipSuccess) return; - fprintf(stderr,"HIP ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,hipGetErrorName(err),hipGetErrorString(err)); + fprintf(stderr, "HIP ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, + hipGetErrorName(err), hipGetErrorString(err)); fflush(stderr); exit(err); } #else // not O_HIP -#include + #include -#if defined(PARIS) || defined(PARIS_GALACTIC) + #if defined(PARIS) || defined(PARIS_GALACTIC) -#include + #include static void check(const cufftResult err, const char *const file, const int line) { if (err == CUFFT_SUCCESS) return; - fprintf(stderr,"CUFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err); + fprintf(stderr, "CUFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, err); fflush(stderr); exit(err); } -#endif // defined(PARIS) || defined(PARIS_GALACTIC) + #endif // defined(PARIS) || defined(PARIS_GALACTIC) static void check(const cudaError_t err, const char *const file, const int line) { if (err == cudaSuccess) return; - fprintf(stderr,"CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,cudaGetErrorName(err),cudaGetErrorString(err)); + fprintf(stderr, "CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, + cudaGetErrorName(err), cudaGetErrorString(err)); fflush(stderr); exit(err); } -#define WARPSIZE 32 -static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; -#define hipLaunchKernelGGL(F,G,B,M,S,...) F<<>>(__VA_ARGS__) -#define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__) + #define WARPSIZE 32 +static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; + #define hipLaunchKernelGGL(F, G, B, M, S, ...) F<<>>(__VA_ARGS__) + #define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__) -#endif //O_HIP +#endif // O_HIP -#define CHECK(X) check(X,__FILE__,__LINE__) +#define CHECK(X) check(X, __FILE__, __LINE__) #define GPU_MAX_THREADS 256 #if defined(__CUDACC__) || defined(__HIPCC__) template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0(const int n0, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, + const F f) { - const int i0 = blockIdx.x*blockDim.x+threadIdx.x; + const int i0 = blockIdx.x * blockDim.x + threadIdx.x; if (i0 < n0) f(i0); } @@ -167,169 +171,162 @@ template void gpuFor(const int n0, const F f) { if (n0 <= 0) return; - const int b0 = (n0+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t0 = (n0+b0-1)/b0; - gpuRun0<<>>(n0,f); + const int b0 = (n0 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t0 = (n0 + b0 - 1) / b0; + gpuRun0<<>>(n0, f); CHECK(cudaGetLastError()); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x2(const F f) { const int i0 = threadIdx.y; const int i1 = threadIdx.x; - f(i0,i1); + f(i0, i1); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x1(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.x; - f(i0,i1); + f(i0, i1); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x0(const int n1, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, + const F f) { const int i0 = blockIdx.y; - const int i1 = blockIdx.x*blockDim.x+threadIdx.x; - if (i1 < n1) f(i0,i1); + const int i1 = blockIdx.x * blockDim.x + threadIdx.x; + if (i1 < n1) f(i0, i1); } template void gpuFor(const int n0, const int n1, const F f) { if ((n0 <= 0) || (n1 <= 0)) return; - const long nl01 = long(n0)*long(n1); + const long nl01 = long(n0) * long(n1); assert(nl01 < long(INT_MAX)); if (n1 > GPU_MAX_THREADS) { - const int b1 = (n1+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t1 = (n1+b1-1)/b1; - gpuRun2x0<<>>(n1,f); + const int b1 = (n1 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t1 = (n1 + b1 - 1) / b1; + gpuRun2x0<<>>(n1, f); CHECK(cudaGetLastError()); } else if (nl01 > GPU_MAX_THREADS) { - gpuRun1x1<<>>(f); + gpuRun1x1<<>>(f); CHECK(cudaGetLastError()); } else { - gpuRun0x2<<<1,dim3(n1,n0)>>>(f); + gpuRun0x2<<<1, dim3(n1, n0)>>>(f); CHECK(cudaGetLastError()); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x3(const F f) { const int i0 = threadIdx.z; const int i1 = threadIdx.y; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x2(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.y; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x1(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x0(const int n2, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, + const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; - const int i2 = blockIdx.x*blockDim.x+threadIdx.x; - if (i2 < n2) f(i0,i1,i2); + const int i2 = blockIdx.x * blockDim.x + threadIdx.x; + if (i2 < n2) f(i0, i1, i2); } template void gpuFor(const int n0, const int n1, const int n2, const F f) { if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) return; - const long nl12 = long(n1)*long(n2); - const long nl012 = long(n0)*nl12; + const long nl12 = long(n1) * long(n2); + const long nl012 = long(n0) * nl12; assert(nl012 < long(INT_MAX)); if (n2 > GPU_MAX_THREADS) { - const int b2 = (n2+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t2 = (n2+b2-1)/b2; - gpuRun3x0<<>>(n2,f); + const int b2 = (n2 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t2 = (n2 + b2 - 1) / b2; + gpuRun3x0<<>>(n2, f); CHECK(cudaGetLastError()); } else if (nl12 > GPU_MAX_THREADS) { - gpuRun2x1<<>>(f); + gpuRun2x1<<>>(f); CHECK(cudaGetLastError()); } else if (nl012 > GPU_MAX_THREADS) { - gpuRun1x2<<>>(f); + gpuRun1x2<<>>(f); CHECK(cudaGetLastError()); } else { - gpuRun0x3<<<1,dim3(n2,n1,n0)>>>(f); + gpuRun0x3<<<1, dim3(n2, n1, n0)>>>(f); CHECK(cudaGetLastError()); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x3(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.z; const int i2 = threadIdx.y; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x2(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.y; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x1(const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; const int i2 = blockIdx.x; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun4x0(const int n23, const int n3, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, + const int n3, + const F f) { - const int i23 = blockIdx.x*blockDim.x+threadIdx.x; + const int i23 = blockIdx.x * blockDim.x + threadIdx.x; if (i23 < n23) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; - const int i2 = i23/n3; - const int i3 = i23%n3; - f(i0,i1,i2,i3); + const int i2 = i23 / n3; + const int i3 = i23 % n3; + f(i0, i1, i2, i3); } } @@ -337,110 +334,111 @@ template void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f) { if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) return; - const long nl23 = long(n2)*long(n3); - const long nl123 = long(n1)*nl23; - assert(long(n0)*nl123 < long(INT_MAX)); + const long nl23 = long(n2) * long(n3); + const long nl123 = long(n1) * nl23; + assert(long(n0) * nl123 < long(INT_MAX)); - const int n23 = int(nl23); + const int n23 = int(nl23); const int n123 = int(nl123); if (n3 > GPU_MAX_THREADS) { - const int b23 = (n23+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t23 = (n23+b23-1)/b23; - gpuRun4x0<<>>(n23,n3,f); + const int b23 = (n23 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t23 = (n23 + b23 - 1) / b23; + gpuRun4x0<<>>(n23, n3, f); CHECK(cudaGetLastError()); } else if (n23 > GPU_MAX_THREADS) { - gpuRun3x1<<>>(f); + gpuRun3x1<<>>(f); CHECK(cudaGetLastError()); } else if (n123 > GPU_MAX_THREADS) { - gpuRun2x2<<>>(f); + gpuRun2x2<<>>(f); CHECK(cudaGetLastError()); } else { - gpuRun1x3<<>>(f); + gpuRun1x3<<>>(f); CHECK(cudaGetLastError()); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x3(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.z; const int i3 = threadIdx.y; const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x2(const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; const int i2 = blockIdx.x; const int i3 = threadIdx.y; const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun4x1(const int n1, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, + const F f) { const int i01 = blockIdx.z; - const int i0 = i01/n1; - const int i1 = i01%n1; - const int i2 = blockIdx.y; - const int i3 = blockIdx.x; - const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + const int i0 = i01 / n1; + const int i1 = i01 % n1; + const int i2 = blockIdx.y; + const int i3 = blockIdx.x; + const int i4 = threadIdx.x; + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun5x0(const int n1, const int n34, const int n4, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, + const int n34, + const int n4, + const F f) { - const int i34 = blockIdx.x*blockDim.x+threadIdx.x; + const int i34 = blockIdx.x * blockDim.x + threadIdx.x; if (i34 < n34) { const int i01 = blockIdx.z; - const int i0 = i01/n1; - const int i1 = i01%n1; - const int i2 = blockIdx.y; - const int i3 = i34/n4; - const int i4 = i34%n4; - f(i0,i1,i2,i3,i4); + const int i0 = i01 / n1; + const int i1 = i01 % n1; + const int i2 = blockIdx.y; + const int i3 = i34 / n4; + const int i4 = i34 % n4; + f(i0, i1, i2, i3, i4); } } template -void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4, const F f) +void gpuFor(const int n0, const int n1, const int n2, const int n3, + const int n4, const F f) { if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) return; - const long nl01 = long(n0)*long(n1); - const long nl34 = long(n3)*long(n4); - assert(nl01*long(n2)*nl34 < long(INT_MAX)); + const long nl01 = long(n0) * long(n1); + const long nl34 = long(n3) * long(n4); + assert(nl01 * long(n2) * nl34 < long(INT_MAX)); const int n34 = int(nl34); if (n4 > GPU_MAX_THREADS) { const int n01 = int(nl01); - const int b34 = (n34+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t34 = (n34+b34-1)/b34; - gpuRun5x0<<>>(n1,n34,n4,f); + const int b34 = (n34 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t34 = (n34 + b34 - 1) / b34; + gpuRun5x0<<>>(n1, n34, n4, f); CHECK(cudaGetLastError()); } else if (n34 > GPU_MAX_THREADS) { - const int n01 = n0*n1; - gpuRun4x1<<>>(n1,f); + const int n01 = n0 * n1; + gpuRun4x1<<>>(n1, f); CHECK(cudaGetLastError()); - } else if (n2*n34 > GPU_MAX_THREADS) { - gpuRun3x2<<>>(f); + } else if (n2 * n34 > GPU_MAX_THREADS) { + gpuRun3x2<<>>(f); CHECK(cudaGetLastError()); } else { - gpuRun2x3<<>>(f); + gpuRun2x3<<>>(f); CHECK(cudaGetLastError()); } } -#define GPU_LAMBDA [=] __device__ + #define GPU_LAMBDA [=] __device__ #endif diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu index 2111f0907..e36a26545 100644 --- a/src/utils/gpu_arrays_functions.cu +++ b/src/utils/gpu_arrays_functions.cu @@ -1,75 +1,65 @@ +#include + +#include "../global/global_cuda.h" #include "../utils/error_handling.h" #include "../utils/gpu.hpp" -#include "../global/global_cuda.h" #include "../utils/gpu_arrays_functions.h" -#include +void Extend_GPU_Array_Real(Real **current_array_d, int current_size, + int new_size, bool print_out) +{ + if (new_size <= current_size) return; + if (print_out) + std::cout << " Extending GPU Array, size: " << current_size + << " new_size: " << new_size << std::endl; -void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out ){ - - if ( new_size <= current_size ) return; - if ( print_out ) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; - size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); - #ifdef PRINT_GPU_MEMORY - printf( "ReAllocating GPU Memory: %d MB free \n", (int) global_free/1000000); - #endif - - if ( global_free < new_size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %d MB \n", (int) (global_free/1000000) ); - printf( " Requested Memory: %d MB \n", (int) (new_size*sizeof(Real)/1000000) ); +#ifdef PRINT_GPU_MEMORY + printf("ReAllocating GPU Memory: %d MB free \n", + (int)global_free / 1000000); +#endif + + if (global_free < new_size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %d MB \n", (int)(global_free / 1000000)); + printf(" Requested Memory: %d MB \n", + (int)(new_size * sizeof(Real) / 1000000)); // exit(-1); } - + Real *new_array_d; - CudaSafeCall( cudaMalloc((void**)&new_array_d, new_size*sizeof(Real)) ); + CudaSafeCall(cudaMalloc((void **)&new_array_d, new_size * sizeof(Real))); cudaDeviceSynchronize(); CudaCheckError(); - if ( new_array_d == NULL ){ + if (new_array_d == NULL) { std::cout << " Error When Allocating New GPU Array" << std::endl; chexit(-1); } - + // Copy the content of the original array to the new array - CudaSafeCall( cudaMemcpy( new_array_d, *current_array_d, current_size*sizeof(Real), cudaMemcpyDeviceToDevice ) ); + CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, + current_size * sizeof(Real), + cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); CudaCheckError(); - + // size_t global_free_before, global_free_after; // CudaSafeCall( cudaMemGetInfo( &global_free_before, &global_total ) ); // cudaDeviceSynchronize(); - + // Free the original array cudaFree(*current_array_d); cudaDeviceSynchronize(); CudaCheckError(); - + // CudaSafeCall( cudaMemGetInfo( &global_free_after, &global_total ) ); // cudaDeviceSynchronize(); - // - // printf("Freed Memory: %d MB\n", (int) (global_free_after - global_free_before)/1000000 ); - + // + // printf("Freed Memory: %d MB\n", (int) (global_free_after - + // global_free_before)/1000000 ); + // Replace the pointer of the original array with the new one *current_array_d = new_array_d; - } - - - - - - - - - - - - - - - - - diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h index 0eaf087c4..848b9af4d 100644 --- a/src/utils/gpu_arrays_functions.h +++ b/src/utils/gpu_arrays_functions.h @@ -1,56 +1,58 @@ #ifndef GPU_ARRAY_FUNCTIONS_H #define GPU_ARRAY_FUNCTIONS_H +#include + +#include "../global/global_cuda.h" #include "../utils/error_handling.h" #include "../utils/gpu.hpp" -#include "../global/global_cuda.h" #include "../utils/gpu_arrays_functions.h" -#include +template +void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, + bool print_out) +{ + if (new_size <= current_size) return; + if (print_out) + std::cout << " Extending GPU Array, size: " << current_size + << " new_size: " << new_size << std::endl; -template< typename T > void Extend_GPU_Array( T **current_array_d, int current_size, int new_size, bool print_out ){ - - if ( new_size <= current_size ) return; - if ( print_out ) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; - size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); - #ifdef PRINT_GPU_MEMORY - printf( "ReAllocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - - if ( global_free < new_size*sizeof(T) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", new_size*sizeof(T)/1000000 ); +#ifdef PRINT_GPU_MEMORY + printf("ReAllocating GPU Memory: %ld MB free \n", global_free / 1000000); +#endif + + if (global_free < new_size * sizeof(T)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", new_size * sizeof(T) / 1000000); exit(-1); } - + T *new_array_d; - CudaSafeCall( cudaMalloc((void**)&new_array_d, new_size*sizeof(T)) ); + CudaSafeCall(cudaMalloc((void **)&new_array_d, new_size * sizeof(T))); cudaDeviceSynchronize(); CudaCheckError(); - if ( new_array_d == NULL ){ + if (new_array_d == NULL) { std::cout << " Error When Allocating New GPU Array" << std::endl; chexit(-1); } - + // Copy the content of the original array to the new array - CudaSafeCall( cudaMemcpy( new_array_d, *current_array_d, current_size*sizeof(T), cudaMemcpyDeviceToDevice ) ); + CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, + current_size * sizeof(T), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); CudaCheckError(); - + // Free the original array cudaFree(*current_array_d); cudaDeviceSynchronize(); CudaCheckError(); - + // Replace the pointer of the original array with the new one *current_array_d = new_array_d; } - - - #endif diff --git a/src/utils/hydro_utilities.cpp b/src/utils/hydro_utilities.cpp index 7fa7c1894..bc649c75c 100644 --- a/src/utils/hydro_utilities.cpp +++ b/src/utils/hydro_utilities.cpp @@ -1,5 +1,6 @@ #include "../utils/hydro_utilities.h" -namespace hydro_utilities { +namespace hydro_utilities +{ -} // end namespace hydro_utilities \ No newline at end of file +} // end namespace hydro_utilities \ No newline at end of file diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index b89175835..7a2549b38 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -15,100 +15,117 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" - /*! -* INDEX OF VARIABLES -* P : pressure -* vx, vy, vz : x, y, and z velocity -* d : density -* E : energy -* T : temperature -* mx, my, mz : x, y, and z momentum -* n : number density -*/ - -namespace hydro_utilities { - - inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { - Real P; - P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, TINY_NUMBER); - return P; - } - - inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { - Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.); - return fmax(P, TINY_NUMBER); - } - - inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) { - Real T = P * PRESSURE_UNIT / (n * KB); - return T; - } + * INDEX OF VARIABLES + * P : pressure + * vx, vy, vz : x, y, and z velocity + * d : density + * E : energy + * T : temperature + * mx, my, mz : x, y, and z momentum + * n : number density + */ - #ifdef DE - inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) { - Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - return T; - } - #endif // DE +namespace hydro_utilities +{ + +inline __host__ __device__ Real +Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, + Real const &vy, Real const &vz, Real const &gamma) +{ + Real P; + P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + P = fmax(P, TINY_NUMBER); + return P; +} - inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { - // Compute and return energy - return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); - } +inline __host__ __device__ Real +Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, + Real const &my, Real const &mz, Real const &gamma) +{ + Real P = (E - 0.5 * (mx * mx + my * my + mz * mz) / d) * (gamma - 1.); + return fmax(P, TINY_NUMBER); +} - inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { - Real U, P; - Real eta = DE_ETA_1; - // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. - if (U_total/E > eta) { - U = U_total; - } else { - U = U_advected; - } - P = U * (gamma - 1.0); - return P; - } +inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) +{ + Real T = P * PRESSURE_UNIT / (n * KB); + return T; +} - /*! - * \brief Compute the kinetic energy from the density and velocities - * - * \param[in] d The density - * \param[in] vx The x velocity - * \param[in] vy The y velocity - * \param[in] vz The z velocity - * \return Real The kinetic energy - */ - inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, - Real const &vx, - Real const &vy, - Real const &vz) - { - return 0.5 * d * (vx*vx + vy*vy * vz*vz); - } +#ifdef DE +inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, + Real const &gamma, Real const &n) +{ + Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + return T; +} +#endif // DE + +inline __host__ __device__ Real +Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, + Real const &vy, Real const &vz, Real const &gamma) +{ + // Compute and return energy + return (fmax(P, TINY_NUMBER) / (gamma - 1.)) + + 0.5 * d * (vx * vx + vy * vy + vz * vz); +} - /*! - * \brief Compute the kinetic energy from the density and momenta - * - * \param[in] d The density - * \param[in] mx The x momentum - * \param[in] my The y momentum - * \param[in] mz The z momentum - * \return Real The kinetic energy - */ - inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, - Real const &mx, - Real const &my, - Real const &mz) - { - return (0.5 / d) * (mx*mx + my*my * mz*mz); - } +inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, + Real const &U_total, + Real const &U_advected, + Real const &gamma) +{ + Real U, P; + Real eta = DE_ETA_1; + // Apply same condition as Byan+2013 to select the internal energy from which + // compute pressure. + if (U_total / E > eta) { + U = U_total; + } else { + U = U_advected; + } + P = U * (gamma - 1.0); + return P; +} - inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { - Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); - return sqrt(gamma * P / d); - } +/*! + * \brief Compute the kinetic energy from the density and velocities + * + * \param[in] d The density + * \param[in] vx The x velocity + * \param[in] vy The y velocity + * \param[in] vz The z velocity + * \return Real The kinetic energy + */ +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity( + Real const &d, Real const &vx, Real const &vy, Real const &vz) +{ + return 0.5 * d * (vx * vx + vy * vy * vz * vz); +} +/*! + * \brief Compute the kinetic energy from the density and momenta + * + * \param[in] d The density + * \param[in] mx The x momentum + * \param[in] my The y momentum + * \param[in] mz The z momentum + * \return Real The kinetic energy + */ +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum( + Real const &d, Real const &mx, Real const &my, Real const &mz) +{ + return (0.5 / d) * (mx * mx + my * my * mz * mz); +} +inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, + Real const &mx, Real const &my, + Real const &mz, + Real const &gamma) +{ + Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); + return sqrt(gamma * P / d); } + +} // namespace hydro_utilities diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index e0e3cf455..0348a362b 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -1,171 +1,187 @@ /*! * \file hyo_utilities_tests.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) - * \brief Tests for the contents of hydro_utilities.h and hydro_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie + * (helenarichie@pitt.edu) \brief Tests for the contents of hydro_utilities.h + * and hydro_utilities.cpp * */ // STL Includes -#include -#include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/hydro_utilities.h" #include "../global/global.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" /*! -* INDEX OF VARIABLES -* P : pressure -* vx, vy, vz : x, y, and z velocity -* d : density -* E : energy -* T : temperature -* mx, my, mz : x, y, and z momentum -* n : number density -*/ + * INDEX OF VARIABLES + * P : pressure + * vx, vy, vz : x, y, and z velocity + * d : density + * E : energy + * T : temperature + * mx, my, mz : x, y, and z momentum + * n : number density + */ // ============================================================================= // Local helper functions namespace { - struct TestParams - { - double gamma = 5./3.; - std::vector d {1.0087201154e-15, 1.0756968986e2, 1.0882403847e100}; - std::vector vx {1.0378624601e-100, 1.0829278656e2, 1.0800514112e100}; - std::vector vy {1.0583469014e-100, 1.0283073464e2, 1.0725717864e100}; - std::vector vz {1.0182972216e-100, 1.0417748226e2, 1.0855352639e100}; - std::vector mx {0.2340416681e-100, 0.1019429453e2, 0.5062596954e100}; - std::vector my {0.9924582299e-100, 0.1254780684e2, 0.5939640992e100}; - std::vector mz {0.6703192739e-100, 0.5676716066e2, 0.2115881803e100}; - std::vector E {20.9342082433e-90, 20.9976906577e10, 20.9487120853e300}; - std::vector P {2.2244082909e-10, 8.6772951021e2, 6.7261085663e100}; - std::vector n {3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; - std::vector ge {4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; - std::vector U_total {2.389074039e-10, 4.890374019e2, 6.8731436293e100}; - std::vector U_advected {1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; - std::vector names{"Small number case", "Medium number case", "Large number case"}; - }; -} - -TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1e-20, 139983415580.5549, 1.2697896247496674e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } +struct TestParams { + double gamma = 5. / 3.; + std::vector d{1.0087201154e-15, 1.0756968986e2, 1.0882403847e100}; + std::vector vx{1.0378624601e-100, 1.0829278656e2, 1.0800514112e100}; + std::vector vy{1.0583469014e-100, 1.0283073464e2, 1.0725717864e100}; + std::vector vz{1.0182972216e-100, 1.0417748226e2, 1.0855352639e100}; + std::vector mx{0.2340416681e-100, 0.1019429453e2, 0.5062596954e100}; + std::vector my{0.9924582299e-100, 0.1254780684e2, 0.5939640992e100}; + std::vector mz{0.6703192739e-100, 0.5676716066e2, 0.2115881803e100}; + std::vector E{20.9342082433e-90, 20.9976906577e10, 20.9487120853e300}; + std::vector P{2.2244082909e-10, 8.6772951021e2, 6.7261085663e100}; + std::vector n{3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; + std::vector ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; + std::vector U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100}; + std::vector U_advected{1.3847303413e-10, 1.0756968986e2, + 1.0882403847e100}; + std::vector names{"Small number case", "Medium number case", + "Large number case"}; +}; +} // namespace + +TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ps{1e-20, 139983415580.5549, + 1.2697896247496674e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), + parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, + parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1e-20, 139984604373.87094, 1.3965808056866668e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } +TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ps{1e-20, 139984604373.87094, + 1.3965808056866668e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ps = hydro_utilities::Calc_Pressure_Conserved( + parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), + parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, + parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ts {3465185.0560059389, 29370603.906644326, 28968949.83344138}; +TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ts{3465185.0560059389, 29370603.906644326, + 28968949.83344138}; - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ts = + hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); - } + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, + parameters.names.at(i)); + } } #ifdef DE -TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ts {5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); - - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); - } +TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ts{5.123106988008801e-09, 261106139.02514684, + 1.2105231166585662e+107}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ts = + hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), + parameters.gamma, parameters.n.at(i)); + + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, + parameters.names.at(i)); + } } -#endif // DE - -TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Es {3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#endif // DE - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); - } +TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Es{3.3366124363499997e-10, 1784507.7619407175, + 1.9018677140549926e+300}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), + parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Es.at(i), test_Es, + parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } +TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ps{1.5927160260000002e-10, 71.713126573333341, + 7.2549358980000001e+99}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ps = hydro_utilities::Get_Pressure_From_DE( + parameters.E.at(i), parameters.U_total.at(i), + parameters.U_advected.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, + parameters.names.at(i)); + } } -TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducialEnergies{0.0, - 6.307524975350106e-145, - 7.3762470327090601e+249}; - double const coef = 1E-50; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - coef*parameters.d.at(i), - coef*parameters.vx.at(i), - coef*parameters.vy.at(i), - coef*parameters.vz.at(i)); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } +TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialEnergies{0.0, 6.307524975350106e-145, + 7.3762470327090601e+249}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + coef * parameters.d.at(i), coef * parameters.vx.at(i), + coef * parameters.vy.at(i), coef * parameters.vz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, + parameters.names.at(i)); + } } -TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducialEnergies{0.0, - 0.0, - 7.2568536478335773e+147}; - double const coef = 1E-50; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( - coef*parameters.d.at(i), - coef*parameters.mx.at(i), - coef*parameters.my.at(i), - coef*parameters.mz.at(i)); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } +TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialEnergies{0.0, 0.0, 7.2568536478335773e+147}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( + coef * parameters.d.at(i), coef * parameters.mx.at(i), + coef * parameters.my.at(i), coef * parameters.mz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, + parameters.names.at(i)); + } } \ No newline at end of file diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h index 735cec996..9c329b1d7 100644 --- a/src/utils/math_utilities.h +++ b/src/utils/math_utilities.h @@ -20,73 +20,70 @@ namespace math_utils { - // ========================================================================= - /*! - * \brief Rotate cartesian coordinates. All arguments are cast to double - * then rotated. If the type is 'int' then the value is rounded to the - * nearest int - * - * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when - * pitch=0 and yaw=90 x1_rot = -x2 - * - * \tparam T The return type - * \param[in] x_1 x1 coordinate - * \param[in] x_2 x2 coordinate - * \param[in] x_3 x3 coordinate - * \param[in] pitch Pitch angle in radians - * \param[in] yaw Yaw angle in radians - * \return std::tuple The new, rotated, coordinates in the - * order . Intended to be captured with structured binding - */ - template - inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, - Real const &x_3, Real const &pitch, Real const &yaw) - { - // Compute the sines and cosines. Correct for floating point errors if - // the angle is 0.5*M_PI - Real const sin_yaw = std::sin(yaw); - Real const cos_yaw = (yaw==0.5*M_PI)? 0: std::cos(yaw); - Real const sin_pitch = std::sin(pitch); - Real const cos_pitch = (pitch==0.5*M_PI)? 0: std::cos(pitch); +// ========================================================================= +/*! + * \brief Rotate cartesian coordinates. All arguments are cast to double + * then rotated. If the type is 'int' then the value is rounded to the + * nearest int + * + * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when + * pitch=0 and yaw=90 x1_rot = -x2 + * + * \tparam T The return type + * \param[in] x_1 x1 coordinate + * \param[in] x_2 x2 coordinate + * \param[in] x_3 x3 coordinate + * \param[in] pitch Pitch angle in radians + * \param[in] yaw Yaw angle in radians + * \return std::tuple The new, rotated, coordinates in the + * order . Intended to be captured with structured binding + */ +template +inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, + Real const &x_3, Real const &pitch, + Real const &yaw) +{ + // Compute the sines and cosines. Correct for floating point errors if + // the angle is 0.5*M_PI + Real const sin_yaw = std::sin(yaw); + Real const cos_yaw = (yaw == 0.5 * M_PI) ? 0 : std::cos(yaw); + Real const sin_pitch = std::sin(pitch); + Real const cos_pitch = (pitch == 0.5 * M_PI) ? 0 : std::cos(pitch); - // Perform the rotation - Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw); - Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw); - Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); + // Perform the rotation + Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + + (x_3 * sin_pitch * cos_yaw); + Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + + (x_3 * sin_pitch * sin_yaw); + Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); - if (std::is_same::value) - { - return {round(x_1_rot), - round(x_2_rot), - round(x_3_rot)}; - } - else if (std::is_same::value) - { - return {x_1_rot, x_2_rot, x_3_rot}; - } - } - // ========================================================================= + if (std::is_same::value) { + return {round(x_1_rot), round(x_2_rot), round(x_3_rot)}; + } else if (std::is_same::value) { + return {x_1_rot, x_2_rot, x_3_rot}; + } +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the dot product of a and b. - * - * \param[in] a1 The first element of a - * \param[in] a2 The second element of a - * \param[in] a3 The third element of a - * \param[in] b1 The first element of b - * \param[in] b2 The second element of b - * \param[in] b3 The third element of b - * - * \return Real The dot product of a and b - */ - inline __device__ __host__ Real dotProduct(Real const &a1, - Real const &a2, - Real const &a3, - Real const &b1, - Real const &b2, - Real const &b3) - {return a1*b1 + ((a2*b2) + (a3*b3));}; - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the dot product of a and b. + * + * \param[in] a1 The first element of a + * \param[in] a2 The second element of a + * \param[in] a3 The third element of a + * \param[in] b1 The first element of b + * \param[in] b2 The second element of b + * \param[in] b3 The third element of b + * + * \return Real The dot product of a and b + */ +inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, + Real const &a3, Real const &b1, + Real const &b2, Real const &b3) +{ + return a1 * b1 + ((a2 * b2) + (a3 * b3)); +}; +// ========================================================================= -}//math_utils +} // namespace math_utils diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp index 38a2902d6..37c4596bc 100644 --- a/src/utils/math_utilities_tests.cpp +++ b/src/utils/math_utilities_tests.cpp @@ -9,56 +9,55 @@ #include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/math_utilities.h" #include "../global/global.h" +#include "../utils/math_utilities.h" +#include "../utils/testing_utilities.h" // ============================================================================= -TEST(tALLRotateCoords, - CorrectInputExpectCorrectOutput) +TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) { - // Fiducial values - double const x_1 = 19.2497333410; - double const x_2 = 60.5197699003; - double const x_3 = 86.0613942621; - double const pitch = 1.239 * M_PI; - double const yaw = 0.171 * M_PI; - double const x_1_rot_fid = -31.565679455456568; - double const x_2_rot_fid = 14.745363873361605; - double const x_3_rot_fid = -76.05402749550727; - - auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); - - testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); - testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); - testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); + // Fiducial values + double const x_1 = 19.2497333410; + double const x_2 = 60.5197699003; + double const x_3 = 86.0613942621; + double const pitch = 1.239 * M_PI; + double const yaw = 0.171 * M_PI; + double const x_1_rot_fid = -31.565679455456568; + double const x_2_rot_fid = 14.745363873361605; + double const x_3_rot_fid = -76.05402749550727; + + auto [x_1_rot, x_2_rot, x_3_rot] = + math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); + + testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); } // ============================================================================= // ========================================================================= /*! - * \brief Test the math_utils::dotProduct function - * - */ -TEST(tALLDotProduct, - CorrectInputExpectCorrectOutput) + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) { - std::vector a{21.503067766457753, 48.316634031589935, 81.12177317622657}, - b{38.504606872151484, 18.984145880030045, 89.52561861038686}; + std::vector a{21.503067766457753, 48.316634031589935, + 81.12177317622657}, + b{38.504606872151484, 18.984145880030045, 89.52561861038686}; - double const fiducialDotProduct = 9007.6941261535867; + double const fiducialDotProduct = 9007.6941261535867; - double testDotProduct; + double testDotProduct; - testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), - b.at(0), b.at(1), b.at(2)); + testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), + b.at(1), b.at(2)); - // Now check results - testingUtilities::checkResults(fiducialDotProduct, - testDotProduct, - "dot product"); + // Now check results + testingUtilities::checkResults(fiducialDotProduct, testDotProduct, + "dot product"); } // ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu index b522b61b4..5205b6c17 100644 --- a/src/utils/mhd_utilities.cu +++ b/src/utils/mhd_utilities.cu @@ -19,4 +19,4 @@ namespace mhd::utils { -} // end namespace mhd::utils \ No newline at end of file +} // end namespace mhd::utils \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 48fb59e0c..288de6314 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -14,323 +14,323 @@ // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" -#include "../utils/gpu.hpp" #include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" -namespace mhd::utils{ - /*! - * \brief Namespace for functions required by functions within the mhd::utils - * namespace. Everything in this name space should be regarded as private - * but is made accesible for testing - * - */ - namespace _internal - { - // ===================================================================== - /*! - * \brief Compute the fast or slow magnetosonic wave speeds - * - * \param density The density - * \param gasPressure The gas pressure - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \param waveChoice Which speed to compute. If +1 then compute the - * speed of the fast magnetosonic wave, if -1 then the speed of the slow - * magnetosonic wave - * \return Real The speed of the fast or slow magnetosonic wave - */ - inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, - Real const &gasPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma, - Real const &waveChoice) - { - // Compute the sound speed - Real bXSquared = magneticX * magneticX; - Real bSquared = bXSquared + ((magneticY*magneticY) + (magneticZ*magneticZ)); +namespace mhd::utils +{ +/*! + * \brief Namespace for functions required by functions within the mhd::utils + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ +namespace _internal +{ +// ===================================================================== +/*! + * \brief Compute the fast or slow magnetosonic wave speeds + * + * \param density The density + * \param gasPressure The gas pressure + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \param waveChoice Which speed to compute. If +1 then compute the + * speed of the fast magnetosonic wave, if -1 then the speed of the slow + * magnetosonic wave + * \return Real The speed of the fast or slow magnetosonic wave + */ +inline __host__ __device__ Real _magnetosonicSpeed( + Real const &density, Real const &gasPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma, + Real const &waveChoice) +{ + // Compute the sound speed + Real bXSquared = magneticX * magneticX; + Real bSquared = + bXSquared + ((magneticY * magneticY) + (magneticZ * magneticZ)); - Real term1 = gamma * gasPressure + bSquared; + Real term1 = gamma * gasPressure + bSquared; - Real term2 = (term1*term1) - 4. * gamma * gasPressure * bXSquared; - term2 = sqrt(term2); + Real term2 = (term1 * term1) - 4. * gamma * gasPressure * bXSquared; + term2 = sqrt(term2); - return sqrt( (term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)) ); - } - // ===================================================================== - }// mhd::utils::_internal namespace + return sqrt((term1 + waveChoice * term2) / + (2.0 * fmax(density, TINY_NUMBER))); +} +// ===================================================================== +} // namespace _internal - // ========================================================================= - /*! - * \brief Compute the energy in a cell. If MHD is not defined then simply - * return the hydro only energy - * - * \param[in] pressure The gas pressure - * \param[in] density The density - * \param[in] velocityX Velocity in the x-direction - * \param[in] velocityY Velocity in the y-direction - * \param[in] velocityZ Velocity in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The energy within a cell - */ - inline __host__ __device__ Real computeEnergy(Real const &pressure, - Real const &density, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute and return energy - Real energy = (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) - + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))); - #ifdef MHD - energy += 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); - #endif //MHD +// ========================================================================= +/*! + * \brief Compute the energy in a cell. If MHD is not defined then simply + * return the hydro only energy + * + * \param[in] pressure The gas pressure + * \param[in] density The density + * \param[in] velocityX Velocity in the x-direction + * \param[in] velocityY Velocity in the y-direction + * \param[in] velocityZ Velocity in the z-direction + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \param[in] gamma The adiabatic index + * \return Real The energy within a cell + */ +inline __host__ __device__ Real computeEnergy( + Real const &pressure, Real const &density, Real const &velocityX, + Real const &velocityY, Real const &velocityZ, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + // Compute and return energy + Real energy = (fmax(pressure, TINY_NUMBER) / (gamma - 1.)) + + 0.5 * density * + (velocityX * velocityX + + ((velocityY * velocityY) + (velocityZ * velocityZ))); +#ifdef MHD + energy += 0.5 * (magneticX * magneticX + + ((magneticY * magneticY) + (magneticZ * magneticZ))); +#endif // MHD - return energy; - } - // ========================================================================= + return energy; +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the MHD gas pressure in a cell - * - * \param[in] energy The energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The gas pressure in a cell - */ - inline __host__ __device__ Real computeGasPressure(Real const &energy, - Real const &density, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - Real pressure = (gamma - 1.) - * (energy - - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / density - - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ)))); +// ========================================================================= +/*! + * \brief Compute the MHD gas pressure in a cell + * + * \param[in] energy The energy + * \param[in] density The density + * \param[in] momentumX Momentum in the x-direction + * \param[in] momentumY Momentum in the y-direction + * \param[in] momentumZ Momentum in the z-direction + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \param[in] gamma The adiabatic index + * \return Real The gas pressure in a cell + */ +inline __host__ __device__ Real computeGasPressure( + Real const &energy, Real const &density, Real const &momentumX, + Real const &momentumY, Real const &momentumZ, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + Real pressure = (gamma - 1.) * + (energy - + 0.5 * + (momentumX * momentumX + + ((momentumY * momentumY) + (momentumZ * momentumZ))) / + density - + 0.5 * (magneticX * magneticX + + ((magneticY * magneticY) + (magneticZ * magneticZ)))); - return fmax(pressure, TINY_NUMBER); - } - // ========================================================================= + return fmax(pressure, TINY_NUMBER); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the MHD thermal energy in a cell - * - * \param[in] energyTot The total energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The thermal energy in a cell - */ - inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, - Real const &density, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - return energyTot - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / fmax(density,TINY_NUMBER) - - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the MHD thermal energy in a cell + * + * \param[in] energyTot The total energy + * \param[in] density The density + * \param[in] momentumX Momentum in the x-direction + * \param[in] momentumY Momentum in the y-direction + * \param[in] momentumZ Momentum in the z-direction + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \param[in] gamma The adiabatic index + * \return Real The thermal energy in a cell + */ +inline __host__ __device__ Real computeThermalEnergy( + Real const &energyTot, Real const &density, Real const &momentumX, + Real const &momentumY, Real const &momentumZ, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + return energyTot - + 0.5 * + (momentumX * momentumX + + ((momentumY * momentumY) + (momentumZ * momentumZ))) / + fmax(density, TINY_NUMBER) - + 0.5 * (magneticX * magneticX + + ((magneticY * magneticY) + (magneticZ * magneticZ))); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the magnetic energy - * - * \param[in] magneticX The magnetic field in the X-direction - * \param[in] magneticY The magnetic field in the Y-direction - * \param[in] magneticZ The magnetic field in the Z-direction - * \return Real The magnetic energy - */ - inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, - Real const &magneticY, - Real const &magneticZ) - { - return 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the magnetic energy + * + * \param[in] magneticX The magnetic field in the X-direction + * \param[in] magneticY The magnetic field in the Y-direction + * \param[in] magneticZ The magnetic field in the Z-direction + * \return Real The magnetic energy + */ +inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, + Real const &magneticY, + Real const &magneticZ) +{ + return 0.5 * (magneticX * magneticX + + ((magneticY * magneticY) + (magneticZ * magneticZ))); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas - * pressure - * - * \param[in] gasPressure The gas pressure - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \return Real The total MHD pressure - */ - inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ) - { - Real pTot = gasPressure + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); +// ========================================================================= +/*! + * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas + * pressure + * + * \param[in] gasPressure The gas pressure + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \return Real The total MHD pressure + */ +inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, + Real const &magneticX, + Real const &magneticY, + Real const &magneticZ) +{ + Real pTot = + gasPressure + 0.5 * (magneticX * magneticX + + ((magneticY * magneticY) + (magneticZ * magneticZ))); - return fmax(pTot, TINY_NUMBER); - } - // ========================================================================= + return fmax(pTot, TINY_NUMBER); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the speed of the fast magnetosonic wave - * - * \param density The gas pressure - * \param pressure The density - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \return Real The speed of the fast magnetosonic wave - */ - inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, - Real const &pressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - 1.0); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the fast magnetosonic wave + * + * \param density The gas pressure + * \param pressure The density + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \return Real The speed of the fast magnetosonic wave + */ +inline __host__ __device__ Real fastMagnetosonicSpeed( + Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + // Compute the sound speed + return mhd::utils::_internal::_magnetosonicSpeed( + density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the speed of the slow magnetosonic wave - * - * \param density The gas pressure - * \param pressure The density - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \return Real The speed of the slow magnetosonic wave - */ - inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, - Real const &pressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - -1.0); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the slow magnetosonic wave + * + * \param density The gas pressure + * \param pressure The density + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \return Real The speed of the slow magnetosonic wave + */ +inline __host__ __device__ Real slowMagnetosonicSpeed( + Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + // Compute the sound speed + return mhd::utils::_internal::_magnetosonicSpeed( + density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the speed of the Alfven wave in a cell - * - * \param[in] magneticX The magnetic field in the x direction, ie the direction - * along with the Riemann solver is acting - * \param[in] density The density in the cell - * \return Real The Alfven wave speed - */ - inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, - Real const &density) - { - // Compute the Alfven wave speed - return fabs(magneticX) / sqrt(fmax(density,TINY_NUMBER)); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the Alfven wave in a cell + * + * \param[in] magneticX The magnetic field in the x direction, ie the direction + * along with the Riemann solver is acting + * \param[in] density The density in the cell + * \return Real The Alfven wave speed + */ +inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, + Real const &density) +{ + // Compute the Alfven wave speed + return fabs(magneticX) / sqrt(fmax(density, TINY_NUMBER)); +} +// ========================================================================= - // ========================================================================= - #ifdef MHD - /*! - * \brief Compute the cell centered average of the magnetic fields in a - * given cell - * - * \param[in] dev_conserved A pointer to the device array of conserved variables - * \param[in] id The 1D index into each grid subarray. - * \param[in] xid The x index - * \param[in] yid The y index - * \param[in] zid The z index - * \param[in] n_cells The total number of cells - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[out] avgBx The cell centered average magnetic field in the x-direction - * \param[out] avgBy The cell centered average magnetic field in the y-direction - * \param[out] avgBz The cell centered average magnetic field in the z-direction - * - * \return Real local struct with the X, Y, and Z cell centered magnetic - * fields. Intended to be called with structured binding like `auto [x, y, - * z] = mhd::utils::cellCenteredMagneticFields(*args*) - */ - inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, - size_t const &id, - size_t const &xid, - size_t const &yid, - size_t const &zid, - size_t const &n_cells, - size_t const &nx, - size_t const &ny) - { - // Ternary operator to check that no values outside of the magnetic field - // arrays are loaded. If the cell is on the edge that doesn't have magnetic - // fields on both sides then instead set the centered magnetic field to be - // equal to the magnetic field of the closest edge. T - Real avgBx = (xid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + dev_conserved[(grid_enum::magnetic_x)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): - /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; - Real avgBy = (yid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + dev_conserved[(grid_enum::magnetic_y)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): - /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; - Real avgBz = (zid > 0) ? - /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + dev_conserved[(grid_enum::magnetic_z)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): - /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; +// ========================================================================= +#ifdef MHD +/*! + * \brief Compute the cell centered average of the magnetic fields in a + * given cell + * + * \param[in] dev_conserved A pointer to the device array of conserved variables + * \param[in] id The 1D index into each grid subarray. + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] n_cells The total number of cells + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[out] avgBx The cell centered average magnetic field in the x-direction + * \param[out] avgBy The cell centered average magnetic field in the y-direction + * \param[out] avgBz The cell centered average magnetic field in the z-direction + * + * \return Real local struct with the X, Y, and Z cell centered magnetic + * fields. Intended to be called with structured binding like `auto [x, y, + * z] = mhd::utils::cellCenteredMagneticFields(*args*) + */ +inline __host__ __device__ auto cellCenteredMagneticFields( + Real const *dev_conserved, size_t const &id, size_t const &xid, + size_t const &yid, size_t const &zid, size_t const &n_cells, + size_t const &nx, size_t const &ny) +{ + // Ternary operator to check that no values outside of the magnetic field + // arrays are loaded. If the cell is on the edge that doesn't have magnetic + // fields on both sides then instead set the centered magnetic field to be + // equal to the magnetic field of the closest edge. T + Real avgBx = + (xid > 0) + ? + /*if true*/ 0.5 * + (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_x)*n_cells + + cuda_utilities::compute1DIndex(xid - 1, yid, zid, + nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; + Real avgBy = + (yid > 0) + ? + /*if true*/ 0.5 * + (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_y)*n_cells + + cuda_utilities::compute1DIndex(xid, yid - 1, zid, + nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; + Real avgBz = + (zid > 0) + ? + /*if true*/ 0.5 * + (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_z)*n_cells + + cuda_utilities::compute1DIndex(xid, yid, zid - 1, + nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; - struct returnStruct - { - Real x, y, z; - }; - return returnStruct{avgBx, avgBy, avgBz}; - } - #endif // MHD - // ========================================================================= -} // end namespace mhd::utils + struct returnStruct { + Real x, y, z; + }; + return returnStruct{avgBx, avgBy, avgBz}; +} +#endif // MHD +// ========================================================================= +} // end namespace mhd::utils diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 83500c68f..8212f49a3 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -6,44 +6,57 @@ */ // STL Includes -#include -#include +#include #include #include -#include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/mhd_utilities.h" #include "../global/global.h" +#include "../utils/mhd_utilities.h" +#include "../utils/testing_utilities.h" // ============================================================================= // Local helper functions namespace { - struct testParams - { - double gamma = 5./3.; - std::vector density {8.4087201154e-100, 1.6756968986e2, 5.4882403847e100}; - std::vector velocityX {7.0378624601e-100, 7.0829278656e2, 1.8800514112e100}; - std::vector velocityY {7.3583469014e-100, 5.9283073464e2, 5.2725717864e100}; - std::vector velocityZ {1.7182972216e-100, 8.8417748226e2, 1.5855352639e100}; - std::vector momentumX {8.2340416681e-100, 8.1019429453e2, 5.5062596954e100}; - std::vector momentumY {4.9924582299e-100, 7.1254780684e2, 6.5939640992e100}; - std::vector momentumZ {3.6703192739e-100, 7.5676716066e2, 7.2115881803e100}; - std::vector energy {3.0342082433e-100, 7.6976906577e2, 1.9487120853e100}; - std::vector pressureGas {2.2244082909e-100, 8.6772951021e2, 6.7261085663e100}; - std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; - std::vector magneticX {2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; - std::vector magneticY {9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; - std::vector magneticZ {9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; - std::vector names{"Small number case", "Medium number case", "Large number case"}; - }; -} +struct testParams { + double gamma = 5. / 3.; + std::vector density{8.4087201154e-100, 1.6756968986e2, + 5.4882403847e100}; + std::vector velocityX{7.0378624601e-100, 7.0829278656e2, + 1.8800514112e100}; + std::vector velocityY{7.3583469014e-100, 5.9283073464e2, + 5.2725717864e100}; + std::vector velocityZ{1.7182972216e-100, 8.8417748226e2, + 1.5855352639e100}; + std::vector momentumX{8.2340416681e-100, 8.1019429453e2, + 5.5062596954e100}; + std::vector momentumY{4.9924582299e-100, 7.1254780684e2, + 6.5939640992e100}; + std::vector momentumZ{3.6703192739e-100, 7.5676716066e2, + 7.2115881803e100}; + std::vector energy{3.0342082433e-100, 7.6976906577e2, + 1.9487120853e100}; + std::vector pressureGas{2.2244082909e-100, 8.6772951021e2, + 6.7261085663e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, + 1.8242151369e100}; + std::vector magneticX{2.8568843801e-100, 9.2400807786e2, + 2.1621115264e100}; + std::vector magneticY{9.2900880344e-100, 8.0382409757e2, + 6.6499532343e100}; + std::vector magneticZ{9.5795678229e-100, 3.3284839263e2, + 9.2337456649e100}; + std::vector names{"Small number case", "Medium number case", + "Large number case"}; +}; +} // namespace // ============================================================================= // ============================================================================= @@ -54,30 +67,23 @@ namespace * parameters * */ -TEST(tMHDComputeEnergy, - CorrectInputExpectCorrectOutput) +TEST(tMHDComputeEnergy, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, - 137786230.15630624, - 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = mhd::utils::computeEnergy(parameters.pressureGas.at(i), - parameters.density.at(i), - parameters.velocityX.at(i), - parameters.velocityY.at(i), - parameters.velocityZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialEnergies{ + 3.3366124363499995e-100, 137786230.15630624, 9.2884430880010847e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = mhd::utils::computeEnergy( + parameters.pressureGas.at(i), parameters.density.at(i), + parameters.velocityX.at(i), parameters.velocityY.at(i), + parameters.velocityZ.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i), + parameters.gamma); + + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, + parameters.names.at(i)); + } } /*! @@ -85,30 +91,23 @@ TEST(tMHDComputeEnergy, * parameters except pressure is now negative * */ -TEST(tMHDComputeEnergy, - NegativePressureExpectAutomaticFix) +TEST(tMHDComputeEnergy, NegativePressureExpectAutomaticFix) { - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, - 137784928.56204093, - 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = mhd::utils::computeEnergy(-parameters.pressureGas.at(i), - parameters.density.at(i), - parameters.velocityX.at(i), - parameters.velocityY.at(i), - parameters.velocityZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialEnergies{ + 3.3366124363499995e-100, 137784928.56204093, 9.2884430880010847e+301}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = mhd::utils::computeEnergy( + -parameters.pressureGas.at(i), parameters.density.at(i), + parameters.velocityX.at(i), parameters.velocityY.at(i), + parameters.velocityZ.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i), + parameters.gamma); + + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::computeEnergy function @@ -118,103 +117,83 @@ TEST(tMHDComputeEnergy, // Tests for the mhd::utils::computeGasPressure function // ============================================================================= /*! - * \brief Test the mhd::utils::computeGasPressure function with the standard set of - * parameters. Energy has been increased to avoid negative pressures + * \brief Test the mhd::utils::computeGasPressure function with the standard set + * of parameters. Energy has been increased to avoid negative pressures * */ -TEST(tMHDComputeGasPressure, - CorrectInputExpectCorrectOutput) +TEST(tMHDComputeGasPressure, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector energyMultiplier{3, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{1.8586864490415075e-100, - 4591434.7663756227, - 1.29869419465575e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhd::utils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), - testGasPressure, - parameters.names.at(i)); - } + testParams parameters; + std::vector energyMultiplier{3, 1.0E4, 1.0E105}; + std::vector fiducialGasPressures{ + 1.8586864490415075e-100, 4591434.7663756227, 1.29869419465575e+205}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testGasPressure = mhd::utils::computeGasPressure( + energyMultiplier.at(i) * parameters.energy.at(i), + parameters.density.at(i), parameters.momentumX.at(i), + parameters.momentumY.at(i), parameters.momentumZ.at(i), + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, + parameters.names.at(i)); + } } /*! - * \brief Test the mhd::utils::computeGasPressure function with a the standard set - * of parameters which produce negative pressures + * \brief Test the mhd::utils::computeGasPressure function with a the standard + * set of parameters which produce negative pressures * */ -TEST(tMHDComputeGasPressure, - NegativePressureExpectAutomaticFix) +TEST(tMHDComputeGasPressure, NegativePressureExpectAutomaticFix) { - testParams parameters; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhd::utils::computeGasPressure(parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testGasPressure) - << "Difference in " << parameters.names.at(i) << std::endl; - } + testParams parameters; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testGasPressure = mhd::utils::computeGasPressure( + parameters.energy.at(i), parameters.density.at(i), + parameters.momentumX.at(i), parameters.momentumY.at(i), + parameters.momentumZ.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i), + parameters.gamma); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, testGasPressure) + << "Difference in " << parameters.names.at(i) << std::endl; + } } // ============================================================================= // End of tests for the mhd::utils::computeGasPressure function // ============================================================================= - // ============================================================================= // Tests for the mhd::utils::computeThermalEnergy function // ============================================================================= /*! - * \brief Test the mhd::utils::computeThermalEnergy function with the standard set - * of parameters. + * \brief Test the mhd::utils::computeThermalEnergy function with the standard + * set of parameters. * */ -TEST(tMHDComputeThermalEnergy, - CorrectInputExpectCorrectOutput) +TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{3.0342082433e-15, - 6887152.1495634327, - 1.9480412919836246e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhd::utils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), - testGasPressure, - parameters.names.at(i)); - } + testParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, + 1.9480412919836246e+205}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testGasPressure = mhd::utils::computeThermalEnergy( + energyMultiplier.at(i) * parameters.energy.at(i), + parameters.density.at(i), parameters.momentumX.at(i), + parameters.momentumY.at(i), parameters.momentumZ.at(i), + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::computeThermalEnergy function @@ -228,25 +207,21 @@ TEST(tMHDComputeThermalEnergy, * set of parameters. * */ -TEST(tMHDcomputeMagneticEnergy, - CorrectInputExpectCorrectOutput) +TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; - std::vector fiducialEnergy{0.0, - 805356.08013056568, - 6.7079331637514162e+201}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i)); - - testingUtilities::checkResults(fiducialEnergy.at(i), - testMagneticEnergy, - parameters.names.at(i)); - } + testParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialEnergy{0.0, 805356.08013056568, + 6.7079331637514162e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testMagneticEnergy = mhd::utils::computeMagneticEnergy( + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + testingUtilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::computeMagneticEnergy function @@ -256,29 +231,24 @@ TEST(tMHDcomputeMagneticEnergy, // Tests for the mhd::utils::computeTotalPressure function // ============================================================================= /*! - * \brief Test the mhd::utils::computeTotalPressure function with the standard set - * of parameters. + * \brief Test the mhd::utils::computeTotalPressure function with the standard + * set of parameters. * */ -TEST(tMHDComputeTotalPressure, - CorrectInputExpectCorrectOutput) +TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector fiducialTotalPressures{9.9999999999999995e-21, - 806223.80964077567, - 6.7079331637514151e+201}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i)); - - testingUtilities::checkResults(fiducialTotalPressures.at(i), - testTotalPressure, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialTotalPressures{ + 9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testTotalPressure = mhd::utils::computeTotalPressure( + parameters.pressureGas.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i)); + + testingUtilities::checkResults(fiducialTotalPressures.at(i), + testTotalPressure, parameters.names.at(i)); + } } /*! @@ -287,24 +257,22 @@ TEST(tMHDComputeTotalPressure, * generate negative total pressures * */ -TEST(tMHDComputeTotalPressure, - NegativePressureExpectAutomaticFix) +TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) { - testParams parameters; - std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i)); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testTotalPressure) - << "Difference in " << parameters.names.at(i) << std::endl; - } + testParams parameters; + std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testTotalPressure = mhd::utils::computeTotalPressure( + pressureMultiplier.at(i) * parameters.pressureGas.at(i), + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, testTotalPressure) + << "Difference in " << parameters.names.at(i) << std::endl; + } } // ============================================================================= // End of tests for the mhd::utils::computeTotalPressure function @@ -319,29 +287,25 @@ TEST(tMHDComputeTotalPressure, * to avoid overflow * */ -TEST(tMHDFastMagnetosonicSpeed, - CorrectInputExpectCorrectOutput) +TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, - 98.062482309387562, - 1.5634816865472293e+38}; - std::vector coef{1.0, 1.0, 1.0e-25}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( - coef.at(i)*parameters.density.at(i), - coef.at(i)*parameters.pressureGas.at(i), - coef.at(i)*parameters.magneticX.at(i), - coef.at(i)*parameters.magneticY.at(i), - coef.at(i)*parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialFastMagnetosonicSpeed{ + 1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38}; + std::vector coef{1.0, 1.0, 1.0e-25}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( + coef.at(i) * parameters.density.at(i), + coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), + coef.at(i) * parameters.magneticY.at(i), + coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), + testFastMagnetosonicSpeed, + parameters.names.at(i)); + } } /*! @@ -350,29 +314,25 @@ TEST(tMHDFastMagnetosonicSpeed, * the large number case to avoid overflow. * */ -TEST(tMHDFastMagnetosonicSpeed, - NegativeDensityExpectAutomaticFix) +TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, - 12694062010603.15, - 1.1582688085027081e+86}; - std::vector coef{1.0, 1.0, 1.0e-25}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( - -coef.at(i)*parameters.density.at(i), - coef.at(i)*parameters.pressureGas.at(i), - coef.at(i)*parameters.magneticX.at(i), - coef.at(i)*parameters.magneticY.at(i), - coef.at(i)*parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialFastMagnetosonicSpeed{ + 1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86}; + std::vector coef{1.0, 1.0, 1.0e-25}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( + -coef.at(i) * parameters.density.at(i), + coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), + coef.at(i) * parameters.magneticY.at(i), + coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); + + testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), + testFastMagnetosonicSpeed, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::fastMagnetosonicSpeed function @@ -387,30 +347,24 @@ TEST(tMHDFastMagnetosonicSpeed, * to avoid overflow * */ -TEST(tMHDSlowMagnetosonicSpeed, - CorrectInputExpectCorrectOutput) +TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, - 2.138424778167535, - 0.26678309355540852}; - // Coefficient to make sure the output is well defined and not nan or inf - double const coef = 1E-95; - - for (size_t i = 2; i < parameters.names.size(); i++) - { - Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( - parameters.density.at(i) * coef, - parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, - parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, - parameters.gamma); - - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, + 0.26678309355540852}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; + + for (size_t i = 2; i < parameters.names.size(); i++) { + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( + parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, + parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, + parameters.magneticZ.at(i) * coef, parameters.gamma); + + testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), + testSlowMagnetosonicSpeed, + parameters.names.at(i)); + } } /*! @@ -419,30 +373,24 @@ TEST(tMHDSlowMagnetosonicSpeed, * the large number case to avoid overflow. * */ -TEST(tMHDSlowMagnetosonicSpeed, - NegativeDensityExpectAutomaticFix) +TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, - 276816332809.37604, - 1976400098318.3574}; - // Coefficient to make sure the output is well defined and not nan or inf - double const coef = 1E-95; - - for (size_t i = 2; i < parameters.names.size(); i++) - { - Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( - -parameters.density.at(i) * coef, - parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, - parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, - parameters.gamma); - - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, + 1976400098318.3574}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; + + for (size_t i = 2; i < parameters.names.size(); i++) { + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( + -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, + parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, + parameters.magneticZ.at(i) * coef, parameters.gamma); + + testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), + testSlowMagnetosonicSpeed, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::slowMagnetosonicSpeed function @@ -456,23 +404,19 @@ TEST(tMHDSlowMagnetosonicSpeed, * parameters. * */ -TEST(tMHDAlfvenSpeed, - CorrectInputExpectCorrectOutput) +TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; - std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, - 71.380245120271113, - 9.2291462785524423e+49}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), - parameters.density.at(i)); - - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), - testAlfvenSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialAlfvenSpeed{ + 2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), + parameters.density.at(i)); + + testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, + parameters.names.at(i)); + } } /*! @@ -480,23 +424,19 @@ TEST(tMHDAlfvenSpeed, * parameters except density is negative * */ -TEST(tMHDAlfvenSpeed, - NegativeDensityExpectAutomaticFix) +TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; - std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, - 9240080778600, - 2.1621115263999998e+110}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), - -parameters.density.at(i)); - - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), - testAlfvenSpeed, - parameters.names.at(i)); - } + testParams parameters; + std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, + 2.1621115263999998e+110}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), + -parameters.density.at(i)); + + testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, + parameters.names.at(i)); + } } // ============================================================================= // End of tests for the mhd::utils::alfvenSpeed function @@ -506,37 +446,40 @@ TEST(tMHDAlfvenSpeed, // Tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= #ifdef MHD -TEST(tMHDCellCenteredMagneticFields, - CorrectInputExpectCorrectOutput) +TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) { - // Initialize the test grid and other state variables - size_t const nx = 3, ny = nx; - size_t const xid = std::floor(nx/2), yid = xid, zid = xid; - size_t const id = xid + yid*nx + zid*nx*ny; - - size_t const n_cells = std::pow(5,3); - // Make sure the vector is large enough that the locations where the - // magnetic field would be in the real grid are filled - std::vector testGrid(n_cells * (grid_enum::num_fields)); - // Populate the grid with values where testGrid.at(i) = double(i). The - // values chosen aren't that important, just that every cell has a unique - // value - std::iota(std::begin(testGrid), std::end(testGrid), 0.); - - // Fiducial and test variables - double const fiducialAvgBx = 637.5, - fiducialAvgBy = 761.5, - fiducialAvgBz = 883.5; - - // Call the function to test - auto [testAvgBx, testAvgBy, testAvgBz] = mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); - - // Check the results - testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); - testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); - testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); + // Initialize the test grid and other state variables + size_t const nx = 3, ny = nx; + size_t const xid = std::floor(nx / 2), yid = xid, zid = xid; + size_t const id = xid + yid * nx + zid * nx * ny; + + size_t const n_cells = std::pow(5, 3); + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector testGrid(n_cells * (grid_enum::num_fields)); + // Populate the grid with values where testGrid.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(testGrid), std::end(testGrid), 0.); + + // Fiducial and test variables + double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, + fiducialAvgBz = 883.5; + + // Call the function to test + auto [testAvgBx, testAvgBy, testAvgBz] = + mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, + n_cells, nx, ny); + + // Check the results + testingUtilities::checkResults(fiducialAvgBx, testAvgBx, + "cell centered Bx value"); + testingUtilities::checkResults(fiducialAvgBy, testAvgBy, + "cell centered By value"); + testingUtilities::checkResults(fiducialAvgBz, testAvgBz, + "cell centered Bz value"); } -#endif // MHD +#endif // MHD // ============================================================================= // End of tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= diff --git a/src/utils/parallel_omp.cpp b/src/utils/parallel_omp.cpp index 90a70c914..3e85efcd0 100644 --- a/src/utils/parallel_omp.cpp +++ b/src/utils/parallel_omp.cpp @@ -1,56 +1,51 @@ #ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" - -void Get_OMP_Grid_Indxs( int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end ){ + #include "../utils/parallel_omp.h" +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, + int *omp_gridIndx_start, int *omp_gridIndx_end) +{ int grid_reminder, n_grid_omp, g_start, g_end; grid_reminder = n_grid_cells % n_omp_procs; - n_grid_omp = n_grid_cells / n_omp_procs; + n_grid_omp = n_grid_cells / n_omp_procs; - g_start = 0; + g_start = 0; int counter = 0; - while ( counter < omp_proc_id ){ + while (counter < omp_proc_id) { g_start += n_grid_omp; - if ( counter < grid_reminder ) g_start += 1; + if (counter < grid_reminder) g_start += 1; counter += 1; } g_end = g_start + n_grid_omp; - if ( omp_proc_id < grid_reminder ) g_end += 1; + if (omp_proc_id < grid_reminder) g_end += 1; *omp_gridIndx_start = g_start; - *omp_gridIndx_end = g_end; - + *omp_gridIndx_end = g_end; } -#ifdef PARTICLES -void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end ){ - + #ifdef PARTICLES +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, + int omp_proc_id, part_int_t *omp_pIndx_start, + part_int_t *omp_pIndx_end) +{ part_int_t n_parts_omp, parts_reminder, p_start, p_end; parts_reminder = n_parts_local % n_omp_procs; - n_parts_omp = n_parts_local / n_omp_procs; + n_parts_omp = n_parts_local / n_omp_procs; - p_start = 0; + p_start = 0; int counter = 0; - while ( counter < omp_proc_id ){ + while (counter < omp_proc_id) { p_start += n_parts_omp; - if ( counter < parts_reminder ) p_start += 1; + if (counter < parts_reminder) p_start += 1; counter += 1; } p_end = p_start + n_parts_omp; - if ( omp_proc_id < parts_reminder ) p_end += 1; + if (omp_proc_id < parts_reminder) p_end += 1; *omp_pIndx_start = p_start; - *omp_pIndx_end = p_end; - + *omp_pIndx_end = p_end; } -#endif - - - - - - + #endif #endif diff --git a/src/utils/parallel_omp.h b/src/utils/parallel_omp.h index b115dcb76..836cd91a4 100644 --- a/src/utils/parallel_omp.h +++ b/src/utils/parallel_omp.h @@ -1,20 +1,25 @@ #ifdef PARALLEL_OMP -#ifndef PARALLEL_OMP_H -#define PARALLEL_OMP_H + #ifndef PARALLEL_OMP_H + #define PARALLEL_OMP_H -#include -#include -#include "math.h" -#include "../global/global.h" -#include -#include + #include + #include + #include -void Get_OMP_Grid_Indxs( int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end ); + #include -#ifdef PARTICLES -void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end ); -#endif + #include "../global/global.h" + #include "math.h" -#endif +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, + int *omp_gridIndx_start, int *omp_gridIndx_end); + + #ifdef PARTICLES +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, + int omp_proc_id, part_int_t *omp_pIndx_start, + part_int_t *omp_pIndx_end); + #endif + + #endif #endif diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h index 6f89a9c1b..9e038ce8f 100644 --- a/src/utils/prng_utilities.h +++ b/src/utils/prng_utilities.h @@ -1,41 +1,42 @@ // STL Includes -#include #include +#include #include // Local includes #include "../global/global.h" - #pragma once class ChollaPrngGenerator { -public: - std::mt19937_64 inline static generator; + public: + std::mt19937_64 inline static generator; - ChollaPrngGenerator(struct parameters *P) - { - // If the seed isn't defined in the settings file or argv then generate - // a random seed - if (P->prng_seed == 0) - { - // Since std::random_device isn't guaranteed to be random or - // different for each rank we're going to convert both the base seed - // and MPI rank to strings, concatenated them, then hash the result. - // This should give a fairly random seed even if std::random_device - // isn't random - std::string hashString = std::to_string(std::random_device{}()) - #ifdef MPI_CHOLLA - + std::to_string(static_cast(procID)) - #endif - + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); - std::size_t hashedSeed = std::hash{}(hashString); - P->prng_seed = static_cast(hashedSeed); - } + ChollaPrngGenerator(struct parameters *P) + { + // If the seed isn't defined in the settings file or argv then generate + // a random seed + if (P->prng_seed == 0) { + // Since std::random_device isn't guaranteed to be random or + // different for each rank we're going to convert both the base seed + // and MPI rank to strings, concatenated them, then hash the result. + // This should give a fairly random seed even if std::random_device + // isn't random + std::string hashString = + std::to_string(std::random_device{}()) +#ifdef MPI_CHOLLA + + std::to_string(static_cast(procID)) +#endif + + std::to_string(std::chrono::high_resolution_clock::now() + .time_since_epoch() + .count()); + std::size_t hashedSeed = std::hash{}(hashString); + P->prng_seed = static_cast(hashedSeed); + } - // Initialize the PRNG - generator.seed(P->prng_seed); - }; - ~ChollaPrngGenerator() = default; + // Initialize the PRNG + generator.seed(P->prng_seed); + }; + ~ChollaPrngGenerator() = default; }; diff --git a/src/utils/ran.h b/src/utils/ran.h index 09a0b8868..95906713a 100644 --- a/src/utils/ran.h +++ b/src/utils/ran.h @@ -1,26 +1,33 @@ -#include #include +#include typedef unsigned long long int Ullong; typedef double Doub; typedef unsigned int Uint; struct Ran { - - Ullong u,v,w; - Ran(Ullong j) : v(4101842887655102017LL), w(1) { - u = j^v; int64(); - v = u; int64(); - w = v; int64(); + Ullong u, v, w; + Ran(Ullong j) : v(4101842887655102017LL), w(1) + { + u = j ^ v; + int64(); + v = u; + int64(); + w = v; + int64(); } - inline Ullong int64() { + inline Ullong int64() + { u = u * 2862933555777941757LL + 7046029254386353087LL; - v ^= v >> 17; v ^= v << 31; v ^= v >> 8; - w = 4294957665U*(w & 0xffffffff) + (w >> 32); - Ullong x = u ^ (u << 21); x ^= x >> 35; x ^= x << 4; + v ^= v >> 17; + v ^= v << 31; + v ^= v >> 8; + w = 4294957665U * (w & 0xffffffff) + (w >> 32); + Ullong x = u ^ (u << 21); + x ^= x >> 35; + x ^= x << 4; return (x + v) ^ w; } inline Doub doub() { return 5.42101086242752217E-20 * int64(); } inline Uint int32() { return (Uint)int64(); } - }; diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 65933e42f..0c54f6296 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -14,33 +14,31 @@ #include "../utils/reduction_utilities.h" #ifdef CUDA - namespace reduction_utilities - { - // ===================================================================== - __global__ void kernelReduceMax(Real *in, Real* out, size_t N) - { - // Initialize maxVal to the smallest possible number - Real maxVal = -DBL_MAX; +namespace reduction_utilities +{ +// ===================================================================== +__global__ void kernelReduceMax(Real* in, Real* out, size_t N) +{ + // Initialize maxVal to the smallest possible number + Real maxVal = -DBL_MAX; - // Grid stride loop to perform as much of the reduction as possible - for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; - i < N; - i += blockDim.x * gridDim.x) - { - // A transformation could go here + // Grid stride loop to perform as much of the reduction as possible + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; + i += blockDim.x * gridDim.x) { + // A transformation could go here - // Grid stride reduction - maxVal = max(maxVal,in[i]); - } + // Grid stride reduction + maxVal = max(maxVal, in[i]); + } - // Find the maximum val in the grid and write it to `out`. Note that - // there is no execution/memory barrier after this and so the - // reduced scalar is not available for use in this kernel. The grid - // wide barrier can be accomplished by ending this kernel here and - // then launching a new one or by using cooperative groups. If this - // becomes a need it can be added later - gridReduceMax(maxVal, out); - } - // ===================================================================== - }//reduction_utilities -#endif //CUDA \ No newline at end of file + // Find the maximum val in the grid and write it to `out`. Note that + // there is no execution/memory barrier after this and so the + // reduced scalar is not available for use in this kernel. The grid + // wide barrier can be accomplished by ending this kernel here and + // then launching a new one or by using cooperative groups. If this + // becomes a need it can be added later + gridReduceMax(maxVal, out); +} +// ===================================================================== +} // namespace reduction_utilities +#endif // CUDA \ No newline at end of file diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 39089ac2e..38cb54724 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -18,283 +18,281 @@ #include "../utils/gpu.hpp" #ifdef CUDA - /*! - * \brief Namespace to contain device resident reduction functions. Includes - * functions and kernels for array reduction, warp level, block level, and - * grid level reductions. - * - */ - namespace reduction_utilities - { - // ===================================================================== - /*! - * \brief Perform a reduction within the warp/wavefront to find the - * maximum value of `val` - * - * \param[in] val The thread local variable to find the maximum of across - * the warp - * \return Real The maximum value of `val` within the warp - */ - __inline__ __device__ Real warpReduceMax(Real val) - { - for (int offset = warpSize/2; offset > 0; offset /= 2) - { - val = max(val, __shfl_down(val, offset)); - } - return val; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Perform a reduction within the block to find the maximum value - * of `val` - * - * \param[in] val The thread local variable to find the maximum of across - * the block - * \return Real The maximum value of `val` within the block - */ - __inline__ __device__ Real blockReduceMax(Real val) - { - // Shared memory for storing the results of each warp-wise partial - // reduction - __shared__ Real shared[::maxWarpsPerBlock]; +/*! + * \brief Namespace to contain device resident reduction functions. Includes + * functions and kernels for array reduction, warp level, block level, and + * grid level reductions. + * + */ +namespace reduction_utilities +{ +// ===================================================================== +/*! + * \brief Perform a reduction within the warp/wavefront to find the + * maximum value of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the warp + * \return Real The maximum value of `val` within the warp + */ +__inline__ __device__ Real warpReduceMax(Real val) +{ + for (int offset = warpSize / 2; offset > 0; offset /= 2) { + val = max(val, __shfl_down(val, offset)); + } + return val; +} +// ===================================================================== - int lane = threadIdx.x % warpSize; // thread ID within the warp, - int warpId = threadIdx.x / warpSize; // ID of the warp itself +// ===================================================================== +/*! + * \brief Perform a reduction within the block to find the maximum value + * of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the block + * \return Real The maximum value of `val` within the block + */ +__inline__ __device__ Real blockReduceMax(Real val) +{ + // Shared memory for storing the results of each warp-wise partial + // reduction + __shared__ Real shared[::maxWarpsPerBlock]; - val = warpReduceMax(val); // Each warp performs partial reduction + int lane = threadIdx.x % warpSize; // thread ID within the warp, + int warpId = threadIdx.x / warpSize; // ID of the warp itself - if (lane==0) shared[warpId]=val; // Write reduced value to shared memory + val = warpReduceMax(val); // Each warp performs partial reduction - __syncthreads(); // Wait for all partial reductions + if (lane == 0) shared[warpId] = val; // Write reduced value to shared memory - //read from shared memory only if that warp existed - val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + __syncthreads(); // Wait for all partial reductions - if (warpId==0) val = warpReduceMax(val); //Final reduce within first warp + // read from shared memory only if that warp existed + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; - return val; - } - // ===================================================================== + if (warpId == 0) val = warpReduceMax(val); // Final reduce within first warp - #ifndef O_HIP - // ===================================================================== - // This section handles the atomics. It is complicated because CUDA - // doesn't currently support atomics with non-integral types. - // This code is taken from - // https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh - // with slight tweaks for our use case. - // ===================================================================== - /*! - * \brief Do a device side bit cast - * - * \tparam To The output type - * \tparam From The input type - * \param from The input value - * \return To The bit cast version of From as type To - */ - template - __device__ constexpr To bit_cast(const From& from) noexcept - { - // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it - To to{}; - static_assert(sizeof(To) == sizeof(From)); - memcpy(&to, &from, sizeof(To)); - return to; - } + return val; +} +// ===================================================================== - /*! - * \brief Encode a float as an int - * - * \param val The float to encode - * \return int The encoded int - */ - inline __device__ int encode(float val) - { - int i = bit_cast(val); - return i >= 0 ? i : (1 << 31) | ~i; - } + #ifndef O_HIP +// ===================================================================== +// This section handles the atomics. It is complicated because CUDA +// doesn't currently support atomics with non-integral types. +// This code is taken from +// https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh +// with slight tweaks for our use case. +// ===================================================================== +/*! + * \brief Do a device side bit cast + * + * \tparam To The output type + * \tparam From The input type + * \param from The input value + * \return To The bit cast version of From as type To + */ +template +__device__ constexpr To bit_cast(const From& from) noexcept +{ + // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it + To to{}; + static_assert(sizeof(To) == sizeof(From)); + memcpy(&to, &from, sizeof(To)); + return to; +} - /*! - * \brief Encode a double as a long long int - * - * \param val The double to encode - * \return long long The encoded long long int - */ - inline __device__ long long encode(double val) - { - std::int64_t i = bit_cast(val); - return i >= 0 ? i : (1ULL << 63) | ~i; - } +/*! + * \brief Encode a float as an int + * + * \param val The float to encode + * \return int The encoded int + */ +inline __device__ int encode(float val) +{ + int i = bit_cast(val); + return i >= 0 ? i : (1 << 31) | ~i; +} - /*! - * \brief Decodes an int as a float - * - * \param val The int to decode - * \return float The decoded float - */ - inline __device__ float decode(int val) - { - if (val < 0) val = (1 << 31) | ~val; - return bit_cast(val); - } +/*! + * \brief Encode a double as a long long int + * + * \param val The double to encode + * \return long long The encoded long long int + */ +inline __device__ long long encode(double val) +{ + std::int64_t i = bit_cast(val); + return i >= 0 ? i : (1ULL << 63) | ~i; +} - /*! - * \brief Decodes a long long int as a double - * - * \param val The long long to decode - * \return double The decoded double - */ - inline __device__ double decode(long long val) - { - if (val < 0) val = (1ULL << 63) | ~val; - return bit_cast(val); - } - #endif //O_HIP - /*! - * \brief Perform an atomic reduction to find the maximum value of `val` - * - * \param[out] address The pointer to where to store the reduced scalar - * value in device memory - * \param[in] val The thread local variable to find the maximum of across - * the grid. Typically this should be a partial reduction that has - * already been reduced to the block level - */ - inline __device__ float atomicMaxBits(float* address, float val) - { - #ifdef O_HIP - return atomicMax(address, val); - #else //O_HIP - int old = atomicMax((int*)address, encode(val)); - return decode(old); - #endif //O_HIP - } +/*! + * \brief Decodes an int as a float + * + * \param val The int to decode + * \return float The decoded float + */ +inline __device__ float decode(int val) +{ + if (val < 0) val = (1 << 31) | ~val; + return bit_cast(val); +} - /*! - * \brief Perform an atomic reduction to find the maximum value of `val` - * - * \param[out] address The pointer to where to store the reduced scalar - * value in device memory - * \param[in] val The thread local variable to find the maximum of across - * the grid. Typically this should be a partial reduction that has - * already been reduced to the block level - */ - inline __device__ double atomicMaxBits(double* address, double val) - { - #ifdef O_HIP - return atomicMax(address, val); - #else //O_HIP - long long old = atomicMax((long long*)address, encode(val)); - return decode(old); - #endif //O_HIP - } +/*! + * \brief Decodes a long long int as a double + * + * \param val The long long to decode + * \return double The decoded double + */ +inline __device__ double decode(long long val) +{ + if (val < 0) val = (1ULL << 63) | ~val; + return bit_cast(val); +} + #endif // O_HIP +/*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ float atomicMaxBits(float* address, float val) +{ + #ifdef O_HIP + return atomicMax(address, val); + #else // O_HIP + int old = atomicMax((int*)address, encode(val)); + return decode(old); + #endif // O_HIP +} - /*! - * \brief Perform an atomic reduction to find the minimum value of `val` - * - * \param[out] address The pointer to where to store the reduced scalar - * value in device memory - * \param[in] val The thread local variable to find the minimum of across - * the grid. Typically this should be a partial reduction that has - * already been reduced to the block level - */ - inline __device__ float atomicMinBits(float* address, float val) - { - #ifdef O_HIP - return atomicMin(address, val); - #else //O_HIP - int old = atomicMin((int*)address, encode(val)); - return decode(old); - #endif //O_HIP - } +/*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ double atomicMaxBits(double* address, double val) +{ + #ifdef O_HIP + return atomicMax(address, val); + #else // O_HIP + long long old = atomicMax((long long*)address, encode(val)); + return decode(old); + #endif // O_HIP +} - /*! - * \brief Perform an atomic reduction to find the minimum value of `val` - * - * \param[out] address The pointer to where to store the reduced scalar - * value in device memory - * \param[in] val The thread local variable to find the minimum of across - * the grid. Typically this should be a partial reduction that has - * already been reduced to the block level - */ - inline __device__ double atomicMinBits(double* address, double val) - { - #ifdef O_HIP - return atomicMin(address, val); - #else //O_HIP - long long old = atomicMin((long long*)address, encode(val)); - return decode(old); - #endif //O_HIP - } - // ===================================================================== +/*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ float atomicMinBits(float* address, float val) +{ + #ifdef O_HIP + return atomicMin(address, val); + #else // O_HIP + int old = atomicMin((int*)address, encode(val)); + return decode(old); + #endif // O_HIP +} - // ===================================================================== - /*! - * \brief Perform a reduction within the grid to find the maximum value - * of `val`. Note that the value of `out` should be set appropriately - * before the kernel launch that uses this function to avoid any - * potential race condition; the `cuda_utilities::setScalarDeviceMemory` - * function exists for this purpose. - * of `val`. Note that the value of `out` should be set appropriately - * before the kernel launch that uses this function to avoid any - * potential race condition; the `cuda_utilities::setScalarDeviceMemory` - * function exists for this purpose. - * - * \details This function can perform a reduction to find the maximum of - * the thread local variable `val` across the entire grid. It relies on a - * warp-wise reduction using registers followed by a block-wise reduction - * using shared memory, and finally a grid-wise reduction using atomics. - * As a result the performance of this function is substantally improved - * by using as many threads per block as possible and as few blocks as - * possible since each block has to perform an atomic operation. To - * accomplish this it is reccommened that you use the - * `AutomaticLaunchParams` functions to get the optimal number of blocks - * and threads per block to launch rather than relying on Cholla defaults - * and then within the kernel using a grid-stride loop to make sure the - * kernel works with any combination of threads and blocks. Note that - * after this function call you cannot use the reduced value in global - * memory since there is no grid wide sync. You can get around this by - * either launching a second kernel to do the next steps or by using - * cooperative groups to perform a grid wide sync. During it's execution - * it also calls multiple __synchThreads and so cannot be called from - * within any kind of thread guard. - * - * \param[in] val The thread local variable to find the maximum of across - * the grid - * \param[out] out The pointer to where to store the reduced scalar value - * in device memory - */ - __inline__ __device__ void gridReduceMax(Real val, Real* out) - { +/*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ double atomicMinBits(double* address, double val) +{ + #ifdef O_HIP + return atomicMin(address, val); + #else // O_HIP + long long old = atomicMin((long long*)address, encode(val)); + return decode(old); + #endif // O_HIP +} +// ===================================================================== - // Reduce the entire block in parallel - val = blockReduceMax(val); +// ===================================================================== +/*! + * \brief Perform a reduction within the grid to find the maximum value + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. + * + * \details This function can perform a reduction to find the maximum of + * the thread local variable `val` across the entire grid. It relies on a + * warp-wise reduction using registers followed by a block-wise reduction + * using shared memory, and finally a grid-wise reduction using atomics. + * As a result the performance of this function is substantally improved + * by using as many threads per block as possible and as few blocks as + * possible since each block has to perform an atomic operation. To + * accomplish this it is reccommened that you use the + * `AutomaticLaunchParams` functions to get the optimal number of blocks + * and threads per block to launch rather than relying on Cholla defaults + * and then within the kernel using a grid-stride loop to make sure the + * kernel works with any combination of threads and blocks. Note that + * after this function call you cannot use the reduced value in global + * memory since there is no grid wide sync. You can get around this by + * either launching a second kernel to do the next steps or by using + * cooperative groups to perform a grid wide sync. During it's execution + * it also calls multiple __synchThreads and so cannot be called from + * within any kind of thread guard. + * + * \param[in] val The thread local variable to find the maximum of across + * the grid + * \param[out] out The pointer to where to store the reduced scalar value + * in device memory + */ +__inline__ __device__ void gridReduceMax(Real val, Real* out) +{ + // Reduce the entire block in parallel + val = blockReduceMax(val); - // Write block level reduced value to the output scalar atomically - if (threadIdx.x == 0) atomicMaxBits(out, val); - } - // ===================================================================== + // Write block level reduced value to the output scalar atomically + if (threadIdx.x == 0) atomicMaxBits(out, val); +} +// ===================================================================== - // ===================================================================== - /*! - * \brief Find the maximum value in the array. Make sure to initialize - * `out` correctly before using this kernel; the - * `cuda_utilities::setScalarDeviceMemory` function exists for this - * purpose. If `in` and `out` are the same array that's ok, all the - * loads are completed before the overwrite occurs. - * \brief Find the maximum value in the array. Make sure to initialize - * `out` correctly before using this kernel; the - * `cuda_utilities::setScalarDeviceMemory` function exists for this - * purpose. If `in` and `out` are the same array that's ok, all the - * loads are completed before the overwrite occurs. - * - * \param[in] in The pointer to the array to reduce in device memory - * \param[out] out The pointer to where to store the reduced scalar - * value in device memory - * \param[in] N The size of the `in` array - */ - __global__ void kernelReduceMax(Real *in, Real* out, size_t N); - // ===================================================================== - } // namespace reduction_utilities -#endif //CUDA +// ===================================================================== +/*! + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. + * + * \param[in] in The pointer to the array to reduce in device memory + * \param[out] out The pointer to where to store the reduced scalar + * value in device memory + * \param[in] N The size of the `in` array + */ +__global__ void kernelReduceMax(Real* in, Real* out, size_t N); +// ===================================================================== +} // namespace reduction_utilities +#endif // CUDA diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index 64613cc5b..e54ccd764 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -1,73 +1,73 @@ /*! * \file reduction_utilities_tests.cpp * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Tests for the contents of reduction_utilities.h and reduction_utilities.cpp + * \brief Tests for the contents of reduction_utilities.h and + * reduction_utilities.cpp * */ // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/reduction_utilities.h" -#include "../utils/cuda_utilities.h" -#include "../utils/DeviceVector.h" #include "../global/global.h" - - +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/reduction_utilities.h" +#include "../utils/testing_utilities.h" // ============================================================================= // Tests for divergence max reduction // ============================================================================= TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) { - // Launch parameters - // ================= - cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax); - - // Grid Parameters & testing parameters - // ==================================== - size_t const gridSize = 64; - size_t const size = std::pow(gridSize, 3);; - Real const maxValue = 4; - std::vector host_grid(size); - - // Fill grid with random values and assign maximum value - std::mt19937 prng(1); - std::uniform_real_distribution doubleRand(-std::abs(maxValue)-1, std::abs(maxValue) - 1); - std::uniform_int_distribution intRand(0, host_grid.size()-1); - for (size_t i = 0; i < host_grid.size(); i++) - { - host_grid.at(i) = doubleRand(prng); - } - host_grid.at(intRand(prng)) = maxValue; + // Launch parameters + // ================= + cuda_utilities::AutomaticLaunchParams static const launchParams( + reduction_utilities::kernelReduceMax); + // Grid Parameters & testing parameters + // ==================================== + size_t const gridSize = 64; + size_t const size = std::pow(gridSize, 3); + ; + Real const maxValue = 4; + std::vector host_grid(size); - // Allocating and copying to device - // ================================ - cuda_utilities::DeviceVector dev_grid(host_grid.size()); - dev_grid.cpyHostToDevice(host_grid); + // Fill grid with random values and assign maximum value + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, + std::abs(maxValue) - 1); + std::uniform_int_distribution intRand(0, host_grid.size() - 1); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng); + } + host_grid.at(intRand(prng)) = maxValue; + // Allocating and copying to device + // ================================ + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); - cuda_utilities::DeviceVector static dev_max(1); - dev_max.assign(std::numeric_limits::lowest()); + cuda_utilities::DeviceVector static dev_max(1); + dev_max.assign(std::numeric_limits::lowest()); - // Do the reduction - // ================ - hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, - launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, - dev_grid.data(), dev_max.data(), host_grid.size()); - CudaCheckError(); + // Do the reduction + // ================ + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_grid.data(), dev_max.data(), host_grid.size()); + CudaCheckError(); - // Perform comparison - testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); + // Perform comparison + testingUtilities::checkResults(maxValue, dev_max.at(0), + "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 6035b68b5..bc14dcd90 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -6,134 +6,130 @@ */ // STL includes -#include #include #include #include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local includes -#include "../utils/testing_utilities.h" // Include the header file -#include "../system_tests/system_tester.h" // provide systemTest class +#include "../system_tests/system_tester.h" // provide systemTest class +#include "../utils/testing_utilities.h" // Include the header file namespace testingUtilities { - // ========================================================================= - int64_t ulpsDistanceDbl(double const &a, double const &b) - { - // Save work if the floats are equal. - // Also handles +0 == -0 - if (a == b) return 0; +// ========================================================================= +int64_t ulpsDistanceDbl(double const &a, double const &b) +{ + // Save work if the floats are equal. + // Also handles +0 == -0 + if (a == b) return 0; - const auto maxInt = std::numeric_limits::max(); + const auto maxInt = std::numeric_limits::max(); - // If either one is NaN then they are not equal, max distance. - if (std::isnan(a) || std::isnan(b)) return maxInt; + // If either one is NaN then they are not equal, max distance. + if (std::isnan(a) || std::isnan(b)) return maxInt; - // If one's infinite and they're not equal, max distance. - if (std::isinf(a) || std::isinf(b)) return maxInt; + // If one's infinite and they're not equal, max distance. + if (std::isinf(a) || std::isinf(b)) return maxInt; - int64_t ia, ib; - std::memcpy(&ia, &a, sizeof(double)); - std::memcpy(&ib, &b, sizeof(double)); + int64_t ia, ib; + std::memcpy(&ia, &a, sizeof(double)); + std::memcpy(&ib, &b, sizeof(double)); - // Don't compare differently-signed floats. - if ((ia < 0) != (ib < 0)) return maxInt; + // Don't compare differently-signed floats. + if ((ia < 0) != (ib < 0)) return maxInt; - // Return the absolute value of the distance in ULPs. - int64_t distance = ia - ib; - if (distance < 0) distance = -distance; + // Return the absolute value of the distance in ULPs. + int64_t distance = ia - ib; + if (distance < 0) distance = -distance; - return distance; - } - // ========================================================================= - - // ========================================================================= - bool nearlyEqualDbl(double const &a, - double const &b, - double &absoluteDiff, - int64_t &ulpsDiff, - double const &fixedEpsilon, // = 1E-14 by default - int const &ulpsEpsilon) // = 4 by default - { - // Compute differences - ulpsDiff = ulpsDistanceDbl(a, b); - absoluteDiff = std::abs(a - b); - - // Perform the ULP check which is for numbers far from zero - if (ulpsDiff <= ulpsEpsilon) - { - return true; - } - // Perform the absolute check which is for numbers near zero - else if (absoluteDiff <= fixedEpsilon) - { - return true; - } - // if none of the checks have passed indicate test failure - else - { - return false; - } - } - // ========================================================================= - - void wrapperEqual(int i, int j, int k, std::string dataSetName, - double test_value, double fid_value, double fixedEpsilon=5.0E-12) { - - std::string outString; - outString += dataSetName; - outString += " dataset at ["; - outString += i; - outString += ","; - outString += j; - outString += ","; - outString += k; - outString += "]"; - - ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value,test_value,outString,fixedEpsilon)); + return distance; +} +// ========================================================================= + +// ========================================================================= +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, + int64_t &ulpsDiff, + double const &fixedEpsilon, // = 1E-14 by default + int const &ulpsEpsilon) // = 4 by default +{ + // Compute differences + ulpsDiff = ulpsDistanceDbl(a, b); + absoluteDiff = std::abs(a - b); + + // Perform the ULP check which is for numbers far from zero + if (ulpsDiff <= ulpsEpsilon) { + return true; } + // Perform the absolute check which is for numbers near zero + else if (absoluteDiff <= fixedEpsilon) { + return true; + } + // if none of the checks have passed indicate test failure + else { + return false; + } +} +// ========================================================================= - void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) { - std::vector testDims(3,1); - std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value)); - } +void wrapperEqual(int i, int j, int k, std::string dataSetName, + double test_value, double fid_value, + double fixedEpsilon = 5.0E-12) +{ + std::string outString; + outString += dataSetName; + outString += " dataset at ["; + outString += i; + outString += ","; + outString += j; + outString += ","; + outString += k; + outString += "]"; + + ASSERT_NO_FATAL_FAILURE( + checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); +} + +void analyticConstant(systemTest::SystemTestRunner testObject, + std::string dataSetName, double value) +{ + std::vector testDims(3, 1); + std::vector testData = + testObject.loadTestFieldData(dataSetName, testDims); + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + ASSERT_NO_FATAL_FAILURE( + wrapperEqual(i, j, k, dataSetName, testData.at(index), value)); } } } +} - void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, - double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance) - { - std::vector testDims(3,1); - std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance)); - } - } +void analyticSine(systemTest::SystemTestRunner testObject, + std::string dataSetName, double constant, double amplitude, + double kx, double ky, double kz, double phase, + double tolerance) +{ + std::vector testDims(3, 1); + std::vector testData = + testObject.loadTestFieldData(dataSetName, testDims); + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + double value = + constant + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + ASSERT_NO_FATAL_FAILURE(wrapperEqual( + i, j, k, dataSetName, testData.at(index), value, tolerance)); + } } } - - - - } + +} // namespace testingUtilities diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index b98780247..15d5b4867 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -9,12 +9,12 @@ #pragma once // STL includes -#include -#include -#include #include +#include +#include +#include -#include "../system_tests/system_tester.h" // provide systemTest class +#include "../system_tests/system_tester.h" // provide systemTest class // ============================================================================= // NOTE: Global variables are declared as extern at the end of this file @@ -28,192 +28,177 @@ */ namespace testingUtilities { - // ========================================================================= - /*! - * \brief Compute the Units in the Last Place (ULP) difference between two doubles - * - * \details This function is modified from - * [Comparing Floating-Point Numbers Is Tricky by Matt Kline](https://bitbashing.io/comparing-floats.html) - * which is in turn based on - * [Comparing Floating Point Numbers, 2012 Edition by Bruce Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/). - * The latter seems to be the bible of floating point comparison and is the - * basis of Googletests ASSERT_DOUBLE_EQ assertion. - * - * This particular function checks that the two numbers if the numbers are - * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes - * the ULP difference between them are returns it - * - * \param[in] a The first double you wish to compare. Order doesn't matter. - * \param[in] b The second double you wish to compare. Order doesn't matter. - * \return int64_t The ULP distance between a and b. - */ - int64_t ulpsDistanceDbl(double const &a, double const &b); - // ========================================================================= - - // ========================================================================= - /*! - * \brief Check if two doubles are nearly equal. - * - * \details This function checks if two doubles are "nearly equal" which is - * defined as either: A) the absolute difference between them is less than - * the fixedEpsilon argument or B) the units in the last place (ULP) - * difference is less than the ulpsEpsilon argument. Both of the epsilon - * arguments have default values which generally should not need to be - * changed. - * - * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when - * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to - * absolute differences in the results up to 1.77636E-15. A priori we chose - * that a difference between two numbers that was less than one order of - * magnitude greater than the difference between compilers would be - * considered "equal". I.e. since the maximum absolute error between the GCC - * and XL compilers was ~1.7E-15 our allowed margin of error should be - * ~1E-14. - * - * Why does ulpsEpsilon default to 4? Repeating the test above I computed - * the largest ULP difference that wasn't caught by the absolute difference - * requirement of 1E-14. It turns out that there were no uncaught - * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest - * default for their floating point assertions - * - * \param[in] a The first double you wish to compare. Order doesn't matter. - * \param[in] b The first double you wish to compare. Order doesn't matter. - * \param[out] absoluteDiff The absolute difference between the numbers. - * Only returned if the numbers are not equal. If the numbers are equal then - * behaviour is undefined - * \param[out] ulpsDiff The ULP difference between the numbers. - * Only returned if the numbers are not equal. If the numbers are equal then - * behaviour is undefined - * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults - * to 1E-14 - * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4 - * \return bool Whether or not the numbers are equal - */ - bool nearlyEqualDbl(double const &a, - double const &b, - double &absoluteDiff, - int64_t &ulpsDiff, - double const &fixedEpsilon = 1E-14, - int const &ulpsEpsilon = 4); - // ========================================================================= - - void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon); - - void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value); - - void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, - double constant, double amplitude, double kx, double ky, double kz, - double phase, double tolerance); - - // ========================================================================= - /*! - * \brief A simple function to compare two doubles with the nearlyEqualDbl - * function, perform a GTest assert on the result, and print out the values - * - * \tparam checkType The type of GTest assertion to use. "0" for and - * "EXPECT" and "1" for an "ASSERT" - * \param[in] fiducialNumber The fiducial number to test against - * \param[in] testNumber The unverified number to test - * \param[in] outString A string to be printed in the first line of the output - * message. Format will be "Difference in outString" - * \param[in] fixedEpsilon The fixed epsilon to use in the comparison. - * Negative values are ignored and default behaviour is used - * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative - * values are ignored and default behaviour is used - */ - template - void checkResults(double fiducialNumber, - double testNumber, - std::string outString, - double fixedEpsilon = -999, - int ulpsEpsilon = -999) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual; - - if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff); - } - else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon); - } - else - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon, - ulpsEpsilon); - } - - std::stringstream outputMessage; - outputMessage << std::setprecision(std::numeric_limits::max_digits10) - << "Difference in " << outString << std::endl - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - - if (checkType == 0) - { - EXPECT_TRUE(areEqual) << outputMessage.str(); - } - else if (checkType == 1) - { - ASSERT_TRUE(areEqual) << outputMessage.str(); - } - else - { - throw std::runtime_error("Incorrect template argument passed to " - "checkResults. Options are 0 and 1 but " - + std::to_string(checkType) + " was passed"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Holds a single std::string that's intended to be read only and - * global. Use for storing the path of the root directory of Cholla - * - */ - class GlobalString - { - private: - /// The path variable - std::string _string; - public: - /*! - * \brief Initializes the _path member variable. Should only be called - * once in main - * - * \param inputPath The path to be store in _path - */ - void init(std::string const &inputPath) {_string = inputPath;}; - - /*! - * \brief Get the String object - * - * \return std::string The string variable - */ - std::string getString() {return _string;}; - GlobalString() = default; - ~GlobalString() = default; - }; - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the Units in the Last Place (ULP) difference between two + * doubles + * + * \details This function is modified from + * [Comparing Floating-Point Numbers Is Tricky by Matt + * Kline](https://bitbashing.io/comparing-floats.html) which is in turn based on + * [Comparing Floating Point Numbers, 2012 Edition by Bruce + * Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/). + * The latter seems to be the bible of floating point comparison and is the + * basis of Googletests ASSERT_DOUBLE_EQ assertion. + * + * This particular function checks that the two numbers if the numbers are + * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes + * the ULP difference between them are returns it + * + * \param[in] a The first double you wish to compare. Order doesn't matter. + * \param[in] b The second double you wish to compare. Order doesn't matter. + * \return int64_t The ULP distance between a and b. + */ +int64_t ulpsDistanceDbl(double const &a, double const &b); +// ========================================================================= + +// ========================================================================= +/*! + * \brief Check if two doubles are nearly equal. + * + * \details This function checks if two doubles are "nearly equal" which is + * defined as either: A) the absolute difference between them is less than + * the fixedEpsilon argument or B) the units in the last place (ULP) + * difference is less than the ulpsEpsilon argument. Both of the epsilon + * arguments have default values which generally should not need to be + * changed. + * + * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when + * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to + * absolute differences in the results up to 1.77636E-15. A priori we chose + * that a difference between two numbers that was less than one order of + * magnitude greater than the difference between compilers would be + * considered "equal". I.e. since the maximum absolute error between the GCC + * and XL compilers was ~1.7E-15 our allowed margin of error should be + * ~1E-14. + * + * Why does ulpsEpsilon default to 4? Repeating the test above I computed + * the largest ULP difference that wasn't caught by the absolute difference + * requirement of 1E-14. It turns out that there were no uncaught + * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest + * default for their floating point assertions + * + * \param[in] a The first double you wish to compare. Order doesn't matter. + * \param[in] b The first double you wish to compare. Order doesn't matter. + * \param[out] absoluteDiff The absolute difference between the numbers. + * Only returned if the numbers are not equal. If the numbers are equal then + * behaviour is undefined + * \param[out] ulpsDiff The ULP difference between the numbers. + * Only returned if the numbers are not equal. If the numbers are equal then + * behaviour is undefined + * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults + * to 1E-14 + * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4 + * \return bool Whether or not the numbers are equal + */ +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, + int64_t &ulpsDiff, double const &fixedEpsilon = 1E-14, + int const &ulpsEpsilon = 4); +// ========================================================================= + +void wrapperEqual(int i, int j, int k, std::string dataSetName, + double test_value, double fid_value, double fixedEpsilon); + +void analyticConstant(systemTest::SystemTestRunner testObject, + std::string dataSetName, double value); + +void analyticSine(systemTest::SystemTestRunner testObject, + std::string dataSetName, double constant, double amplitude, + double kx, double ky, double kz, double phase, + double tolerance); + +// ========================================================================= +/*! + * \brief A simple function to compare two doubles with the nearlyEqualDbl + * function, perform a GTest assert on the result, and print out the values + * + * \tparam checkType The type of GTest assertion to use. "0" for and + * "EXPECT" and "1" for an "ASSERT" + * \param[in] fiducialNumber The fiducial number to test against + * \param[in] testNumber The unverified number to test + * \param[in] outString A string to be printed in the first line of the output + * message. Format will be "Difference in outString" + * \param[in] fixedEpsilon The fixed epsilon to use in the comparison. + * Negative values are ignored and default behaviour is used + * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative + * values are ignored and default behaviour is used + */ +template +void checkResults(double fiducialNumber, double testNumber, + std::string outString, double fixedEpsilon = -999, + int ulpsEpsilon = -999) +{ + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual; + + if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, + absoluteDiff, ulpsDiff); + } else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) { + areEqual = testingUtilities::nearlyEqualDbl( + fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); + } else { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, + absoluteDiff, ulpsDiff, + fixedEpsilon, ulpsEpsilon); + } + + std::stringstream outputMessage; + outputMessage << std::setprecision(std::numeric_limits::max_digits10) + << "Difference in " << outString << std::endl + << "The fiducial value is: " << fiducialNumber + << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + + if (checkType == 0) { + EXPECT_TRUE(areEqual) << outputMessage.str(); + } else if (checkType == 1) { + ASSERT_TRUE(areEqual) << outputMessage.str(); + } else { + throw std::runtime_error( + "Incorrect template argument passed to " + "checkResults. Options are 0 and 1 but " + + std::to_string(checkType) + " was passed"); + } } +// ========================================================================= + +// ========================================================================= +/*! + * \brief Holds a single std::string that's intended to be read only and + * global. Use for storing the path of the root directory of Cholla + * + */ +class GlobalString +{ + private: + /// The path variable + std::string _string; + + public: + /*! + * \brief Initializes the _path member variable. Should only be called + * once in main + * + * \param inputPath The path to be store in _path + */ + void init(std::string const &inputPath) { _string = inputPath; }; + + /*! + * \brief Get the String object + * + * \return std::string The string variable + */ + std::string getString() { return _string; }; + GlobalString() = default; + ~GlobalString() = default; +}; +// ========================================================================= +} // namespace testingUtilities // Declare the global string variables so everything that imports this file // has access to them diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 9ac6bb4ba..050977e1b 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -1,47 +1,53 @@ #ifdef CPU_TIME -#include "../utils/timing_functions.h" -#include "../io/io.h" -#include -#include -#include - -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../utils/timing_functions.h" + + #include + #include + #include -void OneTime::Start(){ + #include "../io/io.h" + + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif + +void OneTime::Start() +{ if (inactive) return; time_start = get_time(); } -void OneTime::Subtract(Real time_to_subtract){ - // Add the time_to_substract to the start time, that way the time_end - time_start is reduced by time_to_substract +void OneTime::Subtract(Real time_to_subtract) +{ + // Add the time_to_substract to the start time, that way the time_end - + // time_start is reduced by time_to_substract time_start += time_to_subtract; } -void OneTime::End(){ +void OneTime::End() +{ if (inactive) return; Real time_end = get_time(); - Real time = (time_end - time_start)*1000; + Real time = (time_end - time_start) * 1000; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA t_min = ReduceRealMin(time); t_max = ReduceRealMax(time); t_avg = ReduceRealAvg(time); -#else + #else t_min = time; t_max = time; t_avg = time; -#endif + #endif if (n_steps > 0) t_all += t_max; n_steps++; } - -void OneTime::RecordTime( Real time ){ - time *= 1000; //Convert from secs to ms +void OneTime::RecordTime(Real time) +{ + time *= 1000; // Convert from secs to ms #ifdef MPI_CHOLLA t_min = ReduceRealMin(time); t_max = ReduceRealMax(time); @@ -55,23 +61,27 @@ void OneTime::RecordTime( Real time ){ n_steps++; } - -void OneTime::PrintStep(){ - chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, t_max, t_avg); +void OneTime::PrintStep() +{ + chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, + t_max, t_avg); } -void OneTime::PrintAverage(){ - if (n_steps > 1) chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all/(n_steps-1)); +void OneTime::PrintAverage() +{ + if (n_steps > 1) + chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); } -void OneTime::PrintAll(){ +void OneTime::PrintAll() +{ chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); } -Time::Time( void ){} - -void Time::Initialize(){ +Time::Time(void) {} +void Time::Initialize() +{ n_steps = 0; // Add or remove timers by editing this list, keep TOTAL at the end @@ -79,68 +89,69 @@ void Time::Initialize(){ // add Timer.NAME.Start() and Timer.NAME.End() where appropriate. onetimes = { - #ifdef PARTICLES - &(Calc_dt = OneTime("Calc_dt")), - #endif - &(Hydro = OneTime("Hydro")), - &(Boundaries = OneTime("Boundaries")), - #ifdef GRAVITY - &(Grav_Potential = OneTime("Grav_Potential")), - &(Pot_Boundaries = OneTime("Pot_Boundaries")), - #endif - #ifdef PARTICLES - &(Part_Density = OneTime("Part_Density")), - &(Part_Boundaries = OneTime("Part_Boundaries")), - &(Part_Dens_Transf = OneTime("Part_Dens_Transf")), - &(Advance_Part_1 = OneTime("Advance_Part_1")), - &(Advance_Part_2 = OneTime("Advance_Part_2")), - #endif - #ifdef COOLING_GRACKLE - &(Cooling = OneTime("Cooling")), - #endif - #ifdef CHEMISTRY_GPU - &(Chemistry = OneTime("Chemistry")), - #endif - #ifdef SUPERNOVA - &(Feedback = OneTime("Feedback")), + #ifdef PARTICLES + &(Calc_dt = OneTime("Calc_dt")), + #endif + &(Hydro = OneTime("Hydro")), + &(Boundaries = OneTime("Boundaries")), + #ifdef GRAVITY + &(Grav_Potential = OneTime("Grav_Potential")), + &(Pot_Boundaries = OneTime("Pot_Boundaries")), + #endif + #ifdef PARTICLES + &(Part_Density = OneTime("Part_Density")), + &(Part_Boundaries = OneTime("Part_Boundaries")), + &(Part_Dens_Transf = OneTime("Part_Dens_Transf")), + &(Advance_Part_1 = OneTime("Advance_Part_1")), + &(Advance_Part_2 = OneTime("Advance_Part_2")), + #endif + #ifdef COOLING_GRACKLE + &(Cooling = OneTime("Cooling")), + #endif + #ifdef CHEMISTRY_GPU + &(Chemistry = OneTime("Chemistry")), + #endif + #ifdef SUPERNOVA + &(Feedback = OneTime("Feedback")), #ifdef ANALYSIS - &(FeedbackAnalysis = OneTime("FeedbackAnalysis")), + &(FeedbackAnalysis = OneTime("FeedbackAnalysis")), #endif - #endif // SUPERNOVA - &(Total = OneTime("Total")), + #endif // SUPERNOVA + &(Total = OneTime("Total")), }; - - chprintf( "\nTiming Functions is ON \n"); - + chprintf("\nTiming Functions is ON \n"); } -void Time::Print_Times(){ - for (OneTime* x : onetimes){ +void Time::Print_Times() +{ + for (OneTime* x : onetimes) { x->PrintStep(); } } // once at end of run in main.cpp -void Time::Print_Average_Times( struct parameters P ){ - +void Time::Print_Average_Times(struct parameters P) +{ chprintf("\nAverage Times n_steps:%d\n", n_steps); - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { x->PrintAverage(); } - std::string file_name ( "run_timing.log" ); + std::string file_name("run_timing.log"); std::string header; - chprintf( "Writing timing values to file: %s \n", file_name.c_str()); + chprintf("Writing timing values to file: %s \n", file_name.c_str()); - std::string gitHash = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); - std::string macroFlags = "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); + std::string gitHash = + "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); + std::string macroFlags = + "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); header = "#n_proc nx ny nz n_omp n_steps "; - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { header += x->name; header += " "; } @@ -148,24 +159,23 @@ void Time::Print_Average_Times( struct parameters P ){ header += " \n"; bool file_exists = false; - if (FILE *file = fopen(file_name.c_str(), "r")){ + if (FILE* file = fopen(file_name.c_str(), "r")) { file_exists = true; - chprintf( " File exists, appending values: %s \n", file_name.c_str() ); - fclose( file ); - } else{ - chprintf( " Creating File: %s \n", file_name.c_str() ); + chprintf(" File exists, appending values: %s \n", file_name.c_str()); + fclose(file); + } else { + chprintf(" Creating File: %s \n", file_name.c_str()); } #ifdef MPI_CHOLLA - if ( procID != 0 ) return; + if (procID != 0) return; #endif std::ofstream out_file; -// Output timing values + // Output timing values out_file.open(file_name.c_str(), std::ios::app); - if ( !file_exists ) - { + if (!file_exists) { out_file << gitHash; out_file << macroFlags; out_file << header; @@ -183,15 +193,14 @@ void Time::Print_Average_Times( struct parameters P ){ #endif out_file << n_steps << " "; - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { out_file << x->t_all << " "; } out_file << "\n"; out_file.close(); - chprintf( "Saved Timing: %s \n\n", file_name.c_str() ); - + chprintf("Saved Timing: %s \n\n", file_name.c_str()); } #endif diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 84a1520d7..54caa56d0 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -1,12 +1,14 @@ #ifdef CPU_TIME -#ifndef TIMING_FUNCTIONS_H -#define TIMING_FUNCTIONS_H + #ifndef TIMING_FUNCTIONS_H + #define TIMING_FUNCTIONS_H -#include -#include "../global/global.h" + #include -// Each instance of this class represents a single timer, timing a single section of code. -// All instances have their own n_steps, time_start, etc. so that all timers can run independently + #include "../global/global.h" + +// Each instance of this class represents a single timer, timing a single +// section of code. All instances have their own n_steps, time_start, etc. so +// that all timers can run independently class OneTime { public: @@ -16,13 +18,13 @@ class OneTime Real t_min; Real t_max; Real t_avg; - Real t_all=0; - bool inactive=true; - OneTime(void){ - } - OneTime(const char* input_name){ - name = input_name; - inactive=false; + Real t_all = 0; + bool inactive = true; + OneTime(void) {} + OneTime(const char* input_name) + { + name = input_name; + inactive = false; } void Start(); void Subtract(Real time_to_subtract); @@ -30,15 +32,15 @@ class OneTime void PrintStep(); void PrintAverage(); void PrintAll(); - void RecordTime( Real time ); + void RecordTime(Real time); }; -// Time loops through instances of OneTime. onetimes is initialized with pointers to each timer. +// Time loops through instances of OneTime. onetimes is initialized with +// pointers to each timer. // class Time { -public: - + public: int n_steps; OneTime Total; @@ -56,16 +58,14 @@ class Time OneTime Chemistry; OneTime Feedback; OneTime FeedbackAnalysis; - + std::vector onetimes; - + Time(); void Initialize(); void Print_Times(); - void Print_Average_Times( struct parameters P ); - + void Print_Average_Times(struct parameters P); }; - -#endif -#endif //CPU_TIME + #endif +#endif // CPU_TIME From ea2d1b9838b516d579906a62530a260b67dd3d77 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 11:15:51 -0500 Subject: [PATCH 179/694] Add formatting commit to .git-blame-ignore-revs --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 8e42d4fda..331d6f122 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -11,3 +11,4 @@ b78d8c96680c9c2d5a5d41656895cb3795e1e204 # Reformat Code with clang-format +729ef8ed307eaa2cf42baa1f5af6c389ad614ac4 From fbcf3bdc3d6e4b0aba781addfdc3fe8416949baa Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 11:29:51 -0500 Subject: [PATCH 180/694] Fix bad syntax in hydro test --- src/hydro/hydro_cuda_tests.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 524e61469..4d4752ad9 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -55,8 +55,7 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, - n_fields * sizeof(Real), cudaMemcpyHostToDevice)); + dev_conserved.cpyHostToDevice(host_conserved); //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, // int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real // dz) From 35023e1f257220366d07bfc18312e7f36ae937f0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 12:03:40 -0500 Subject: [PATCH 181/694] Minor tweaks to feedback to let it pass clang-tidy --- src/particles/feedback_CIC_gpu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 59292802b..58388762a 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -74,8 +74,8 @@ void supernova::initState(struct parameters* P, part_int_t n_local, { printf("supernova::initState start\n"); std::string snr_filename(P->snr_filename); - if (snr_filename.size()) { - chprintf("Specified a SNR filename %s.\n", &snr_filename[0]); + if (not snr_filename.empty()) { + chprintf("Specified a SNR filename %s.\n", snr_filename.data()); // read in array of supernova rate values. std::ifstream snr_in(snr_filename); @@ -97,7 +97,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, if (line_counter++ < N_HEADER) continue; // skip header processing int i = 0; - char* data = strtok(const_cast(line.c_str()), s99_delim); + char* data = strtok(line.data(), s99_delim); while (data != nullptr) { if (i == 0) { // in the following divide by # years per kyr (1000) From 061e5cf0dc82ab0ed8c05da749c47083ebc6ae03 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 16 Jan 2023 15:28:47 -0500 Subject: [PATCH 182/694] Add a few more clang-tidy checks to the ignore list clang-tidy v15.0.7 has some more checks that v15.0.5 didn't have and they aren't passing so I added them to the ignore list --- .clang-tidy | 4 ++++ src/system_tests/system_tester.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index cd7085f4d..f7c2afe3f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -56,9 +56,11 @@ Checks: "*, -cert-msc51-cpp, -cert-str34-c, -clang-analyzer-core.CallAndMessage, + -clang-analyzer-core.NullDereference, -clang-analyzer-core.UndefinedBinaryOperatorResult, -clang-analyzer-core.uninitialized.ArraySubscript, -clang-analyzer-core.uninitialized.Assign, + -clang-analyzer-core.uninitialized.UndefReturn, -clang-analyzer-deadcode.DeadStores, -clang-analyzer-optin.performance.Padding, -clang-analyzer-security.insecureAPI.strcpy, @@ -138,6 +140,7 @@ Checks: "*, -modernize-use-nullptr, -modernize-use-override, -modernize-use-using, + -openmp-use-default-none, -performance-faster-string-find, -performance-for-range-copy, -performance-inefficient-vector-operation, @@ -158,6 +161,7 @@ Checks: "*, -readability-make-member-function-const, -readability-non-const-parameter, -readability-redundant-control-flow, + -readability-redundant-preprocessor, -readability-simplify-boolean-expr, -readability-suspicious-call-argument" WarningsAsErrors: '' diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index db038fa9a..d6ecd73e7 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -552,7 +552,7 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( if (dataSetName == "particle_density") { file = _testParticlesFileVec; dataSetName = "density"; - } else if (file.size() == 0) { + } else if (file.empty()) { file = _testHydroFieldsFileVec; } From 501dc32f12dcd1ae9bfb089c105dfbe6486513fa Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 17 Jan 2023 14:04:57 -0500 Subject: [PATCH 183/694] Add python script to count clang-tidy warnings The new script (`tools/analyze_tidy_checks.py`) prints a markdown formatted list of all the failing checks --- tools/analyze_tidy_checks.py | 108 +++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 tools/analyze_tidy_checks.py diff --git a/tools/analyze_tidy_checks.py b/tools/analyze_tidy_checks.py new file mode 100755 index 000000000..0ea286920 --- /dev/null +++ b/tools/analyze_tidy_checks.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +================================================================================ + This script analyzes the clang-tidy output and produces an ordered list of all + the checks run, how many failures a check generated and the percentage of + failures a check represents. + + When running, make sure that you have already run clang-tidy with all the + checks you want enabled since this script looks for the 3 tidy_results_*.log + files in the root directory of Cholla +================================================================================ +""" + +import numpy as np +import pandas as pd +import pathlib +import subprocess + + +def main(): + # Determine path to Cholla directory + chollaPath = pathlib.Path(__file__).resolve().parent.parent + + # Load required data + tidyResults = loadTidyResults(chollaPath) + enabledChecks = getEnabledChecks(chollaPath) + + # Count and sort the errors + sortedChecks, totalWarnings, numPassing, numFailing = countAndSort( + tidyResults, enabledChecks + ) + + # Print Results in markdown format + printResults(sortedChecks, totalWarnings, numPassing, numFailing) + + +def loadTidyResults(chollaPath): + with open(chollaPath / "tidy_results_c.log", "r") as file: + cData = file.read() + with open(chollaPath / "tidy_results_cpp.log", "r") as file: + cppData = file.read() + with open(chollaPath / "tidy_results_gpu.log", "r") as file: + gpuData = file.read() + + return cData + cppData + gpuData + + +def getEnabledChecks(chollaPath): + stdout = subprocess.run( + ["clang-tidy", "--list-checks"], cwd=chollaPath, stdout=subprocess.PIPE + ).stdout.decode("utf-8") + + # find index where checks start + stdout = stdout.split() + for i in range(len(stdout)): + if "bugprone" in stdout[i]: + index = i + break + + return stdout[index:] + + +def countAndSort(tidyResults, enabledChecks): + passingChecks = 0 + failingChecks = 0 + numWarnings = np.zeros(len(enabledChecks)) + + for i, check in enumerate(enabledChecks): + numWarnings[i] = tidyResults.count(check) + if check in tidyResults: + failingChecks += 1 + else: + passingChecks += 1 + + # Convert to dataframe and sort + sortedChecks = sorted(list(zip(numWarnings, enabledChecks))) + sortedChecks.reverse() + totalWarnings = numWarnings.sum() + + return sortedChecks, totalWarnings, passingChecks, failingChecks + + +def printResults(sortedChecks, totalWarnings, numPassing, numFailing): + # Determine percentages + totalChecks = numPassing + numFailing + + print(f"Total number of warnings: {int(totalWarnings)}") + print(f"{round(numPassing/totalChecks*100, 2)}% of checks passing") + print(f"{round(numFailing/totalChecks*100, 2)}% of checks failing") + + col1Title = "Number of Warnings" + col2Title = "Percentage of Warnings" + col3Title = "Check" + col3Length = np.max([len(entry[1]) for entry in sortedChecks]) + + print() + print("Failing Checks:") + print(f"| {col1Title} | {col2Title} | {col3Title:{col3Length}} |") + print(f'| {"-"*len(col1Title)} | {"-"*len(col2Title)} | {"-"*col3Length} |') + for entry in sortedChecks: + if int(entry[0]) != 0: + print( + f"| {int(entry[0]):18} | {(entry[0] / totalWarnings)*100:22.2f} | {entry[1]:{col3Length}} |" + ) + + +if __name__ == "__main__": + main() From 87d5ff1c077f1c1310351a4f51f9f58f8f25df15 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 18 Jan 2023 15:19:13 -0500 Subject: [PATCH 184/694] Fix initialization issue --- src/global/global.h | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index d74158c40..cbcb4cf43 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -230,32 +230,32 @@ struct parameters { char custom_bcnd[MAXLEN]; char outdir[MAXLEN]; char indir[MAXLEN]; // Folder to load Initial conditions from - Real rho; - Real vx; - Real vy; - Real vz; - Real P; - Real A; - Real Bx = 0; - Real By = 0; - Real Bz = 0; - Real rho_l; - Real vx_l; - Real vy_l = 0; - Real vz_l = 0; - Real P_l; - Real Bx_l; - Real By_l; - Real Bz_l; - Real rho_r; - Real vx_r; - Real vy_r = 0; - Real vz_r = 0; - Real P_r; - Real Bx_r; - Real By_r; - Real Bz_r; - Real diaph; + Real rho = 0; + Real vx = 0; + Real vy = 0; + Real vz = 0; + Real P = 0; + Real A = 0; + Real Bx = 0; + Real By = 0; + Real Bz = 0; + Real rho_l = 0; + Real vx_l = 0; + Real vy_l = 0; + Real vz_l = 0; + Real P_l = 0; + Real Bx_l = 0; + Real By_l = 0; + Real Bz_l = 0; + Real rho_r = 0; + Real vx_r = 0; + Real vy_r = 0; + Real vz_r = 0; + Real P_r = 0; + Real Bx_r = 0; + Real By_r = 0; + Real Bz_r = 0; + Real diaph = 0; Real rEigenVec_rho = 0; Real rEigenVec_MomentumX = 0; Real rEigenVec_MomentumY = 0; From 4f19378cad32775ecc7dae6b68c922941fb8fe44 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 25 Jan 2023 10:39:59 -0500 Subject: [PATCH 185/694] Fix integer size bug in testing utils Fixes #234 --- src/utils/testing_utilities.cpp | 2 +- src/utils/testing_utilities.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index bc14dcd90..32263001f 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -54,7 +54,7 @@ int64_t ulpsDistanceDbl(double const &a, double const &b) bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, double const &fixedEpsilon, // = 1E-14 by default - int const &ulpsEpsilon) // = 4 by default + int64_t const &ulpsEpsilon) // = 4 by default { // Compute differences ulpsDiff = ulpsDistanceDbl(a, b); diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 15d5b4867..0f19d3265 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -93,7 +93,7 @@ int64_t ulpsDistanceDbl(double const &a, double const &b); */ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, double const &fixedEpsilon = 1E-14, - int const &ulpsEpsilon = 4); + int64_t const &ulpsEpsilon = 4); // ========================================================================= void wrapperEqual(int i, int j, int k, std::string dataSetName, @@ -126,7 +126,7 @@ void analyticSine(systemTest::SystemTestRunner testObject, template void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, - int ulpsEpsilon = -999) + int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference double absoluteDiff; From 95d3a5796e38d28f0d0874813f44a7ef279ad607 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Jan 2023 15:12:47 -0500 Subject: [PATCH 186/694] Refactor HLLD solver with structs This makes passing all the variables around much simplier, makes the solver much more readable, and enables us to put pretty much everything into testable functions. --- builds/run_tests.sh | 1 + src/riemann_solvers/hlld_cuda.cu | 957 +++++++++++------------- src/riemann_solvers/hlld_cuda.h | 381 +++++----- src/riemann_solvers/hlld_cuda_tests.cu | 981 ++++++++++++------------- src/utils/mhd_utilities.h | 20 + 5 files changed, 1091 insertions(+), 1249 deletions(-) diff --git a/builds/run_tests.sh b/builds/run_tests.sh index 80fcab2a1..a5aac62d8 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -214,6 +214,7 @@ runTests () # argument is the value of COMPILER which does not occur for all setup scripts # \param[in] -g (optional) If set then download and build a local version of # GoogleTest to use instead of the machine default +# \param[in] -d (optional) Build Cholla in debug mode buildAndRunTests () { # Unset BUILD_GTEST so that subsequent runs aren't tied to what previous runs diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 2cb930d62..82085b1b8 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -43,579 +43,457 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, int xid, yid, zid; cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + // Thread guard to avoid overrun + if (xid >= nx and yid >= ny and zid >= nz) return; + // Number of cells int n_cells = nx * ny * nz; // Offsets & indices int o1, o2, o3; if (direction == 0) { - o1 = 1; - o2 = 2; - o3 = 3; + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; } if (direction == 1) { - o1 = 2; - o2 = 3; - o3 = 1; + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; } if (direction == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; } - // Thread guard to avoid overrun - if (xid < nx and yid < ny and zid < nz) { - // ============================ - // Retrieve conserved variables - // ============================ - // The magnetic field in the X-direction - Real magneticX = dev_magnetic_face[threadId]; - - // Left interface - Real densityL = dev_bounds_L[threadId]; - Real momentumXL = dev_bounds_L[threadId + n_cells * o1]; - Real momentumYL = dev_bounds_L[threadId + n_cells * o2]; - Real momentumZL = dev_bounds_L[threadId + n_cells * o3]; - Real energyL = dev_bounds_L[threadId + n_cells * 4]; - Real magneticYL = - dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_y)]; - Real magneticZL = - dev_bounds_L[threadId + n_cells * (grid_enum::Q_x_magnetic_z)]; + // ============================ + // Retrieve state variables + // ============================ + // The magnetic field in the X-direction + Real const magneticX = dev_magnetic_face[threadId]; + + mhd::_internal::State const stateL = mhd::_internal::loadState( + dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::_internal::State const stateR = mhd::_internal::loadState( + dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); + + // Compute the approximate Left and Right wave speeds + mhd::_internal::Speeds speed = + mhd::_internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); + + // ================================================================= + // Compute the fluxes in the non-star states + // ================================================================= + // Left state + mhd::_internal::Flux fluxL = mhd::_internal::nonStarFluxes(stateL, magneticX); + + // If we're in the L state then assign fluxes and return. + // In this state the flow is supersonic + // M&K 2005 equation 66 + if (speed.L >= 0.0) { + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, + stateL); + return; + } + // Right state + mhd::_internal::Flux fluxR = mhd::_internal::nonStarFluxes(stateR, magneticX); + + // If we're in the R state then assign fluxes and return. + // In this state the flow is supersonic + // M&K 2005 equation 66 + if (speed.R <= 0.0) { + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, + stateR); + return; + } - #ifdef SCALAR - Real scalarConservedL[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - scalarConservedL[i] = dev_bounds_L[threadId + n_cells * (5 + i)]; - } - #endif // SCALAR - #ifdef DE - Real thermalEnergyConservedL = - dev_bounds_L[threadId + n_cells * (n_fields - 1)]; - #endif // DE + // ================================================================= + // Compute the fluxes in the star states + // ================================================================= + // Shared quantities: + // - velocityStarX = speedM + // - totalPrssureStar is the same on both sides + speed.M = approximateMiddleWaveSpeed(stateL, stateR, speed); + Real const totalPressureStar = + mhd::_internal::starTotalPressure(stateL, stateR, speed); + + // Left star state + mhd::_internal::StarState const starStateL = mhd::_internal::computeStarState( + stateL, speed, speed.L, magneticX, totalPressureStar); + + // Left star speed + speed.LStar = mhd::_internal::approximateStarWaveSpeed(starStateL, speed, + magneticX, -1); + + // If we're in the L* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speed.LStar >= 0.0) { + fluxL = + mhd::_internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, + stateL); + return; + } - // Right interface - Real densityR = dev_bounds_R[threadId]; - Real momentumXR = dev_bounds_R[threadId + n_cells * o1]; - Real momentumYR = dev_bounds_R[threadId + n_cells * o2]; - Real momentumZR = dev_bounds_R[threadId + n_cells * o3]; - Real energyR = dev_bounds_R[threadId + n_cells * 4]; - Real magneticYR = - dev_bounds_R[threadId + n_cells * (grid_enum::Q_x_magnetic_y)]; - Real magneticZR = - dev_bounds_R[threadId + n_cells * (grid_enum::Q_x_magnetic_z)]; + // Right star state + mhd::_internal::StarState const starStateR = mhd::_internal::computeStarState( + stateR, speed, speed.R, magneticX, totalPressureStar); + + // Right star speed + speed.RStar = + mhd::_internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); + + // If we're in the R* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speed.RStar <= 0.0) { + fluxR = + mhd::_internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, + stateR); + return; + } + + // ================================================================= + // Compute the fluxes in the double star states + // ================================================================= + mhd::_internal::DoubleStarState const doubleStarState = + mhd::_internal::computeDoubleStarState(starStateL, starStateR, magneticX, + totalPressureStar, speed); + + // Compute and return L** fluxes + // M&K 2005 equation 66 + if (speed.M >= 0.0) { + fluxL = mhd::_internal::computeDoubleStarFluxes( + doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, + speed, speed.L, speed.LStar); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, + stateL); + return; + } + // Compute and return R** fluxes + // M&K 2005 equation 66 + else { // if (speedStarR >= 0.0) { + fluxR = mhd::_internal::computeDoubleStarFluxes( + doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, + speed, speed.R, speed.RStar); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, + stateR); + return; + } +} +// ========================================================================= + +namespace _internal +{ +// ===================================================================== +__device__ __host__ mhd::_internal::State loadState( + Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, int const &o2, + int const &o3) +{ + mhd::_internal::State state; + state.density = interfaceArr[threadId + n_cells * grid_enum::density]; + state.density = fmax(state.density, (Real)TINY_NUMBER); + state.velocityX = interfaceArr[threadId + n_cells * o1] / state.density; + state.velocityY = interfaceArr[threadId + n_cells * o2] / state.density; + state.velocityZ = interfaceArr[threadId + n_cells * o3] / state.density; + state.energy = interfaceArr[threadId + n_cells * grid_enum::Energy]; + state.energy = fmax(state.energy, (Real)TINY_NUMBER); + state.magneticY = + interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y]; + state.magneticZ = + interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z]; #ifdef SCALAR - Real scalarConservedR[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - scalarConservedR[i] = dev_bounds_R[threadId + n_cells * (5 + i)]; - } + for (int i = 0; i < NSCALARS; i++) { + state.scalarSpecific[i] = + interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / + state.density; + } #endif // SCALAR #ifdef DE - Real thermalEnergyConservedR = - dev_bounds_R[threadId + n_cells * (n_fields - 1)]; - #endif // DE - - // Check for unphysical values - densityL = fmax(densityL, (Real)TINY_NUMBER); - densityR = fmax(densityR, (Real)TINY_NUMBER); - energyL = fmax(energyL, (Real)TINY_NUMBER); - energyR = fmax(energyR, (Real)TINY_NUMBER); - - // ============================ - // Compute primitive variables - // ============================ - // Left interface - Real const velocityXL = momentumXL / densityL; - Real const velocityYL = momentumYL / densityL; - Real const velocityZL = momentumZL / densityL; + state.thermalEnergySpecific = + interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; + #endif // DE} #ifdef DE // PRESSURE_DE - Real energyNonThermal = - hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - densityL, velocityXL, velocityYL, velocityZL) + - mhd::utils::computeMagneticEnergy(magneticX, magneticYL, magneticZL); - - Real const gasPressureL = fmax(hydro_utilities::Get_Pressure_From_DE( - energyL, energyL - energyNonThermal, - thermalEnergyConservedL, gamma), - (Real)TINY_NUMBER); + Real energyNonThermal = + hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + state.density, state.velocityX, state.velocityY, state.velocityZ) + + mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, + state.magneticZ); + + state.gasPressure = + fmax(hydro_utilities::Get_Pressure_From_DE( + state.energy, state.energy - energyNonThermal, + state.thermalEnergySpecific * state.density, gamma), + (Real)TINY_NUMBER); #else - // Note that this function does the positive pressure check - // internally - Real const gasPressureL = mhd::utils::computeGasPressure( - energyL, densityL, momentumXL, momentumYL, momentumZL, magneticX, - magneticYL, magneticZL, gamma); + // Note that this function does the positive pressure check + // internally + state.gasPressure = mhd::utils::computeGasPressure(state, magneticX, gamma); #endif // PRESSURE_DE - Real const totalPressureL = mhd::utils::computeTotalPressure( - gasPressureL, magneticX, magneticYL, magneticZL); - - // Right interface - Real const velocityXR = momentumXR / densityR; - Real const velocityYR = momentumYR / densityR; - Real const velocityZR = momentumZR / densityR; - - #ifdef DE // PRESSURE_DE - energyNonThermal = - hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - densityR, velocityXR, velocityYR, velocityZR) + - mhd::utils::computeMagneticEnergy(magneticX, magneticYR, magneticZR); - - Real const gasPressureR = fmax(hydro_utilities::Get_Pressure_From_DE( - energyR, energyR - energyNonThermal, - thermalEnergyConservedR, gamma), - (Real)TINY_NUMBER); - #else - // Note that this function does the positive pressure check - // internally - Real const gasPressureR = mhd::utils::computeGasPressure( - energyR, densityR, momentumXR, momentumYR, momentumZR, magneticX, - magneticYR, magneticZR, gamma); - #endif // PRESSURE_DE - - Real const totalPressureR = mhd::utils::computeTotalPressure( - gasPressureR, magneticX, magneticYR, magneticZR); - - // Compute the approximate wave speeds and density in the star - // regions - Real speedL, speedR, speedM, speedStarL, speedStarR, densityStarL, - densityStarR; - mhd::_internal::_approximateWaveSpeeds( - densityL, momentumXL, momentumYL, momentumZL, velocityXL, velocityYL, - velocityZL, gasPressureL, totalPressureL, magneticX, magneticYL, - magneticZL, densityR, momentumXR, momentumYR, momentumZR, velocityXR, - velocityYR, velocityZR, gasPressureR, totalPressureR, magneticYR, - magneticZR, gamma, speedL, speedR, speedM, speedStarL, speedStarR, - densityStarL, densityStarR); - - // ================================================================= - // Compute the fluxes in the non-star states - // ================================================================= - // Left state - Real densityFluxL, momentumFluxXL, momentumFluxYL, momentumFluxZL, - magneticFluxYL, magneticFluxZL, energyFluxL; - mhd::_internal::_nonStarFluxes( - momentumXL, velocityXL, velocityYL, velocityZL, totalPressureL, energyL, - magneticX, magneticYL, magneticZL, densityFluxL, momentumFluxXL, - momentumFluxYL, momentumFluxZL, magneticFluxYL, magneticFluxZL, - energyFluxL); - - // If we're in the L state then assign fluxes and return. - // In this state the flow is supersonic - // M&K 2005 equation 66 - if (speedL >= 0.0) { - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, - densityFluxL, momentumFluxXL, - momentumFluxYL, momentumFluxZL, energyFluxL, - magneticFluxYL, magneticFluxZL); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedL[i] / densityL) * densityFluxL; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedL / densityL) * densityFluxL; - #endif // DE - return; - } - // Right state - Real densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, - magneticFluxYR, magneticFluxZR, energyFluxR; - mhd::_internal::_nonStarFluxes( - momentumXR, velocityXR, velocityYR, velocityZR, totalPressureR, energyR, - magneticX, magneticYR, magneticZR, densityFluxR, momentumFluxXR, - momentumFluxYR, momentumFluxZR, magneticFluxYR, magneticFluxZR, - energyFluxR); - - // If we're in the R state then assign fluxes and return. - // In this state the flow is supersonic - // M&K 2005 equation 66 - if (speedR <= 0.0) { - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, - densityFluxR, momentumFluxXR, - momentumFluxYR, momentumFluxZR, energyFluxR, - magneticFluxYR, magneticFluxZR); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedR[i] / densityR) * densityFluxR; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedR / densityR) * densityFluxR; - #endif // DE - return; - } - - // ================================================================= - // Compute the fluxes in the star states - // ================================================================= - // Shared quantity - // note that velocityStarX = speedM - // M&K 2005 equation 23, might need to switch to eqn. 41 in the - // future though they should produce identical results - Real totalPressureStar = totalPressureL + densityL * (speedL - velocityXL) * - (speedM - velocityXL); - - // Left star state - Real velocityStarYL, velocityStarZL, energyStarL, magneticStarYL, - magneticStarZL, densityStarFluxL, momentumStarFluxXL, - momentumStarFluxYL, momentumStarFluxZL, magneticStarFluxYL, - magneticStarFluxZL, energyStarFluxL; - mhd::_internal::_starFluxes( - speedM, speedL, densityL, velocityXL, velocityYL, velocityZL, - momentumXL, momentumYL, momentumZL, energyL, totalPressureL, magneticX, - magneticYL, magneticZL, densityStarL, totalPressureStar, densityFluxL, - momentumFluxXL, momentumFluxYL, momentumFluxZL, energyFluxL, - magneticFluxYL, magneticFluxZL, velocityStarYL, velocityStarZL, - energyStarL, magneticStarYL, magneticStarZL, densityStarFluxL, - momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, - energyStarFluxL, magneticStarFluxYL, magneticStarFluxZL); - - // If we're in the L* state then assign fluxes and return. - // In this state the flow is subsonic - // M&K 2005 equation 66 - if (speedStarL >= 0.0) { - mhd::_internal::_returnFluxes( - threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, - momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, - energyStarFluxL, magneticStarFluxYL, magneticStarFluxZL); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedL[i] / densityL) * densityStarFluxL; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedL / densityL) * densityStarFluxL; - #endif // DE - return; - } - - // Right star state - Real velocityStarYR, velocityStarZR, energyStarR, magneticStarYR, - magneticStarZR, densityStarFluxR, momentumStarFluxXR, - momentumStarFluxYR, momentumStarFluxZR, magneticStarFluxYR, - magneticStarFluxZR, energyStarFluxR; - mhd::_internal::_starFluxes( - speedM, speedR, densityR, velocityXR, velocityYR, velocityZR, - momentumXR, momentumYR, momentumZR, energyR, totalPressureR, magneticX, - magneticYR, magneticZR, densityStarR, totalPressureStar, densityFluxR, - momentumFluxXR, momentumFluxYR, momentumFluxZR, energyFluxR, - magneticFluxYR, magneticFluxZR, velocityStarYR, velocityStarZR, - energyStarR, magneticStarYR, magneticStarZR, densityStarFluxR, - momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, - energyStarFluxR, magneticStarFluxYR, magneticStarFluxZR); - - // If we're in the R* state then assign fluxes and return. - // In this state the flow is subsonic - // M&K 2005 equation 66 - if (speedStarR <= 0.0) { - mhd::_internal::_returnFluxes( - threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, - momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, - energyStarFluxR, magneticStarFluxYR, magneticStarFluxZR); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedR[i] / densityR) * densityStarFluxR; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedR / densityR) * densityStarFluxR; - #endif // DE - return; - } - - // ================================================================= - // Compute the fluxes in the double star states - // ================================================================= - Real velocityDoubleStarY, velocityDoubleStarZ, magneticDoubleStarY, - magneticDoubleStarZ, energyDoubleStarL, energyDoubleStarR; - mhd::_internal::_doubleStarState( - speedM, magneticX, totalPressureStar, densityStarL, velocityStarYL, - velocityStarZL, energyStarL, magneticStarYL, magneticStarZL, - densityStarR, velocityStarYR, velocityStarZR, energyStarR, - magneticStarYR, magneticStarZR, velocityDoubleStarY, - velocityDoubleStarZ, magneticDoubleStarY, magneticDoubleStarZ, - energyDoubleStarL, energyDoubleStarR); - - // Compute and return L** fluxes - // M&K 2005 equation 66 - if (speedM >= 0.0) { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - mhd::_internal::_doubleStarFluxes( - speedStarL, momentumStarFluxXL, momentumStarFluxYL, - momentumStarFluxZL, energyStarFluxL, magneticStarFluxYL, - magneticStarFluxZL, densityStarL, speedM, velocityStarYL, - velocityStarZL, energyStarL, magneticStarYL, magneticStarZL, speedM, - velocityDoubleStarY, velocityDoubleStarZ, energyDoubleStarL, - magneticDoubleStarY, magneticDoubleStarZ, momentumDoubleStarFluxX, - momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - mhd::_internal::_returnFluxes( - threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedL[i] / densityL) * densityStarFluxL; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedL / densityL) * densityStarFluxL; - #endif // DE - return; - } - // Compute and return R** fluxes - // M&K 2005 equation 66 - else if (speedStarR >= 0.0) { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - mhd::_internal::_doubleStarFluxes( - speedStarR, momentumStarFluxXR, momentumStarFluxYR, - momentumStarFluxZR, energyStarFluxR, magneticStarFluxYR, - magneticStarFluxZR, densityStarR, speedM, velocityStarYR, - velocityStarZR, energyStarR, magneticStarYR, magneticStarZR, speedM, - velocityDoubleStarY, velocityDoubleStarZ, energyDoubleStarR, - magneticDoubleStarY, magneticDoubleStarZ, momentumDoubleStarFluxX, - momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - mhd::_internal::_returnFluxes( - threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); + state.totalPressure = mhd::utils::computeTotalPressure( + state.gasPressure, magneticX, state.magneticY, state.magneticZ); - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i = 0; i < NSCALARS; i++) { - dev_flux[(5 + i) * n_cells + threadId] = - (scalarConservedR[i] / densityR) * densityStarFluxR; - } - #endif // SCALAR - #ifdef DE - dev_flux[(n_fields - 1) * n_cells + threadId] = - (thermalEnergyConservedR / densityR) * densityStarFluxR; - #endif // DE - return; - } - } // End thread guard -}; -// ========================================================================= + return state; +} +// ===================================================================== -namespace _internal -{ // ===================================================================== -__device__ __host__ void _approximateWaveSpeeds( - Real const &densityL, Real const &momentumXL, Real const &momentumYL, - Real const &momentumZL, Real const &velocityXL, Real const &velocityYL, - Real const &velocityZL, Real const &gasPressureL, - Real const &totalPressureL, Real const &magneticX, Real const &magneticYL, - Real const &magneticZL, Real const &densityR, Real const &momentumXR, - Real const &momentumYR, Real const &momentumZR, Real const &velocityXR, - Real const &velocityYR, Real const &velocityZR, Real const &gasPressureR, - Real const &totalPressureR, Real const &magneticYR, Real const &magneticZR, - Real const &gamma, Real &speedL, Real &speedR, Real &speedM, - Real &speedStarL, Real &speedStarR, Real &densityStarL, Real &densityStarR) +__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds( + mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, + Real const &magneticX, Real const &gamma) { // Get the fast magnetosonic wave speeds Real magSonicL = mhd::utils::fastMagnetosonicSpeed( - densityL, gasPressureL, magneticX, magneticYL, magneticZL, gamma); + stateL.density, stateL.gasPressure, magneticX, stateL.magneticY, + stateL.magneticZ, gamma); Real magSonicR = mhd::utils::fastMagnetosonicSpeed( - densityR, gasPressureR, magneticX, magneticYR, magneticZR, gamma); + stateR.density, stateR.gasPressure, magneticX, stateR.magneticY, + stateR.magneticZ, gamma); // Compute the S_L and S_R wave speeds. // Version suggested by Miyoshi & Kusano 2005 and used in Athena // M&K 2005 equation 67 Real magSonicMax = fmax(magSonicL, magSonicR); - speedL = fmin(velocityXL, velocityXR) - magSonicMax; - speedR = fmax(velocityXL, velocityXR) + magSonicMax; + mhd::_internal::Speeds speed; + speed.L = fmin(stateL.velocityX, stateR.velocityX) - magSonicMax; + speed.R = fmax(stateL.velocityX, stateR.velocityX) + magSonicMax; + return speed; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ Real approximateMiddleWaveSpeed( + mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed) +{ // Compute the S_M wave speed // M&K 2005 equation 38 - speedM = // Numerator - (momentumXR * (speedR - velocityXR) - momentumXL * (speedL - velocityXL) + - (totalPressureL - totalPressureR)) / - // Denominator - (densityR * (speedR - velocityXR) - densityL * (speedL - velocityXL)); + Real const speed_r_diff = speed.R - stateR.velocityX; + Real const speed_l_diff = speed.L - stateL.velocityX; - // Compute the densities in the star state - // M&K 2005 equation 43 - densityStarL = densityL * (speedL - velocityXL) / (speedL - speedM); - densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM); + return // Numerator + (speed_r_diff * stateR.density * stateR.velocityX - + speed_l_diff * stateL.density * stateL.velocityX - stateR.totalPressure + + stateL.totalPressure) / + // Denominator + (speed_r_diff * stateR.density - speed_l_diff * stateL.density); +} +// ===================================================================== +// ===================================================================== +__device__ __host__ Real +approximateStarWaveSpeed(mhd::_internal::StarState const &starState, + mhd::_internal::Speeds const &speed, + Real const &magneticX, Real const &side) +{ // Compute the S_L^* and S_R^* wave speeds // M&K 2005 equation 51 - speedStarL = speedM - mhd::utils::alfvenSpeed(magneticX, densityStarL); - speedStarR = speedM + mhd::utils::alfvenSpeed(magneticX, densityStarR); + return speed.M + side * mhd::utils::alfvenSpeed(magneticX, starState.density); } // ===================================================================== // ===================================================================== -__device__ __host__ void _nonStarFluxes( - Real const &momentumX, Real const &velocityX, Real const &velocityY, - Real const &velocityZ, Real const &totalPressure, Real const &energy, - Real const &magneticX, Real const &magneticY, Real const &magneticZ, - Real &densityFlux, Real &momentumFluxX, Real &momentumFluxY, - Real &momentumFluxZ, Real &magneticFluxY, Real &magneticFluxZ, - Real &energyFlux) +__device__ __host__ mhd::_internal::Flux nonStarFluxes( + mhd::_internal::State const &state, Real const &magneticX) { + mhd::_internal::Flux flux; // M&K 2005 equation 2 - densityFlux = momentumX; + flux.density = state.density * state.velocityX; - momentumFluxX = momentumX * velocityX + totalPressure - magneticX * magneticX; - momentumFluxY = momentumX * velocityY - magneticX * magneticY; - momentumFluxZ = momentumX * velocityZ - magneticX * magneticZ; + flux.momentumX = flux.density * state.velocityX + state.totalPressure - + magneticX * magneticX; + flux.momentumY = flux.density * state.velocityY - magneticX * state.magneticY; + flux.momentumZ = flux.density * state.velocityZ - magneticX * state.magneticZ; - magneticFluxY = magneticY * velocityX - magneticX * velocityY; - magneticFluxZ = magneticZ * velocityX - magneticX * velocityZ; + flux.magneticY = + state.magneticY * state.velocityX - magneticX * state.velocityY; + flux.magneticZ = + state.magneticZ * state.velocityX - magneticX * state.velocityZ; // Group transverse terms for FP associative symmetry - energyFlux = velocityX * (energy + totalPressure) - - magneticX * (velocityX * magneticX + ((velocityY * magneticY) + - (velocityZ * magneticZ))); + flux.energy = state.velocityX * (state.energy + state.totalPressure) - + magneticX * (state.velocityX * magneticX + + ((state.velocityY * state.magneticY) + + (state.velocityZ * state.magneticZ))); + + return flux; } // ===================================================================== // ===================================================================== -__device__ __host__ void _returnFluxes( - int const &threadId, int const &o1, int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, Real const &densityFlux, - Real const &momentumFluxX, Real const &momentumFluxY, - Real const &momentumFluxZ, Real const &energyFlux, - Real const &magneticFluxY, Real const &magneticFluxZ) +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, + int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, + mhd::_internal::Flux const &flux, + mhd::_internal::State const &state) { - dev_flux[threadId] = densityFlux; - dev_flux[threadId + n_cells * o1] = momentumFluxX; - dev_flux[threadId + n_cells * o2] = momentumFluxY; - dev_flux[threadId + n_cells * o3] = momentumFluxZ; - dev_flux[threadId + n_cells * 4] = energyFlux; - dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_z)] = magneticFluxY; - dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_y)] = magneticFluxZ; + dev_flux[threadId + n_cells * grid_enum::density] = flux.density; + dev_flux[threadId + n_cells * o1] = flux.momentumX; + dev_flux[threadId + n_cells * o2] = flux.momentumY; + dev_flux[threadId + n_cells * o3] = flux.momentumZ; + dev_flux[threadId + n_cells * grid_enum::Energy] = flux.energy; + dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_z] = flux.magneticY; + dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_y] = flux.magneticZ; + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = + state.scalarSpecific[i] * flux.density; + } + #endif // SCALAR + #ifdef DE + dev_flux[threadId + n_cells * grid_enum::GasEnergy] = + state.thermalEnergySpecific * flux.density; + #endif // DE +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed) +{ + // M&K 2005 equation 41 + return // Numerator + (stateR.density * stateL.totalPressure * (speed.R - stateR.velocityX) - + stateL.density * stateR.totalPressure * (speed.L - stateL.velocityX) + + stateL.density * stateR.density * (speed.R - stateR.velocityX) * + (speed.L - stateL.velocityX) * + (stateR.velocityX - stateL.velocityX)) / + // Denominator + (stateR.density * (speed.R - stateR.velocityX) - + stateL.density * (speed.L - stateL.velocityX)); } // ===================================================================== // ===================================================================== -__device__ __host__ void _starFluxes( - Real const &speedM, Real const &speedSide, Real const &density, - Real const &velocityX, Real const &velocityY, Real const &velocityZ, - Real const &momentumX, Real const &momentumY, Real const &momentumZ, - Real const &energy, Real const &totalPressure, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &densityStar, - Real const &totalPressureStar, Real const &densityFlux, - Real const &momentumFluxX, Real const &momentumFluxY, - Real const &momentumFluxZ, Real const &energyFlux, - Real const &magneticFluxY, Real const &magneticFluxZ, Real &velocityStarY, - Real &velocityStarZ, Real &energyStar, Real &magneticStarY, - Real &magneticStarZ, Real &densityStarFlux, Real &momentumStarFluxX, - Real &momentumStarFluxY, Real &momentumStarFluxZ, Real &energyStarFlux, - Real &magneticStarFluxY, Real &magneticStarFluxZ) +__device__ __host__ mhd::_internal::StarState computeStarState( + mhd::_internal::State const &state, mhd::_internal::Speeds const &speed, + Real const &speedSide, Real const &magneticX, Real const &totalPressureStar) { + mhd::_internal::StarState starState; + + // Compute the densities in the star state + // M&K 2005 equation 43 + starState.density = + state.density * (speedSide - state.velocityX) / (speedSide - speed.M); + // Check for and handle the degenerate case // Explained at the top of page 326 in M&K 2005 - if (fabs(density * (speedSide - velocityX) * (speedSide - speedM) - + if (fabs(state.density * (speedSide - state.velocityX) * + (speedSide - speed.M) - (magneticX * magneticX)) < totalPressureStar * mhd::_internal::_hlldSmallNumber) { - velocityStarY = velocityY; - velocityStarZ = velocityZ; - magneticStarY = magneticY; - magneticStarZ = magneticZ; + starState.velocityY = state.velocityY; + starState.velocityZ = state.velocityZ; + starState.magneticY = state.magneticY; + starState.magneticZ = state.magneticZ; } else { // Denominator for M&K 2005 equations 44-47 Real const denom = - density * (speedSide - velocityX) * (speedSide - speedM) - + state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX); // Compute the velocity and magnetic field in the star state // M&K 2005 equations 44 & 46 - Real coef = magneticX * (speedM - velocityX) / denom; - velocityStarY = velocityY - magneticY * coef; - velocityStarZ = velocityZ - magneticZ * coef; + Real coef = magneticX * (speed.M - state.velocityX) / denom; + starState.velocityY = state.velocityY - state.magneticY * coef; + starState.velocityZ = state.velocityZ - state.magneticZ * coef; // M&K 2005 equations 45 & 47 - Real tmpPower = (speedSide - velocityX); + Real tmpPower = (speedSide - state.velocityX); tmpPower = tmpPower * tmpPower; - coef = (density * tmpPower - (magneticX * magneticX)) / denom; - magneticStarY = magneticY * coef; - magneticStarZ = magneticZ * coef; + coef = (state.density * tmpPower - (magneticX * magneticX)) / denom; + starState.magneticY = state.magneticY * coef; + starState.magneticZ = state.magneticZ * coef; } // M&K 2005 equation 48 - energyStar = - (energy * (speedSide - velocityX) - totalPressure * velocityX + - totalPressureStar * speedM + - magneticX * - (math_utils::dotProduct(velocityX, velocityY, velocityZ, magneticX, - magneticY, magneticZ) - - math_utils::dotProduct(speedM, velocityStarY, velocityStarZ, - magneticX, magneticStarY, magneticStarZ))) / - (speedSide - speedM); + starState.energy = + (state.energy * (speedSide - state.velocityX) - + state.totalPressure * state.velocityX + totalPressureStar * speed.M + + magneticX * (math_utils::dotProduct(state.velocityX, state.velocityY, + state.velocityZ, magneticX, + state.magneticY, state.magneticZ) - + math_utils::dotProduct( + speed.M, starState.velocityY, starState.velocityZ, + magneticX, starState.magneticY, starState.magneticZ))) / + (speedSide - speed.M); + + return starState; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::_internal::Flux starFluxes( + mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide) +{ + mhd::_internal::Flux starFlux; // Now compute the star state fluxes // M&K 2005 equations 64 - densityStarFlux = densityFlux + speedSide * (densityStar - density); - ; - momentumStarFluxX = - momentumFluxX + speedSide * (densityStar * speedM - momentumX); - ; - momentumStarFluxY = - momentumFluxY + speedSide * (densityStar * velocityStarY - momentumY); - ; - momentumStarFluxZ = - momentumFluxZ + speedSide * (densityStar * velocityStarZ - momentumZ); - ; - energyStarFlux = energyFlux + speedSide * (energyStar - energy); - magneticStarFluxY = magneticFluxY + speedSide * (magneticStarY - magneticY); - magneticStarFluxZ = magneticFluxZ + speedSide * (magneticStarZ - magneticZ); + starFlux.density = + flux.density + speedSide * (starState.density - state.density); + starFlux.momentumX = + flux.momentumX + speedSide * (starState.density * speed.M - + state.density * state.velocityX); + starFlux.momentumY = + flux.momentumY + speedSide * (starState.density * starState.velocityY - + state.density * state.velocityY); + starFlux.momentumZ = + flux.momentumZ + speedSide * (starState.density * starState.velocityZ - + state.density * state.velocityZ); + starFlux.energy = flux.energy + speedSide * (starState.energy - state.energy); + starFlux.magneticY = + flux.magneticY + speedSide * (starState.magneticY - state.magneticY); + starFlux.magneticZ = + flux.magneticZ + speedSide * (starState.magneticZ - state.magneticZ); + + return starFlux; } // ===================================================================== // ===================================================================== -__device__ __host__ void _doubleStarState( - Real const &speedM, Real const &magneticX, Real const &totalPressureStar, - Real const &densityStarL, Real const &velocityStarYL, - Real const &velocityStarZL, Real const &energyStarL, - Real const &magneticStarYL, Real const &magneticStarZL, - Real const &densityStarR, Real const &velocityStarYR, - Real const &velocityStarZR, Real const &energyStarR, - Real const &magneticStarYR, Real const &magneticStarZR, - Real &velocityDoubleStarY, Real &velocityDoubleStarZ, - Real &magneticDoubleStarY, Real &magneticDoubleStarZ, - Real &energyDoubleStarL, Real &energyDoubleStarR) +__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( + mhd::_internal::StarState const &starStateL, + mhd::_internal::StarState const &starStateR, Real const &magneticX, + Real const &totalPressureStar, mhd::_internal::Speeds const &speed) { + mhd::_internal::DoubleStarState doubleStarState; + // if Bx is zero then just return the star state // Explained at the top of page 328 in M&K 2005. Essentially when // magneticX is 0 this reduces to the HLLC solver if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { - velocityDoubleStarY = velocityStarYL; - velocityDoubleStarZ = velocityStarZL; - magneticDoubleStarY = magneticStarYL; - magneticDoubleStarZ = magneticStarZL; - energyDoubleStarL = energyStarL; - energyDoubleStarR = energyStarR; + if (speed.M >= 0.0) { + // We're in the L** state but Bx=0 so return L* state + doubleStarState.velocityY = starStateL.velocityY; + doubleStarState.velocityZ = starStateL.velocityZ; + doubleStarState.magneticY = starStateL.magneticY; + doubleStarState.magneticZ = starStateL.magneticZ; + doubleStarState.energyL = starStateL.energy; + } else { + // We're in the L** state but Bx=0 so return L* state + doubleStarState.velocityY = starStateR.velocityY; + doubleStarState.velocityZ = starStateR.velocityZ; + doubleStarState.magneticY = starStateR.magneticY; + doubleStarState.magneticZ = starStateR.magneticZ; + doubleStarState.energyR = starStateR.energy; + } } else { // Setup some variables we'll need later - Real sqrtDL = sqrt(densityStarL); - Real sqrtDR = sqrt(densityStarR); + Real sqrtDL = sqrt(starStateL.density); + Real sqrtDR = sqrt(starStateR.density); Real inverseDensities = 1.0 / (sqrtDL + sqrtDR); Real magXSign = copysign(1.0, magneticX); @@ -624,77 +502,94 @@ __device__ __host__ void _doubleStarState( // Double Star velocities // M&K 2005 equations 59 & 60 - velocityDoubleStarY = - inverseDensities * (sqrtDL * velocityStarYL + sqrtDR * velocityStarYR + - magXSign * (magneticStarYR - magneticStarYL)); - velocityDoubleStarZ = - inverseDensities * (sqrtDL * velocityStarZL + sqrtDR * velocityStarZR + - magXSign * (magneticStarZR - magneticStarZL)); + doubleStarState.velocityY = + inverseDensities * + (sqrtDL * starStateL.velocityY + sqrtDR * starStateR.velocityY + + magXSign * (starStateR.magneticY - starStateL.magneticY)); + doubleStarState.velocityZ = + inverseDensities * + (sqrtDL * starStateL.velocityZ + sqrtDR * starStateR.velocityZ + + magXSign * (starStateR.magneticZ - starStateL.magneticZ)); // Double star magnetic fields // M&K 2005 equations 61 & 62 - magneticDoubleStarY = + doubleStarState.magneticY = inverseDensities * - (sqrtDL * magneticStarYR + sqrtDR * magneticStarYL + - magXSign * (sqrtDL * sqrtDR) * (velocityStarYR - velocityStarYL)); - magneticDoubleStarZ = + (sqrtDL * starStateR.magneticY + sqrtDR * starStateL.magneticY + + magXSign * (sqrtDL * sqrtDR) * + (starStateR.velocityY - starStateL.velocityY)); + doubleStarState.magneticZ = inverseDensities * - (sqrtDL * magneticStarZR + sqrtDR * magneticStarZL + - magXSign * (sqrtDL * sqrtDR) * (velocityStarZR - velocityStarZL)); + (sqrtDL * starStateR.magneticZ + sqrtDR * starStateL.magneticZ + + magXSign * (sqrtDL * sqrtDR) * + (starStateR.velocityZ - starStateL.velocityZ)); // Double star energy Real velDblStarDotMagDblStar = math_utils::dotProduct( - speedM, velocityDoubleStarY, velocityDoubleStarZ, magneticX, - magneticDoubleStarY, magneticDoubleStarZ); + speed.M, doubleStarState.velocityY, doubleStarState.velocityZ, + magneticX, doubleStarState.magneticY, doubleStarState.magneticZ); // M&K 2005 equation 63 - energyDoubleStarL = - energyStarL - + doubleStarState.energyL = + starStateL.energy - sqrtDL * magXSign * - (math_utils::dotProduct(speedM, velocityStarYL, velocityStarZL, - magneticX, magneticStarYL, magneticStarZL) - + (math_utils::dotProduct( + speed.M, starStateL.velocityY, starStateL.velocityZ, magneticX, + starStateL.magneticY, starStateL.magneticZ) - velDblStarDotMagDblStar); - energyDoubleStarR = - energyStarR + + doubleStarState.energyR = + starStateR.energy + sqrtDR * magXSign * - (math_utils::dotProduct(speedM, velocityStarYR, velocityStarZR, - magneticX, magneticStarYR, magneticStarZR) - + (math_utils::dotProduct( + speed.M, starStateR.velocityY, starStateR.velocityZ, magneticX, + starStateR.magneticY, starStateR.magneticZ) - velDblStarDotMagDblStar); } + + return doubleStarState; } // ===================================================================== // ===================================================================== -__device__ __host__ void _doubleStarFluxes( - Real const &speedStarSide, Real const &momentumStarFluxX, - Real const &momentumStarFluxY, Real const &momentumStarFluxZ, - Real const &energyStarFlux, Real const &magneticStarFluxY, - Real const &magneticStarFluxZ, Real const &densityStar, - Real const &velocityStarX, Real const &velocityStarY, - Real const &velocityStarZ, Real const &energyStar, - Real const &magneticStarY, Real const &magneticStarZ, - Real const &velocityDoubleStarX, Real const &velocityDoubleStarY, - Real const &velocityDoubleStarZ, Real const &energyDoubleStar, - Real const &magneticDoubleStarY, Real const &magneticDoubleStarZ, - Real &momentumDoubleStarFluxX, Real &momentumDoubleStarFluxY, - Real &momentumDoubleStarFluxZ, Real &energyDoubleStarFlux, - Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ) +__device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( + mhd::_internal::DoubleStarState const &doubleStarState, + Real const &doubleStarStateEnergy, + mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide, + Real const &speedSideStar) { + mhd::_internal::Flux doubleStarFlux; + + Real const speed_diff = speedSideStar - speedSide; + // M&K 2005 equation 65 - momentumDoubleStarFluxX = - momentumStarFluxX + - speedStarSide * (velocityDoubleStarX - velocityStarX) * densityStar; - momentumDoubleStarFluxY = - momentumStarFluxY + - speedStarSide * (velocityDoubleStarY - velocityStarY) * densityStar; - momentumDoubleStarFluxZ = - momentumStarFluxZ + - speedStarSide * (velocityDoubleStarZ - velocityStarZ) * densityStar; - energyDoubleStarFlux = - energyStarFlux + speedStarSide * (energyDoubleStar - energyStar); - magneticDoubleStarFluxY = - magneticStarFluxY + speedStarSide * (magneticDoubleStarY - magneticStarY); - magneticDoubleStarFluxZ = - magneticStarFluxZ + speedStarSide * (magneticDoubleStarZ - magneticStarZ); + doubleStarFlux.density = flux.density - speedSide * state.density - + speed_diff * starState.density + + speedSideStar * starState.density; + + doubleStarFlux.momentumX = flux.momentumX - + speedSide * (state.density * state.velocityX) - + speed_diff * (starState.density * speed.M) + + speedSideStar * (starState.density * speed.M); + doubleStarFlux.momentumY = + flux.momentumY - speedSide * (state.density * state.velocityY) - + speed_diff * (starState.density * starState.velocityY) + + speedSideStar * (starState.density * doubleStarState.velocityY); + doubleStarFlux.momentumZ = + flux.momentumZ - speedSide * (state.density * state.velocityZ) - + speed_diff * (starState.density * starState.velocityZ) + + speedSideStar * (starState.density * doubleStarState.velocityZ); + doubleStarFlux.energy = flux.energy - speedSide * state.energy - + speed_diff * starState.energy + + speedSideStar * doubleStarStateEnergy; + doubleStarFlux.magneticY = flux.magneticY - speedSide * state.magneticY - + speed_diff * starState.magneticY + + speedSideStar * doubleStarState.magneticY; + doubleStarFlux.magneticZ = flux.magneticZ - speedSide * state.magneticZ - + speed_diff * starState.magneticZ + + speedSideStar * doubleStarState.magneticZ; + + return doubleStarFlux; } // ===================================================================== diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 539e5a96c..38504abbc 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -59,84 +59,119 @@ namespace _internal Real static const _hlldSmallNumber = 1.0e-8; /*! - * \brief Compute the left, right, star, and middle wave speeds. Also - * returns the densities in the star states. M&K 2005 equations 38, 43, - * 51, and 67 + * \brief Holds all the data needed for the non-star states of the HLLD solver * - * \param[in] densityL Density, left side - * \param[in] momentumXL Momentum in the X-direction, left side - * \param[in] momentumYL Momentum in the Y-direction, left side - * \param[in] momentumZL Momentum in the Z-direction, left side - * \param[in] velocityXL Velocity in the X-direction, left side - * \param[in] velocityYL Velocity in the Y-direction, left side - * \param[in] velocityZL Velocity in the Z-direction, left side - * \param[in] gasPressureL Gas pressure, left side - * \param[in] totalPressureL Total MHD pressure, left side - * \param[in] magneticX Magnetic field in the X-direction, left side - * \param[in] magneticYL Magnetic field in the Y-direction, left side - * \param[in] magneticZL Magnetic field in the Z-direction, left side - * \param[in] densityR Density, right side - * \param[in] momentumXR Momentum in the X-direction, right side - * \param[in] momentumYR Momentum in the Y-direction, right side - * \param[in] momentumZR Momentum in the Z-direction, right side - * \param[in] velocityXR Velocity in the X-direction, right side - * \param[in] velocityYR Velocity in the Y-direction, right side - * \param[in] velocityZR Velocity in the Z-direction, right side - * \param[in] gasPressureR Gas pressure, right side - * \param[in] totalPressureR Total MHD pressure, right side - * \param[in] magneticYR Magnetic field in the Y-direction, right side - * \param[in] magneticZR Magnetic field in the Z-direction, right side - * \param[in] gamma Adiabatic index - * \param[out] speedL Approximate speed of the left most wave - * \param[out] speedR Approximate speed of the right most wave - * \param[out] speedM Speed of the middle wave - * \param[out] speedStarL Speed of the left star state wave - * \param[out] speedStarR Speed of the right star state wave - * \param[out] densityStarL Density in left star region - * \param[out] densityStarR Density in right star region */ -__device__ __host__ void _approximateWaveSpeeds( - Real const &densityL, Real const &momentumXL, Real const &momentumYL, - Real const &momentumZL, Real const &velocityXL, Real const &velocityYL, - Real const &velocityZL, Real const &gasPressureL, - Real const &totalPressureL, Real const &magneticX, Real const &magneticYL, - Real const &magneticZL, Real const &densityR, Real const &momentumXR, - Real const &momentumYR, Real const &momentumZR, Real const &velocityXR, - Real const &velocityYR, Real const &velocityZR, Real const &gasPressureR, - Real const &totalPressureR, Real const &magneticYR, Real const &magneticZR, - Real const &gamma, Real &speedL, Real &speedR, Real &speedM, - Real &speedStarL, Real &speedStarR, Real &densityStarL, Real &densityStarR); +struct State { + Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, + gasPressure, totalPressure; + #ifdef SCALAR + Real scalarSpecific[grid_enum::nscalars]; + #endif // SCALAR + #ifdef DE + Real thermalEnergySpecific; + #endif // DE +}; /*! - * \brief Compute the fluxes in the left or right non-star state + * \brief Holds all the data needed for the star states of the HLLD solver + * except total pressure and x velocity as those are shared between the left and + * right states * - * \param[in] momentumX Momentum in the X-direction - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] totalPressure Total MHD pressure - * \param[in] energy Energy - * \param[in] magneticX Magnetic field in -direction - * \param[in] magneticY Magnetic field in -direction - * \param[in] magneticZ Magnetic field in -direction - * \param[out] densityFlux The density flux - * \param[out] momentumFluxX The momentum flux in the X-direction - * \param[out] momentumFluxY The momentum flux in the Y-direction - * \param[out] momentumFluxZ The momentum flux in the Z-direction - * \param[out] magneticFluxY The magnetic field flux in the Y-direction - * \param[out] magneticFluxZ The magnetic field flux in the Z-direction - * \param[out] energyFlux The energy flux */ -__device__ __host__ void _nonStarFluxes( - Real const &momentumX, Real const &velocityX, Real const &velocityY, - Real const &velocityZ, Real const &totalPressure, Real const &energy, - Real const &magneticX, Real const &magneticY, Real const &magneticZ, - Real &densityFlux, Real &momentumFluxX, Real &momentumFluxY, - Real &momentumFluxZ, Real &magneticFluxY, Real &magneticFluxZ, - Real &energyFlux); +struct StarState { + // velocityStarX = Speeds.M + // Total pressure is computed on its own since it's shared + Real density, velocityY, velocityZ, energy, magneticY, magneticZ; +}; /*! - * \brief Assign the given flux values to the dev_flux array + * \brief Holds all the data needed for the double star states of the HLLD + * solver except the x velocity, density, and total pressure since those are all + * inherited from the star state. + * + */ +struct DoubleStarState { + // velocityDoubleStarX = Speeds.M + // densityDoubleStar = densityStar + // pressureDoubleStar = pressureStar + // Shared values + Real velocityY, velocityZ, magneticY, magneticZ; + // Different values + Real energyL, energyR; +}; + +/*! + * \brief Holds all the data needed for the fluxes in the HLLD solver + * + */ +struct Flux { + Real density, momentumX, momentumY, momentumZ, energy, magneticY, magneticZ; +}; + +/*! + * \brief Holds all the data needed for the speeds in the HLLD solver + * + */ +struct Speeds { + Real L, LStar, M, RStar, R; +}; + +/*! + * \brief Load and compute the left or right state + * + * \param interfaceArr The interface array to load from + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \param threadId The thread ID + * \param n_cells Total number of cells + * \param o1 Direction parameter + * \param o2 Direction parameter + * \param o3 Direction parameter + * \return mhd::_internal::State The loaded state + */ +__device__ __host__ mhd::_internal::State loadState( + Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, int const &o2, + int const &o3); + +/*! + * \brief Compute the approximate left and right wave speeds. M&K 2005 equation + * 67 + */ +__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds( + mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, + Real const &magneticX, Real const &gamma); + +/*! + * \brief Compute the approximate middle wave speed. M&K 2005 equation 38 + */ +__device__ __host__ Real approximateMiddleWaveSpeed( + mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed); + +/*! + * \brief Compute the approximate left and right wave speeds. M&K 2005 equation + * 51 + */ +__device__ __host__ Real +approximateStarWaveSpeed(mhd::_internal::StarState const &starState, + mhd::_internal::Speeds const &speed, + Real const &magneticX, Real const &side); + +/*! + * \brief Compute the fluxes in the left or right non-star state. M&K 2005 + * equation 2 + * + * \param state The state to compute the flux of + * \param magneticX The X magnetic field + * \return mhd::_internal::Flux The flux in the state + */ +__device__ __host__ mhd::_internal::Flux nonStarFluxes( + mhd::_internal::State const &state, Real const &magneticX); + +/*! + * \brief Write the given flux values to the dev_flux array * * \param[in] threadId The thread ID * \param[in] o1 Offset to get indexing right @@ -144,159 +179,91 @@ __device__ __host__ void _nonStarFluxes( * \param[in] o3 Offset to get indexing right * \param[in] n_cells Number of cells * \param[out] dev_flux The flux array - * \param[in] densityFlux The density flux - * \param[in] momentumFluxX The momentum flux in the X-direction - * \param[in] momentumFluxY The momentum flux in the Y-direction - * \param[in] momentumFluxZ The momentum flux in the Z-direction - * \param[in] magneticFluxY The magnetic field flux in the X-direction - * \param[in] magneticFluxZ The magnetic field flux in the Y-direction - * \param[in] energyFlux The energy flux + * \param[in] flux The fluxes to write out + * \param[in] state The left or right state depending on if this is a return for + * one of the left states or one of the right states */ -__device__ __host__ void _returnFluxes( - int const &threadId, int const &o1, int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, Real const &densityFlux, - Real const &momentumFluxX, Real const &momentumFluxY, - Real const &momentumFluxZ, Real const &magneticFluxY, - Real const &magneticFluxZ, Real const &energyFlux); +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, + int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, + mhd::_internal::Flux const &flux, + mhd::_internal::State const &state); /*! - * \brief Compute the fluxes in the left or right star state. M&K 2005 - * equations 44-48, 64 + * \brief Compute the total pressure in the star states. M&K 2005 equation 41 * - * \param[in] speedM Speed of the central wave - * \param[in] speedSide Speed of the non-star wave on the side being computed - * \param[in] density Density - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] momentumX Momentum in the X-direction - * \param[in] momentumY Momentum in the Y-direction - * \param[in] momentumZ Momentum in the Z-direction - * \param[in] energy Energy - * \param[in] totalPressure Total MHD pressure - * \param[in] magneticX Magnetic field in the X-direction - * \param[in] magneticY Magnetic field in the Y-direction - * \param[in] magneticZ Magnetic field in the Z-direction - * \param[in] densityStar Density in the star state - * \param[in] totalPressureStar Total MHD pressure in the star state - * \param[in] densityFlux Density Flux from the non-star state - * \param[in] momentumFluxX Momentum flux from the non-star state in the - * X-direction \param[in] momentumFluxY Momentum flux from the non-star state in - * the Y-direction \param[in] momentumFluxZ Momentum flux from the non-star - * state in the Z-direction \param[in] energyFlux Energy flux from the non-star - * state \param[in] magneticFluxY Magnetic flux from the non-star state in the - * X-direction \param[in] magneticFluxZ Magnetic flux from the non-star state in - * the Y-direction \param[out] velocityStarY Velocity in the star state in the - * Y-direction \param[out] velocityStarZ Velocity in the star state in the - * Z-direction \param[out] energyStar Energy in the star state \param[out] - * magneticStarY Magnetic field in the star state in the X-direction \param[out] - * magneticStarZ Magnetic field in the star state in the Y-direction \param[out] - * densityStarFlux Density flux in the star state \param[out] momentumStarFluxX - * Momentum flux in the star state in the X-direction \param[out] - * momentumStarFluxY Momentum flux in the star state in the Y-direction - * \param[out] momentumStarFluxZ Momentum flux in the star state in the - * Z-direction \param[out] energyStarFlux Energy flux in the star state - * \param[out] magneticStarFluxY Magnetic field flux in the star state in the - * X-direction \param[out] magneticStarFluxZ Magnetic field flux in the star - * state in the Y-direction + * \param stateL The left state + * \param stateR The right state + * \param speed The wave speeds + * \return Real The total pressure in the star state + */ +__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed); + +/*! + * \brief Compute the L* or R* state. M&K 2005 equations 43-48 + * + * \param state The non-star state on the same side as the desired star + * state \param speed The wavespeeds \param speedSide The wave speed on the + * same side as the desired star state \param magneticX The magnetic field + * in the x direction \param totalPressureStar The total pressure in the + * star state \return mhd::_internal::StarState The computed star state + */ +__device__ __host__ mhd::_internal::StarState computeStarState( + mhd::_internal::State const &state, mhd::_internal::Speeds const &speed, + Real const &speedSide, Real const &magneticX, + Real const &totalPressureStar); + +/*! + * \brief Compute the flux in the star state. M&K 2005 equation 64 * + * \param starState The star state to compute the flux of + * \param state The non-star state on the same side as the star state + * \param flux The non-star flux on the same side as the star state + * \param speed The wave speeds + * \param speedSide The non-star wave speed on the same side as the star state + * \return mhd::_internal::Flux The flux in the star state */ -__device__ __host__ void _starFluxes( - Real const &speedM, Real const &speedSide, Real const &density, - Real const &velocityX, Real const &velocityY, Real const &velocityZ, - Real const &momentumX, Real const &momentumY, Real const &momentumZ, - Real const &energy, Real const &totalPressure, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &densityStar, - Real const &totalPressureStar, Real const &densityFlux, - Real const &momentumFluxX, Real const &momentumFluxY, - Real const &momentumFluxZ, Real const &energyFlux, - Real const &magneticFluxY, Real const &magneticFluxZ, Real &velocityStarY, - Real &velocityStarZ, Real &energyStar, Real &magneticStarY, - Real &magneticStarZ, Real &densityStarFlux, Real &momentumStarFluxX, - Real &momentumStarFluxY, Real &momentumStarFluxZ, Real &energyStarFlux, - Real &magneticStarFluxY, Real &magneticStarFluxZ); +__device__ __host__ mhd::_internal::Flux starFluxes( + mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide); /*! * \brief Compute the double star state. M&K 2005 equations 59-63 * - * \param[in] speedM - * \param[in] magneticX - * \param[in] totalPressureStar - * \param[in] densityStarL - * \param[in] velocityStarYL - * \param[in] velocityStarZL - * \param[in] energyStarL - * \param[in] magneticStarYL - * \param[in] magneticStarZL - * \param[in] densityStarR - * \param[in] velocityStarYR - * \param[in] velocityStarZR - * \param[in] energyStarR - * \param[in] magneticStarYR - * \param[in] magneticStarZR - * \param[out] velocityDoubleStarY - * \param[out] velocityDoubleStarZ - * \param[out] magneticDoubleStarY - * \param[out] magneticDoubleStarZ - * \param[out] energyDoubleStarL - * \param[out] energyDoubleStarR + * \param starStateL The Left star state + * \param starStateR The Right star state + * \param magneticX The x magnetic field + * \param totalPressureStar The total pressure in the star state + * \param speed The approximate wave speeds + * \return mhd::_internal::DoubleStarState The double star state */ -__device__ __host__ void _doubleStarState( - Real const &speedM, Real const &magneticX, Real const &totalPressureStar, - Real const &densityStarL, Real const &velocityStarYL, - Real const &velocityStarZL, Real const &energyStarL, - Real const &magneticStarYL, Real const &magneticStarZL, - Real const &densityStarR, Real const &velocityStarYR, - Real const &velocityStarZR, Real const &energyStarR, - Real const &magneticStarYR, Real const &magneticStarZR, - Real &velocityDoubleStarY, Real &velocityDoubleStarZ, - Real &magneticDoubleStarY, Real &magneticDoubleStarZ, - Real &energyDoubleStarL, Real &energyDoubleStarR); +__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( + mhd::_internal::StarState const &starStateL, + mhd::_internal::StarState const &starStateR, Real const &magneticX, + Real const &totalPressureStar, mhd::_internal::Speeds const &speed); /*! * \brief Compute the double star state fluxes. M&K 2005 equation 65 * - * \param[in] speedStarSide The star speed on the side being computed - * \param[in] momentumStarFluxX - * \param[in] momentumStarFluxY - * \param[in] momentumStarFluxZ - * \param[in] energyStarFlux - * \param[in] magneticStarFluxY - * \param[in] magneticStarFluxZ - * \param[in] densityStar - * \param[in] velocityStarX - * \param[in] velocityStarY - * \param[in] velocityStarZ - * \param[in] energyStar - * \param[in] magneticStarY - * \param[in] magneticStarZ - * \param[in] velocityDoubleStarX - * \param[in] velocityDoubleStarY - * \param[in] velocityDoubleStarZ - * \param[in] energyDoubleStar - * \param[in] magneticDoubleStarY - * \param[in] magneticDoubleStarZ - * \param[out] momentumDoubleStarFluxX - * \param[out] momentumDoubleStarFluxY - * \param[out] momentumDoubleStarFluxZ - * \param[out] energyDoubleStarFlux - * \param[out] magneticDoubleStarFluxY - * \param[out] magneticDoubleStarFluxZ + * \param doubleStarState The double star states + * \param starState The star state on the same side + * \param state The non-star state on the same side + * \param flux The non-star flux on the same side + * \param speed The approximate wave speeds + * \param speedSide The wave speed on the same side + * \param speedSideStar The star wave speed on the same side + * \return __device__ */ -__device__ __host__ void _doubleStarFluxes( - Real const &speedStarSide, Real const &momentumStarFluxX, - Real const &momentumStarFluxY, Real const &momentumStarFluxZ, - Real const &energyStarFlux, Real const &magneticStarFluxY, - Real const &magneticStarFluxZ, Real const &densityStar, - Real const &velocityStarX, Real const &velocityStarY, - Real const &velocityStarZ, Real const &energyStar, - Real const &magneticStarY, Real const &magneticStarZ, - Real const &velocityDoubleStarX, Real const &velocityDoubleStarY, - Real const &velocityDoubleStarZ, Real const &energyDoubleStar, - Real const &magneticDoubleStarY, Real const &magneticDoubleStarZ, - Real &momentumDoubleStarFluxX, Real &momentumDoubleStarFluxY, - Real &momentumDoubleStarFluxZ, Real &energyDoubleStarFlux, - Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ); +__device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( + mhd::_internal::DoubleStarState const &doubleStarState, + Real const &doubleStarStateEnergy, + mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide, + Real const &speedSideStar); } // namespace _internal } // end namespace mhd diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 59f52d72a..10ceed96e 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -8,8 +8,8 @@ // STL Includes #include #include +#include #include -#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -214,8 +214,14 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test double absoluteDiff; int64_t ulpsDiff; + // This error is consistent with the FP error in rearanging the flux + // computations in the Athena solver + double const fixedEpsilon = 2.7E-15; + int64_t const ulpsEpsilon = 7; + bool areEqual = testingUtilities::nearlyEqualDbl( - fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, fixedEpsilon, + ulpsEpsilon); EXPECT_TRUE(areEqual) << std::endl << customOutput << std::endl @@ -1965,7 +1971,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); - for (size_t direction = 2; direction < 3; direction++) { + for (size_t direction = 0; direction < 3; direction++) { { std::string const outputString{ "Left State: Constant state, zero magnetic field\n" @@ -1988,7 +1994,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) "HLLD State: Left Double Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, + std::vector const fiducialFlux{1.42108547152020037174e-14, 0.50001380657999994, -1, -1, @@ -2275,388 +2281,380 @@ struct testParams { // List of cases std::vector names{"Case 1", "Case 2"}; - // Conserved Variables - double gamma = 5. / 3.; - std::valarray densityL{21.50306776645775, 48.316634031589935}; - std::valarray densityR{81.1217731762265, 91.02955738853635}; - std::valarray momentumXL{38.504606872151484, 18.984145880030045}; - std::valarray momentumXR{8.201811315045326, 85.24863367778745}; - std::valarray momentumYL{7.1046427940455015, 33.76182584816693}; - std::valarray momentumYR{13.874767484202021, 33.023492551299974}; - std::valarray momentumZL{32.25700338919422, 89.52561861038686}; - std::valarray momentumZR{33.85305318830181, 8.664313303796256}; - std::valarray energyL{65.75120838109942, 38.461354599479826}; - std::valarray energyR{18.88982523270516, 83.65639784178894}; - std::valarray magneticXL{92.75101068883114, 31.588767769990532}; - std::valarray magneticXR{93.66196246448985, 84.3529879134052}; - std::valarray magneticYL{12.297499156516622, 63.74471969570406}; - std::valarray magneticYR{84.9919141787549, 35.910258841630984}; - std::valarray magneticZL{46.224045698787776, 37.70326455170754}; - std::valarray magneticZR{34.852095153095384, 24.052685003977757}; - // Star States - std::valarray densityStarL{28.520995251761526, 54.721668215064945}; - std::valarray densityStarR{49.09069570738605, 72.68000504460609}; - std::valarray momentumStarXL{48.96082367518151, 97.15439466280228}; - std::valarray momentumStarXR{65.74705433463932, 94.5689655974538}; - std::valarray momentumStarYL{44.910034185328996, 78.60179936059853}; - std::valarray momentumStarYR{51.642522487399276, 44.63864007208728}; - std::valarray momentumStarZL{39.78163555990428, 63.01612978428839}; - std::valarray momentumStarZR{33.47900698769427, 52.19410653341197}; - std::valarray energyStarL{6.579867455284738, 30.45043664908369}; - std::valarray energyStarR{90.44484278669114, 61.33664731346812}; - std::valarray magneticStarXL{49.81491527582234, 62.379765828560906}; - std::valarray magneticStarXR{67.77402751903804, 64.62226739788758}; - std::valarray magneticStarYL{62.09348829143065, 54.27916744403672}; - std::valarray magneticStarYR{26.835645069149873, 98.97444628327318}; - std::valarray magneticStarZL{62.765890944643196, 93.26765455509641}; - std::valarray magneticStarZR{7.430231695917344, 10.696380763901459}; - // Double Star State - std::valarray momentumDoubleStarXL{75.42525315887075, - 83.87480678359029}; - std::valarray momentumDoubleStarYL{22.56132540660678, - 76.11074421934487}; - std::valarray momentumDoubleStarZL{27.83908778933224, - 28.577101567661465}; - std::valarray energyDoubleStar{45.83202455707669, 55.4553014145573}; - std::valarray magneticDoubleStarY{20.943239839455895, - 83.8514810487021}; - std::valarray magneticDoubleStarZ{83.3802438268807, - 80.36671251730783}; - // Fluxes - std::valarray densityFluxL{12.939239309626116, 81.71524586517073}; - std::valarray momentumFluxXL{65.05481464917627, 56.09885069707803}; - std::valarray momentumFluxYL{73.67692845586782, 2.717246983403787}; - std::valarray momentumFluxZL{16.873647595664387, 39.70132983192873}; - std::valarray energyFluxL{52.71888731972469, 81.63926176158796}; - std::valarray magneticFluxXL{67.7412464028116, 42.85301340921149}; - std::valarray magneticFluxYL{58.98928445415967, 57.04344459221359}; - std::valarray magneticFluxZL{29.976925743532302, 97.73329827141359}; - std::valarray momentumStarFluxX{74.90125547448865, - 26.812722601652684}; - std::valarray momentumStarFluxY{16.989138610622945, - 48.349566649914976}; - std::valarray momentumStarFluxZ{38.541822734846185, - 61.22843961052538}; - std::valarray energyStarFlux{19.095105176247017, 45.43224973313112}; - std::valarray magneticStarFluxY{96.23964526624277, 33.05337536594796}; - std::valarray magneticStarFluxZ{86.22516928268347, 15.62102082410738}; - - // Derived/Primitive variables - std::valarray velocityXL = momentumXL / densityL; - std::valarray velocityXR = momentumXR / densityR; - std::valarray velocityYL = momentumYL / densityL; - std::valarray velocityYR = momentumYR / densityR; - std::valarray velocityZL = momentumZL / densityL; - std::valarray velocityZR = momentumZR / densityR; - std::valarray totalPressureStarL{66.80958736783934, - 72.29644038317676}; - std::vector gasPressureL; - std::vector gasPressureR; - std::vector totalPressureL; - std::vector totalPressureR; - // Star State - std::valarray velocityStarXL = momentumStarXL / densityStarL; - std::valarray velocityStarXR = momentumStarXR / densityStarR; - std::valarray velocityStarYL = momentumStarYL / densityStarL; - std::valarray velocityStarYR = momentumStarYR / densityStarR; - std::valarray velocityStarZL = momentumStarZL / densityStarL; - std::valarray velocityStarZR = momentumStarZR / densityStarR; - // Double Star State - std::valarray velocityDoubleStarXL = - momentumDoubleStarXL / densityStarL; - std::valarray velocityDoubleStarYL = - momentumDoubleStarYL / densityStarL; - std::valarray velocityDoubleStarZL = - momentumDoubleStarZL / densityStarL; - // Other - std::valarray speedM{68.68021569453585, 70.08236749169825}; - std::valarray speedSide{70.37512772923496, 3.6579130085113265}; - testParams() - { - for (size_t i = 0; i < names.size(); i++) { - gasPressureL.push_back(mhd::utils::computeGasPressure( - energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], - magneticXL[i], magneticYL[i], magneticZL[i], gamma)); - gasPressureR.push_back(mhd::utils::computeGasPressure( - energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], - magneticXR[i], magneticYR[i], magneticZR[i], gamma)); - totalPressureL.push_back(mhd::utils::computeTotalPressure( - gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); - totalPressureR.push_back(mhd::utils::computeTotalPressure( - gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); - } - } + double const gamma = 5. / 3.; + + std::vector const magneticX{92.75101068883114, 31.588767769990532}; + + std::vector stateLVec{ + {21.50306776645775, 1.7906564444824999, 0.33040135813215948, + 1.500111692877206, 65.751208381099417, 12.297499156516622, + 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083}, + {48.316634031589935, 0.39291118391272883, 0.69876195899931859, + 1.8528943583250035, 38.461354599479826, 63.744719695704063, + 37.703264551707541, 9.9999999999999995e-21, 3241.38784808316}}, + stateRVec{ + {81.121773176226498, 0.10110493143718589, 0.17103629446142521, + 0.41731155351794952, 18.88982523270516, 84.991914178754897, + 34.852095153095384, 9.9999999999999995e-21, 8605.4286125143772}, + {91.029557388536347, 0.93649399297774782, 0.36277769000180521, + 0.095181318599791204, 83.656397841788944, 35.910258841630984, + 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}}; + + std::vector const starStateLVec{ + {28.520995251761526, 1.5746306813243216, 1.3948193325212686, + 6.579867455284738, 62.093488291430653, 62.765890944643196}, + {54.721668215064945, 1.4363926014039052, 1.1515754515491903, + 30.450436649083692, 54.279167444036723, 93.267654555096414}}, + starStateRVec{ + {49.090695707386047, 1.0519818825796206, 0.68198273634686157, + 90.44484278669114, 26.835645069149873, 7.4302316959173442}, + {72.680005044606091, 0.61418047569879897, 0.71813570322922715, + 61.33664731346812, 98.974446283273181, 10.696380763901459}}; + + std::vector totalPressureStar{66.80958736783934, 72.29644038317676}; + + std::vector const DoubleStarStateVec{ + {0.79104271107837087, 0.97609103551927523, 20.943239839455895, + 83.380243826880701, 45.832024557076693, std::nan("0")}, + {1.390870320696683, 0.52222643241336986, 83.851481048702098, + 80.366712517307832, 55.455301414557297, std::nan("0")}}; + + std::vector const flux{ + {12.939239309626116, 65.054814649176265, 73.676928455867824, + 16.873647595664387, 52.718887319724693, 58.989284454159673, + 29.976925743532302}, + {81.715245865170729, 56.098850697078028, 2.7172469834037871, + 39.701329831928732, 81.63926176158796, 57.043444592213589, + 97.733298271413588}}, + starFlux{{0, 74.90125547448865, 16.989138610622945, 38.541822734846185, + 19.095105176247017, 96.239645266242775, 86.225169282683467}, + {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, + 45.432249733131123, 33.053375365947957, 15.621020824107379}}; + + std::vector const speed{ + {-22.40376497145191, -19.710500632936679, -0.81760587897407833, + 9.6740190040662242, 24.295526347371595}, + {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, + 3.4191202933087519, 12.519790189404299}}; + + testParams() = default; }; } // namespace // ========================================================================= // ========================================================================= /*! - * \brief Test the mhd::_internal::_approximateWaveSpeeds function + * \brief Test the mhd::_internal::approximateLRWaveSpeeds function * */ -TEST(tMHDHlldInternalApproximateWaveSpeeds, CorrectInputExpectCorrectOutput) +TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) { testParams const parameters; std::vector const fiducialSpeedL{-22.40376497145191, -11.190385012513822}; std::vector const fiducialSpeedR{24.295526347371595, 12.519790189404299}; - std::vector const fiducialSpeedM{-0.81760587897407833, - -0.026643804611559244}; - std::vector const fiducialSpeedStarL{-19.710500632936679, - -4.4880642018724357}; - std::vector const fiducialSpeedStarR{9.6740190040662242, - 3.4191202933087519}; - std::vector const fiducialDensityStarL{24.101290139122913, - 50.132466596958501}; - std::vector const fiducialDensityStarR{78.154104734671265, - 84.041595114910123}; - - double testSpeedL = 0; - double testSpeedR = 0; - double testSpeedM = 0; - double testSpeedStarL = 0; - double testSpeedStarR = 0; - double testDensityStarL = 0; - double testDensityStarR = 0; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_approximateWaveSpeeds( - parameters.densityL[i], parameters.momentumXL[i], - parameters.momentumYL[i], parameters.momentumZL[i], - parameters.velocityXL[i], parameters.velocityYL[i], - parameters.velocityZL[i], parameters.gasPressureL[i], - parameters.totalPressureL[i], parameters.magneticXL[i], - parameters.magneticYL[i], parameters.magneticZL[i], - parameters.densityR[i], parameters.momentumXR[i], - parameters.momentumYR[i], parameters.momentumZR[i], - parameters.velocityXR[i], parameters.velocityYR[i], - parameters.velocityZR[i], parameters.gasPressureR[i], - parameters.totalPressureR[i], parameters.magneticYR[i], - parameters.magneticZR[i], parameters.gamma, testSpeedL, testSpeedR, - testSpeedM, testSpeedStarL, testSpeedStarR, testDensityStarL, - testDensityStarR); + mhd::_internal::Speeds testSpeed = mhd::_internal::approximateLRWaveSpeeds( + parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.magneticX.at(i), parameters.gamma); + // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], testSpeedL, + testingUtilities::checkResults(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeedR, + testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); - testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeedM, + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::approximateMiddleWaveSpeed function + * + */ +TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, + CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialSpeedM{-0.81760587897407833, + -0.026643804611559244}; + + mhd::_internal::Speeds testSpeed; + + for (size_t i = 0; i < parameters.names.size(); i++) { + testSpeed.M = mhd::_internal::approximateMiddleWaveSpeed( + parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); + + // Now check results + testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); - testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeedStarL, + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::approximateStarWaveSpeed function + * + */ +TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + std::vector const fiducialSpeedStarL{-18.18506608966894, + -4.2968910457518161}; + std::vector const fiducialSpeedStarR{12.420292938368167, + 3.6786718447209252}; + + mhd::_internal::Speeds testSpeed; + + for (size_t i = 0; i < parameters.names.size(); i++) { + testSpeed.LStar = mhd::_internal::approximateStarWaveSpeed( + parameters.starStateLVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), -1); + testSpeed.RStar = mhd::_internal::approximateStarWaveSpeed( + parameters.starStateRVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), 1); + + // Now check results + testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeedStarR, + testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); - testingUtilities::checkResults(fiducialDensityStarL.at(i), testDensityStarL, - parameters.names.at(i) + ", DensityStarL"); - testingUtilities::checkResults(fiducialDensityStarR.at(i), testDensityStarR, - parameters.names.at(i) + ", DensityStarR"); } } // ========================================================================= // ========================================================================= /*! - * \brief Test the mhd::_internal::_starFluxes function in the non-degenerate - * case + * \brief Test the mhd::_internal::_nonStarFluxes function * */ -TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) +TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialVelocityStarY{12.831290892281075, - 12.92610185957192}; - std::vector const fiducialVelocityStarZ{48.488664548015286, - 9.0850326944201107}; - std::vector const fiducialEnergyStar{1654897.6912410262, - 956.83439334487116}; - std::vector const fiducialMagneticStarY{-186.47142421374559, - 2.6815421494204679}; - std::vector const fiducialMagneticStarZ{-700.91191100481922, - 1.5860591049546646}; - std::vector const fiducialDensityStarFlux{506.82678248238807, - 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, - 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{25328.25203616685, - 2466.5997745560339}; - std::vector const fiducialMomentumStarFluxZ{95071.711914347878, - 1530.7490710422007}; - std::vector const fiducialEnergyStarFlux{116459061.8691024, - 3440.9679468544314}; - std::vector const fiducialMagneticStarFluxY{-13929.399086330559, - -166.32034689537392}; - std::vector const fiducialMagneticStarFluxZ{-52549.811458376971, - -34.380297363339892}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; + std::vector fiducialFlux{ + {38.504606872151484, -3088.4810263278778, -1127.8835013070616, + -4229.5657456907293, -12344.460641662206, -8.6244637840856555, + -56.365490339906408}, + {18.984145880030045, 2250.9966820900618, -2000.3517480656785, + -1155.8240512956793, -2717.2127176227905, 2.9729840344910059, + -43.716615275067923}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_starFluxes( - parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], - parameters.velocityXL[i], parameters.velocityYL[i], - parameters.velocityZL[i], parameters.momentumXL[i], - parameters.momentumYL[i], parameters.momentumZL[i], - parameters.energyL[i], parameters.totalPressureL[i], - parameters.magneticXL[i], parameters.magneticYL[i], - parameters.magneticZL[i], parameters.densityStarL[i], - parameters.totalPressureStarL[i], parameters.densityFluxL[i], - parameters.momentumFluxXL[i], parameters.momentumFluxYL[i], - parameters.momentumFluxZL[i], parameters.energyFluxL[i], - parameters.magneticFluxYL[i], parameters.magneticFluxZL[i], - testVelocityStarY, testVelocityStarZ, testEnergyStar, testMagneticStarY, - testMagneticStarZ, testDensityStarFlux, testMomentumStarFluxX, - testMomentumStarFluxY, testMomentumStarFluxZ, testEnergyStarFlux, - testMagneticStarFluxY, testMagneticStarFluxZ); + mhd::_internal::Flux testFlux = mhd::_internal::nonStarFluxes( + parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], testVelocityStarY, + testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityFlux"); + testingUtilities::checkResults(fiducialFlux[i].momentumX, + testFlux.momentumX, + parameters.names.at(i) + ", MomentumFluxX"); + testingUtilities::checkResults(fiducialFlux[i].momentumY, + testFlux.momentumY, + parameters.names.at(i) + ", MomentumFluxY"); + testingUtilities::checkResults(fiducialFlux[i].momentumZ, + testFlux.momentumZ, + parameters.names.at(i) + ", MomentumFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].magneticY, + testFlux.magneticY, + parameters.names.at(i) + ", MagneticFluxY"); + testingUtilities::checkResults(fiducialFlux[i].magneticZ, + testFlux.magneticZ, + parameters.names.at(i) + ", MagneticFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyFlux"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::computeStarState function in the + * non-degenerate case + * + */ +TEST(tMHDHlldInternalComputeStarState, + CorrectInputNonDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + std::vector fiducialStarState{ + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, + 1023.8840191068896, 18.648382121236992, 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, + 172.06840532772659, 66.595692901872582, 39.389537509454122}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( + parameters.stateLVec.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.magneticX.at(i), + parameters.totalPressureStar.at(i)); + + // Now check results + testingUtilities::checkResults(fiducialStarState.at(i).velocityY, + testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], testVelocityStarZ, + testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, + testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], testEnergyStar, + testingUtilities::checkResults(fiducialStarState.at(i).energy, + testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], testMagneticStarY, + testingUtilities::checkResults(fiducialStarState.at(i).magneticY, + testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], testMagneticStarZ, + testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, + testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); + } +} + +/*! + * \brief Test the mhd::_internal::starFluxes function in the non-degenerate + * case + * + */ +TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) +{ + testParams const parameters; + + std::vector fiducialFlux{ + {-45.270724071132321, 1369.1771532285088, -556.91765728768155, + -2368.4452742393819, -21413.063415617493, -83.294404848633206, + -504.8413875424834}, + {61.395380340435793, 283.48596932136809, -101.75517013858293, + -51.343648925162142, -1413.4750762739586, 25.139956754826922, + 78.863254638038882}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( + parameters.stateLVec.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.magneticX.at(i), + parameters.totalPressureStar.at(i)); + + mhd::_internal::Flux testFlux = mhd::_internal::starFluxes( + testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); + + // Now check results testingUtilities::checkResults( - fiducialDensityStarFlux[i], testDensityStarFlux, + fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityStarFlux"); testingUtilities::checkResults( - fiducialMomentumStarFluxX[i], testMomentumStarFluxX, + fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::checkResults( - fiducialMomentumStarFluxY[i], testMomentumStarFluxY, + fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::checkResults( - fiducialMomentumStarFluxZ[i], testMomentumStarFluxZ, + fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, + testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::checkResults( - fiducialMagneticStarFluxY[i], testMagneticStarFluxY, + fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticStarFluxY"); testingUtilities::checkResults( - fiducialMagneticStarFluxZ[i], testMagneticStarFluxZ, + fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticStarFluxZ"); } } /*! - * \brief Test the mhd::_internal::_starFluxes function in the degenerate + * \brief Test the mhd::_internal::starFluxes function in the degenerate * case * */ -TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) +TEST(tMHDHlldInternalComputeStarState, + CorrectInputDegenerateExpectCorrectOutput) { - testParams const parameters; + testParams parameters; + + std::vector fiducialStarState{ + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, + 4.5171065808847731e+17, 18.648382121236992, 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, + 172.06840532772659, 66.595692901872582, 39.389537509454122}}; // Used to get us into the degenerate case double const totalPressureStarMultiplier = 1E15; - - std::vector const fiducialVelocityStarY{0.33040135813215948, - 0.69876195899931859}; - std::vector const fiducialVelocityStarZ{1.500111692877206, - 1.8528943583250035}; - std::vector const fiducialEnergyStar{2.7072182962581443e+18, - -76277716432851392}; - std::vector const fiducialMagneticStarY{12.297499156516622, - 63.744719695704063}; - std::vector const fiducialMagneticStarZ{46.224045698787776, - 37.703264551707541}; - std::vector const fiducialDensityStarFlux{506.82678248238807, - 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, - 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{236.85804348470396, - 19.08858135095122}; - std::vector const fiducialMomentumStarFluxZ{757.76012607552047, - 83.112898961023902}; - std::vector const fiducialEnergyStarFlux{1.9052083339008875e+20, - -2.7901725119926531e+17}; - std::vector const fiducialMagneticStarFluxY{58.989284454159673, - 57.043444592213589}; - std::vector const fiducialMagneticStarFluxZ{29.976925743532302, - 97.733298271413588}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; + parameters.stateLVec.at(0).totalPressure *= totalPressureStarMultiplier; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_starFluxes( - parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], - parameters.velocityXL[i], parameters.velocityYL[i], - parameters.velocityZL[i], parameters.momentumXL[i], - parameters.momentumYL[i], parameters.momentumZL[i], - parameters.energyL[i], parameters.totalPressureL[i], - parameters.magneticXL[i], parameters.magneticYL[i], - parameters.magneticZL[i], parameters.densityStarL[i], - parameters.totalPressureStarL[i] * totalPressureStarMultiplier, - parameters.densityFluxL[i], parameters.momentumFluxXL[i], - parameters.momentumFluxYL[i], parameters.momentumFluxZL[i], - parameters.energyFluxL[i], parameters.magneticFluxYL[i], - parameters.magneticFluxZL[i], testVelocityStarY, testVelocityStarZ, - testEnergyStar, testMagneticStarY, testMagneticStarZ, - testDensityStarFlux, testMomentumStarFluxX, testMomentumStarFluxY, - testMomentumStarFluxZ, testEnergyStarFlux, testMagneticStarFluxY, - testMagneticStarFluxZ); + mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( + parameters.stateLVec.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.magneticX.at(i), + parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], testVelocityStarY, + testingUtilities::checkResults(fiducialStarState.at(i).velocityY, + testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], testVelocityStarZ, + testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, + testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], testEnergyStar, + testingUtilities::checkResults(fiducialStarState.at(i).energy, + testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], testMagneticStarY, + testingUtilities::checkResults(fiducialStarState.at(i).magneticY, + testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], testMagneticStarZ, + testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, + testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); + } +} + +TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) +{ + testParams parameters; + + // Used to get us into the degenerate case + double const totalPressureStarMultiplier = 1E15; + + std::vector fiducialFlux{ + {-144.2887586578122, 1450.1348804310369, -773.30617492819886, + -151.70644305354989, 1378.3797024673304, -1056.6283526454272, + -340.62268733874163}, + {10.040447333773272, 284.85426012223729, -499.05932057162761, + 336.35271628090368, 171.28451793017882, 162.96661864443826, + -524.05361885198215}}; + + parameters.totalPressureStar.at(0) *= totalPressureStarMultiplier; + parameters.totalPressureStar.at(1) *= totalPressureStarMultiplier; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::_internal::Flux testFlux = mhd::_internal::starFluxes( + parameters.starStateLVec.at(i), parameters.stateLVec.at(i), + parameters.flux.at(i), parameters.speed.at(i), + parameters.speed.at(i).L); + + // Now check results testingUtilities::checkResults( - fiducialDensityStarFlux[i], testDensityStarFlux, + fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityStarFlux"); testingUtilities::checkResults( - fiducialMomentumStarFluxX[i], testMomentumStarFluxX, + fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::checkResults( - fiducialMomentumStarFluxY[i], testMomentumStarFluxY, + fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::checkResults( - fiducialMomentumStarFluxZ[i], testMomentumStarFluxZ, + fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, + testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::checkResults( - fiducialMagneticStarFluxY[i], testMagneticStarFluxY, + fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticStarFluxY"); testingUtilities::checkResults( - fiducialMagneticStarFluxZ[i], testMagneticStarFluxZ, + fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticStarFluxZ"); } } @@ -2664,69 +2662,8 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::_nonStarFluxes function - * - */ -TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) -{ - testParams const parameters; - - std::vector const fiducialDensityFlux{38.504606872151484, - 18.984145880030045}; - std::vector const fiducialMomentumFluxX{-3088.4810263278778, - 2250.9966820900618}; - std::vector const fiducialMomentumFluxY{-1127.8835013070616, - -2000.3517480656785}; - std::vector const fiducialMomentumFluxZ{-4229.5657456907293, - -1155.8240512956793}; - std::vector const fiducialMagneticFluxY{-8.6244637840856555, - 2.9729840344910059}; - std::vector const fiducialMagneticFluxZ{-56.365490339906408, - -43.716615275067923}; - std::vector const fiducialEnergyFlux{-12344.460641662206, - -2717.2127176227905}; - - double testDensityFlux; - double testMomentumFluxX; - double testMomentumFluxY; - double testMomentumFluxZ; - double testMagneticFluxY; - double testMagneticFluxZ; - double testEnergyFlux; - - for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_nonStarFluxes( - parameters.momentumXL[i], parameters.velocityXL[i], - parameters.velocityYL[i], parameters.velocityZL[i], - parameters.totalPressureL[i], parameters.energyL[i], - parameters.magneticXL[i], parameters.magneticYL[i], - parameters.magneticZL[i], testDensityFlux, testMomentumFluxX, - testMomentumFluxY, testMomentumFluxZ, testMagneticFluxY, - testMagneticFluxZ, testEnergyFlux); - - // Now check results - testingUtilities::checkResults(fiducialDensityFlux[i], testDensityFlux, - parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialMomentumFluxX[i], testMomentumFluxX, - parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialMomentumFluxY[i], testMomentumFluxY, - parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialMomentumFluxZ[i], testMomentumFluxZ, - parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialMagneticFluxY[i], testMagneticFluxY, - parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialMagneticFluxZ[i], testMagneticFluxZ, - parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialEnergyFlux[i], testEnergyFlux, - parameters.names.at(i) + ", EnergyFlux"); - } -} -// ========================================================================= - -// ========================================================================= -/*! - * \brief Test the mhd::_internal::_doubleStarState function. Non-degenerate - * state + * \brief Test the mhd::_internal::computeDoubleStarState function. + * Non-degenerate state * */ TEST(tMHDHlldInternalDoubleStarState, @@ -2734,65 +2671,43 @@ TEST(tMHDHlldInternalDoubleStarState, { testParams const parameters; - double const fixedEpsilon = 7E-12; - - std::vector const fiducialVelocityDoubleStarY{-1.5775383335759607, - 3.803188977150934}; - std::vector const fiducialVelocityDoubleStarZ{-3.4914062207842482, - -4.2662645349592765}; - std::vector const fiducialMagneticDoubleStarY{45.259313435283325, - 71.787329583230417}; - std::vector const fiducialMagneticDoubleStarZ{36.670978215630669, - 53.189673238238178}; - std::vector const fiducialEnergyDoubleStarL{-2048.1953674500514, - -999.79694164635089}; - std::vector const fiducialEnergyDoubleStarR{1721.0582276783764, - 252.04716752257781}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; + std::vector fiducialState{ + {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, + 36.670978215630669, -2048.1953674500523, 1721.0582276783819}, + {3.803188977150934, -4.2662645349592765, 71.787329583230417, + 53.189673238238178, -999.79694164635089, 252.047167522579}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_doubleStarState( - parameters.speedM[i], parameters.magneticXL[i], - parameters.totalPressureStarL[i], parameters.densityStarL[i], - parameters.velocityStarYL[i], parameters.velocityStarZL[i], - parameters.energyStarL[i], parameters.magneticStarYL[i], - parameters.magneticStarZL[i], parameters.densityStarR[i], - parameters.velocityStarYR[i], parameters.velocityStarZR[i], - parameters.energyStarR[i], parameters.magneticStarYR[i], - parameters.magneticStarZR[i], testVelocityDoubleStarY, - testVelocityDoubleStarZ, testMagneticDoubleStarY, - testMagneticDoubleStarZ, testEnergyDoubleStarL, testEnergyDoubleStarR); + mhd::_internal::DoubleStarState const testState = + mhd::_internal::computeDoubleStarState( + parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), + parameters.magneticX.at(i), parameters.totalPressureStar.at(i), + parameters.speed.at(i)); // Now check results testingUtilities::checkResults( - fiducialVelocityDoubleStarY[i], testVelocityDoubleStarY, + fiducialState.at(i).velocityY, testState.velocityY, parameters.names.at(i) + ", VelocityDoubleStarY"); testingUtilities::checkResults( - fiducialVelocityDoubleStarZ[i], testVelocityDoubleStarZ, + fiducialState.at(i).velocityZ, testState.velocityZ, parameters.names.at(i) + ", VelocityDoubleStarZ"); testingUtilities::checkResults( - fiducialMagneticDoubleStarY[i], testMagneticDoubleStarY, + fiducialState.at(i).magneticY, testState.magneticY, parameters.names.at(i) + ", MagneticDoubleStarY"); testingUtilities::checkResults( - fiducialMagneticDoubleStarZ[i], testMagneticDoubleStarZ, + fiducialState.at(i).magneticZ, testState.magneticZ, parameters.names.at(i) + ", MagneticDoubleStarZ"); testingUtilities::checkResults( - fiducialEnergyDoubleStarL[i], testEnergyDoubleStarL, + fiducialState.at(i).energyL, testState.energyL, parameters.names.at(i) + ", EnergyDoubleStarL"); testingUtilities::checkResults( - fiducialEnergyDoubleStarR[i], testEnergyDoubleStarR, - parameters.names.at(i) + ", EnergyDoubleStarR", fixedEpsilon); + fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); } } /*! - * \brief Test the mhd::_internal::_doubleStarState function in the + * \brief Test the mhd::_internal::computeDoubleStarState function in the * degenerate state. * */ @@ -2800,56 +2715,36 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialVelocityDoubleStarY{1.5746306813243216, - 1.4363926014039052}; - std::vector const fiducialVelocityDoubleStarZ{1.3948193325212686, - 1.1515754515491903}; - std::vector const fiducialMagneticDoubleStarY{62.093488291430653, - 54.279167444036723}; - std::vector const fiducialMagneticDoubleStarZ{62.765890944643196, - 93.267654555096414}; - std::vector const fiducialEnergyDoubleStarL{6.579867455284738, - 30.450436649083692}; - std::vector const fiducialEnergyDoubleStarR{90.44484278669114, - 61.33664731346812}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; + std::vector fiducialState{ + {1.0519818825796206, 0.68198273634686157, 26.835645069149873, + 7.4302316959173442, -999.79694164635089, 90.44484278669114}, + {0.61418047569879897, 0.71813570322922715, 98.974446283273181, + 10.696380763901459, -999.79694164635089, 61.33664731346812}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_doubleStarState( - parameters.speedM[i], 0.0, parameters.totalPressureStarL[i], - parameters.densityStarL[i], parameters.velocityStarYL[i], - parameters.velocityStarZL[i], parameters.energyStarL[i], - parameters.magneticStarYL[i], parameters.magneticStarZL[i], - parameters.densityStarR[i], parameters.velocityStarYR[i], - parameters.velocityStarZR[i], parameters.energyStarR[i], - parameters.magneticStarYR[i], parameters.magneticStarZR[i], - testVelocityDoubleStarY, testVelocityDoubleStarZ, - testMagneticDoubleStarY, testMagneticDoubleStarZ, testEnergyDoubleStarL, - testEnergyDoubleStarR); + mhd::_internal::DoubleStarState const testState = + mhd::_internal::computeDoubleStarState( + parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, + parameters.totalPressureStar.at(i), parameters.speed.at(i)); + // Now check results testingUtilities::checkResults( - fiducialVelocityDoubleStarY[i], testVelocityDoubleStarY, + fiducialState.at(i).velocityY, testState.velocityY, parameters.names.at(i) + ", VelocityDoubleStarY"); testingUtilities::checkResults( - fiducialVelocityDoubleStarZ[i], testVelocityDoubleStarZ, + fiducialState.at(i).velocityZ, testState.velocityZ, parameters.names.at(i) + ", VelocityDoubleStarZ"); testingUtilities::checkResults( - fiducialMagneticDoubleStarY[i], testMagneticDoubleStarY, + fiducialState.at(i).magneticY, testState.magneticY, parameters.names.at(i) + ", MagneticDoubleStarY"); testingUtilities::checkResults( - fiducialMagneticDoubleStarZ[i], testMagneticDoubleStarZ, + fiducialState.at(i).magneticZ, testState.magneticZ, parameters.names.at(i) + ", MagneticDoubleStarZ"); testingUtilities::checkResults( - fiducialEnergyDoubleStarL[i], testEnergyDoubleStarL, + fiducialState.at(i).energyL, testState.energyL, parameters.names.at(i) + ", EnergyDoubleStarL"); testingUtilities::checkResults( - fiducialEnergyDoubleStarR[i], testEnergyDoubleStarR, + fiducialState.at(i).energyR, testState.energyR, parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2864,61 +2759,44 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialMomentumDoubleStarFluxX{ - 1937.3388606704509, -21.762854649386174}; - std::vector const fiducialMomentumDoubleStarFluxY{-1555.8040962754276, - 39.237503643804175}; - std::vector const fiducialMomentumDoubleStarFluxZ{ - -801.91650203165148, -64.746529703562871}; - std::vector const fiducialEnergyDoubleStarFlux{2781.4706748628528, - 136.89786983482355}; - std::vector const fiducialMagneticDoubleStarFluxY{-2799.7143456312342, - 141.2263259922299}; - std::vector const fiducialMagneticDoubleStarFluxZ{ - 1536.9628864256708, -31.569502877970095}; - - double testMomentumDoubleStarFluxX; - double testMomentumDoubleStarFluxY; - double testMomentumDoubleStarFluxZ; - double testEnergyDoubleStarFlux; - double testMagneticDoubleStarFluxY; - double testMagneticDoubleStarFluxZ; + std::vector const fiducialFlux{ + {-144.2887586578122, 1450.1348804310369, -332.80193639987715, + 83.687152337186944, 604.70003506833029, -245.53635448727721, + -746.94190287166407}, + {10.040447333773216, 284.85426012223729, -487.87930516727664, + 490.91728596722157, 59.061079503595323, 30.244176588794346, + -466.15336272175193}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::_doubleStarFluxes( - parameters.speedSide[i], parameters.momentumStarFluxX[i], - parameters.momentumStarFluxY[i], parameters.momentumStarFluxZ[i], - parameters.energyStarFlux[i], parameters.magneticStarFluxY[i], - parameters.magneticStarFluxZ[i], parameters.densityStarL[i], - parameters.velocityStarXL[i], parameters.velocityStarYL[i], - parameters.velocityStarZL[i], parameters.energyStarL[i], - parameters.magneticStarYL[i], parameters.magneticStarZL[i], - parameters.velocityDoubleStarXL[i], parameters.velocityDoubleStarYL[i], - parameters.velocityDoubleStarZL[i], parameters.energyDoubleStar[i], - parameters.magneticDoubleStarY[i], parameters.magneticDoubleStarZ[i], - testMomentumDoubleStarFluxX, testMomentumDoubleStarFluxY, - testMomentumDoubleStarFluxZ, testEnergyDoubleStarFlux, - testMagneticDoubleStarFluxY, testMagneticDoubleStarFluxZ); + mhd::_internal::Flux const testFlux = + mhd::_internal::computeDoubleStarFluxes( + parameters.DoubleStarStateVec.at(i), + parameters.DoubleStarStateVec.at(i).energyL, + parameters.starStateLVec.at(i), parameters.stateLVec.at(i), + parameters.flux.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.speed.at(i).LStar); // Now check results testingUtilities::checkResults( - fiducialMomentumDoubleStarFluxX[i], testMomentumDoubleStarFluxX, - parameters.names.at(i) + ", MomentumDoubleStarFluxX"); + fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); testingUtilities::checkResults( - fiducialMomentumDoubleStarFluxY[i], testMomentumDoubleStarFluxY, - parameters.names.at(i) + ", MomentumDoubleStarFluxY"); + fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::checkResults( - fiducialMomentumDoubleStarFluxZ[i], testMomentumDoubleStarFluxZ, - parameters.names.at(i) + ", MomentumDoubleStarFluxZ"); + fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::checkResults( - fiducialEnergyDoubleStarFlux[i], testEnergyDoubleStarFlux, - parameters.names.at(i) + ", EnergyDoubleStarFlux"); + fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::checkResults( - fiducialMagneticDoubleStarFluxY[i], testMagneticDoubleStarFluxY, - parameters.names.at(i) + ", MagneticDoubleStarFluxY"); + fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); testingUtilities::checkResults( - fiducialMagneticDoubleStarFluxZ[i], testMagneticDoubleStarFluxZ, - parameters.names.at(i) + ", MagneticDoubleStarFluxZ"); + fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2930,14 +2808,9 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) { - double const dummyValue = 999; - double const densityFlux = 1; - double const momentumFluxX = 2; - double const momentumFluxY = 3; - double const momentumFluxZ = 4; - double const energyFlux = 5; - double const magneticFluxY = 6; - double const magneticFluxZ = 7; + double const dummyValue = 999; + mhd::_internal::Flux inputFlux{1, 2, 3, 4, 5, 6, 7}; + mhd::_internal::State inputState{8, 9, 10, 11, 12, 13, 14, 15, 16}; int threadId = 0; int n_cells = 10; @@ -2954,13 +2827,12 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) int const &fidIndex, std::string const &name) { int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); - // EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << - // std::endl; + EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl; return index; }; - for (size_t direction = 0; direction < 3; direction++) { + for (size_t direction = 0; direction < 1; direction++) { int o1, o2, o3; if (direction == 0) { o1 = 1; @@ -2981,36 +2853,34 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) std::vector testFluxArray(nFields * n_cells, dummyValue); // Fiducial Indices - int const fiducialDensityIndex = threadId; + int const fiducialDensityIndex = threadId + n_cells * grid_enum::density; int const fiducialMomentumIndexX = threadId + n_cells * o1; int const fiducialMomentumIndexY = threadId + n_cells * o2; int const fiducialMomentumIndexZ = threadId + n_cells * o3; - int const fiducialEnergyIndex = threadId + n_cells * 4; + int const fiducialEnergyIndex = threadId + n_cells * grid_enum::Energy; int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); - mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, - testFluxArray.data(), densityFlux, - momentumFluxX, momentumFluxY, momentumFluxZ, - energyFlux, magneticFluxY, magneticFluxZ); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, + testFluxArray.data(), inputFlux, inputState); // Find the indices for the various fields - int densityLoc = - findIndex(testFluxArray, densityFlux, fiducialDensityIndex, "density"); - int momentumXLocX = findIndex(testFluxArray, momentumFluxX, + int densityLoc = findIndex(testFluxArray, inputFlux.density, + fiducialDensityIndex, "density"); + int momentumXLocX = findIndex(testFluxArray, inputFlux.momentumX, fiducialMomentumIndexX, "momentum X"); - int momentumYLocY = findIndex(testFluxArray, momentumFluxY, + int momentumYLocY = findIndex(testFluxArray, inputFlux.momentumY, fiducialMomentumIndexY, "momentum Y"); - int momentumZLocZ = findIndex(testFluxArray, momentumFluxZ, + int momentumZLocZ = findIndex(testFluxArray, inputFlux.momentumZ, fiducialMomentumIndexZ, "momentum Z"); - int energyLoc = - findIndex(testFluxArray, energyFlux, fiducialEnergyIndex, "energy"); - int magneticYLoc = findIndex(testFluxArray, magneticFluxY, - fiducialMagneticYIndex, "magnetic Y"); - int magneticZLoc = findIndex(testFluxArray, magneticFluxZ, - fiducialMagneticZIndex, "magnetic Z"); + int energyLoc = findIndex(testFluxArray, inputFlux.energy, + fiducialEnergyIndex, "energy"); + int magneticYLoc = findIndex(testFluxArray, inputFlux.magneticY, + fiducialMagneticYIndex, "magnetic Y"); + int magneticZLoc = findIndex(testFluxArray, inputFlux.magneticZ, + fiducialMagneticZIndex, "magnetic Z"); for (size_t i = 0; i < testFluxArray.size(); i++) { // Skip the already checked indices @@ -3026,6 +2896,95 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) } } } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::starTotalPressure function + * + */ +TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + + std::vector const fiducialPressure{6802.2800807224075, + 3476.1984612875144}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real const testPressure = mhd::_internal::starTotalPressure( + parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); + + // Now check results + testingUtilities::checkResults( + fiducialPressure.at(i), testPressure, + parameters.names.at(i) + ", total pressure in the star states"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::_internal::loadState function + * + */ +TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) +{ + testParams const parameters; + int const threadId = 0; + int const n_cells = 10; + std::vector interfaceArray(n_cells * grid_enum::num_fields); + std::iota(std::begin(interfaceArray), std::end(interfaceArray), 1.); + + std::vector const fiducialState{ + {1, 11, 21, 31, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + {1, 21, 31, 11, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + {1, 31, 11, 21, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + }; + + for (size_t direction = 0; direction < 3; direction++) { + int o1, o2, o3; + if (direction == 0) { + o1 = 1; + o2 = 2; + o3 = 3; + } + if (direction == 1) { + o1 = 2; + o2 = 3; + o3 = 1; + } + if (direction == 2) { + o1 = 3; + o2 = 1; + o3 = 2; + } + + mhd::_internal::State const testState = mhd::_internal::loadState( + interfaceArray.data(), parameters.magneticX.at(0), parameters.gamma, + threadId, n_cells, o1, o2, o3); + + // Now check results + testingUtilities::checkResults(fiducialState.at(direction).density, + testState.density, ", Density"); + testingUtilities::checkResults(fiducialState.at(direction).velocityX, + testState.velocityX, ", velocityX"); + testingUtilities::checkResults(fiducialState.at(direction).velocityY, + testState.velocityY, ", velocityY"); + testingUtilities::checkResults(fiducialState.at(direction).velocityZ, + testState.velocityZ, ", velocityZ"); + testingUtilities::checkResults(fiducialState.at(direction).energy, + testState.energy, ", energy"); + testingUtilities::checkResults(fiducialState.at(direction).magneticY, + testState.magneticY, ", magneticY"); + testingUtilities::checkResults(fiducialState.at(direction).magneticZ, + testState.magneticZ, ", magneticZ"); + testingUtilities::checkResults(fiducialState.at(direction).gasPressure, + testState.gasPressure, ", gasPressure"); + testingUtilities::checkResults(fiducialState.at(direction).totalPressure, + testState.totalPressure, ", totalPressure"); + } } // ========================================================================= #endif // MHD diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 288de6314..9f5903299 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -14,6 +14,7 @@ // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -129,6 +130,25 @@ inline __host__ __device__ Real computeGasPressure( return fmax(pressure, TINY_NUMBER); } + +/*! + * \brief Specialzation of mhd::utils::computeGasPressure for use in the HLLD + * solver + * + * \param state The State to compute the gas pressure of + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \return Real The gas pressure + */ +inline __host__ __device__ Real +computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, + Real const &gamma) +{ + return mhd::utils::computeGasPressure( + state.energy, state.density, state.velocityX * state.density, + state.velocityY * state.density, state.velocityZ * state.density, + magneticX, state.magneticY, state.magneticZ, gamma); +} // ========================================================================= // ========================================================================= From 74e5adb4b1144aca83b8a859ad42cc5ea71b7e3f Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 3 Feb 2023 05:27:11 -0500 Subject: [PATCH 187/694] improve restart quality with hydro and gravity --- builds/make.host.frontier | 2 +- builds/make.type.disk | 5 +- src/global/global_cuda.h | 18 +++++++ src/gravity/grav3D.cpp | 7 +++ src/gravity/grav3D.h | 5 ++ src/gravity/gravity_restart.cpp | 94 +++++++++++++++++++++++++++++++++ src/grid/grid3D.cpp | 25 +++++---- src/grid/grid3D.h | 2 + src/io/io.cpp | 17 +++--- src/io/io.h | 2 + src/main.cpp | 10 ++-- src/particles/io_particles.cpp | 2 +- src/particles/supernova.h | 4 -- 13 files changed, 161 insertions(+), 32 deletions(-) create mode 100644 src/gravity/gravity_restart.cpp diff --git a/builds/make.host.frontier b/builds/make.host.frontier index 69f715871..d9efba818 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -16,7 +16,7 @@ GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-re HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings #HIPCONFIG = $(shell hipconfig -C) -OMP_NUM_THREADS = 8 +OMP_NUM_THREADS = 7 #-- How to launch job JOB_LAUNCH = srun -u -A STF016 -n 1 -c 8 diff --git a/builds/make.type.disk b/builds/make.type.disk index 9696c0d03..a95560cf1 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -36,12 +36,11 @@ DFLAGS += -DTEMPERATURE_FLOOR DFLAGS += -DCOOLING_GPU #DFLAGS += -DCLOUDY_COOL DFLAGS += -DDE -#DFLAGS += -DCPU_TIME +DFLAGS += -DCPU_TIME DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DHYDRO_GPU -OUTPUT ?= -DOUTPUT -DHDF5 -# -DSLICES +OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION DFLAGS += $(OUTPUT) DFLAGS += $(MPI_GPU) diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 983c6eba4..eeb8b6cc7 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -116,6 +116,24 @@ __device__ double atomicAdd(double *address, double val) } #endif +// This helper function exists to easily enable/disable printfs inside kernels +// And makes it easier to find printfs inside kernels +// For any printf in kernel which is FATAL, use printf instead of kernel_printf, since crashing the GPU in fatal situation is ok +// If printf is not supported by the GPU for any reason, disable non-fatal kernel printfs to avoid crashing the GPU +// If debug printfs should be turned off during production, disable non-fatal kernel printfs + +#ifndef DISABLE_KERNEL_PRINTF +#define kernel_printf printf +#else +inline __device__ int kernel_printf(const char * format, ...) +{ + // printf returns number of characters printed if success, negative value otherwise + return 0; +} +#endif //DISABLE_KERNEL_PRINTF + + + #endif // GLOBAL_CUDA_H #endif // CUDA diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 20c64f594..d388806ea 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -120,6 +120,13 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz); #endif + + // At the end of initializing, set restart state if needed + + if ((strcmp(P->init, "Read_Grid") == 0) && (P->nfile > 0)) + { + Read_Restart_HDF5(P, P->nfile); + } } void Grav3D::AllocateMemory_CPU(void) diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index 9203ac138..d5e0d4750 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -201,11 +201,16 @@ class Grav3D void Initialize_values_CPU(); void FreeMemory_CPU(void); + void Read_Restart_HDF5(struct parameters* P, int nfile); + void Write_Restart_HDF5(struct parameters* P, int nfile); + Real Get_Average_Density(); Real Get_Average_Density_function(int g_start, int g_end); void Set_Boundary_Flags(int *flags); + + #ifdef SOR void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp new file mode 100644 index 000000000..e3626735a --- /dev/null +++ b/src/gravity/gravity_restart.cpp @@ -0,0 +1,94 @@ +// Special functions needed to make restart (init=Read_Grid) consistent with running continuously + +#include + +#ifdef GRAVITY +#include "../io/io.h" +#include "../gravity/grav3D.h" +#endif + +#ifdef HDF5 +#include +#endif + +void Gravity_Restart_Filename(char* filename, char* dirname, int nfile) +{ + #ifdef MPI_CHOLLA + sprintf(filename,"%s%d_gravity.h5.%d",dirname,nfile,procID); + #else + sprintf(filename,"%s%d_gravity.h5",dirname,nfile); + #endif +} + +#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) +void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) +{ + H5open(); + char filename[MAXLEN]; + Gravity_Restart_Filename(filename, P->indir, nfile); + hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + + // Read dt_now + hid_t attribute_id = H5Aopen(file_id, "dt_now", H5P_DEFAULT); + herr_t status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); + + // Read potential and copy to device to be used as potential n-1 + Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential"); + #ifdef GRAVITY_GPU + CudaSafeCall( cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); + #endif + + + H5Fclose(file_id); + H5close(); + + // Set INITIAL to false + INITIAL = false; +} + +void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) +{ + H5open(); + char filename[MAXLEN]; + Gravity_Restart_Filename(filename, P->outdir, nfile); + hid_t file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write dt_now + hsize_t attr_dims = 1; + hid_t dataspace_id = H5Screate_simple(1, &attr_dims, NULL); + + hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); + + status = H5Sclose(dataspace_id); + + // Copy device to host if needed + #ifdef GRAVITY_GPU + CudaSafeCall( cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); + #endif + + // Write potential + hsize_t dims[1]; + dims[0] = n_cells_potential; + + dataspace_id = H5Screate_simple(1, dims, NULL); + HDF5_Dataset(file_id, dataspace_id, F.potential_1_h, "/potential"); + H5Sclose(dataspace_id); + + H5Fclose(file_id); + + H5close(); + } + +#elif defined(GRAVITY) +// Do nothing +void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) +{ +} + +void Grav3D::Write_Restart_HDF5(struct parameters P, int nfile) +{ +} +#endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 7ab8be7e0..9c45492c0 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -120,6 +120,15 @@ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, #endif /*MPI_CHOLLA*/ } + + +Real Grid3D::Calc_DTI() +{ + // ==Calculate the next inverse time step using Calc_dt_GPU from hydro/hydro_cuda.h== + return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); +} + + /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ void Grid3D::Initialize(struct parameters *P) @@ -376,14 +385,8 @@ void Grid3D::set_dt(Real dti) #else // NOT ONLY_PARTICLES - // Compute the hydro delta_t ( H.dt ) - if (H.n_step == 0) { - // Compute the time step - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, - H.dx, H.dy, H.dz, gama); - } else { - max_dti = dti; - } + // dti is calculated before first loop and at the end of Update_Grid + max_dti = dti; #ifdef MPI_CHOLLA // Note that this is the MPI_Allreduce for every iteration of the loop, not @@ -506,9 +509,9 @@ Real Grid3D::Update_Grid(void) H.dy, H.dz, gama, max_dti_slow); #endif // AVERAGE_SLOW_CELLS - // ==Calculate the next time step with Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, - H.dy, H.dz, gama); + // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== + max_dti = Calc_DTI(); + #endif // CUDA #ifdef COOLING_GRACKLE diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 7e207b311..0b774bee7 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -434,6 +434,8 @@ class Grid3D * *zpos) \brief Get the cell-centered position based on cell index */ void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); + Real Calc_DTI(); + /*! \fn void Set_Domain_Properties(struct parameters P) * \brief Set local domain properties */ void Set_Domain_Properties(struct parameters P); diff --git a/src/io/io.cpp b/src/io/io.cpp index 6f7440b98..009f877d0 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -87,11 +87,6 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) H5open(); #endif -#ifdef HDF5 - // Initialize HDF5 interface - H5open(); -#endif - #ifdef N_OUTPUT_COMPLETE // If nfile is multiple of N_OUTPUT_COMPLETE then output all data if (nfile % N_OUTPUT_COMPLETE == 0) { @@ -162,9 +157,8 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) H5close(); #endif -#ifdef HDF5 - // Cleanup HDF5 - H5close(); +#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) + G.Grav.Write_Restart_HDF5(&P, nfile); #endif #ifdef MPI_CHOLLA @@ -1308,6 +1302,13 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 +herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name) +{ + hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); + herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + status = H5Dclose(dataset_id); + return status; +} // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, diff --git a/src/io/io.h b/src/io/io.h index d267abb38..13919351e 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -53,6 +53,8 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp +herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); + herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, diff --git a/src/main.cpp b/src/main.cpp index 61707ae97..b5913f7ae 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -101,13 +101,12 @@ int main(int argc, char *argv[]) message = "Initializing Simulation"; Write_Message_To_Log_File(message.c_str()); - // Set initial conditions and calculate first dt + // Set initial conditions chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); chprintf("Initial conditions set.\n"); // set main variables for Read_Grid initial conditions if (strcmp(P.init, "Read_Grid") == 0) { - dti = C_cfl / G.H.dt; outtime += G.H.t; nfile = P.nfile; } @@ -189,8 +188,7 @@ int main(int argc, char *argv[]) chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, G.H.dz); chprintf("Ratio of specific heats gamma = %f\n", gama); - chprintf("Nstep = %d Timestep = %f Simulation time = %f\n", G.H.n_step, - G.H.dt, G.H.t); + chprintf("Nstep = %d Simulation time = %f\n", G.H.n_step, G.H.t); #ifdef OUTPUT if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now) { @@ -228,6 +226,10 @@ int main(int argc, char *argv[]) chprintf("Starting calculations.\n"); message = "Starting calculations."; Write_Message_To_Log_File(message.c_str()); + + // Compute inverse timestep for the first time + dti = G.Calc_DTI(); + while (G.H.t < P.tout) { // get the start time #ifdef CPU_TIME diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index d82a28b2f..0032d4779 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -510,7 +510,7 @@ void Grid3D::Write_Particles_Header_HDF5(hid_t file_id) status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_ULONG, &Particles.n_local); + status = H5Awrite(attribute_id, H5T_NATIVE_LONG, &Particles.n_local); status = H5Aclose(attribute_id); #ifdef SINGLE_PARTICLE_MASS diff --git a/src/particles/supernova.h b/src/particles/supernova.h index 56b1ad70a..6d6fc7d11 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -32,10 +32,6 @@ static const Real DEFAULT_SN_END = static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) -extern curandStateMRG32k3a_t* randStates; -extern part_int_t n_states; -extern Real *dev_snr, snr_dt, time_sn_end, time_sn_start; - void initState(struct parameters* P, part_int_t n_local, Real allocation_factor = 1); Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); From 334aa648ed646ea21f99e4957fc2836826276400 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 3 Feb 2023 11:02:24 -0500 Subject: [PATCH 188/694] Fix thread overrun error in HLLD solver --- src/riemann_solvers/hlld_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 82085b1b8..72fc9cde8 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -44,7 +44,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); // Thread guard to avoid overrun - if (xid >= nx and yid >= ny and zid >= nz) return; + if (xid >= nx or yid >= ny or zid >= nz) return; // Number of cells int n_cells = nx * ny * nz; From a130fc6c8625ecec0fcad92f810468babf2227e2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 3 Feb 2023 11:38:52 -0500 Subject: [PATCH 189/694] Fix sod tests boundaries The Y and Z boundaries in the sod test were using the default boundaries. This will make them use transmissive boundaries, same as X does. --- ...beParameterizedMpi_CorrectInputExpectCorrectOutput.txt | 8 ++++---- ...beParameterizedMpi_CorrectInputExpectCorrectOutput.txt | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 6fb66732b..5e7687d07 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -25,10 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 -yl_bcnd=0 -yu_bcnd=0 -zl_bcnd=0 -zu_bcnd=0 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 # path to output directory outdir=./ diff --git a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 71dd9bd91..7246e311f 100644 --- a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -25,10 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 -yl_bcnd=0 -yu_bcnd=0 -zl_bcnd=0 -zu_bcnd=0 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 # path to output directory outdir=./ From 16dfe119c443b90a62646ce8198b847c6ca59716 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 05:02:45 -0500 Subject: [PATCH 190/694] experimental commit --- src/particles/density_CIC.cpp | 2 + src/particles/feedback_CIC_gpu.cu | 173 +++++++++++++++++++----------- 2 files changed, 115 insertions(+), 60 deletions(-) diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index aac2a2056..133c41704 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -41,7 +41,9 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P) // Step 1: Get Particles CIC Density Particles.Clear_Density(); + #ifndef DISABLE_DENSITY_CIC Particles.Get_Density_CIC(); + #endif #ifdef CPU_TIME Timer.Part_Density.End(); diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 58388762a..a9dad701d 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -17,7 +17,7 @@ #include "../io/io.h" #include "supernova.h" - #define TPB_FEEDBACK 256 + #define TPB_FEEDBACK 128 #define FEED_INFO_N 6 #define i_RES 1 #define i_UNRES 2 @@ -25,9 +25,12 @@ #define i_MOMENTUM 4 #define i_UNRES_ENERGY 5 +typedef curandStateMRG32k3a_t feedback_prng_t; +//typedef curandStatePhilox4_32_10_t feedback_prng_t; + namespace supernova { -curandStateMRG32k3a_t* randStates; +feedback_prng_t* randStates; part_int_t n_states; Real *dev_snr, snr_dt, time_sn_start, time_sn_end; int snr_n; @@ -49,7 +52,7 @@ __device__ double atomicMax(double* address, double val) #endif // O_HIP __global__ void initState_kernel(unsigned int seed, - curandStateMRG32k3a_t* states) + feedback_prng_t* states) { int id = blockIdx.x * blockDim.x + threadIdx.x; curand_init(seed, id, 0, &states[id]); @@ -72,7 +75,7 @@ __global__ void initState_kernel(unsigned int seed, void supernova::initState(struct parameters* P, part_int_t n_local, Real allocation_factor) { - printf("supernova::initState start\n"); + chprintf("supernova::initState start\n"); std::string snr_filename(P->snr_filename); if (not snr_filename.empty()) { chprintf("Specified a SNR filename %s.\n", snr_filename.data()); @@ -129,9 +132,9 @@ void supernova::initState(struct parameters* P, part_int_t n_local, time_sn_end = DEFAULT_SN_END; } - // Now ititialize the poisson random number generator state. + // Now initialize the poisson random number generator state. n_states = n_local * allocation_factor; - cudaMalloc((void**)&randStates, n_states * sizeof(curandStateMRG32k3a_t)); + cudaMalloc((void**)&randStates, n_states * sizeof(feedback_prng_t)); int ngrid = (n_states + TPB_FEEDBACK - 1) / TPB_FEEDBACK; dim3 grid(ngrid); @@ -140,8 +143,8 @@ void supernova::initState(struct parameters* P, part_int_t n_local, hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, randStates); CHECK(cudaDeviceSynchronize()); - printf("supernova::initState end: n_states=%d, ngrid=%d, threads=%d\n", - n_states, ngrid, TPB_FEEDBACK); + chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", + n_states, ngrid, TPB_FEEDBACK); } __device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, @@ -191,14 +194,14 @@ __device__ Real d_fr(int i, Real dx) } __device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, - int nxg, int nyg, int ng) + int nx_grid, int ny_grid, int n_ghost) { Real d_average = 0.0; for (int i = -1; i < 2; i++) { for (int j = -1; j < 2; j++) { for (int k = -1; k < 2; k++) { - d_average += density[(xi + ng + i) + (yi + ng + j) * nxg + - (zi + ng + k) * nxg * nyg]; + d_average += density[(xi + n_ghost + i) + (yi + n_ghost + j) * nx_grid + + (zi + n_ghost + k) * nx_grid * ny_grid]; } } } @@ -206,12 +209,29 @@ __device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, } __device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, - int zi, int nxg, int nyg, int ng) + int zi, int nx_grid, int ny_grid, int n_ghost) { - return GetAverageDensity(density, xi, yi, zi, nxg, nyg, ng) * DENSITY_UNIT / + return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * DENSITY_UNIT / (supernova::MU * MP); } +__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, part_int_t n_local, int gtid, Real dx) +{ + Real x0 = pos_x_dev[gtid]; + Real y0 = pos_y_dev[gtid]; + Real z0 = pos_z_dev[gtid]; + // Brute force loop to see if particle is alone + for (int i=0;i dx) continue; + if (abs(y0 - pos_y_dev[i]) > dx) continue; + if (abs(z0 - pos_z_dev[i]) > dx) continue; + // If we made it here, something is too close. + return false; + } + return true; +} + __global__ void Cluster_Feedback_Kernel( part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, @@ -219,9 +239,9 @@ __global__ void Cluster_Feedback_Kernel( int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, - curandStateMRG32k3a_t* states, Real* prev_dens, int* prev_N, + feedback_prng_t* states, Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, - Real time_sn_end) + Real time_sn_end, int n_step) { __shared__ Real s_info[FEED_INFO_N * @@ -254,14 +274,14 @@ __global__ void Cluster_Feedback_Kernel( pos_x = pos_x_dev[gtid]; pos_y = pos_y_dev[gtid]; pos_z = pos_z_dev[gtid]; - // printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); - // printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); + // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); + // kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); bool in_local = (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax); if (!in_local) { - printf( + kernel_printf( " Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] " "[%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); @@ -270,32 +290,52 @@ __global__ void Cluster_Feedback_Kernel( int indx_x = (int)floor((pos_x - xMin) / dx); int indx_y = (int)floor((pos_y - yMin) / dy); int indx_z = (int)floor((pos_z - zMin) / dz); - // printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + // kernel_printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g - 2 * n_ghost || indx_y >= ny_g - 2 * n_ghost || indx_z >= nz_g - 2 * n_ghost; if (ignore) { - printf( + kernel_printf( " Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d " "%d %d] \n ", pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); } - if (!ignore && in_local) { + // Avoid overlap issues for now + bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, gtid, 6*dx) ; + + if (!ignore && in_local && is_alone) { int N = 0; - if ((t - age_dev[gtid]) <= - time_sn_end) { // only calculate this if there will be SN feedback + // only calculate this if there will be SN feedback + if ((t - age_dev[gtid]) <= time_sn_end) { if (direction == -1) N = -prev_N[gtid]; else { - curandStateMRG32k3a_t state = states[gtid]; - N = curand_poisson(&state, - GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, - time_sn_start, time_sn_end) * - mass_dev[gtid] * dt); - states[gtid] = state; - prev_N[gtid] = N; + Real average_num_sn = GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, + time_sn_start, time_sn_end) * mass_dev[gtid] * dt; + + //N = (int) (average_num_sn + 0.5); + + + feedback_prng_t state;// = states[0]; // load initial state + + curand_init(42,0,0,&state); + unsigned long long skip = n_step * 10000 + id[gtid]; + skipahead(skip, &state); // provided by curand + unsigned int debug_state = curand(&state); + + + //state = states[gtid]; + + + + + N = (int) curand_poisson(&state, average_num_sn); + printf("PRNG DEBUG: n_step: %d id: %d skip: %llu debug_state: %u N: %d \n", + n_step, (int) id[gtid], skip, debug_state, N); + //states[gtid] = state; // don't write back to state, keep it pristine + prev_N[gtid] = N; } if (N != 0) { mass_dev[gtid] -= N * supernova::MASS_PER_SN; @@ -312,7 +352,7 @@ __global__ void Cluster_Feedback_Kernel( // cudaGetDeviceCount(&devcount); // int devId; // cudaGetDevice(&devId); - // printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: + // kernel_printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: // %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); feedback_momentum = direction * supernova::FINAL_MOMENTUM * @@ -321,7 +361,7 @@ __global__ void Cluster_Feedback_Kernel( supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; if (!is_resolved) - printf( + kernel_printf( "UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, " "n_0=%0.4e\n", t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, @@ -360,7 +400,7 @@ __global__ void Cluster_Feedback_Kernel( (indx_z + k) * nx_g * ny_g; if (abs(momentum_x[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -368,7 +408,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_y[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -376,7 +416,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_z[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -398,7 +438,7 @@ __global__ void Cluster_Feedback_Kernel( x_frac * y_frac * z_frac * feedback_energy); if (abs(momentum_x[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -406,7 +446,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_y[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -414,7 +454,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_z[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -436,8 +476,8 @@ __global__ void Cluster_Feedback_Kernel( delta_x = (pos_x - xMin - indx_x * dx) / dx; delta_y = (pos_y - yMin - indx_y * dy) / dy; delta_z = (pos_z - zMin - indx_z * dz) / dz; - // printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); - // printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, + // kernel_printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, // %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); indx_x += n_ghost; @@ -447,7 +487,7 @@ __global__ void Cluster_Feedback_Kernel( if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5) > 40000) { // injected speeds are greater than 4e4 km/s - printf( + kernel_printf( "**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z, feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5); @@ -477,10 +517,10 @@ __global__ void Cluster_Feedback_Kernel( // d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) // * feedback_density; e = frac(i, delta_x) * frac(j, - // delta_y) * frac(k, delta_z) * feedback_energy; printf("(%d, + // delta_y) * frac(k, delta_z) * feedback_energy; kernel_printf("(%d, // %d, %d): delta:(%.4e, %.4e, %.4e), frac: %.4e\n", indx_x, // indx_y, indx_z, delta_x, delta_y, delta_z, frac(i, - // delta_x)*frac(j, delta_y)*frac(k, delta_z)); printf("(%d, + // delta_x)*frac(j, delta_y)*frac(k, delta_z)); kernel_printf("(%d, // %d, %d):(%d SN) (i:%d, j:%d, k:%d) before: %.4e\n", indx_x, // indx_y, indx_z, N, i, j, k, // density[indx]*DENSITY_UNIT/0.6/MP); @@ -496,7 +536,7 @@ __global__ void Cluster_Feedback_Kernel( // density[indx]*DENSITY_UNIT/0.6/MP; if (abs(momentum_x[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -504,7 +544,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_y[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -512,7 +552,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_z[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -520,9 +560,18 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } + + //printf("INDX DEBUG: n_step: %d id: %d indx: %d \n", n_step, (int) id[gtid], indx); + + if (indx >= nx_g * ny_g * nz_g) { + printf("INDX DEBUG\n"); + } + atomicAdd(&momentum_x[indx], px); atomicAdd(&momentum_y[indx], py); atomicAdd(&momentum_z[indx], pz); + + /* density[indx] = d; energy[indx] = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] + @@ -530,6 +579,8 @@ __global__ void Cluster_Feedback_Kernel( 2 / density[indx] + gasEnergy[indx]; + */ + // atomicAdd( &energy[indx], e ); // atomicAdd( &density[indx], d ); @@ -538,7 +589,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * dV; if (abs(momentum_x[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -546,7 +597,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_y[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -554,7 +605,7 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } if (abs(momentum_z[indx] / density[indx]) >= C_L) { - printf( + kernel_printf( "%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " "%.3e\n", direction, indx_x + i, indx_y + j, indx_z + k, @@ -576,22 +627,22 @@ __global__ void Cluster_Feedback_Kernel( // momentum_y[indx]*momentum_y[indx] + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; - // printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) + // kernel_printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) // v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, indx_y, // indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, - // (v_2-v_1)/v_1*100); printf(" (%d, %d, %d):(%d SN) (i:%d, + // (v_2-v_1)/v_1*100); kernel_printf(" (%d, %d, %d):(%d SN) (i:%d, // j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f %%\n", // indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, - // (t_a-t_b)/t_b*100); printf(" (%d, %d, %d):(%d SN) + // (t_a-t_b)/t_b*100); kernel_printf(" (%d, %d, %d):(%d SN) // (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f // %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, - // (d_a-d_b)/d_b*100); printf(" (%d, %d, %d):(%d SN) + // (d_a-d_b)/d_b*100); kernel_printf(" (%d, %d, %d):(%d SN) // (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> %.4f // %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, // (p_a-p_b)/p_b*100); if (direction > 0) { - // printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], + // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], // N, n_0); local_dti = fmax( local_dti, @@ -641,11 +692,13 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) if (G.H.dt == 0) return 0.0; + /* if (G.Particles.n_local > supernova::n_states) { - printf("ERROR: not enough cuRAND states (%d) for %d local particles\n", + printf("ERROR: not enough cuRAND states (%ld) for %ld local particles\n", supernova::n_states, G.Particles.n_local); exit(-1); } + */ Real h_dti = 0.0; int direction, ngrid; @@ -686,7 +739,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, - snr_dt, time_sn_start, time_sn_end); + snr_dt, time_sn_start, time_sn_end, G.H.n_step); CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } @@ -696,9 +749,9 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) MPI_Barrier(world); #endif // MPI_CHOLLA - if (h_dti != 0 && - (C_cfl / h_dti < - G.H.dt)) { // timestep too big: need to undo the last operation + if (h_dti != 0 && (C_cfl / h_dti < G.H.dt)) + { + // timestep too big: need to undo the last operation direction = -1; if (G.Particles.n_local > 0) { hipLaunchKernelGGL( @@ -711,7 +764,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, - snr_dt, time_sn_start, time_sn_end); + snr_dt, time_sn_start, time_sn_end, G.H.n_step); CHECK(cudaDeviceSynchronize()); } From dbfc625c9dd3560d6f163187a59adc518acae346 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 05:04:29 -0500 Subject: [PATCH 191/694] make.type.debug_disk --- builds/make.type.debug_disk | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 builds/make.type.debug_disk diff --git a/builds/make.type.debug_disk b/builds/make.type.debug_disk new file mode 100644 index 000000000..2050699dc --- /dev/null +++ b/builds/make.type.debug_disk @@ -0,0 +1,56 @@ +DFLAGS += -DDISABLE_KERNEL_PRINTF + +MPI_GPU = -DMPI_GPU +DFLAGS += -DPARTICLES +#DFLAGS += -DPARTICLES_CPU +DFLAGS += -DPARTICLES_GPU +#DFLAGS += -DONLY_PARTICLES +DFLAGS += -DPARTICLE_IDS +#DFLAGS += -DSINGLE_PARTICLE_MASS +DFLAGS += -DPARTICLE_AGE +DFLAGS += -DSUPERNOVA #this flag requires PARTICLE_AGE, PARTICLE_IDS +DFLAGS += -DANALYSIS +#DFLAGS += -DPARTICLES_KDK + + + +DFLAGS += -DGRAVITY +DFLAGS += -DGRAVITY_GPU +DFLAGS += -DGRAVITY_RESTART # For making gravity restarts more consistent +DFLAGS += -DDISABLE_DENSITY_CIC # For preventing atomicAdd inconsistency +# Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions +#DFLAGS += -DSOR +DFLAGS += -DPARIS_GALACTIC +DFLAGS += -DGRAVITY_ANALYTIC_COMP +DFLAGS += -DGRAVITY_5_POINTS_GRADIENT + +#DFLAGS += -DSTATIC_GRAV + +#DFLAGS += -DOUTPUT_ALWAYS +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC +DFLAGS += -DVL + +DFLAGS += -DDISK_ICS + +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCLOUDY_COOL +DFLAGS += -DDE +DFLAGS += -DCPU_TIME +DFLAGS += -DAVERAGE_SLOW_CELLS +DFLAGS += -DHYDRO_GPU + +OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION +DFLAGS += $(OUTPUT) + +DFLAGS += $(MPI_GPU) + +DFLAGS += -DPARALLEL_OMP +DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) + +#DFLAGS += -DCUDA_ERROR_CHECK From abbf983d162dc730f69a363177c8b18a0b978e8d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 3 Feb 2023 17:01:19 -0500 Subject: [PATCH 192/694] Remove `using namespace` and `while(1)` - Remove all instances of `using namespace` and add proper namespace qualifiers to statements - Allows us to turn on `google-build-using-namespace` clang-tidy check - Replace `while(1)` with `while(true)` - Allows us to turn on `modernize-use-bool-literals` clang-tidy check - Permanantly disable `misc-unused-parameters` clang-tidy check since it causes too many issues with all our ifdefs --- .clang-tidy | 4 +--- src/analysis/analysis.h | 2 -- src/analysis/io_analysis.cpp | 2 -- src/chemistry_gpu/chemistry_io.cpp | 2 -- src/cosmology/io_cosmology.cpp | 6 ++---- src/grid/initial_conditions.cpp | 6 ++---- src/io/io.cpp | 16 +++++++--------- src/mpi/mpi_routines.cpp | 2 +- 8 files changed, 13 insertions(+), 27 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index f7c2afe3f..4c0eacf8c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -30,6 +30,7 @@ Checks: "*, -modernize-use-trailing-return-type, -readability-avoid-const-params-in-decls, -readability-static-accessed-through-instance, + -misc-unused-parameters, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, @@ -92,7 +93,6 @@ Checks: "*, -cppcoreguidelines-special-member-functions, -cppcoreguidelines-virtual-class-destructor, -google-build-namespaces, - -google-build-using-namespace, -google-explicit-constructor, -google-global-names-in-headers, -google-readability-braces-around-statements, @@ -124,14 +124,12 @@ Checks: "*, -misc-confusable-identifiers, -misc-const-correctness, -misc-non-private-member-variables-in-classes, - -misc-unused-parameters, -modernize-avoid-c-arrays, -modernize-deprecated-headers, -modernize-loop-convert, -modernize-macro-to-enum, -modernize-redundant-void-arg, -modernize-use-auto, - -modernize-use-bool-literals, -modernize-use-default-member-init, -modernize-use-emplace, -modernize-use-equals-default, diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h index 7c7913b2d..7f157d6db 100644 --- a/src/analysis/analysis.h +++ b/src/analysis/analysis.h @@ -11,8 +11,6 @@ #include #endif -using namespace std; - class Analysis_Module { public: diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp index 2b213aba5..c7df48f91 100644 --- a/src/analysis/io_analysis.cpp +++ b/src/analysis/io_analysis.cpp @@ -7,8 +7,6 @@ #include "../grid/grid3D.h" #include "../io/io.h" -using namespace std; - // #define OUTPUT_SKEWERS_TRANSMITTED_FLUX #ifdef OUTPUT_SKEWERS diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index 915bfcd65..a10729c10 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -10,8 +10,6 @@ #include "../io/io.h" #include "chemistry_gpu.h" -using namespace std; - void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct parameters *P) { char uvb_filename[100]; diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp index b20657f7c..dbaeb983f 100644 --- a/src/cosmology/io_cosmology.cpp +++ b/src/cosmology/io_cosmology.cpp @@ -6,8 +6,6 @@ #include "../cosmology/cosmology.h" #include "../io/io.h" -using namespace std; - void Cosmology::Load_Scale_Outputs(struct parameters *P) { char filename_1[100]; @@ -15,8 +13,8 @@ void Cosmology::Load_Scale_Outputs(struct parameters *P) strcpy(filename_1, P->scale_outputs_file); chprintf(" Loading Scale_Factor Outpus: %s\n", filename_1); - ifstream file_out(filename_1); - string line; + std::ifstream file_out(filename_1); + std::string line; Real a_value; if (file_out.is_open()) { while (getline(file_out, line)) { diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index df487a7c4..d3b5a5317 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -22,8 +22,6 @@ #include "../utils/math_utilities.h" #include "../utils/mhd_utilities.h" -using namespace std; - /*! \fn void Set_Initial_Conditions(parameters P) * \brief Set the initial conditions based on info in the parameters structure. */ @@ -1533,8 +1531,8 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) real_vector_t ics_values; - ifstream file_in(filename); - string line; + std::ifstream file_in(filename); + std::string line; Real ic_val; if (file_in.is_open()) { while (getline(file_in, line)) { diff --git a/src/io/io.cpp b/src/io/io.cpp index 6f7440b98..4049fc49b 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -23,8 +23,6 @@ #include "../cosmology/cosmology.h" #endif // COSMOLOGY -using namespace std; - // #define OUTPUT_ENERGY // #define OUTPUT_MOMENTUM @@ -39,7 +37,7 @@ void Create_Log_File(struct parameters P) if (procID != 0) return; #endif - string file_name(LOG_FILE_NAME); + std::string file_name(LOG_FILE_NAME); chprintf("\nCreating Log File: %s \n\n", file_name.c_str()); bool file_exists = false; @@ -54,8 +52,8 @@ void Create_Log_File(struct parameters P) // convert now to string form char *dt = ctime(&now); - ofstream out_file; - out_file.open(file_name.c_str(), ios::app); + std::ofstream out_file; + out_file.open(file_name.c_str(), std::ios::app); out_file << "\n"; out_file << "Run date: " << dt; out_file.close(); @@ -67,10 +65,10 @@ void Write_Message_To_Log_File(const char *message) if (procID != 0) return; #endif - string file_name(LOG_FILE_NAME); - ofstream out_file; - out_file.open(file_name.c_str(), ios::app); - out_file << message << endl; + std::string file_name(LOG_FILE_NAME); + std::ofstream out_file; + out_file.open(file_name.c_str(), std::ios::app); + out_file << message << std::endl; out_file.close(); } diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 4b6a21474..66f3cd0ef 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -821,7 +821,7 @@ int greatest_prime_factor(int n) if (n == 1 || n == 2) return n; - while (1) { + while (true) { while (!(ns % np)) { ns = ns / np; } From 996270ab15691863be5540a112625742b69b093b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 6 Feb 2023 10:30:00 -0500 Subject: [PATCH 193/694] Remove additional instance of bool non-literals --- src/particles/particles_boundaries_gpu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index 2e1d552bc..c9730e10a 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -81,12 +81,12 @@ __global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= n_total) return; - bool transfer = 0; + bool transfer = false; Real pos = pos_d[tid]; - if (side == 0 && pos < d_min) transfer = 1; - if (side == 1 && pos >= d_max) transfer = 1; + if (side == 0 && pos < d_min) transfer = true; + if (side == 1 && pos >= d_max) transfer = true; transfer_flags_d[tid] = transfer; } From 6bf39da464e17a06f2303684dd8305549cd4611b Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 21:45:49 -0800 Subject: [PATCH 194/694] format dev-restart, rename hdf5_dataset to write_hdf5_dataset --- src/global/global_cuda.h | 24 ++--- src/gravity/grav3D.cpp | 5 +- src/gravity/grav3D.h | 6 +- src/gravity/gravity_restart.cpp | 55 +++++----- src/grid/grid3D.cpp | 13 ++- src/grid/grid3D.h | 2 +- src/io/io.cpp | 176 ++++++++++++++++++-------------- src/io/io.h | 11 +- src/io/io_gpu.cu | 4 +- src/main.cpp | 4 +- 10 files changed, 161 insertions(+), 139 deletions(-) diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index eeb8b6cc7..1ef6750a4 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -116,23 +116,19 @@ __device__ double atomicAdd(double *address, double val) } #endif -// This helper function exists to easily enable/disable printfs inside kernels -// And makes it easier to find printfs inside kernels -// For any printf in kernel which is FATAL, use printf instead of kernel_printf, since crashing the GPU in fatal situation is ok -// If printf is not supported by the GPU for any reason, disable non-fatal kernel printfs to avoid crashing the GPU -// If debug printfs should be turned off during production, disable non-fatal kernel printfs - -#ifndef DISABLE_KERNEL_PRINTF -#define kernel_printf printf -#else -inline __device__ int kernel_printf(const char * format, ...) + // This helper function exists to easily enable/disable printfs inside kernels + // And makes it easier to find printfs inside kernels + + #ifndef DISABLE_KERNEL_PRINTF + #define kernel_printf printf + #else +inline __device__ int kernel_printf(const char *format, ...) { - // printf returns number of characters printed if success, negative value otherwise + // printf returns number of characters printed if success, negative value + // otherwise return 0; } -#endif //DISABLE_KERNEL_PRINTF - - + #endif // DISABLE_KERNEL_PRINTF #endif // GLOBAL_CUDA_H diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index d388806ea..8ab114059 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -122,9 +122,8 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, #endif // At the end of initializing, set restart state if needed - - if ((strcmp(P->init, "Read_Grid") == 0) && (P->nfile > 0)) - { + + if ((strcmp(P->init, "Read_Grid") == 0) && (P->nfile > 0)) { Read_Restart_HDF5(P, P->nfile); } } diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index d5e0d4750..691a352e7 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -201,16 +201,14 @@ class Grav3D void Initialize_values_CPU(); void FreeMemory_CPU(void); - void Read_Restart_HDF5(struct parameters* P, int nfile); - void Write_Restart_HDF5(struct parameters* P, int nfile); + void Read_Restart_HDF5(struct parameters *P, int nfile); + void Write_Restart_HDF5(struct parameters *P, int nfile); Real Get_Average_Density(); Real Get_Average_Density_function(int g_start, int g_end); void Set_Boundary_Flags(int *flags); - - #ifdef SOR void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index e3626735a..1eb660063 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -1,23 +1,24 @@ -// Special functions needed to make restart (init=Read_Grid) consistent with running continuously +// Special functions needed to make restart (init=Read_Grid) consistent with +// running continuously #include #ifdef GRAVITY -#include "../io/io.h" -#include "../gravity/grav3D.h" + #include "../gravity/grav3D.h" + #include "../io/io.h" #endif #ifdef HDF5 -#include + #include #endif void Gravity_Restart_Filename(char* filename, char* dirname, int nfile) { - #ifdef MPI_CHOLLA - sprintf(filename,"%s%d_gravity.h5.%d",dirname,nfile,procID); - #else - sprintf(filename,"%s%d_gravity.h5",dirname,nfile); - #endif +#ifdef MPI_CHOLLA + sprintf(filename, "%s%d_gravity.h5.%d", dirname, nfile, procID); +#else + sprintf(filename, "%s%d_gravity.h5", dirname, nfile); +#endif } #if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) @@ -30,16 +31,17 @@ void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) // Read dt_now hid_t attribute_id = H5Aopen(file_id, "dt_now", H5P_DEFAULT); - herr_t status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); - status = H5Aclose(attribute_id); + herr_t status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); // Read potential and copy to device to be used as potential n-1 Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential"); #ifdef GRAVITY_GPU - CudaSafeCall( cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall(cudaMemcpy(F.potential_1_d, F.potential_1_h, + n_cells_potential * sizeof(Real), + cudaMemcpyHostToDevice)); #endif - H5Fclose(file_id); H5close(); @@ -55,20 +57,23 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) hid_t file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write dt_now - hsize_t attr_dims = 1; + hsize_t attr_dims = 1; hid_t dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); - status = H5Aclose(attribute_id); + hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, + dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); status = H5Sclose(dataspace_id); // Copy device to host if needed #ifdef GRAVITY_GPU - CudaSafeCall( cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); + CudaSafeCall(cudaMemcpy(F.potential_1_h, F.potential_1_d, + n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); #endif - + // Write potential hsize_t dims[1]; dims[0] = n_cells_potential; @@ -80,15 +85,11 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) H5Fclose(file_id); H5close(); - } +} #elif defined(GRAVITY) -// Do nothing -void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) -{ -} +// Do nothing +void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) {} -void Grav3D::Write_Restart_HDF5(struct parameters P, int nfile) -{ -} +void Grav3D::Write_Restart_HDF5(struct parameters P, int nfile) {} #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 9c45492c0..c27563534 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -120,15 +120,14 @@ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, #endif /*MPI_CHOLLA*/ } - - -Real Grid3D::Calc_DTI() +Real Grid3D::Calc_Inverse_Timestep() { - // ==Calculate the next inverse time step using Calc_dt_GPU from hydro/hydro_cuda.h== - return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); + // ==Calculate the next inverse time step using Calc_dt_GPU from + // hydro/hydro_cuda.h== + return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, + H.dy, H.dz, gama); } - /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ void Grid3D::Initialize(struct parameters *P) @@ -510,7 +509,7 @@ Real Grid3D::Update_Grid(void) #endif // AVERAGE_SLOW_CELLS // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_DTI(); + max_dti = Calc_Inverse_Timestep(); #endif // CUDA diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 0b774bee7..5d580c209 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -434,7 +434,7 @@ class Grid3D * *zpos) \brief Get the cell-centered position based on cell index */ void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); - Real Calc_DTI(); + Real Calc_Inverse_Timestep(); /*! \fn void Set_Domain_Properties(struct parameters P) * \brief Set local domain properties */ diff --git a/src/io/io.cpp b/src/io/io.cpp index 009f877d0..97efb972f 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -157,7 +157,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) H5close(); #endif -#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) +#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) G.Grav.Write_Restart_HDF5(&P, nfile); #endif @@ -1302,17 +1302,19 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 -herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name) +herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, + const char *name) { hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); - herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); + herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataset_buffer); + status = H5Dclose(dataset_id); return status; } // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, - const char *name) +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, + double *dataset_buffer, const char *name) { // Create a dataset id for density hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, @@ -1325,8 +1327,8 @@ herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, return status; } -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float *dataset_buffer, - const char *name) +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, + float *dataset_buffer, const char *name) { // Create a dataset id for density hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, @@ -1347,7 +1349,8 @@ void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, int id = H.n_ghost; memcpy(&dataset_buffer[0], &(source[id]), H.nx_real * sizeof(Real)); // Buffer write to HDF5 Dataset - herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, @@ -1360,7 +1363,8 @@ void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, dataset_buffer[i] = (float)source[i + H.n_ghost]; } // Buffer write to HDF5 Dataset - herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, @@ -1377,7 +1381,8 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, } } // Buffer write to HDF5 Dataset - herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } // Convert double to float if necessary @@ -1395,7 +1400,8 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, } } // Buffer write to HDF5 Dataset - herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } /*! \fn void Write_Grid_HDF5(hid_t file_id) @@ -1488,7 +1494,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) memcpy(&dataset_buffer[0], &(C.scalar[id + s * H.n_cells]), H.nx_real * sizeof(Real)); // dataset here is just a name - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif // SCALAR @@ -1547,7 +1554,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif // SCALAR @@ -1634,7 +1642,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #else // COOLING_GRACKLE or CHEMISTRY_GPU. Write Chemistry when using // GRACKLE @@ -1654,7 +1663,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HI_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/HI_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { @@ -1672,8 +1682,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_full_ionization || H.Output_Complete_Data) { - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/HII_density"); } for (k = 0; k < H.nz_real; k++) { @@ -1692,8 +1702,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_full_ionization || H.Output_Complete_Data) { - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeI_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/HeI_density"); } for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { @@ -1710,8 +1720,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/HeII_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { @@ -1728,8 +1738,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeIII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/HeIII_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { @@ -1747,8 +1757,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_electrons || H.Output_Complete_Data) { - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/e_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/e_density"); } #ifdef GRACKLE_METALS @@ -1763,8 +1773,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_metals || H.Output_Complete_Data) { - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/metal_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/metal_density"); } #endif // GRACKLE_METALS @@ -1793,8 +1803,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/temperature"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/temperature"); #endif // OUTPUT_TEMPERATURE @@ -1827,8 +1837,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = - HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, + "/grav_potential"); #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD @@ -1956,14 +1966,14 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) } // Write the projected density and temperature arrays to file - status = - HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy"); - status = - HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); - status = - HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); - status = - HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, + "/d_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, + "/d_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, + "/T_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, + "/T_xz"); // Free the dataspace ids status = H5Sclose(dataspace_xz_id); @@ -2112,16 +2122,16 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) } // Write projected d,T,vx,vy,vz - status = - HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); - status = - HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, - "/vx_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, - "/vy_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, - "/vz_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, + "/d_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, + "/T_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, + "/vx_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, + "/vy_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, + "/vz_xzr"); // Free the dataspace id status = H5Sclose(dataspace_xzr_id); @@ -2243,17 +2253,23 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the xy datasets for each variable - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy"); #ifdef DE - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); #endif #ifdef SCALAR - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, + "/scalar_xy"); #endif // Free the dataspace id status = H5Sclose(dataspace_id); @@ -2339,17 +2355,23 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the xz datasets for each variable - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz"); #ifdef DE - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); #endif #ifdef SCALAR - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, + "/scalar_xz"); #endif // Free the dataspace id @@ -2436,17 +2458,23 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the yz datasets for each variable - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz"); - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz"); #ifdef DE - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); + status = + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); #endif #ifdef SCALAR - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, + "/scalar_yz"); #endif // Free the dataspace id diff --git a/src/io/io.h b/src/io/io.h index 13919351e..2ce0e0284 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -53,12 +53,13 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp -herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); +herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, + const char* name); -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, - const char* name); -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, - const char* name); +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, + double* dataset_buffer, const char* name); +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, + float* dataset_buffer, const char* name); // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 253d9ae79..ddf51c946 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -85,7 +85,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 - status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); if (status < 0) { @@ -118,7 +118,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 - status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); if (status < 0) { diff --git a/src/main.cpp b/src/main.cpp index b5913f7ae..ba8f8786c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -226,9 +226,9 @@ int main(int argc, char *argv[]) chprintf("Starting calculations.\n"); message = "Starting calculations."; Write_Message_To_Log_File(message.c_str()); - + // Compute inverse timestep for the first time - dti = G.Calc_DTI(); + dti = G.Calc_Inverse_Timestep(); while (G.H.t < P.tout) { // get the start time From 3a6068d6c358231c9bcefe3c36b27397e450b7b3 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 21:51:11 -0800 Subject: [PATCH 195/694] format some more --- src/particles/feedback_CIC_gpu.cu | 117 +++++++++++++++--------------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index a9dad701d..a8f26bd98 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -26,7 +26,7 @@ #define i_UNRES_ENERGY 5 typedef curandStateMRG32k3a_t feedback_prng_t; -//typedef curandStatePhilox4_32_10_t feedback_prng_t; +// typedef curandStatePhilox4_32_10_t feedback_prng_t; namespace supernova { @@ -51,8 +51,7 @@ __device__ double atomicMax(double* address, double val) } #endif // O_HIP -__global__ void initState_kernel(unsigned int seed, - feedback_prng_t* states) +__global__ void initState_kernel(unsigned int seed, feedback_prng_t* states) { int id = blockIdx.x * blockDim.x + threadIdx.x; curand_init(seed, id, 0, &states[id]); @@ -144,7 +143,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, randStates); CHECK(cudaDeviceSynchronize()); chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", - n_states, ngrid, TPB_FEEDBACK); + n_states, ngrid, TPB_FEEDBACK); } __device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, @@ -209,19 +208,22 @@ __device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, } __device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, - int zi, int nx_grid, int ny_grid, int n_ghost) + int zi, int nx_grid, int ny_grid, + int n_ghost) { - return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * DENSITY_UNIT / - (supernova::MU * MP); + return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * + DENSITY_UNIT / (supernova::MU * MP); } -__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, part_int_t n_local, int gtid, Real dx) +__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, + Real* pos_z_dev, part_int_t n_local, int gtid, + Real dx) { Real x0 = pos_x_dev[gtid]; Real y0 = pos_y_dev[gtid]; Real z0 = pos_z_dev[gtid]; // Brute force loop to see if particle is alone - for (int i=0;i dx) continue; if (abs(y0 - pos_y_dev[i]) > dx) continue; @@ -238,10 +240,9 @@ __global__ void Cluster_Feedback_Kernel( Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, - Real* momentum_y, Real* momentum_z, Real gamma, - feedback_prng_t* states, Real* prev_dens, int* prev_N, - short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, - Real time_sn_end, int n_step) + Real* momentum_y, Real* momentum_z, Real gamma, feedback_prng_t* states, + Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, + Real time_sn_start, Real time_sn_end, int n_step) { __shared__ Real s_info[FEED_INFO_N * @@ -274,8 +275,9 @@ __global__ void Cluster_Feedback_Kernel( pos_x = pos_x_dev[gtid]; pos_y = pos_y_dev[gtid]; pos_z = pos_z_dev[gtid]; - // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, pos_z); - // kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, xMin); + // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, + // pos_z); kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, + // xMin); bool in_local = (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && @@ -303,7 +305,8 @@ __global__ void Cluster_Feedback_Kernel( } // Avoid overlap issues for now - bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, gtid, 6*dx) ; + bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, + gtid, 6 * dx); if (!ignore && in_local && is_alone) { int N = 0; @@ -313,29 +316,28 @@ __global__ void Cluster_Feedback_Kernel( N = -prev_N[gtid]; else { Real average_num_sn = GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, - time_sn_start, time_sn_end) * mass_dev[gtid] * dt; + time_sn_start, time_sn_end) * + mass_dev[gtid] * dt; - //N = (int) (average_num_sn + 0.5); + // N = (int) (average_num_sn + 0.5); + feedback_prng_t state; // = states[0]; // load initial state - feedback_prng_t state;// = states[0]; // load initial state - - curand_init(42,0,0,&state); + curand_init(42, 0, 0, &state); unsigned long long skip = n_step * 10000 + id[gtid]; - skipahead(skip, &state); // provided by curand + skipahead(skip, &state); // provided by curand unsigned int debug_state = curand(&state); + // state = states[gtid]; - //state = states[gtid]; - - - - - N = (int) curand_poisson(&state, average_num_sn); - printf("PRNG DEBUG: n_step: %d id: %d skip: %llu debug_state: %u N: %d \n", - n_step, (int) id[gtid], skip, debug_state, N); - //states[gtid] = state; // don't write back to state, keep it pristine - prev_N[gtid] = N; + N = (int)curand_poisson(&state, average_num_sn); + printf( + "PRNG DEBUG: n_step: %d id: %d skip: %llu debug_state: %u N: %d " + "\n", + n_step, (int)id[gtid], skip, debug_state, N); + // states[gtid] = state; // don't write back to state, keep it + // pristine + prev_N[gtid] = N; } if (N != 0) { mass_dev[gtid] -= N * supernova::MASS_PER_SN; @@ -476,8 +478,9 @@ __global__ void Cluster_Feedback_Kernel( delta_x = (pos_x - xMin - indx_x * dx) / dx; delta_y = (pos_y - yMin - indx_y * dy) / dy; delta_z = (pos_z - zMin - indx_z * dz) / dz; - // kernel_printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); - // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x (%.2e, %.2e, + // kernel_printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, + // indx_z); kernel_printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x + // (%.2e, %.2e, // %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); indx_x += n_ghost; @@ -517,12 +520,12 @@ __global__ void Cluster_Feedback_Kernel( // d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) // * feedback_density; e = frac(i, delta_x) * frac(j, - // delta_y) * frac(k, delta_z) * feedback_energy; kernel_printf("(%d, - // %d, %d): delta:(%.4e, %.4e, %.4e), frac: %.4e\n", indx_x, - // indx_y, indx_z, delta_x, delta_y, delta_z, frac(i, - // delta_x)*frac(j, delta_y)*frac(k, delta_z)); kernel_printf("(%d, - // %d, %d):(%d SN) (i:%d, j:%d, k:%d) before: %.4e\n", indx_x, - // indx_y, indx_z, N, i, j, k, + // delta_y) * frac(k, delta_z) * feedback_energy; + // kernel_printf("(%d, %d, %d): delta:(%.4e, %.4e, %.4e), + // frac: %.4e\n", indx_x, indx_y, indx_z, delta_x, delta_y, + // delta_z, frac(i, delta_x)*frac(j, delta_y)*frac(k, + // delta_z)); kernel_printf("(%d, %d, %d):(%d SN) (i:%d, j:%d, + // k:%d) before: %.4e\n", indx_x, indx_y, indx_z, N, i, j, k, // density[indx]*DENSITY_UNIT/0.6/MP); // v_1 = sqrt((momentum_x[indx]*momentum_x[indx] + @@ -560,8 +563,8 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } - - //printf("INDX DEBUG: n_step: %d id: %d indx: %d \n", n_step, (int) id[gtid], indx); + // printf("INDX DEBUG: n_step: %d id: %d indx: %d \n", n_step, + // (int) id[gtid], indx); if (indx >= nx_g * ny_g * nz_g) { printf("INDX DEBUG\n"); @@ -570,7 +573,7 @@ __global__ void Cluster_Feedback_Kernel( atomicAdd(&momentum_x[indx], px); atomicAdd(&momentum_y[indx], py); atomicAdd(&momentum_z[indx], pz); - + /* density[indx] = d; energy[indx] = (momentum_x[indx] * momentum_x[indx] + @@ -627,23 +630,24 @@ __global__ void Cluster_Feedback_Kernel( // momentum_y[indx]*momentum_y[indx] + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; - // kernel_printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, k:%d) - // v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, indx_y, - // indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, - // (v_2-v_1)/v_1*100); kernel_printf(" (%d, %d, %d):(%d SN) (i:%d, - // j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f %%\n", - // indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, - // (t_a-t_b)/t_b*100); kernel_printf(" (%d, %d, %d):(%d SN) - // (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f + // kernel_printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, + // k:%d) v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, + // indx_y, indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, + // (v_2-v_1)/v_1*100); kernel_printf(" (%d, %d, %d):(%d SN) + // (i:%d, j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, + // (t_a-t_b)/t_b*100); kernel_printf(" (%d, %d, %d):(%d + // SN) (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f // %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, - // (d_a-d_b)/d_b*100); kernel_printf(" (%d, %d, %d):(%d SN) - // (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> %.4f + // (d_a-d_b)/d_b*100); kernel_printf(" (%d, %d, + // %d):(%d SN) (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> + // %.4f // %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, // (p_a-p_b)/p_b*100); if (direction > 0) { - // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, id[gtid], - // N, n_0); + // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, + // id[gtid], N, n_0); local_dti = fmax( local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, @@ -749,8 +753,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) MPI_Barrier(world); #endif // MPI_CHOLLA - if (h_dti != 0 && (C_cfl / h_dti < G.H.dt)) - { + if (h_dti != 0 && (C_cfl / h_dti < G.H.dt)) { // timestep too big: need to undo the last operation direction = -1; if (G.Particles.n_local > 0) { From 6a145997e27de75881fe6927a2180ef3ff7ab9bc Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 21:57:50 -0800 Subject: [PATCH 196/694] format some more --- src/gravity/gravity_restart.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index 1eb660063..12f9d9271 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -8,6 +8,11 @@ #include "../io/io.h" #endif +#ifdef MPI_CHOLLA +// provides procID + #include "../mpi/mpi_routines.h" +#endif // MPI_CHOLLA + #ifdef HDF5 #include #endif From c5336cc62dd29dc05fe859c78cb66135a0a50173 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 22:44:52 -0800 Subject: [PATCH 197/694] fix minor typo --- src/gravity/gravity_restart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index 12f9d9271..cbd0f94df 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -96,5 +96,5 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) // Do nothing void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) {} -void Grav3D::Write_Restart_HDF5(struct parameters P, int nfile) {} +void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) {} #endif From 1360271ef9e54db8069a5b63a3e813da7e61d9af Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 22:50:15 -0800 Subject: [PATCH 198/694] fix minor typo --- src/utils/gpu.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index c7968abab..52acbca24 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -107,6 +107,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define curandStateMRG32k3a_t hiprandStateMRG32k3a_t #define curand_init hiprand_init + #define curand hiprand #define curand_poisson hiprand_poisson static void __attribute__((unused)) From 9e97aeb157c8ce085ef07d3c039f59d96decad3b Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 23:04:00 -0800 Subject: [PATCH 199/694] undo experimental changes --- src/particles/feedback_CIC_gpu.cu | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index a8f26bd98..e4c95e7b5 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -305,10 +305,10 @@ __global__ void Cluster_Feedback_Kernel( } // Avoid overlap issues for now - bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, - gtid, 6 * dx); + //bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, + // gtid, 6 * dx); - if (!ignore && in_local && is_alone) { + if (!ignore && in_local) { int N = 0; // only calculate this if there will be SN feedback if ((t - age_dev[gtid]) <= time_sn_end) { @@ -326,15 +326,10 @@ __global__ void Cluster_Feedback_Kernel( curand_init(42, 0, 0, &state); unsigned long long skip = n_step * 10000 + id[gtid]; skipahead(skip, &state); // provided by curand - unsigned int debug_state = curand(&state); - - // state = states[gtid]; + // unsigned int debug_state = curand(&state); N = (int)curand_poisson(&state, average_num_sn); - printf( - "PRNG DEBUG: n_step: %d id: %d skip: %llu debug_state: %u N: %d " - "\n", - n_step, (int)id[gtid], skip, debug_state, N); + // states[gtid] = state; // don't write back to state, keep it // pristine prev_N[gtid] = N; @@ -563,18 +558,11 @@ __global__ void Cluster_Feedback_Kernel( density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } - // printf("INDX DEBUG: n_step: %d id: %d indx: %d \n", n_step, - // (int) id[gtid], indx); - - if (indx >= nx_g * ny_g * nz_g) { - printf("INDX DEBUG\n"); - } - atomicAdd(&momentum_x[indx], px); atomicAdd(&momentum_y[indx], py); atomicAdd(&momentum_z[indx], pz); - /* + density[indx] = d; energy[indx] = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] + @@ -582,7 +570,7 @@ __global__ void Cluster_Feedback_Kernel( 2 / density[indx] + gasEnergy[indx]; - */ + // atomicAdd( &energy[indx], e ); // atomicAdd( &density[indx], d ); From 513be879f651824b9f3316b39cc07c673a9aaad3 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 6 Feb 2023 23:10:34 -0800 Subject: [PATCH 200/694] format --- src/particles/feedback_CIC_gpu.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index e4c95e7b5..1a55479ce 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -305,7 +305,8 @@ __global__ void Cluster_Feedback_Kernel( } // Avoid overlap issues for now - //bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, n_local, + // bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, + // n_local, // gtid, 6 * dx); if (!ignore && in_local) { @@ -562,7 +563,6 @@ __global__ void Cluster_Feedback_Kernel( atomicAdd(&momentum_y[indx], py); atomicAdd(&momentum_z[indx], pz); - density[indx] = d; energy[indx] = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] + @@ -570,8 +570,6 @@ __global__ void Cluster_Feedback_Kernel( 2 / density[indx] + gasEnergy[indx]; - - // atomicAdd( &energy[indx], e ); // atomicAdd( &density[indx], d ); From 1b646b831bdeb9a5264d9bcf2d3f9b0d8dc0f82e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 7 Feb 2023 09:33:31 -0500 Subject: [PATCH 201/694] Update test data for MHD sod test --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index c069bb7a6..4f3087125 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit c069bb7a6de79546f60d3ea47f6c10ba19df3c76 +Subproject commit 4f3087125f6bf3fee07221c29a59b962b4b4c39e From a068453213992ea96163aaf9f9f209ddd7290928 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 7 Feb 2023 14:40:10 -0800 Subject: [PATCH 202/694] remove flags --- builds/make.type.debug_disk | 56 --------------------------------- src/global/global_cuda.h | 16 ++-------- src/gravity/gravity_restart.cpp | 10 ++++-- src/io/io.cpp | 2 +- src/particles/density_CIC.cpp | 2 -- 5 files changed, 12 insertions(+), 74 deletions(-) delete mode 100644 builds/make.type.debug_disk diff --git a/builds/make.type.debug_disk b/builds/make.type.debug_disk deleted file mode 100644 index 2050699dc..000000000 --- a/builds/make.type.debug_disk +++ /dev/null @@ -1,56 +0,0 @@ -DFLAGS += -DDISABLE_KERNEL_PRINTF - -MPI_GPU = -DMPI_GPU -DFLAGS += -DPARTICLES -#DFLAGS += -DPARTICLES_CPU -DFLAGS += -DPARTICLES_GPU -#DFLAGS += -DONLY_PARTICLES -DFLAGS += -DPARTICLE_IDS -#DFLAGS += -DSINGLE_PARTICLE_MASS -DFLAGS += -DPARTICLE_AGE -DFLAGS += -DSUPERNOVA #this flag requires PARTICLE_AGE, PARTICLE_IDS -DFLAGS += -DANALYSIS -#DFLAGS += -DPARTICLES_KDK - - - -DFLAGS += -DGRAVITY -DFLAGS += -DGRAVITY_GPU -DFLAGS += -DGRAVITY_RESTART # For making gravity restarts more consistent -DFLAGS += -DDISABLE_DENSITY_CIC # For preventing atomicAdd inconsistency -# Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions -#DFLAGS += -DSOR -DFLAGS += -DPARIS_GALACTIC -DFLAGS += -DGRAVITY_ANALYTIC_COMP -DFLAGS += -DGRAVITY_5_POINTS_GRADIENT - -#DFLAGS += -DSTATIC_GRAV - -#DFLAGS += -DOUTPUT_ALWAYS -DFLAGS += -DCUDA -DFLAGS += -DMPI_CHOLLA -DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC -DFLAGS += -DHLLC -DFLAGS += -DVL - -DFLAGS += -DDISK_ICS - -DFLAGS += -DDENSITY_FLOOR -DFLAGS += -DTEMPERATURE_FLOOR -DFLAGS += -DCOOLING_GPU -#DFLAGS += -DCLOUDY_COOL -DFLAGS += -DDE -DFLAGS += -DCPU_TIME -DFLAGS += -DAVERAGE_SLOW_CELLS -DFLAGS += -DHYDRO_GPU - -OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION -DFLAGS += $(OUTPUT) - -DFLAGS += $(MPI_GPU) - -DFLAGS += -DPARALLEL_OMP -DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) - -#DFLAGS += -DCUDA_ERROR_CHECK diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 1ef6750a4..214e49e23 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -116,19 +116,9 @@ __device__ double atomicAdd(double *address, double val) } #endif - // This helper function exists to easily enable/disable printfs inside kernels - // And makes it easier to find printfs inside kernels - - #ifndef DISABLE_KERNEL_PRINTF - #define kernel_printf printf - #else -inline __device__ int kernel_printf(const char *format, ...) -{ - // printf returns number of characters printed if success, negative value - // otherwise - return 0; -} - #endif // DISABLE_KERNEL_PRINTF + // This helper function exists to make it easier to find printfs inside + // kernels + #define kernel_printf printf #endif // GLOBAL_CUDA_H diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index cbd0f94df..a283b4441 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -26,7 +26,7 @@ void Gravity_Restart_Filename(char* filename, char* dirname, int nfile) #endif } -#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) +#if defined(GRAVITY) && defined(HDF5) void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) { H5open(); @@ -94,7 +94,13 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) #elif defined(GRAVITY) // Do nothing -void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) {} +void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) +{ + chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", + __FILE__, __LINE__); +} void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) {} +chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", + __FILE__, __LINE__); #endif diff --git a/src/io/io.cpp b/src/io/io.cpp index 97efb972f..113a091bb 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -157,7 +157,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) H5close(); #endif -#if defined(GRAVITY) && defined(GRAVITY_RESTART) && defined(HDF5) +#if defined(GRAVITY) && defined(HDF5) G.Grav.Write_Restart_HDF5(&P, nfile); #endif diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index 133c41704..aac2a2056 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -41,9 +41,7 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P) // Step 1: Get Particles CIC Density Particles.Clear_Density(); - #ifndef DISABLE_DENSITY_CIC Particles.Get_Density_CIC(); - #endif #ifdef CPU_TIME Timer.Part_Density.End(); From fd742983dfd323207eee0da436360ba8f9763ef3 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 7 Feb 2023 14:44:31 -0800 Subject: [PATCH 203/694] remove flags --- src/gravity/gravity_restart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index a283b4441..fbf300807 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -84,7 +84,7 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) dims[0] = n_cells_potential; dataspace_id = H5Screate_simple(1, dims, NULL); - HDF5_Dataset(file_id, dataspace_id, F.potential_1_h, "/potential"); + Write_HDF5_Dataset(file_id, dataspace_id, F.potential_1_h, "/potential"); H5Sclose(dataspace_id); H5Fclose(file_id); From 312f96a4f1b70ee194de44adc1b470d4f4c0c61c Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 7 Feb 2023 19:17:31 -0800 Subject: [PATCH 204/694] fix minor typo --- src/gravity/gravity_restart.cpp | 8 +++++--- src/hydro/hydro_cuda.cu | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index fbf300807..0733ef0c9 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -100,7 +100,9 @@ void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) __FILE__, __LINE__); } -void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) {} -chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", - __FILE__, __LINE__); +void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) +{ + chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", + __FILE__, __LINE__); +} #endif diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index d9dc46e68..fb401cb0b 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -720,7 +720,7 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; // Average this cell - printf( + kernel_printf( " Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, " "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, From 4408600dd4fd38a61413568ac34a6eb38dce3fa2 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 9 Feb 2023 12:00:34 -0800 Subject: [PATCH 205/694] hdf5 attribute refactor --- src/io/io.cpp | 266 +++++++++++++++----------------------------------- src/io/io.h | 5 + 2 files changed, 84 insertions(+), 187 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 55175b452..9b84ca509 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -655,82 +655,43 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) H5Aclose(attribute_id); // Numeric Attributes - attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "dt", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.dt); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_step", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_step); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_fields", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_fields); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields"); double time_unit = TIME_UNIT; - attribute_id = H5Acreate(file_id, "time_unit", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &time_unit); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &time_unit, "time_unit"); double length_unit = LENGTH_UNIT; - attribute_id = H5Acreate(file_id, "length_unit", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &length_unit); - status = H5Aclose(attribute_id); + status = + Write_HDF5_Attribute(file_id, dataspace_id, &length_unit, "length_unit"); double mass_unit = MASS_UNIT; - attribute_id = H5Acreate(file_id, "mass_unit", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &mass_unit); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &mass_unit, "mass_unit"); double velocity_unit = VELOCITY_UNIT; - attribute_id = H5Acreate(file_id, "velocity_unit", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &velocity_unit); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &velocity_unit, + "velocity_unit"); double density_unit = DENSITY_UNIT; - attribute_id = H5Acreate(file_id, "density_unit", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &density_unit); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &density_unit, + "density_unit"); double energy_unit = ENERGY_UNIT; - attribute_id = H5Acreate(file_id, "energy_unit", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &energy_unit); - status = H5Aclose(attribute_id); + status = + Write_HDF5_Attribute(file_id, dataspace_id, &energy_unit, "energy_unit"); #ifdef MHD double magnetic_field_unit = MAGNETIC_FIELD_UNIT; - attribute_id = H5Acreate(file_id, "magnetic_field_unit", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &magnetic_field_unit); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &magnetic_field_unit, + "magnetic_field_unit"); #endif // MHD #ifdef COSMOLOGY - attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.H0); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Current_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Current_a", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.H0, "H0"); + status = + Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_M, "Omega_M"); + status = + Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_L, "Omega_L"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_z, + "Current_z"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_a, + "Current_a"); #endif // Close the dataspace @@ -752,20 +713,15 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) int_data[2] = nz_global; #endif - attribute_id = H5Acreate(file_id, "dims", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims"); #ifdef MHD for (size_t i = 0; i < 3; i++) { int_data[i]++; } - attribute_id = H5Acreate(file_id, "magnetic_field_dims", H5T_STD_I32BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, + "magnetic_field_dims"); #endif // MHD #ifdef MPI_CHOLLA @@ -773,67 +729,47 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) int_data[1] = H.ny_real; int_data[2] = H.nz_real; - attribute_id = H5Acreate(file_id, "dims_local", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims_local"); #ifdef MHD int_data[0] = H.nx_real + 1; int_data[1] = H.ny_real + 1; int_data[2] = H.nz_real + 1; - attribute_id = H5Acreate(file_id, "magnetic_field_dims_local", H5T_STD_I32BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, + "magnetic_field_dims_local"); #endif // MHD int_data[0] = nx_local_start; int_data[1] = ny_local_start; int_data[2] = nz_local_start; - attribute_id = H5Acreate(file_id, "offset", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "offset"); int_data[0] = nproc_x; int_data[1] = nproc_y; int_data[2] = nproc_z; - attribute_id = H5Acreate(file_id, "nprocs", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "nprocs"); #endif Real_data[0] = H.xbound; Real_data[1] = H.ybound; Real_data[2] = H.zbound; - attribute_id = H5Acreate(file_id, "bounds", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "bounds"); Real_data[0] = H.xdglobal; Real_data[1] = H.ydglobal; Real_data[2] = H.zdglobal; - attribute_id = H5Acreate(file_id, "domain", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "domain"); Real_data[0] = H.dx; Real_data[1] = H.dy; Real_data[2] = H.dz; - attribute_id = H5Acreate(file_id, "dx", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "dx"); // Close the dataspace status = H5Sclose(dataspace_id); @@ -919,71 +855,26 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) H5Aclose(attribute_id); // Numeric Attributes - attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "dt", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.dt); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_step", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_step); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_fields", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_fields); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields"); // Rotation data - attribute_id = H5Acreate(file_id, "nxr", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "nzr", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "nx_min", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx_min); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "nz_min", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz_min); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "nx_max", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx_max); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "nz_max", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz_max); - status = H5Aclose(attribute_id); - delta = 180. * R.delta / M_PI; - attribute_id = H5Acreate(file_id, "delta", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &delta); - status = H5Aclose(attribute_id); - theta = 180. * R.theta / M_PI; - attribute_id = H5Acreate(file_id, "theta", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &theta); - status = H5Aclose(attribute_id); - phi = 180. * R.phi / M_PI; - attribute_id = H5Acreate(file_id, "phi", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &phi); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Lx", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &R.Lx); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Lz", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &R.Lz); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx, "nxr"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz, "nzr"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx_min, "nx_min"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz_min, "nz_min"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx_max, "nx_max"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz_max, "nz_max"); + delta = 180. * R.delta / M_PI; + status = Write_HDF5_Attribute(file_id, dataspace_id, &delta, "delta"); + theta = 180. * R.theta / M_PI; + status = Write_HDF5_Attribute(file_id, dataspace_id, &theta, "theta"); + phi = 180. * R.phi / M_PI; + status = Write_HDF5_Attribute(file_id, dataspace_id, &phi, "phi"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.Lx, "Lx"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &R.Lz, "Lz"); // Close the dataspace status = H5Sclose(dataspace_id); @@ -1003,57 +894,39 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) int_data[2] = nz_global; #endif - attribute_id = H5Acreate(file_id, "dims", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims"); #ifdef MPI_CHOLLA int_data[0] = H.nx_real; int_data[1] = H.ny_real; int_data[2] = H.nz_real; - attribute_id = H5Acreate(file_id, "dims_local", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims_local"); int_data[0] = nx_local_start; int_data[1] = ny_local_start; int_data[2] = nz_local_start; - attribute_id = H5Acreate(file_id, "offset", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "offset"); #endif Real_data[0] = H.xbound; Real_data[1] = H.ybound; Real_data[2] = H.zbound; - attribute_id = H5Acreate(file_id, "bounds", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "bounds"); Real_data[0] = H.xdglobal; Real_data[1] = H.ydglobal; Real_data[2] = H.zdglobal; - attribute_id = H5Acreate(file_id, "domain", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "domain"); Real_data[0] = H.dx; Real_data[1] = H.dy; Real_data[2] = H.dz; - attribute_id = H5Acreate(file_id, "dx", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); + status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "dx"); // Close the dataspace status = H5Sclose(dataspace_id); @@ -1299,6 +1172,25 @@ void Grid3D::Write_Grid_Binary(FILE *fp) } #ifdef HDF5 +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, + double *attribute, const char *name) +{ + hid_t attribute_id = H5Acreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, attribute); + status = H5Aclose(attribute_id); + return status; +} + +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int *attribute, + const char *name) +{ + hid_t attribute_id = H5Acreate(file_id, name, H5T_STD_I32BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT); + herr_t status = H5Awrite(attribute_id, H5T_NATIVE_INT, attribute); + status = H5Aclose(attribute_id); + return status; +} herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, const char *name) diff --git a/src/io/io.h b/src/io/io.h index 2ce0e0284..b7c501543 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -53,6 +53,11 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, + double* attribute, const char* name); +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, + const char* name); + herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); From 8e5b4619734e0922d815f4d259323c68002af6db Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 9 Feb 2023 12:08:57 -0800 Subject: [PATCH 206/694] widen clang format columns to 120 --- .clang-format | 2 +- src/analysis/analysis.cpp | 20 +- src/analysis/analysis.h | 7 +- src/analysis/feedback_analysis.cpp | 48 +- src/analysis/feedback_analysis_gpu.cu | 47 +- src/analysis/io_analysis.cpp | 511 ++-- src/analysis/lya_statistics.cpp | 603 ++--- src/analysis/phase_diagram.cpp | 27 +- src/chemistry_gpu/chemistry_functions.cpp | 104 +- src/chemistry_gpu/chemistry_functions_gpu.cu | 437 ++-- src/chemistry_gpu/chemistry_gpu.h | 9 +- src/chemistry_gpu/chemistry_io.cpp | 3 +- src/cooling/cooling_cuda.cu | 83 +- src/cooling/cooling_cuda.h | 12 +- src/cooling/load_cloudy_texture.cu | 51 +- src/cooling/texture_utilities.h | 8 +- src/cooling_grackle/cool_grackle.cpp | 24 +- src/cooling_grackle/grackle_functions.cpp | 28 +- src/cosmology/cosmology.cpp | 14 +- src/cosmology/cosmology_functions.cpp | 6 +- src/cosmology/cosmology_functions_gpu.cu | 20 +- src/dust/dust_cuda.cu | 14 +- src/dust/dust_cuda.h | 7 +- src/dust/dust_cuda_tests.cpp | 34 +- src/global/global.cpp | 20 +- src/global/global.h | 30 +- src/global/global_cuda.h | 15 +- src/gravity/grav3D.cpp | 64 +- src/gravity/grav3D.h | 12 +- src/gravity/gravity_boundaries.cpp | 82 +- src/gravity/gravity_boundaries_gpu.cu | 173 +- src/gravity/gravity_functions.cpp | 162 +- src/gravity/gravity_functions_gpu.cu | 79 +- src/gravity/gravity_restart.cpp | 17 +- src/gravity/paris/HenryPeriodic.cu | 31 +- src/gravity/paris/ParisPeriodic.cu | 33 +- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 209 +- src/gravity/potential_SOR_3D.cpp | 242 +- src/gravity/potential_SOR_3D.h | 70 +- src/gravity/potential_SOR_3D_gpu.cu | 350 +-- src/gravity/potential_paris_3D.cu | 59 +- src/gravity/potential_paris_3D.h | 8 +- src/gravity/potential_paris_galactic.cu | 48 +- src/gravity/potential_paris_galactic.h | 8 +- src/gravity/static_grav.h | 30 +- src/grid/boundary_conditions.cpp | 27 +- src/grid/cuda_boundaries.cu | 104 +- src/grid/cuda_boundaries.h | 27 +- src/grid/grid3D.cpp | 56 +- src/grid/grid3D.h | 106 +- src/grid/grid_enum.h | 9 +- src/grid/initial_conditions.cpp | 264 +- src/grid/mpi_boundaries.cpp | 429 ++- src/h_correction/flux_correction.h | 39 +- src/h_correction/h_correction_2D_cuda.cu | 84 +- src/h_correction/h_correction_2D_cuda.h | 12 +- src/h_correction/h_correction_3D_cuda.cu | 147 +- src/h_correction/h_correction_3D_cuda.h | 18 +- src/hydro/hydro_cuda.cu | 413 ++- src/hydro/hydro_cuda.h | 111 +- src/hydro/hydro_cuda_tests.cu | 37 +- src/integrators/VL_1D_cuda.cu | 115 +- src/integrators/VL_1D_cuda.h | 4 +- src/integrators/VL_2D_cuda.cu | 189 +- src/integrators/VL_2D_cuda.h | 5 +- src/integrators/VL_3D_cuda.cu | 365 ++- src/integrators/VL_3D_cuda.h | 9 +- src/integrators/simple_1D_cuda.cu | 52 +- src/integrators/simple_1D_cuda.h | 4 +- src/integrators/simple_2D_cuda.cu | 88 +- src/integrators/simple_2D_cuda.h | 3 +- src/integrators/simple_3D_cuda.cu | 155 +- src/integrators/simple_3D_cuda.h | 9 +- src/io/io.cpp | 732 ++---- src/io/io.h | 21 +- src/io/io_gpu.cu | 36 +- src/main.cpp | 21 +- src/main_tests.cpp | 6 +- src/mhd/ct_electric_fields.cu | 267 +- src/mhd/ct_electric_fields.h | 71 +- src/mhd/ct_electric_fields_tests.cu | 44 +- src/mhd/magnetic_divergence.cu | 47 +- src/mhd/magnetic_divergence.h | 8 +- src/mhd/magnetic_divergence_tests.cu | 8 +- src/mhd/magnetic_update.cu | 49 +- src/mhd/magnetic_update.h | 7 +- src/mhd/magnetic_update_tests.cu | 41 +- src/model/disk_ICs.cpp | 87 +- src/model/disk_galaxy.h | 54 +- src/mpi/mpi_routines.cpp | 172 +- src/mpi/mpi_routines.h | 12 +- src/particles/density_CIC.cpp | 36 +- src/particles/density_CIC.h | 5 +- src/particles/density_CIC_gpu.cu | 60 +- src/particles/density_boundaries.cpp | 30 +- src/particles/density_boundaries_gpu.cu | 97 +- src/particles/feedback_CIC_gpu.cu | 243 +- src/particles/gravity_CIC.cpp | 147 +- src/particles/gravity_CIC_gpu.cu | 183 +- src/particles/io_particles.cpp | 246 +- src/particles/particles_3D.cpp | 107 +- src/particles/particles_3D.h | 129 +- src/particles/particles_3D_gpu.cu | 66 +- src/particles/particles_boundaries.cpp | 533 ++-- src/particles/particles_boundaries_cpu.cpp | 129 +- src/particles/particles_boundaries_gpu.cu | 239 +- src/particles/particles_boundaries_gpu.h | 67 +- src/particles/particles_dynamics.cpp | 84 +- src/particles/particles_dynamics_gpu.cu | 122 +- src/particles/supernova.h | 27 +- src/reconstruction/pcm_cuda.cu | 27 +- src/reconstruction/pcm_cuda.h | 18 +- src/reconstruction/plmc_cuda.cu | 69 +- src/reconstruction/plmc_cuda.h | 6 +- src/reconstruction/plmp_cuda.cu | 37 +- src/reconstruction/plmp_cuda.h | 9 +- src/reconstruction/ppmc_cuda.cu | 278 +- src/reconstruction/ppmc_cuda.h | 6 +- src/reconstruction/ppmp_cuda.cu | 136 +- src/reconstruction/ppmp_cuda.h | 14 +- src/riemann_solvers/exact_cuda.cu | 62 +- src/riemann_solvers/exact_cuda.h | 20 +- src/riemann_solvers/hll_cuda.cu | 29 +- src/riemann_solvers/hll_cuda.h | 6 +- src/riemann_solvers/hllc_cuda.cu | 37 +- src/riemann_solvers/hllc_cuda.h | 6 +- src/riemann_solvers/hllc_cuda_tests.cu | 49 +- src/riemann_solvers/hlld_cuda.cu | 362 +-- src/riemann_solvers/hlld_cuda.h | 82 +- src/riemann_solvers/hlld_cuda_tests.cu | 2433 +++++++----------- src/riemann_solvers/roe_cuda.cu | 48 +- src/riemann_solvers/roe_cuda.h | 6 +- src/system_tests/hydro_system_tests.cpp | 108 +- src/system_tests/mhd_system_tests.cpp | 281 +- src/system_tests/system_tester.cpp | 268 +- src/system_tests/system_tester.h | 52 +- src/utils/DeviceVector.h | 31 +- src/utils/DeviceVector_tests.cu | 20 +- src/utils/cuda_utilities.h | 23 +- src/utils/cuda_utilities_tests.cpp | 19 +- src/utils/error_check_cuda.cu | 17 +- src/utils/error_check_cuda.h | 7 +- src/utils/gpu_arrays_functions.cu | 17 +- src/utils/gpu_arrays_functions.h | 10 +- src/utils/hydro_utilities.h | 39 +- src/utils/hydro_utilities_tests.cpp | 83 +- src/utils/math_utilities.h | 12 +- src/utils/math_utilities_tests.cpp | 12 +- src/utils/mhd_utilities.h | 163 +- src/utils/mhd_utilities_tests.cu | 218 +- src/utils/parallel_omp.cpp | 7 +- src/utils/parallel_omp.h | 7 +- src/utils/prng_utilities.h | 9 +- src/utils/reduction_utilities.cu | 3 +- src/utils/reduction_utilities_tests.cu | 12 +- src/utils/testing_utilities.cpp | 33 +- src/utils/testing_utilities.h | 37 +- src/utils/timing_functions.cpp | 17 +- 158 files changed, 5913 insertions(+), 10307 deletions(-) diff --git a/.clang-format b/.clang-format index 8a9f6dc6f..7cf221dc1 100644 --- a/.clang-format +++ b/.clang-format @@ -77,7 +77,7 @@ BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true -ColumnLimit: 80 +ColumnLimit: 120 CommentPragmas: '^ IWYU pragma:' QualifierAlignment: Leave CompactNamespaces: false diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index ec2eba059..3c5991e71 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -84,8 +84,7 @@ void Grid3D::Compute_and_Output_Analysis(struct parameters *P) chprintf("\nComputing Analysis \n"); #endif - cudaMemcpy(C.density, C.device, H.n_fields * H.n_cells * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(C.density, C.device, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); #ifdef PHASE_DIAGRAM #ifdef CHEMISTRY_GPU @@ -108,8 +107,7 @@ void Grid3D::Compute_and_Output_Analysis(struct parameters *P) #endif #ifdef LYA_STATISTICS - if (Analysis.Computed_Flux_Power_Spectrum == 1) - Analysis.Clear_Power_Spectrum_Measurements(); + if (Analysis.Computed_Flux_Power_Spectrum == 1) Analysis.Clear_Power_Spectrum_Measurements(); #endif #ifdef COSMOLOGY @@ -139,17 +137,13 @@ void Grid3D::Initialize_Analysis_Module(struct parameters *P) z_now = 0; #endif - Analysis.Initialize(H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, - H.zblocal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, - H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P); + Analysis.Initialize(H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz, + H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P); } -void Analysis_Module::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, - Real y_min, Real z_min, int nx, int ny, int nz, - int nx_real, int ny_real, int nz_real, - Real dx_real, Real dy_real, Real dz_real, - int n_ghost_hydro, Real z_now, - struct parameters *P) +void Analysis_Module::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, + int n_ghost_hydro, Real z_now, struct parameters *P) { // Domain Length Lbox_x = Lx; diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h index 7f157d6db..dcc2b9b24 100644 --- a/src/analysis/analysis.h +++ b/src/analysis/analysis.h @@ -291,10 +291,9 @@ class Analysis_Module #endif Analysis_Module(void); - void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, - int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, - Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, - Real z_now, struct parameters *P); + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, + int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, + struct parameters *P); void Reset(void); void Load_Scale_Outputs(struct parameters *P); diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 0ea688a97..3164abb04 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -40,10 +40,8 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) } #ifdef PARTICLES_GPU - CHECK(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), - cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), - cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif } @@ -69,10 +67,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) Real x, y, z, r, xpm, xpp, ypm, ypp, zpm, zpp; Real Pm, Pp; Real dPdx, dPdy, dPdr; - Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, - vcya; - Real total_mass, partial_mass = 0, total_var_analytic = 0, - total_var_poisson = 0, partial_var_poisson = 0, + Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, vcya; + Real total_mass, partial_mass = 0, total_var_analytic = 0, total_var_poisson = 0, partial_var_poisson = 0, partial_var_analytic = 0; int n_ghost_grav = G.Particles.G.n_ghost_particles_grid; @@ -83,8 +79,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) for (k = 0; k < G.H.nz_real; k++) { for (j = 0; j < G.H.ny_real; j++) { for (i = 0; i < G.H.nx_real; i++) { - id = (i + G.H.n_ghost) + (j + G.H.n_ghost) * G.H.nx + - (k + G.H.n_ghost) * G.H.nx * G.H.ny; + id = (i + G.H.n_ghost) + (j + G.H.n_ghost) * G.H.nx + (k + G.H.n_ghost) * G.H.nx * G.H.ny; partial_mass += G.C.density[id]; } } @@ -99,30 +94,24 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) for (j = G.H.n_ghost; j < G.H.ny - G.H.n_ghost; j++) { for (i = G.H.n_ghost; i < G.H.nx - G.H.n_ghost; i++) { id = i + j * G.H.nx + k * G.H.nx * G.H.ny; - id_grav = (i + ghost_diff) + (j + ghost_diff) * nx_grav + - (k + ghost_diff) * nx_grav * ny_grav; + id_grav = (i + ghost_diff) + (j + ghost_diff) * nx_grav + (k + ghost_diff) * nx_grav * ny_grav; - if (G.C.density[id] < VRMS_CUTOFF_DENSITY) - continue; // in cgs, this is 0.01 cm^{-3} + if (G.C.density[id] < VRMS_CUTOFF_DENSITY) continue; // in cgs, this is 0.01 cm^{-3} G.Get_Position(i, j, k, &x, &y, &z); r = sqrt(x * x + y * y); - vcp = sqrt(r * fabs(G.Particles.G.gravity_x[id_grav] * x / r + - G.Particles.G.gravity_y[id_grav] * y / r)); + vcp = sqrt(r * fabs(G.Particles.G.gravity_x[id_grav] * x / r + G.Particles.G.gravity_y[id_grav] * y / r)); vcxp = -y / r * vcp; vcyp = x / r * vcp; vx = G.C.momentum_x[id] / G.C.density[id]; vy = G.C.momentum_y[id] / G.C.density[id]; vz = G.C.momentum_z[id] / G.C.density[id]; - partial_var_poisson += - ((vx - vcxp) * (vx - vcxp) + (vy - vcyp) * (vy - vcyp) + vz * vz) * - G.C.density[id]; - partial_var_analytic += - ((vx - h_circ_vel_x[id]) * (vx - h_circ_vel_x[id]) + - (vy - h_circ_vel_y[id]) * (vy - h_circ_vel_y[id]) + (vz * vz)) * - G.C.density[id]; + partial_var_poisson += ((vx - vcxp) * (vx - vcxp) + (vy - vcyp) * (vy - vcyp) + vz * vz) * G.C.density[id]; + partial_var_analytic += ((vx - h_circ_vel_x[id]) * (vx - h_circ_vel_x[id]) + + (vy - h_circ_vel_y[id]) * (vy - h_circ_vel_y[id]) + (vz * vz)) * + G.C.density[id]; } } } @@ -130,22 +119,19 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) partial_var_analytic /= total_mass; #ifdef MPI_CHOLLA - MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, - root, world); - MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, - root, world); + MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, root, world); + MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, root, world); #else total_var_poisson = partial_var_poisson; total_var_analytic = partial_var_analytic; #endif - vrms_poisson = - sqrt(total_var_poisson) * VELOCITY_UNIT / 1e5; // output in km/s + vrms_poisson = sqrt(total_var_poisson) * VELOCITY_UNIT / 1e5; // output in km/s vrms_analytic = sqrt(total_var_analytic) * VELOCITY_UNIT / 1e5; - chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", - G.H.t, G.H.dt, vrms_poisson, vrms_analytic); + chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", G.H.t, G.H.dt, vrms_poisson, + vrms_analytic); #elif defined(PARTICLES_GPU) Compute_Gas_Velocity_Dispersion_GPU(G); diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index a934e52d0..f18b33f59 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -8,9 +8,7 @@ #define MU 0.6 // in cgs, this is 0.01 cm^{-3} - #define MIN_DENSITY \ - 0.01 * MP *MU *LENGTH_UNIT *LENGTH_UNIT *LENGTH_UNIT / \ - MASS_UNIT // 148279.7 + #define MIN_DENSITY 0.01 * MP *MU *LENGTH_UNIT *LENGTH_UNIT *LENGTH_UNIT / MASS_UNIT // 148279.7 #define TPB_ANALYSIS 1024 __device__ void warpReduce(volatile Real *buff, size_t tid) @@ -23,10 +21,8 @@ __device__ void warpReduce(volatile Real *buff, size_t tid) if (TPB_ANALYSIS >= 2) buff[tid] += buff[tid + 1]; } -void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, - Real *density, Real *momentum_x, - Real *momentum_y, Real *momentum_z, - Real *circ_vel_x, Real *circ_vel_y, +void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x, + Real *momentum_y, Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y, Real *partial_mass, Real *partial_vel) { __shared__ Real s_mass[TPB_ANALYSIS]; @@ -42,16 +38,15 @@ void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, s_mass[tid] = 0; s_vel[tid] = 0; Real vx, vy, vz; - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost && - density[id] > MIN_DENSITY) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost && density[id] > MIN_DENSITY) { s_mass[tid] = density[id]; vx = momentum_x[id] / density[id]; vy = momentum_y[id] / density[id]; vz = momentum_z[id] / density[id]; - s_vel[tid] = ((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + - (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) * - density[id]; + s_vel[tid] = + ((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) * + density[id]; } __syncthreads(); @@ -70,8 +65,7 @@ void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, } } -void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, - Real *output_m, Real *output_v, int n) +void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *output_m, Real *output_v, int n) { __shared__ Real s_mass[TPB_ANALYSIS]; __shared__ Real s_vel[TPB_ANALYSIS]; @@ -147,10 +141,9 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) Real total_mass = 0; Real total_vel = 0; - hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, - G.H.ny, G.H.nz, G.H.n_ghost, G.C.d_density, - G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, - d_circ_vel_x, d_circ_vel_y, d_partial_mass, d_partial_vel); + hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, + G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x, d_circ_vel_y, + d_partial_mass, d_partial_vel); size_t n = ngrid; Real *mass_input = d_partial_mass; @@ -158,25 +151,22 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) while (n > TPB_ANALYSIS) { ngrid = std::ceil((n * 1.) / TPB_ANALYSIS); // printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid); - hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, - mass_input, vel_input, d_partial_mass, d_partial_vel, n); + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, mass_input, vel_input, d_partial_mass, + d_partial_vel, n); mass_input = d_partial_mass; vel_input = d_partial_vel; n = ngrid; } if (n > 1) { - hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, - d_partial_mass, d_partial_vel, d_partial_mass, + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, d_partial_mass, d_partial_vel, d_partial_mass, d_partial_vel, n); } // cudaDeviceSynchronize(); - CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), - cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), - cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); #ifdef MPI_CHOLLA MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); @@ -187,8 +177,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) #endif if (total_vel < 0 || total_mass < 0) { - chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", - total_vel, total_mass); + chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", total_vel, total_mass); } chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp index c7df48f91..70dec2a7b 100644 --- a/src/analysis/io_analysis.cpp +++ b/src/analysis/io_analysis.cpp @@ -56,28 +56,22 @@ void Grid3D::Write_Skewers_Header_HDF5(hid_t file_id) // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); #ifdef COSMOLOGY - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); status = H5Aclose(attribute_id); #endif @@ -93,8 +87,7 @@ void Grid3D::Write_Skewers_Header_HDF5(hid_t file_id) Real_data[1] = Analysis.Lbox_y; Real_data[2] = Analysis.Lbox_z; - attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); status = H5Aclose(attribute_id); @@ -128,8 +121,7 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dims_x[1] = n_los_x; hid_t skewers_group_x, dataspace_id_skewers_x; dataspace_id_skewers_x = H5Screate_simple(2, dims_x, NULL); - skewers_group_x = - H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + skewers_group_x = H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { @@ -138,103 +130,83 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataset_buffer_x[buffer_id] = Analysis.skewers_density_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "density", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_HI_density_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_HI_density_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "HI_density", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_HeII_density_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_HeII_density_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "HeII_density", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_temperature_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_temperature_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "temperature", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_los_velocity_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_los_velocity_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "los_velocity", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_transmitted_flux_HI_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_transmitted_flux_HI_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "los_transmitted_flux_HI", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_x, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) { for (int los_id = 0; los_id < n_los_x; los_id++) { - data_id = skewer_id * n_los_x + los_id; - buffer_id = skewer_id * n_los_x + los_id; - dataset_buffer_x[buffer_id] = - Analysis.skewers_transmitted_flux_HeII_x_global[data_id]; + data_id = skewer_id * n_los_x + los_id; + buffer_id = skewer_id * n_los_x + los_id; + dataset_buffer_x[buffer_id] = Analysis.skewers_transmitted_flux_HeII_x_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_x, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, - dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_x, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x); + status = H5Dclose(dataset_id); #endif free(dataset_buffer_x); @@ -246,8 +218,7 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dims_y[1] = n_los_y; hid_t skewers_group_y, dataspace_id_skewers_y; dataspace_id_skewers_y = H5Screate_simple(2, dims_y, NULL); - skewers_group_y = - H5Gcreate(file_id, "skewers_y", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + skewers_group_y = H5Gcreate(file_id, "skewers_y", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { @@ -256,103 +227,83 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataset_buffer_y[buffer_id] = Analysis.skewers_density_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "density", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_HI_density_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_HI_density_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "HI_density", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_HeII_density_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_HeII_density_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "HeII_density", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_temperature_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_temperature_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "temperature", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_los_velocity_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_los_velocity_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "los_velocity", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_transmitted_flux_HI_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_transmitted_flux_HI_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "los_transmitted_flux_HI", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_y, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) { for (int los_id = 0; los_id < n_los_y; los_id++) { - data_id = skewer_id * n_los_y + los_id; - buffer_id = skewer_id * n_los_y + los_id; - dataset_buffer_y[buffer_id] = - Analysis.skewers_transmitted_flux_HeII_y_global[data_id]; + data_id = skewer_id * n_los_y + los_id; + buffer_id = skewer_id * n_los_y + los_id; + dataset_buffer_y[buffer_id] = Analysis.skewers_transmitted_flux_HeII_y_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_y, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, - dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_y, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y); + status = H5Dclose(dataset_id); #endif free(dataset_buffer_y); @@ -364,8 +315,7 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dims_z[1] = n_los_z; hid_t skewers_group_z, dataspace_id_skewers_z; dataspace_id_skewers_z = H5Screate_simple(2, dims_z, NULL); - skewers_group_z = - H5Gcreate(file_id, "skewers_z", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + skewers_group_z = H5Gcreate(file_id, "skewers_z", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { @@ -374,103 +324,83 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataset_buffer_z[buffer_id] = Analysis.skewers_density_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "density", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_HI_density_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_HI_density_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "HI_density", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_HeII_density_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_HeII_density_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "HeII_density", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_temperature_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_temperature_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "temperature", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_los_velocity_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_los_velocity_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "los_velocity", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_transmitted_flux_HI_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_transmitted_flux_HI_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "los_transmitted_flux_HI", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_z, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) { for (int los_id = 0; los_id < n_los_z; los_id++) { - data_id = skewer_id * n_los_z + los_id; - buffer_id = skewer_id * n_los_z + los_id; - dataset_buffer_z[buffer_id] = - Analysis.skewers_transmitted_flux_HeII_z_global[data_id]; + data_id = skewer_id * n_los_z + los_id; + buffer_id = skewer_id * n_los_z + los_id; + dataset_buffer_z[buffer_id] = Analysis.skewers_transmitted_flux_HeII_z_global[data_id]; } } - dataset_id = - H5Dcreate(skewers_group_z, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, - dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_z, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z); + status = H5Dclose(dataset_id); #endif free(dataset_buffer_z); @@ -484,15 +414,12 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataspace_id_skewer_x = H5Screate_simple(1, dims1d_x, NULL); Real *buffer_skewer_x = (Real *)malloc(n_los_x * sizeof(Real)); for (int los_id = 0; los_id < n_los_x; los_id++) { - buffer_skewer_x[los_id] = - Analysis.full_vel_Hubble_x[los_id + n_ghost] / 1e5; // km/s + buffer_skewer_x[los_id] = Analysis.full_vel_Hubble_x[los_id + n_ghost] / 1e5; // km/s } - dataset_id = - H5Dcreate(skewers_group_x, "vel_Hubble", H5T_IEEE_F64BE, - dataspace_id_skewer_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, buffer_skewer_x); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_x, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_x, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_x); + status = H5Dclose(dataset_id); hid_t dataspace_id_skewer_y; hsize_t dims1d_y[1]; @@ -500,15 +427,12 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataspace_id_skewer_y = H5Screate_simple(1, dims1d_y, NULL); Real *buffer_skewer_y = (Real *)malloc(n_los_y * sizeof(Real)); for (int los_id = 0; los_id < n_los_y; los_id++) { - buffer_skewer_y[los_id] = - Analysis.full_vel_Hubble_y[los_id + n_ghost] / 1e5; // km/s + buffer_skewer_y[los_id] = Analysis.full_vel_Hubble_y[los_id + n_ghost] / 1e5; // km/s } - dataset_id = - H5Dcreate(skewers_group_y, "vel_Hubble", H5T_IEEE_F64BE, - dataspace_id_skewer_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, buffer_skewer_y); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_y, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_y, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_y); + status = H5Dclose(dataset_id); hid_t dataspace_id_skewer_z; hsize_t dims1d_z[1]; @@ -516,15 +440,12 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) dataspace_id_skewer_z = H5Screate_simple(1, dims1d_z, NULL); Real *buffer_skewer_z = (Real *)malloc(n_los_z * sizeof(Real)); for (int los_id = 0; los_id < n_los_z; los_id++) { - buffer_skewer_z[los_id] = - Analysis.full_vel_Hubble_z[los_id + n_ghost] / 1e5; // km/s + buffer_skewer_z[los_id] = Analysis.full_vel_Hubble_z[los_id + n_ghost] / 1e5; // km/s } - dataset_id = - H5Dcreate(skewers_group_z, "vel_Hubble", H5T_IEEE_F64BE, - dataspace_id_skewer_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, buffer_skewer_z); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(skewers_group_z, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_z, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_z); + status = H5Dclose(dataset_id); free(buffer_skewer_x); free(buffer_skewer_y); @@ -586,28 +507,22 @@ void Grid3D::Write_Analysis_Header_HDF5(hid_t file_id) dataspace_id = H5Screate_simple(1, &attr_dims, NULL); #ifdef COSMOLOGY Real H0 = Cosmo.cosmo_h * 100; - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); - status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); status = H5Aclose(attribute_id); #endif @@ -623,8 +538,7 @@ void Grid3D::Write_Analysis_Header_HDF5(hid_t file_id) Real_data[1] = Analysis.Lbox_y; Real_data[2] = Analysis.Lbox_z; - attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); status = H5Aclose(attribute_id); @@ -649,8 +563,7 @@ void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) dims2d[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims2d, NULL); - group_id = H5Gcreate(file_id, "/phase_diagram", H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); + group_id = H5Gcreate(file_id, "/phase_diagram", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); for (j = 0; j < ny_dset; j++) { for (i = 0; i < nx_dset; i++) { id = i + j * nx_dset; @@ -658,36 +571,28 @@ void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) dataset_buffer[buf_id] = Analysis.phase_diagram[id]; } } - dataset_id = H5Dcreate(group_id, "data", H5T_IEEE_F32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(group_id, "data", H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + status = H5Dclose(dataset_id); attr_dims = 1; dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - attribute_id = H5Acreate(group_id, "n_temp", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "n_temp", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_temp); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "n_dens", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "n_dens", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_dens); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "temp_min", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "temp_min", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_min); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "temp_max", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "temp_max", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_max); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "dens_min", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "dens_min", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_min); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "dens_max", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(group_id, "dens_max", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_max); status = H5Aclose(attribute_id); @@ -698,24 +603,19 @@ void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) #ifdef LYA_STATISTICS - group_id = H5Gcreate(file_id, "/lya_statistics", H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); + group_id = H5Gcreate(file_id, "/lya_statistics", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - attribute_id = H5Acreate(group_id, "n_skewers", H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); - status = - H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_skewers_processed); - status = H5Aclose(attribute_id); + attribute_id = H5Acreate(group_id, "n_skewers", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_skewers_processed); + status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "Flux_mean_HI", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HI); - status = H5Aclose(attribute_id); + attribute_id = H5Acreate(group_id, "Flux_mean_HI", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HI); + status = H5Aclose(attribute_id); - attribute_id = H5Acreate(group_id, "Flux_mean_HeII", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HeII); - status = H5Aclose(attribute_id); + attribute_id = H5Acreate(group_id, "Flux_mean_HeII", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HeII); + status = H5Aclose(attribute_id); if (Analysis.Computed_Flux_Power_Spectrum == 1) { hid_t ps_group, dataspace_id_ps; @@ -724,27 +624,22 @@ void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) dims1d_ps[0] = n_bins; dataspace_id_ps = H5Screate_simple(1, dims1d_ps, NULL); - ps_group = H5Gcreate(group_id, "power_spectrum", H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); + ps_group = H5Gcreate(group_id, "power_spectrum", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); Real *buffer_ps = (Real *)malloc(n_bins * sizeof(Real)); for (int bin_id = 0; bin_id < n_bins; bin_id++) { buffer_ps[bin_id] = Analysis.k_centers[bin_id]; } - dataset_id = H5Dcreate(ps_group, "k_vals", H5T_IEEE_F64BE, dataspace_id_ps, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, buffer_ps); + dataset_id = H5Dcreate(ps_group, "k_vals", H5T_IEEE_F64BE, dataspace_id_ps, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps); status = H5Dclose(dataset_id); for (int bin_id = 0; bin_id < n_bins; bin_id++) { buffer_ps[bin_id] = Analysis.ps_mean[bin_id]; } - dataset_id = H5Dcreate(ps_group, "p(k)", H5T_IEEE_F64BE, dataspace_id_ps, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, buffer_ps); + dataset_id = H5Dcreate(ps_group, "p(k)", H5T_IEEE_F64BE, dataspace_id_ps, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps); status = H5Dclose(dataset_id); free(buffer_ps); diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index afea84fa7..3a22149a1 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -111,28 +111,24 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_density_global[skewer_id * n_los + los_id] = - skewers_density_root[skewer_id * n_los + los_id]; + skewers_density_global[skewer_id * n_los + los_id] = skewers_density_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_density_global[offset + skewer_id * n_los + los_id] = - transfer_buffer[skewer_id * n_los + los_id]; + skewers_density_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id]; } } n_added += 1; } } else { - MPI_Send(skewers_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, - world); + MPI_Send(skewers_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } #endif @@ -141,28 +137,24 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_HI_density_global[skewer_id * n_los + los_id] = - skewers_HI_density_root[skewer_id * n_los + los_id]; + skewers_HI_density_global[skewer_id * n_los + los_id] = skewers_HI_density_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_HI_density_global[offset + skewer_id * n_los + los_id] = - transfer_buffer[skewer_id * n_los + los_id]; + skewers_HI_density_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id]; } } n_added += 1; } } else { - MPI_Send(skewers_HI_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, - world); + MPI_Send(skewers_HI_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } // Set the HeII density array @@ -170,16 +162,14 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_HeII_density_global[skewer_id * n_los + los_id] = - skewers_HeII_density_root[skewer_id * n_los + los_id]; + skewers_HeII_density_global[skewer_id * n_los + los_id] = skewers_HeII_density_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { @@ -190,8 +180,7 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) n_added += 1; } } else { - MPI_Send(skewers_HeII_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, - 0, world); + MPI_Send(skewers_HeII_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } // Set the temeprature array @@ -199,28 +188,24 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_temperature_global[skewer_id * n_los + los_id] = - skewers_temperature_root[skewer_id * n_los + los_id]; + skewers_temperature_global[skewer_id * n_los + los_id] = skewers_temperature_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_temperature_global[offset + skewer_id * n_los + los_id] = - transfer_buffer[skewer_id * n_los + los_id]; + skewers_temperature_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id]; } } n_added += 1; } } else { - MPI_Send(skewers_temperature_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, - world); + MPI_Send(skewers_temperature_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } // Set the los_velocity array @@ -228,16 +213,14 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_los_velocity_global[skewer_id * n_los + los_id] = - skewers_los_velocity_root[skewer_id * n_los + los_id]; + skewers_los_velocity_global[skewer_id * n_los + los_id] = skewers_los_velocity_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { @@ -248,8 +231,7 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) n_added += 1; } } else { - MPI_Send(skewers_los_velocity_root, n_skewers_root * n_los, MPI_CHREAL, 0, - 0, world); + MPI_Send(skewers_los_velocity_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } // Set the HI Flux array @@ -257,28 +239,24 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_F_HI_global[skewer_id * n_los + los_id] = - skewers_F_HI_root[skewer_id * n_los + los_id]; + skewers_F_HI_global[skewer_id * n_los + los_id] = skewers_F_HI_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_F_HI_global[offset + skewer_id * n_los + los_id] = - transfer_buffer[skewer_id * n_los + los_id]; + skewers_F_HI_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id]; } } n_added += 1; } } else { - MPI_Send(skewers_F_HI_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, - world); + MPI_Send(skewers_F_HI_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } // Set the HeII Flux array @@ -286,28 +264,24 @@ void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) // Write the local data into the global array for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_F_HeII_global[skewer_id * n_los + los_id] = - skewers_F_HeII_root[skewer_id * n_los + los_id]; + skewers_F_HeII_global[skewer_id * n_los + los_id] = skewers_F_HeII_root[skewer_id * n_los + los_id]; } } // Write the remote data into the global array n_added = 1; for (int p_id = 1; p_id < nproc; p_id++) { if (!root_procs[p_id]) continue; - MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, - world, &mpi_status); + MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status); offset = n_added * n_skewers_root * n_los; for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) { for (int los_id = 0; los_id < n_los; los_id++) { - skewers_F_HeII_global[offset + skewer_id * n_los + los_id] = - transfer_buffer[skewer_id * n_los + los_id]; + skewers_F_HeII_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id]; } } n_added += 1; } } else { - MPI_Send(skewers_F_HeII_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, - world); + MPI_Send(skewers_F_HeII_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world); } } @@ -325,13 +299,11 @@ int Locate_Index(Real val, Real *values, int N) } if (val < values[index - 1]) { - chprintf("ERROR; Value less than left edge: val=%f left=%f \n", val, - values[index - 1]); + chprintf("ERROR; Value less than left edge: val=%f left=%f \n", val, values[index - 1]); exit(-1); } if (val > values[index]) { - chprintf("ERROR; Value grater than right edge: val=%f right=%f \n", val, - values[index]); + chprintf("ERROR; Value grater than right edge: val=%f right=%f \n", val, values[index]); exit(-1); } @@ -421,8 +393,7 @@ void Grid3D::Initialize_Power_Spectrum_Measurements(int axis) k_start = log10(0.99 * k_vals[1]); if (d_log_k == 0) { - chprintf( - "ERROR: d_log_k = 0 Set lya_Pk_d_log_k in the parameter file \n"); + chprintf("ERROR: d_log_k = 0 Set lya_Pk_d_log_k in the parameter file \n"); exit(-1); } @@ -491,8 +462,7 @@ void Grid3D::Initialize_Power_Spectrum_Measurements(int axis) // Create array for global PS if (axis == 2) { - if (Analysis.n_hist_edges_x != Analysis.n_hist_edges_y || - Analysis.n_hist_edges_x != Analysis.n_hist_edges_z) { + if (Analysis.n_hist_edges_x != Analysis.n_hist_edges_y || Analysis.n_hist_edges_x != Analysis.n_hist_edges_z) { chprintf("ERROR: PS Histogram sizes dont match \n"); exit(-1); } else { @@ -500,8 +470,7 @@ void Grid3D::Initialize_Power_Spectrum_Measurements(int axis) Analysis.k_centers = (Real *)malloc(n_bins * sizeof(Real)); for (int bin_id = 0; bin_id < n_bins; bin_id++) { - Analysis.k_centers[bin_id] = sqrt(Analysis.hist_k_edges_x[bin_id] * - Analysis.hist_k_edges_x[bin_id + 1]); + Analysis.k_centers[bin_id] = sqrt(Analysis.hist_k_edges_x[bin_id] * Analysis.hist_k_edges_x[bin_id + 1]); } } } @@ -596,8 +565,7 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis) } for (int los_id = 0; los_id < n_los; los_id++) { - delta_F[los_id] = skewers_transmitted_flux[skewer_id * n_los + los_id] / - Analysis.Flux_mean_HI; + delta_F[los_id] = skewers_transmitted_flux[skewer_id * n_los + los_id] / Analysis.Flux_mean_HI; } // Compute the r2c FFT @@ -613,9 +581,7 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis) // Compute the amplitude of the FFT for (int i = 0; i < n_fft; i++) { - fft2_delta_F[i] = (fft_delta_F[i][0] * fft_delta_F[i][0] + - fft_delta_F[i][1] * fft_delta_F[i][1]) / - n_los / n_los; + fft2_delta_F[i] = (fft_delta_F[i][0] * fft_delta_F[i][0] + fft_delta_F[i][1] * fft_delta_F[i][1]) / n_los / n_los; } // Arrange in k-bins @@ -630,9 +596,7 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis) k_val = k_vals[i]; if (k_val == 0) continue; bin_id = Locate_Index(k_val, hist_k_edges, n_hist_edges); - if (bin_id < 0) - chprintf(" %d: %e %e %e \n", bin_id, hist_k_edges[0], k_val, - hist_k_edges[1]); + if (bin_id < 0) chprintf(" %d: %e %e %e \n", bin_id, hist_k_edges[0], k_val, hist_k_edges[1]); if (bin_id < 0 || bin_id >= n_bins) continue; hist_PS[bin_id] += fft2_delta_F[i]; hist_n[bin_id] += 1; @@ -654,8 +618,7 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis) } if (hist_sum != n_fft - 1) { - printf("ERROR: Histogram sum doesn't match n_pfft: sum=%d n_fft=%d \n", - hist_sum, n_fft - 1); + printf("ERROR: Histogram sum doesn't match n_pfft: sum=%d n_fft=%d \n", hist_sum, n_fft - 1); exit(-1); } @@ -711,9 +674,7 @@ void Analysis_Module::Reduce_Power_Spectrum_Global() int n_bins = n_hist_edges_x - 1; for (int bin_id = 0; bin_id < n_bins; bin_id++) { - ps_mean[bin_id] = - (ps_global_x[bin_id] + ps_global_y[bin_id] + ps_global_z[bin_id]) / - n_PS_total; + ps_mean[bin_id] = (ps_global_x[bin_id] + ps_global_y[bin_id] + ps_global_z[bin_id]) / n_PS_total; } chprintf(" PS Bins: %d N_Skewers_Processed: %d \n", n_bins, n_PS_total); @@ -726,21 +687,17 @@ void Analysis_Module::Reduce_Power_Spectrum_Global() void Analysis_Module::Reduce_Lya_Mean_Flux_Global() { - n_skewers_processed = - n_skewers_processed_x + n_skewers_processed_y + n_skewers_processed_z; - Flux_mean_HI = (Flux_mean_HI_x * n_skewers_processed_x + - Flux_mean_HI_y * n_skewers_processed_y + + n_skewers_processed = n_skewers_processed_x + n_skewers_processed_y + n_skewers_processed_z; + Flux_mean_HI = (Flux_mean_HI_x * n_skewers_processed_x + Flux_mean_HI_y * n_skewers_processed_y + Flux_mean_HI_z * n_skewers_processed_z) / n_skewers_processed; ; - Flux_mean_HeII = (Flux_mean_HeII_x * n_skewers_processed_x + - Flux_mean_HeII_y * n_skewers_processed_y + + Flux_mean_HeII = (Flux_mean_HeII_x * n_skewers_processed_x + Flux_mean_HeII_y * n_skewers_processed_y + Flux_mean_HeII_z * n_skewers_processed_z) / n_skewers_processed; ; - chprintf( - " N_Skewers_Processed Global: %d F_Mean_HI: %e F_Mean_HeII: %e\n", - n_skewers_processed, Flux_mean_HI, Flux_mean_HeII); + chprintf(" N_Skewers_Processed Global: %d F_Mean_HI: %e F_Mean_HeII: %e\n", n_skewers_processed, Flux_mean_HI, + Flux_mean_HeII); } void Analysis_Module::Reduce_Lya_Mean_Flux_Axis(int axis) @@ -784,18 +741,16 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis(int axis) #ifdef PRINT_ANALYSIS_LOG for (int i = 0; i < nproc; i++) { if (procID == i) - printf(" procID:%d Flux_HI_Sum: %e N_Skewers_Processed: %d \n", - procID, (*Flux_mean_root_HI), *n_skewers_processed_root); + printf(" procID:%d Flux_HI_Sum: %e N_Skewers_Processed: %d \n", procID, (*Flux_mean_root_HI), + *n_skewers_processed_root); MPI_Barrier(world); sleep(1); } #endif MPI_Allreduce(Flux_mean_root_HI, Flux_mean_HI, 1, MPI_CHREAL, MPI_SUM, world); - MPI_Allreduce(Flux_mean_root_HeII, Flux_mean_HeII, 1, MPI_CHREAL, MPI_SUM, - world); - MPI_Allreduce(n_skewers_processed_root, n_skewers_processed, 1, MPI_INT, - MPI_SUM, world); + MPI_Allreduce(Flux_mean_root_HeII, Flux_mean_HeII, 1, MPI_CHREAL, MPI_SUM, world); + MPI_Allreduce(n_skewers_processed_root, n_skewers_processed, 1, MPI_INT, MPI_SUM, world); #else @@ -807,8 +762,8 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis(int axis) *Flux_mean_HI = *Flux_mean_HI / *n_skewers_processed; *Flux_mean_HeII = *Flux_mean_HeII / *n_skewers_processed; - chprintf(" N_Skewers_Processed: %d Flux Mean HI: %e Flux_mean_HeII: %e \n", - *n_skewers_processed, *Flux_mean_HI, *Flux_mean_HeII); + chprintf(" N_Skewers_Processed: %d Flux Mean HI: %e Flux_mean_HeII: %e \n", *n_skewers_processed, *Flux_mean_HI, + *Flux_mean_HeII); } void Analysis_Module::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis) @@ -857,10 +812,8 @@ void Analysis_Module::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis) F_mean_HI = 0; F_mean_HeII = 0; for (int los_id = 0; los_id < n_los; los_id++) { - F_mean_HI += - skewers_transmitted_flux_HI[skewer_id * n_los + los_id] / n_los; - F_mean_HeII += - skewers_transmitted_flux_HeII[skewer_id * n_los + los_id] / n_los; + F_mean_HI += skewers_transmitted_flux_HI[skewer_id * n_los + los_id] / n_los; + F_mean_HeII += skewers_transmitted_flux_HeII[skewer_id * n_los + los_id] / n_los; } *F_mean_root_HI += F_mean_HI; @@ -979,8 +932,7 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) // printf( " Computing Skewer ID: %d \n", skewer_id ); - Real density_HI, density_HeII, velocity, temperature, Msun, kpc, Mp, kpc3, Me, - e_charge, c, Kb; + Real density_HI, density_HeII, velocity, temperature, Msun, kpc, Mp, kpc3, Me, e_charge, c, Kb; // Constants in CGS Kb = 1.38064852e-16; // g (cm/s)^2 K-1 @@ -994,10 +946,10 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) // Fill the Real cells first for (int los_id = 0; los_id < n_los_total; los_id++) { - density_HI = skewers_HI_density_root[skewer_id * n_los_total + los_id]; - density_HeII = skewers_HeII_density_root[skewer_id * n_los_total + los_id]; - velocity = skewers_velocity_root[skewer_id * n_los_total + los_id]; - temperature = skewers_temperature_root[skewer_id * n_los_total + los_id]; + density_HI = skewers_HI_density_root[skewer_id * n_los_total + los_id]; + density_HeII = skewers_HeII_density_root[skewer_id * n_los_total + los_id]; + velocity = skewers_velocity_root[skewer_id * n_los_total + los_id]; + temperature = skewers_temperature_root[skewer_id * n_los_total + los_id]; full_density_HI[los_id + n_ghost] = density_HI; full_density_HeII[los_id + n_ghost] = density_HeII; full_velocity[los_id + n_ghost] = velocity; @@ -1006,23 +958,18 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) // Fill the ghost cells for (int los_id = 0; los_id < n_ghost; los_id++) { - full_density_HI[los_id] = full_density_HI[n_los_total + los_id]; - full_density_HeII[los_id] = full_density_HeII[n_los_total + los_id]; - full_velocity[los_id] = full_velocity[n_los_total + los_id]; - full_temperature[los_id] = full_temperature[n_los_total + los_id]; - full_density_HI[n_los_total + n_ghost + los_id] = - full_density_HI[n_ghost + los_id]; - full_density_HeII[n_los_total + n_ghost + los_id] = - full_density_HeII[n_ghost + los_id]; - full_velocity[n_los_total + n_ghost + los_id] = - full_velocity[n_ghost + los_id]; - full_temperature[n_los_total + n_ghost + los_id] = - full_temperature[n_ghost + los_id]; + full_density_HI[los_id] = full_density_HI[n_los_total + los_id]; + full_density_HeII[los_id] = full_density_HeII[n_los_total + los_id]; + full_velocity[los_id] = full_velocity[n_los_total + los_id]; + full_temperature[los_id] = full_temperature[n_los_total + los_id]; + full_density_HI[n_los_total + n_ghost + los_id] = full_density_HI[n_ghost + los_id]; + full_density_HeII[n_los_total + n_ghost + los_id] = full_density_HeII[n_ghost + los_id]; + full_velocity[n_los_total + n_ghost + los_id] = full_velocity[n_ghost + los_id]; + full_temperature[n_los_total + n_ghost + los_id] = full_temperature[n_ghost + los_id]; } Real dens_factor, dens_factor_HI, dens_factor_HeII, vel_factor; - dens_factor = 1. / (Cosmo.current_a * Cosmo.current_a * Cosmo.current_a) * - Cosmo.cosmo_h * Cosmo.cosmo_h; + dens_factor = 1. / (Cosmo.current_a * Cosmo.current_a * Cosmo.current_a) * Cosmo.cosmo_h * Cosmo.cosmo_h; dens_factor_HI = dens_factor * Msun / (kpc3) / Mp; dens_factor_HeII = dens_factor * Msun / (kpc3) / (4 * Mp); vel_factor = 1e5; // cm/s @@ -1042,18 +989,14 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) full_vel_Hubble[los_id] = (los_id - n_ghost + 0.5) * dv_Hubble; } - Lya_lambda_HI = 1.21567e-5; // cm Rest wave length of the Lyman Alpha - // Transition Hydrogen - Lya_lambda_HeII = - Lya_lambda_HI / - 4; // cm Rest wave length of the Lyman Alpha Transition Helium II - f_12 = 0.416; // Lya transition Oscillator strength - H_cgs = H * 1e5 / kpc; + Lya_lambda_HI = 1.21567e-5; // cm Rest wave length of the Lyman Alpha + // Transition Hydrogen + Lya_lambda_HeII = Lya_lambda_HI / 4; // cm Rest wave length of the Lyman Alpha Transition Helium II + f_12 = 0.416; // Lya transition Oscillator strength + H_cgs = H * 1e5 / kpc; - Lya_sigma_HI = - M_PI * e_charge * e_charge / Me / c * Lya_lambda_HI * f_12 / H_cgs; - Lya_sigma_HeII = - M_PI * e_charge * e_charge / Me / c * Lya_lambda_HeII * f_12 / H_cgs; + Lya_sigma_HI = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HI * f_12 / H_cgs; + Lya_sigma_HeII = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HeII * f_12 / H_cgs; // Compute the optical depth Real b_HI_j, n_HI_j; @@ -1085,10 +1028,8 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) // Compute the transmitted_flux for (int los_id = 0; los_id < n_los_total; los_id++) { - skewers_transmitted_flux_HI[skewer_id * n_los_total + los_id] = - exp(-full_optical_depth_HI[los_id + n_ghost]); - skewers_transmitted_flux_HeII[skewer_id * n_los_total + los_id] = - exp(-full_optical_depth_HeII[los_id + n_ghost]); + skewers_transmitted_flux_HI[skewer_id * n_los_total + los_id] = exp(-full_optical_depth_HI[los_id + n_ghost]); + skewers_transmitted_flux_HeII[skewer_id * n_los_total + los_id] = exp(-full_optical_depth_HeII[los_id + n_ghost]); } } @@ -1199,18 +1140,14 @@ void Analysis_Module::Transfer_Skewers_Data(int axis) for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { for (int los_id = 0; los_id < n_los_local; los_id++) { - HI_density = skewers_HI_density_local[skewer_id * n_los_local + los_id]; - HeII_density = - skewers_HeII_density_local[skewer_id * n_los_local + los_id]; - velocity = skewers_velocity_local[skewer_id * n_los_local + los_id]; - temperature = - skewers_temperature_local[skewer_id * n_los_local + los_id]; - skewers_HI_density_root[skewer_id * n_los_total + los_id] = HI_density; - skewers_HeII_density_root[skewer_id * n_los_total + los_id] = - HeII_density; - skewers_velocity_root[skewer_id * n_los_total + los_id] = velocity; - skewers_temperature_root[skewer_id * n_los_total + los_id] = - temperature; + HI_density = skewers_HI_density_local[skewer_id * n_los_local + los_id]; + HeII_density = skewers_HeII_density_local[skewer_id * n_los_local + los_id]; + velocity = skewers_velocity_local[skewer_id * n_los_local + los_id]; + temperature = skewers_temperature_local[skewer_id * n_los_local + los_id]; + skewers_HI_density_root[skewer_id * n_los_total + los_id] = HI_density; + skewers_HeII_density_root[skewer_id * n_los_total + los_id] = HeII_density; + skewers_velocity_root[skewer_id * n_los_total + los_id] = velocity; + skewers_temperature_root[skewer_id * n_los_total + los_id] = temperature; #ifdef OUTPUT_SKEWERS density = skewers_density_local[skewer_id * n_los_local + los_id]; skewers_density_root[skewer_id * n_los_total + los_id] = density; @@ -1239,37 +1176,27 @@ void Analysis_Module::Transfer_Skewers_Data(int axis) printf(" Receiving Skewers From pID: %d\n", mpi_id); #endif - MPI_Recv(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, - mpi_id, 0, world, &mpi_status); - MPI_Recv(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, - mpi_id, 1, world, &mpi_status); - MPI_Recv(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, - mpi_id, 2, world, &mpi_status); - MPI_Recv(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, - mpi_id, 3, world, &mpi_status); + MPI_Recv(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 0, world, &mpi_status); + MPI_Recv(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 1, world, &mpi_status); + MPI_Recv(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 2, world, &mpi_status); + MPI_Recv(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 3, world, &mpi_status); #ifdef OUTPUT_SKEWERS - MPI_Recv(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, - mpi_id, 4, world, &mpi_status); + MPI_Recv(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 4, world, &mpi_status); #endif for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { for (int los_id = 0; los_id < n_los_local; los_id++) { - skewers_HI_density_root[skewer_id * n_los_total + indx * n_los_local + - los_id] = + skewers_HI_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] = skewers_HI_density_local[skewer_id * n_los_local + los_id]; - skewers_HeII_density_root[skewer_id * n_los_total + - indx * n_los_local + los_id] = + skewers_HeII_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] = skewers_HeII_density_local[skewer_id * n_los_local + los_id]; - skewers_velocity_root[skewer_id * n_los_total + indx * n_los_local + - los_id] = + skewers_velocity_root[skewer_id * n_los_total + indx * n_los_local + los_id] = skewers_velocity_local[skewer_id * n_los_local + los_id]; - skewers_temperature_root[skewer_id * n_los_total + - indx * n_los_local + los_id] = + skewers_temperature_root[skewer_id * n_los_total + indx * n_los_local + los_id] = skewers_temperature_local[skewer_id * n_los_local + los_id]; #ifdef OUTPUT_SKEWERS - skewers_density_root[skewer_id * n_los_total + indx * n_los_local + - los_id] = + skewers_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] = skewers_density_local[skewer_id * n_los_local + los_id]; #endif } @@ -1278,17 +1205,12 @@ void Analysis_Module::Transfer_Skewers_Data(int axis) } else { - MPI_Send(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, - root_id, 0, world); - MPI_Send(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, - root_id, 1, world); - MPI_Send(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, - root_id, 2, world); - MPI_Send(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, - root_id, 3, world); + MPI_Send(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 0, world); + MPI_Send(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 1, world); + MPI_Send(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 2, world); + MPI_Send(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 3, world); #ifdef OUTPUT_SKEWERS - MPI_Send(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, - root_id, 4, world); + MPI_Send(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 4, world); #endif } @@ -1380,18 +1302,11 @@ void Grid3D::Populate_Lya_Skewers_Local(int axis) for (int id_los = 0; id_los < n_los; id_los++) { id_i = i * stride; id_j = j * stride; - if (axis == 0) - id_grid = (id_los + n_ghost) + (id_i + n_ghost) * nx_grid + - (id_j + n_ghost) * nx_grid * nz_grid; - if (axis == 1) - id_grid = (id_i + n_ghost) + (id_los + n_ghost) * nx_grid + - (id_j + n_ghost) * nx_grid * nz_grid; - if (axis == 2) - id_grid = (id_i + n_ghost) + (id_j + n_ghost) * nx_grid + - (id_los + n_ghost) * nx_grid * nz_grid; + if (axis == 0) id_grid = (id_los + n_ghost) + (id_i + n_ghost) * nx_grid + (id_j + n_ghost) * nx_grid * nz_grid; + if (axis == 1) id_grid = (id_i + n_ghost) + (id_los + n_ghost) * nx_grid + (id_j + n_ghost) * nx_grid * nz_grid; + if (axis == 2) id_grid = (id_i + n_ghost) + (id_j + n_ghost) * nx_grid + (id_los + n_ghost) * nx_grid * nz_grid; density = C.density[id_grid] * Cosmo.rho_0_gas; - velocity = momentum_los[id_grid] * Cosmo.rho_0_gas * Cosmo.v_0_gas / - Cosmo.current_a / density; + velocity = momentum_los[id_grid] * Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a / density; #ifdef COOLING_GRACKLE HI_density = Cool.fields.HI_density[id_grid] * Cosmo.rho_0_gas; HeII_density = Cool.fields.HeII_density[id_grid] * Cosmo.rho_0_gas; @@ -1419,8 +1334,7 @@ void Grid3D::Populate_Lya_Skewers_Local(int axis) } if (skewer_id != n_skewers_local) { - printf("ERROR: Skewers numbers don't match. ID: %d N_skewers: %d \n ", - skewer_id, n_skewers_local); + printf("ERROR: Skewers numbers don't match. ID: %d N_skewers: %d \n ", skewer_id, n_skewers_local); exit(-1); } } @@ -1461,41 +1375,26 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) #endif // Alocate Memory For Properties of Local Skewers - skewers_HI_density_local_x = - (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); - skewers_HI_density_local_y = - (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); - skewers_HI_density_local_z = - (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); - - skewers_HeII_density_local_x = - (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); - skewers_HeII_density_local_y = - (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); - skewers_HeII_density_local_z = - (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); - - skewers_velocity_local_x = - (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); - skewers_velocity_local_y = - (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); - skewers_velocity_local_z = - (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); - - skewers_temperature_local_x = - (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); - skewers_temperature_local_y = - (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); - skewers_temperature_local_z = - (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); + skewers_HI_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); + skewers_HI_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); + skewers_HI_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); + + skewers_HeII_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); + skewers_HeII_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); + skewers_HeII_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); + + skewers_velocity_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); + skewers_velocity_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); + skewers_velocity_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); + + skewers_temperature_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); + skewers_temperature_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); + skewers_temperature_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); #ifdef OUTPUT_SKEWERS - skewers_density_local_x = - (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); - skewers_density_local_y = - (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); - skewers_density_local_z = - (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); + skewers_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real)); + skewers_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real)); + skewers_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real)); #endif // for (int i=0; i - mpi_domain_boundary_x[mpi_indices_x[i + 1]]) { + if (mpi_domain_boundary_x[mpi_indices_x[i]] > mpi_domain_boundary_x[mpi_indices_x[i + 1]]) { temp_indx = mpi_indices_x[i]; mpi_indices_x[i] = mpi_indices_x[i + 1]; mpi_indices_x[i + 1] = temp_indx; @@ -1578,8 +1467,7 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) while (!sorted) { sorted = true; for (int i = 0; i < n_mpi_y - 1; i++) { - if (mpi_domain_boundary_y[mpi_indices_y[i]] > - mpi_domain_boundary_y[mpi_indices_y[i + 1]]) { + if (mpi_domain_boundary_y[mpi_indices_y[i]] > mpi_domain_boundary_y[mpi_indices_y[i + 1]]) { temp_indx = mpi_indices_y[i]; mpi_indices_y[i] = mpi_indices_y[i + 1]; mpi_indices_y[i + 1] = temp_indx; @@ -1594,8 +1482,7 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) while (!sorted) { sorted = true; for (int i = 0; i < n_mpi_z - 1; i++) { - if (mpi_domain_boundary_z[mpi_indices_z[i]] > - mpi_domain_boundary_z[mpi_indices_z[i + 1]]) { + if (mpi_domain_boundary_z[mpi_indices_z[i]] > mpi_domain_boundary_z[mpi_indices_z[i + 1]]) { temp_indx = mpi_indices_z[i]; mpi_indices_z[i] = mpi_indices_z[i + 1]; mpi_indices_z[i + 1] = temp_indx; @@ -1660,12 +1547,9 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) } // Gather the root processes - MPI_Gather(&am_I_root_x, 1, MPI_C_BOOL, root_procs_x, 1, MPI_C_BOOL, 0, - world); - MPI_Gather(&am_I_root_y, 1, MPI_C_BOOL, root_procs_y, 1, MPI_C_BOOL, 0, - world); - MPI_Gather(&am_I_root_z, 1, MPI_C_BOOL, root_procs_z, 1, MPI_C_BOOL, 0, - world); + MPI_Gather(&am_I_root_x, 1, MPI_C_BOOL, root_procs_x, 1, MPI_C_BOOL, 0, world); + MPI_Gather(&am_I_root_y, 1, MPI_C_BOOL, root_procs_y, 1, MPI_C_BOOL, 0, world); + MPI_Gather(&am_I_root_z, 1, MPI_C_BOOL, root_procs_z, 1, MPI_C_BOOL, 0, world); int n_skewers_global_x, n_skewers_global_y, n_skewers_global_z; if (procID == 0) { @@ -1678,67 +1562,42 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) n_skewers_global_z += n_skewers_local_z * root_procs_z[p_id]; } } - chprintf(" N Skewers Global: x:%d y:%d z:%d \n", n_skewers_global_x, - n_skewers_global_y, n_skewers_global_z); + chprintf(" N Skewers Global: x:%d y:%d z:%d \n", n_skewers_global_x, n_skewers_global_y, n_skewers_global_z); // Allocate Memory for Global Skewers Data #ifdef OUTPUT_SKEWERS if (procID == 0) { - skewers_transmitted_flux_HI_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_transmitted_flux_HI_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_transmitted_flux_HI_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_transmitted_flux_HeII_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_transmitted_flux_HeII_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_transmitted_flux_HeII_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_density_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_density_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_density_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_HI_density_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_HI_density_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_HI_density_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_HeII_density_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_HeII_density_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_HeII_density_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_temperature_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_temperature_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_temperature_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - skewers_los_velocity_x_global = - (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); - skewers_los_velocity_y_global = - (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); - skewers_los_velocity_z_global = - (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); - - transfer_buffer_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - transfer_buffer_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - transfer_buffer_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_transmitted_flux_HI_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_transmitted_flux_HI_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_transmitted_flux_HI_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_transmitted_flux_HeII_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_transmitted_flux_HeII_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_transmitted_flux_HeII_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_HI_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_HI_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_HI_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_HeII_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_HeII_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_HeII_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_temperature_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_temperature_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_temperature_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + skewers_los_velocity_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real)); + skewers_los_velocity_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real)); + skewers_los_velocity_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real)); + + transfer_buffer_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + transfer_buffer_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + transfer_buffer_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); } #endif @@ -1792,107 +1651,83 @@ void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) #endif if (am_I_root_x) { - skewers_HI_density_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - skewers_HeII_density_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - skewers_velocity_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - skewers_temperature_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - full_HI_density_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_HeII_density_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_velocity_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_temperature_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_optical_depth_HI_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_optical_depth_HeII_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - full_vel_Hubble_x = (Real *)malloc(n_los_full_x * sizeof(Real)); - skewers_transmitted_flux_HI_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); - skewers_transmitted_flux_HeII_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_HI_density_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_HeII_density_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_velocity_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_temperature_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + full_HI_density_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_HeII_density_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_velocity_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_temperature_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_optical_depth_HI_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_optical_depth_HeII_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + full_vel_Hubble_x = (Real *)malloc(n_los_full_x * sizeof(Real)); + skewers_transmitted_flux_HI_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_transmitted_flux_HeII_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); #if OUTPUT_SKEWERS - skewers_density_root_x = - (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); + skewers_density_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real)); #endif // Alocate Memory For Power Spectrum Calculation - delta_F_x = (Real *)malloc(nx_total * sizeof(Real)); - vel_Hubble_x = (Real *)malloc(nx_total * sizeof(Real)); - fft_delta_F_x = (fftw_complex *)fftw_malloc(n_fft_x * sizeof(fftw_complex)); + delta_F_x = (Real *)malloc(nx_total * sizeof(Real)); + vel_Hubble_x = (Real *)malloc(nx_total * sizeof(Real)); + fft_delta_F_x = (fftw_complex *)fftw_malloc(n_fft_x * sizeof(fftw_complex)); fft2_delta_F_x = (Real *)malloc(n_fft_x * sizeof(Real)); - fftw_plan_x = - fftw_plan_dft_r2c_1d(nx_total, delta_F_x, fft_delta_F_x, FFTW_ESTIMATE); + fftw_plan_x = fftw_plan_dft_r2c_1d(nx_total, delta_F_x, fft_delta_F_x, FFTW_ESTIMATE); } k_vals_x = (Real *)malloc(n_fft_x * sizeof(Real)); if (am_I_root_y) { - skewers_HI_density_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - skewers_HeII_density_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - skewers_velocity_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - skewers_temperature_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - full_HI_density_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_HeII_density_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_velocity_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_temperature_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_optical_depth_HI_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_optical_depth_HeII_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - full_vel_Hubble_y = (Real *)malloc(n_los_full_y * sizeof(Real)); - skewers_transmitted_flux_HI_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); - skewers_transmitted_flux_HeII_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_HI_density_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_HeII_density_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_velocity_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_temperature_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + full_HI_density_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_HeII_density_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_velocity_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_temperature_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_optical_depth_HI_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_optical_depth_HeII_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + full_vel_Hubble_y = (Real *)malloc(n_los_full_y * sizeof(Real)); + skewers_transmitted_flux_HI_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_transmitted_flux_HeII_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); #if OUTPUT_SKEWERS - skewers_density_root_y = - (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); + skewers_density_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real)); #endif // Alocate Memory For Power Spectrum Calculation - delta_F_y = (Real *)malloc(ny_total * sizeof(Real)); - vel_Hubble_y = (Real *)malloc(ny_total * sizeof(Real)); - fft_delta_F_y = (fftw_complex *)fftw_malloc(n_fft_y * sizeof(fftw_complex)); + delta_F_y = (Real *)malloc(ny_total * sizeof(Real)); + vel_Hubble_y = (Real *)malloc(ny_total * sizeof(Real)); + fft_delta_F_y = (fftw_complex *)fftw_malloc(n_fft_y * sizeof(fftw_complex)); fft2_delta_F_y = (Real *)malloc(n_fft_y * sizeof(Real)); - fftw_plan_y = - fftw_plan_dft_r2c_1d(ny_total, delta_F_y, fft_delta_F_y, FFTW_ESTIMATE); + fftw_plan_y = fftw_plan_dft_r2c_1d(ny_total, delta_F_y, fft_delta_F_y, FFTW_ESTIMATE); } k_vals_y = (Real *)malloc(n_fft_y * sizeof(Real)); if (am_I_root_z) { - skewers_HI_density_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); - skewers_HeII_density_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); - skewers_velocity_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); - skewers_temperature_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); - full_HI_density_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_HeII_density_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_velocity_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_temperature_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_optical_depth_HI_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_optical_depth_HeII_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - full_vel_Hubble_z = (Real *)malloc(n_los_full_z * sizeof(Real)); - skewers_transmitted_flux_HI_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); - skewers_transmitted_flux_HeII_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_HI_density_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_HeII_density_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_velocity_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_temperature_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + full_HI_density_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_HeII_density_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_velocity_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_temperature_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_optical_depth_HI_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_optical_depth_HeII_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + full_vel_Hubble_z = (Real *)malloc(n_los_full_z * sizeof(Real)); + skewers_transmitted_flux_HI_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_transmitted_flux_HeII_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); #if OUTPUT_SKEWERS - skewers_density_root_z = - (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); + skewers_density_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real)); #endif // Alocate Memory For Power Spectrum Calculation - delta_F_z = (Real *)malloc(nz_total * sizeof(Real)); - vel_Hubble_z = (Real *)malloc(nz_total * sizeof(Real)); - fft_delta_F_z = (fftw_complex *)fftw_malloc(n_fft_z * sizeof(fftw_complex)); + delta_F_z = (Real *)malloc(nz_total * sizeof(Real)); + vel_Hubble_z = (Real *)malloc(nz_total * sizeof(Real)); + fft_delta_F_z = (fftw_complex *)fftw_malloc(n_fft_z * sizeof(fftw_complex)); fft2_delta_F_z = (Real *)malloc(n_fft_z * sizeof(Real)); - fftw_plan_z = - fftw_plan_dft_r2c_1d(nz_total, delta_F_z, fft_delta_F_z, FFTW_ESTIMATE); + fftw_plan_z = fftw_plan_dft_r2c_1d(nz_total, delta_F_z, fft_delta_F_z, FFTW_ESTIMATE); } k_vals_z = (Real *)malloc(n_fft_z * sizeof(Real)); diff --git a/src/analysis/phase_diagram.cpp b/src/analysis/phase_diagram.cpp index 982d1707e..760179ccd 100644 --- a/src/analysis/phase_diagram.cpp +++ b/src/analysis/phase_diagram.cpp @@ -47,16 +47,13 @@ void Grid3D::Compute_Phase_Diagram() int indx_dens, indx_temp, indx_phase; // Clear Phase Dikagram - for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) - Analysis.phase_diagram[indx_phase] = 0; + for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] = 0; for (k = 0; k < nz_local; k++) { for (j = 0; j < ny_local; j++) { for (i = 0; i < nx_local; i++) { - id_grid = (i + n_ghost) + (j + n_ghost) * nx_grid + - (k + n_ghost) * nx_grid * ny_grid; - dens = C.density[id_grid] * Cosmo.rho_0_gas / - Cosmo.rho_mean_baryon; // Baryonic overdensity + id_grid = (i + n_ghost) + (j + n_ghost) * nx_grid + (k + n_ghost) * nx_grid * ny_grid; + dens = C.density[id_grid] * Cosmo.rho_0_gas / Cosmo.rho_mean_baryon; // Baryonic overdensity // chprintf( "%f %f \n", dens, temp); #ifdef COOLING_GRACKLE temp = Cool.temperature[id_grid]; @@ -69,8 +66,7 @@ void Grid3D::Compute_Phase_Diagram() exit(-1); #endif - if (dens < dens_min || dens > dens_max || temp < temp_min || - temp > temp_max) { + if (dens < dens_min || dens > dens_max || temp < temp_min || temp > temp_max) { // printf("Outside Phase Diagram: dens:%e temp:%e \n", dens, temp // ); continue; @@ -99,23 +95,19 @@ void Grid3D::Compute_Phase_Diagram() // %f\n", phase_sum_local ); #ifdef MPI_CHOLLA - MPI_Reduce(Analysis.phase_diagram, Analysis.phase_diagram_global, - n_temp * n_dens, MPI_FLOAT, MPI_SUM, 0, world); + MPI_Reduce(Analysis.phase_diagram, Analysis.phase_diagram_global, n_temp * n_dens, MPI_FLOAT, MPI_SUM, 0, world); if (procID == 0) for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) - Analysis.phase_diagram[indx_phase] = - Analysis.phase_diagram_global[indx_phase]; + Analysis.phase_diagram[indx_phase] = Analysis.phase_diagram_global[indx_phase]; #endif // Compute the sum for normalization Real phase_sum = 0; - for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) - phase_sum += Analysis.phase_diagram[indx_phase]; + for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) phase_sum += Analysis.phase_diagram[indx_phase]; chprintf(" Phase Diagram Sum Global: %f\n", phase_sum); // Normalize the Phase Diagram - for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) - Analysis.phase_diagram[indx_phase] /= phase_sum; + for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] /= phase_sum; } void Analysis_Module::Initialize_Phase_Diagram(struct parameters *P) @@ -131,8 +123,7 @@ void Analysis_Module::Initialize_Phase_Diagram(struct parameters *P) phase_diagram = (float *)malloc(n_dens * n_temp * sizeof(float)); #ifdef MPI_CHOLLA - if (procID == 0) - phase_diagram_global = (float *)malloc(n_dens * n_temp * sizeof(float)); + if (procID == 0) phase_diagram_global = (float *)malloc(n_dens * n_temp * sizeof(float)); #endif chprintf(" Phase Diagram Initialized.\n"); } diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index aa240f709..18999fe2d 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -53,11 +53,10 @@ void Grid3D::Initialize_Chemistry(struct parameters *P) Chem.H.time_units = kpc_km; Chem.H.dens_number_conv = Chem.H.density_units / MH; #ifdef COSMOLOGY - Chem.H.a_value = Cosmo.current_a; - Chem.H.density_units = - Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value; - Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; - Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h; + Chem.H.a_value = Cosmo.current_a; + Chem.H.density_units = Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value; + Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; + Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h; Chem.H.dens_number_conv = Chem.H.density_number_conv * pow(Chem.H.a_value, 3); #endif // COSMOLOGY Chem.H.velocity_units = Chem.H.length_units / Chem.H.time_units; @@ -70,9 +69,8 @@ void Grid3D::Initialize_Chemistry(struct parameters *P) length_base = length_base / Chem.H.a_value; #endif // COSMOLOGY - time_base = Chem.H.time_units; - Chem.H.cooling_units = - (pow(length_base, 2) * pow(MH, 2)) / (dens_base * pow(time_base, 3)); + time_base = Chem.H.time_units; + Chem.H.cooling_units = (pow(length_base, 2) * pow(MH, 2)) / (dens_base * pow(time_base, 3)); Chem.H.reaction_units = MH / (dens_base * time_base); // printf(" cooling_units: %e\n", Chem.H.cooling_units ); // printf(" reaction_units: %e\n", Chem.H.reaction_units ); @@ -84,11 +82,9 @@ void Grid3D::Initialize_Chemistry(struct parameters *P) #ifdef COSMOLOGY // Real kpc_cgs = KPC_CGS; - Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / - pow(kpc_cgs, 3) * MSUN_CGS; - Chem.H.energy_conversion = - Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; // km^2 -> cm^2 ; - #else // Not COSMOLOGY + Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow(kpc_cgs, 3) * MSUN_CGS; + Chem.H.energy_conversion = Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; // km^2 -> cm^2 ; + #else // Not COSMOLOGY Chem.H.density_conversion = 1.0; Chem.H.energy_conversion = 1.0; #endif @@ -108,9 +104,7 @@ void Grid3D::Initialize_Chemistry(struct parameters *P) chprintf("Chemistry Solver Successfully Initialized. \n\n"); } -void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, - Rate_Function_T rate_function, - Real units) +void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units) { // Host array for storing the rates Real *rate_table_array_h = (Real *)malloc(H.N_Temp_bins * sizeof(Real)); @@ -130,8 +124,7 @@ void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, // Allocate the device array for the rate and copy from host Allocate_Array_GPU_Real(rate_table_array_d, H.N_Temp_bins); - Copy_Real_Array_to_Device(H.N_Temp_bins, rate_table_array_h, - *rate_table_array_d); + Copy_Real_Array_to_Device(H.N_Temp_bins, rate_table_array_h, *rate_table_array_d); // Free the host array free(rate_table_array_h); @@ -160,19 +153,13 @@ void Chem_GPU::Initialize_Cooling_Rates() Generate_Reaction_Rate_Table(&H.cool_ciHeIS_d, cool_ciHeIS_rate, units); if (!use_case_B_recombination) { - Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_A, - units); - Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_A, - units); - Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_A, - units); + Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_A, units); + Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_A, units); + Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_A, units); } else { - Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_B, - units); - Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_B, - units); - Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_B, - units); + Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_B, units); + Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_B, units); + Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_B, units); } Generate_Reaction_Rate_Table(&H.cool_reHeII2_d, cool_reHeII2_rate, units); @@ -192,19 +179,13 @@ void Chem_GPU::Initialize_Reaction_Rates() Generate_Reaction_Rate_Table(&H.k_coll_i_HI_HeI_d, coll_i_HI_HeI_rate, units); if (!use_case_B_recombination) { - Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_A, - units); - Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_A, - units); - Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_A, - units); + Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_A, units); + Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_A, units); + Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_A, units); } else { - Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_B, - units); - Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_B, - units); - Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_B, - units); + Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_B, units); + Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_B, units); + Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_B, units); } } @@ -216,9 +197,8 @@ void Chem_GPU::Initialize_UVB_Ionization_and_Heating_Rates(struct parameters *P) Copy_UVB_Rates_to_GPU(); #ifdef TEXTURES_UVB_INTERPOLATION - Bind_GPU_Textures(n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HeI_h, - Heat_rates_HeII_h, Ion_rates_HI_h, Ion_rates_HeI_h, - Ion_rates_HeII_h); + Bind_GPU_Textures(n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HeI_h, Heat_rates_HeII_h, Ion_rates_HI_h, + Ion_rates_HeI_h, Ion_rates_HeII_h); #endif } @@ -233,18 +213,12 @@ void Chem_GPU::Copy_UVB_Rates_to_GPU() Allocate_Array_GPU_float(&Ion_rates_HeII_d, n_uvb_rates_samples); Copy_Float_Array_to_Device(n_uvb_rates_samples, rates_z_h, rates_z_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HI_h, - Heat_rates_HI_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeI_h, - Heat_rates_HeI_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeII_h, - Heat_rates_HeII_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HI_h, - Ion_rates_HI_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeI_h, - Ion_rates_HeI_d); - Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeII_h, - Ion_rates_HeII_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HI_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeI_h, Heat_rates_HeI_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeII_h, Heat_rates_HeII_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HI_h, Ion_rates_HI_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeI_h, Ion_rates_HeI_d); + Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeII_h, Ion_rates_HeII_d); } void Grid3D::Update_Chemistry() @@ -255,12 +229,10 @@ void Grid3D::Update_Chemistry() Chem.H.current_z = 0; #endif - Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, - Chem.H); + Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); } -void Grid3D::Compute_Gas_Temperature(Real *temperature, - bool convert_cosmo_units) +void Grid3D::Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units) { int k, j, i, id; Real dens_HI, dens_HII, dens_HeI, dens_HeII, dens_HeIII, dens_e, gamma; @@ -282,10 +254,7 @@ void Grid3D::Compute_Gas_Temperature(Real *temperature, #ifdef DE GE = C.GasEnergy[id]; #else - GE = (E - - 0.5 * d * - (vx * vx + vy * vy + - vz * vz)); // TODO: this probably needs to be fixed for MHD + GE = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)); // TODO: this probably needs to be fixed for MHD #endif dens_HI = C.HI_density[id]; @@ -296,9 +265,8 @@ void Grid3D::Compute_Gas_Temperature(Real *temperature, dens_e = C.e_density[id]; cell_dens = dens_HI + dens_HII + dens_HeI + dens_HeII + dens_HeIII; - cell_n = dens_HI + dens_HII + (dens_HeI + dens_HeII + dens_HeIII) / 4 + - dens_e; - mu = cell_dens / cell_n; + cell_n = dens_HI + dens_HII + (dens_HeI + dens_HeII + dens_HeIII) / 4 + dens_e; + mu = cell_dens / cell_n; #ifdef COSMOLOGY if (convert_cosmo_units) { diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index b5ecddb45..df886f7b7 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -21,11 +21,9 @@ void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size) CudaCheckError(); } -void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, - float *array_d) +void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d) { - CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(float), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } @@ -43,8 +41,7 @@ void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size) void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d) { - CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } @@ -67,18 +64,9 @@ class Thermal_State Real d_e; // Constructor - __host__ __device__ Thermal_State(Real U_0 = 1, Real d_0 = 1, Real d_HI_0 = 1, - Real d_HII_0 = 0, Real d_HeI_0 = 1, - Real d_HeII_0 = 0, Real d_HeIII_0 = 1, - Real d_e_0 = 0) - : U(U_0), - d(d_0), - d_HI(d_HI_0), - d_HII(d_HII_0), - d_HeI(d_HeI_0), - d_HeII(d_HeII_0), - d_HeIII(d_HeIII_0), - d_e(d_e_0) + __host__ __device__ Thermal_State(Real U_0 = 1, Real d_0 = 1, Real d_HI_0 = 1, Real d_HII_0 = 0, Real d_HeI_0 = 1, + Real d_HeII_0 = 0, Real d_HeIII_0 = 1, Real d_e_0 = 0) + : U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0) { } @@ -107,9 +95,8 @@ class Thermal_State } }; -__device__ void get_temperature_indx(Real T, Chemistry_Header &Chem_H, - int &temp_indx, Real &delta_T, - Real temp_old, bool print) +__device__ void get_temperature_indx(Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, + bool print) { Real logT, logT_start, d_logT, logT_l, logT_r; logT = log(0.5 * (T + temp_old)); @@ -136,73 +123,56 @@ __device__ Real interpolate_rate(Real *rate_table, int indx, Real delta) return rate_val; } -__device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, - Real dens_number_conv, Real current_z, - Real temp_prev, float photo_h_HI, - float photo_h_HeI, float photo_h_HeII, - bool print) +__device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, + Real temp_prev, float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print) { int temp_indx; Real temp, delta_T, U_dot; temp = TS.get_temperature(Chem_H.gamma); get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp_prev, print); - if (print) - printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), - temp, temp_indx, delta_T); + if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T); U_dot = 0.0; // Collisional excitation cooling Real cool_ceHI, cool_ceHeI, cool_ceHeII; - cool_ceHI = interpolate_rate(Chem_H.cool_ceHI_d, temp_indx, delta_T) * - TS.d_HI * TS.d_e; - cool_ceHeI = interpolate_rate(Chem_H.cool_ceHeI_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; - cool_ceHeII = interpolate_rate(Chem_H.cool_ceHeII_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e / 4.0; + cool_ceHI = interpolate_rate(Chem_H.cool_ceHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e; + cool_ceHeI = + interpolate_rate(Chem_H.cool_ceHeI_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ceHeII = interpolate_rate(Chem_H.cool_ceHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; U_dot -= cool_ceHI + cool_ceHeI + cool_ceHeII; // Collisional excitation cooling Real cool_ciHI, cool_ciHeI, cool_ciHeII, cool_ciHeIS; - cool_ciHI = interpolate_rate(Chem_H.cool_ciHI_d, temp_indx, delta_T) * - TS.d_HI * TS.d_e; - cool_ciHeI = interpolate_rate(Chem_H.cool_ciHeI_d, temp_indx, delta_T) * - TS.d_HeI * TS.d_e / 4.0; - cool_ciHeII = interpolate_rate(Chem_H.cool_ciHeII_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e / 4.0; - cool_ciHeIS = interpolate_rate(Chem_H.cool_ciHeIS_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ciHI = interpolate_rate(Chem_H.cool_ciHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e; + cool_ciHeI = interpolate_rate(Chem_H.cool_ciHeI_d, temp_indx, delta_T) * TS.d_HeI * TS.d_e / 4.0; + cool_ciHeII = interpolate_rate(Chem_H.cool_ciHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_ciHeIS = + interpolate_rate(Chem_H.cool_ciHeIS_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; U_dot -= cool_ciHI + cool_ciHeI + cool_ciHeII + cool_ciHeIS; // Recombination cooling Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII; - cool_reHII = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * - TS.d_HII * TS.d_e; - cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII1_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e / 4.0; - cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII2_d, temp_indx, delta_T) * - TS.d_HeII * TS.d_e / 4.0; - cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * - TS.d_HeIII * TS.d_e / 4.0; + cool_reHII = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * TS.d_HII * TS.d_e; + cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII1_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII2_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * TS.d_HeIII * TS.d_e / 4.0; U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII; // Bremsstrahlung cooling Real cool_brem; - cool_brem = interpolate_rate(Chem_H.cool_brem_d, temp_indx, delta_T) * - (TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII) * TS.d_e; + cool_brem = + interpolate_rate(Chem_H.cool_brem_d, temp_indx, delta_T) * (TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII) * TS.d_e; U_dot -= cool_brem; // Compton cooling or heating Real cool_compton, temp_cmb; temp_cmb = 2.73 * (1.0 + current_z); - cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * - (temp - temp_cmb) * TS.d_e / dens_number_conv; + cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * (temp - temp_cmb) * TS.d_e / dens_number_conv; U_dot -= cool_compton; // Phothoheating Real photo_heat; - photo_heat = (photo_h_HI * TS.d_HI + - 0.25 * (photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII)) / - dens_number_conv; + photo_heat = (photo_h_HI * TS.d_HI + 0.25 * (photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII)) / dens_number_conv; U_dot += photo_heat; if (temp <= 1.01 * Chem_H.Temp_start && fabs(U_dot) < 0) U_dot = tiny; @@ -226,15 +196,9 @@ __device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, if (print) printf("Cooling reHeII2: %e \n", cool_reHeII2); if (print) printf("Cooling reHeIII: %e \n", cool_reHeIII); if (print) printf("Cooling brem: %e \n", cool_brem); - if (print) - printf("Cooling piHI: %e rate: %e \n", photo_h_HI, - photo_h_HI * TS.d_HI / dens_number_conv); - if (print) - printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, - photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25); - if (print) - printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, - photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); + if (print) printf("Cooling piHI: %e rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv); + if (print) printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25); + if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); if (print) printf("Cooling DOM: %e \n", dens_number_conv); if (print) printf("Cooling compton: %e \n", cool_compton); if (print) printf("Cooling U_dot: %e \n", U_dot); @@ -242,12 +206,9 @@ __device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, return U_dot; } -__device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, - Real &k_coll_i_HI, Real &k_coll_i_HeI, - Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, - Real &k_coll_i_HI_HeI, Real &k_recomb_HII, - Real &k_recomb_HeII, Real &k_recomb_HeIII, - bool print) +__device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, + Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, + Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print) { int temp_indx; Real temp, delta_T; @@ -258,15 +219,12 @@ __device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, k_coll_i_HeI = interpolate_rate(Chem_H.k_coll_i_HeI_d, temp_indx, delta_T); k_coll_i_HeII = interpolate_rate(Chem_H.k_coll_i_HeII_d, temp_indx, delta_T); - k_coll_i_HI_HI = - interpolate_rate(Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T); - k_coll_i_HI_HeI = - interpolate_rate(Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T); + k_coll_i_HI_HI = interpolate_rate(Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T); + k_coll_i_HI_HeI = interpolate_rate(Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T); - k_recomb_HII = interpolate_rate(Chem_H.k_recomb_HII_d, temp_indx, delta_T); - k_recomb_HeII = interpolate_rate(Chem_H.k_recomb_HeII_d, temp_indx, delta_T); - k_recomb_HeIII = - interpolate_rate(Chem_H.k_recomb_HeIII_d, temp_indx, delta_T); + k_recomb_HII = interpolate_rate(Chem_H.k_recomb_HII_d, temp_indx, delta_T); + k_recomb_HeII = interpolate_rate(Chem_H.k_recomb_HeII_d, temp_indx, delta_T); + k_recomb_HeIII = interpolate_rate(Chem_H.k_recomb_HeIII_d, temp_indx, delta_T); if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx); if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI); @@ -279,8 +237,7 @@ __device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII); } -__device__ int Binary_Search(int N, Real val, float *data, int indx_l, - int indx_r) +__device__ int Binary_Search(int N, Real val, float *data, int indx_l, int indx_r) { int n, indx; n = indx_r - indx_l; @@ -295,8 +252,7 @@ __device__ int Binary_Search(int N, Real val, float *data, int indx_l, return Binary_Search(N, val, data, indx_l, indx_r); } -__device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, - float *array) +__device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, float *array) { float v_l, v_r; Real v; @@ -306,10 +262,8 @@ __device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, return v; } -__device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, - float &photo_i_HI, float &photo_i_HeI, - float &photo_i_HeII, float &photo_h_HI, - float &photo_h_HeI, float &photo_h_HeII, +__device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, float &photo_i_HI, float &photo_i_HeI, + float &photo_i_HeII, float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, bool print) { if (current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]) { @@ -324,50 +278,36 @@ __device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, // Find closest value of z in rates_z such that z<=current_z int indx_l; Real z_l, z_r, delta_x; - indx_l = Binary_Search(Chem_H.n_uvb_rates_samples, current_z, - Chem_H.uvb_rates_redshift_d, 0, + indx_l = Binary_Search(Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0, Chem_H.n_uvb_rates_samples - 1); z_l = Chem_H.uvb_rates_redshift_d[indx_l]; z_r = Chem_H.uvb_rates_redshift_d[indx_l + 1]; delta_x = (current_z - z_l) / (z_r - z_l); - photo_i_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_ion_HI_rate_d); - photo_i_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_ion_HeI_rate_d); - photo_i_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_ion_HeII_rate_d); - photo_h_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_heat_HI_rate_d); - photo_h_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_heat_HeI_rate_d); - photo_h_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, - Chem_H.photo_heat_HeII_rate_d); + photo_i_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HI_rate_d); + photo_i_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeI_rate_d); + photo_i_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeII_rate_d); + photo_h_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HI_rate_d); + photo_h_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeI_rate_d); + photo_h_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeII_rate_d); } -__device__ Real Get_Chemistry_dt( - Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, - Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, - Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, Real k_recomb_HII, - Real k_recomb_HeII, Real k_recomb_HeIII, float photo_i_HI, - float photo_i_HeI, float photo_i_HeII, int n_iter, Real HI_dot_prev, - Real e_dot_prev, Real t_chem, Real dt_hydro, bool print) +__device__ Real Get_Chemistry_dt(Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, + Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, + Real k_coll_i_HI_HeI, Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, + float photo_i_HI, float photo_i_HeI, float photo_i_HeII, int n_iter, Real HI_dot_prev, + Real e_dot_prev, Real t_chem, Real dt_hydro, bool print) { Real dt, energy; // Rate of change of HI - HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - + HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - photo_i_HI * TS.d_HI; // Rate of change of electron - e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + - k_coll_i_HeI * TS.d_HeI / 4.0 * TS.d_e + - k_coll_i_HeII * TS.d_HeII / 4.0 * TS.d_e + - k_coll_i_HI_HI * TS.d_HI * TS.d_HI + - +k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - - k_recomb_HII * TS.d_HII * TS.d_e - - k_recomb_HeII * TS.d_HeII / 4.0 * TS.d_e - - k_recomb_HeIII * TS.d_HeIII / 4.0 * TS.d_e + photo_i_HI * TS.d_HI + + e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI / 4.0 * TS.d_e + + k_coll_i_HeII * TS.d_HeII / 4.0 * TS.d_e + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + +k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - k_recomb_HII * TS.d_HII * TS.d_e - + k_recomb_HeII * TS.d_HeII / 4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII / 4.0 * TS.d_e + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; // Bound from below to prevent numerical errors @@ -376,8 +316,7 @@ __device__ Real Get_Chemistry_dt( // If the net rate is almost perfectly balanced then set // it to zero (since it is zero to available precision) - if (fmin(fabs(k_coll_i_HI * TS.d_HI * TS.d_e), - fabs(k_recomb_HII * TS.d_HII * TS.d_e)) / + if (fmin(fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e)) / fmax(fabs(HI_dot), fabs(e_dot)) > 1e6) { HI_dot = tiny; @@ -394,8 +333,7 @@ __device__ Real Get_Chemistry_dt( } #ifdef TEMPERATURE_FLOOR - if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) - TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif energy = fmax(TS.U * TS.d, tiny); @@ -408,9 +346,8 @@ __device__ Real Get_Chemistry_dt( printf( "##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: " "%e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", - dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, - fabs(0.1 * TS.d_HI / HI_dot), fabs(0.1 * TS.d_e / e_dot), - fabs(0.1 * TS.U * TS.d / U_dot)); + dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs(0.1 * TS.d_HI / HI_dot), + fabs(0.1 * TS.d_e / e_dot), fabs(0.1 * TS.U * TS.d / U_dot)); } if (print) printf("HIdot: %e\n", HI_dot); @@ -423,73 +360,53 @@ __device__ Real Get_Chemistry_dt( return dt; } -__device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, - Real dt, Real U_dot, Real k_coll_i_HI, - Real k_coll_i_HeI, Real k_coll_i_HeII, - Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, - Real k_recomb_HeIII, float photo_i_HI, - float photo_i_HeI, float photo_i_HeII, - Real &HI_dot_prev, Real &e_dot_prev, - Real &temp_prev, bool print) +__device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, + Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, + Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, float photo_i_HI, + float photo_i_HeI, float photo_i_HeII, Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, + bool print) { Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; Real s_coef, a_coef; // Update HI s_coef = k_recomb_HII * TS.d_HII * TS.d_e; - a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + - k_coll_i_HI_HeI * TS.d_HeI / 4.0 + photo_i_HI; + a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI / 4.0 + photo_i_HI; d_HI_p = (dt * s_coef + TS.d_HI) / (1.0 + dt * a_coef); - if (print) - printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, - d_HI_p); + if (print) printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, d_HI_p); // Update HII s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * d_HI_p; a_coef = k_recomb_HII * TS.d_e; d_HII_p = (dt * s_coef + TS.d_HII) / (1.0 + dt * a_coef); - if (print) - printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, - d_HII_p); + if (print) printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, d_HII_p); // Update electron - s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + - k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * TS.d_HI + + s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; - a_coef = -k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - - k_coll_i_HeI * TS.d_HeI / 4.0 + k_recomb_HeII * TS.d_HeII / 4.0 - - k_coll_i_HeII * TS.d_HeII / 4.0 + k_recomb_HeIII * TS.d_HeIII / 4.0; + a_coef = -k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI / 4.0 + + k_recomb_HeII * TS.d_HeII / 4.0 - k_coll_i_HeII * TS.d_HeII / 4.0 + k_recomb_HeIII * TS.d_HeIII / 4.0; d_e_p = (dt * s_coef + TS.d_e) / (1.0 + dt * a_coef); - if (print) - printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, - d_e_p); + if (print) printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, d_e_p); // Update HeI s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; d_HeI_p = (dt * s_coef + TS.d_HeI) / (1.0 + dt * a_coef); - if (print) - printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, - d_HeI_p); + if (print) printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, d_HeI_p); // Update HeII - s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + - k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; + s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; d_HeII_p = (dt * s_coef + TS.d_HeII) / (1.0 + dt * a_coef); - if (print) - printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, - a_coef, d_HeII_p); + if (print) printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, a_coef, d_HeII_p); // Update HeIII s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; a_coef = k_recomb_HeIII * TS.d_e; d_HeIII_p = (dt * s_coef + TS.d_HeIII) / (1.0 + dt * a_coef); - if (print) - printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, - a_coef, d_HeIII_p); + if (print) printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p); // Record the temperature for the next step temp_prev = TS.get_temperature(Chem_H.gamma); @@ -509,14 +426,12 @@ __device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, // Update internal energy TS.U += U_dot / TS.d * dt; #ifdef TEMPERATURE_FLOOR - if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) - TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif if (print) printf("Updated U: %e \n", TS.U); } -__global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields, +__global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt_hydro, Chemistry_Header Chem_H) { int id, xid, yid, zid, n_cells, n_iter; @@ -530,8 +445,7 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, energy_conv = Chem_H.energy_conversion; Real U_dot, HI_dot, e_dot, HI_dot_prev, e_dot_prev, temp_prev; - Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, - k_coll_i_HI_HeI; + Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI; Real k_recomb_HII, k_recomb_HeII, k_recomb_HeIII; float photo_i_HI, photo_i_HeI, photo_i_HeII; float photo_h_HI, photo_h_HeI, photo_h_HeII; @@ -547,8 +461,8 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, bool print; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -624,36 +538,30 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, // } // Get the photoheating and photoionization rates at z=current_z - Get_Current_UVB_Rates(current_z, Chem_H, photo_i_HI, photo_i_HeI, - photo_i_HeII, photo_h_HI, photo_h_HeI, photo_h_HeII, - print); + Get_Current_UVB_Rates(current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, photo_h_HI, photo_h_HeI, + photo_h_HeII, print); HI_dot_prev = 0; e_dot_prev = 0; n_iter = 0; t_chem = 0; while (t_chem < dt_hydro) { - if (print) - printf("########################################## Iter %d \n", n_iter); + if (print) printf("########################################## Iter %d \n", n_iter); - U_dot = Get_Cooling_Rates(TS, Chem_H, Chem_H.dens_number_conv, current_z, - temp_prev, photo_h_HI, photo_h_HeI, + U_dot = Get_Cooling_Rates(TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, photo_h_HI, photo_h_HeI, photo_h_HeII, print); - Get_Reaction_Rates(TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, - k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, - k_recomb_HeII, k_recomb_HeIII, print); + Get_Reaction_Rates(TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, + k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print); - dt_chem = Get_Chemistry_dt( - TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, - k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, - k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, - n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print); + dt_chem = + Get_Chemistry_dt(TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, + k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, + photo_i_HeII, n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print); - Update_Step(TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, - k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, - k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, - photo_i_HeII, HI_dot_prev, e_dot_prev, temp_prev, print); + Update_Step(TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, + k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, + e_dot_prev, temp_prev, print); t_chem += dt_chem; n_iter += 1; @@ -662,9 +570,8 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, if (print) printf("Chem_GPU: N Iter: %d\n", n_iter); // Make consistent abundances with the H and He density - correct_H = Chem_H.H_fraction * TS.d / (TS.d_HI + TS.d_HII); - correct_He = - (1.0 - Chem_H.H_fraction) * TS.d / (TS.d_HeI + TS.d_HeII + TS.d_HeIII); + correct_H = Chem_H.H_fraction * TS.d / (TS.d_HI + TS.d_HII); + correct_He = (1.0 - Chem_H.H_fraction) * TS.d / (TS.d_HeI + TS.d_HeII + TS.d_HeIII); TS.d_HI *= correct_H; TS.d_HII *= correct_H; TS.d_HeI *= correct_He; @@ -681,9 +588,9 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, dev_conserved[id + n_cells * grid_enum::HeII_density] = TS.d_HeII * a3; dev_conserved[id + n_cells * grid_enum::HeIII_density] = TS.d_HeIII * a3; dev_conserved[id + n_cells * grid_enum::e_density] = TS.d_e * a3; - d = d / density_conv * a3; - GE = TS.U / d_inv / energy_conv * a2 / 1e-10; - dev_conserved[4 * n_cells + id] = GE + E_kin; + d = d / density_conv * a3; + GE = TS.U / d_inv / energy_conv * a2 / 1e-10; + dev_conserved[4 * n_cells + id] = GE + E_kin; #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] = GE; #endif @@ -695,14 +602,12 @@ __global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, if (print) printf("Updated HeII: %e\n", TS.d_HeII * a3); if (print) printf("Updated HeIII: %e\n", TS.d_HeIII * a3); if (print) printf("Updated e: %e\n", TS.d_e * a3); - if (print) - printf("Updated GE: %e\n", dev_conserved[(n_fields - 1) * n_cells + id]); + if (print) printf("Updated GE: %e\n", dev_conserved[(n_fields - 1) * n_cells + id]); if (print) printf("Updated E: %e\n", dev_conserved[4 * n_cells + id]); } } -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H) { float time; @@ -714,8 +619,8 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int ngrid = (nx * ny * nz - 1) / TPB_CHEM + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_CHEM, 1, 1); - hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H); + hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + dt, Chem_H); CudaCheckError(); cudaEventRecord(stop, 0); @@ -743,8 +648,7 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, #endif // Boltzmann's constant #ifndef kboltz - #define kboltz \ - 1.3806504e-16 // Boltzmann's constant [cm2gs-2K-1] or [ergK-1] + #define kboltz 1.3806504e-16 // Boltzmann's constant [cm2gs-2K-1] or [ergK-1] #endif // Calculation of k1 (HI + e --> HII + 2e) @@ -754,15 +658,11 @@ __device__ Real coll_i_HI_rate(Real T, Real units) Real T_ev = T / 11605.0; Real logT_ev = log(T_ev); - Real k1 = - exp(-32.71396786375 + 13.53655609057 * logT_ev - - 5.739328757388 * pow(logT_ev, 2) + 1.563154982022 * pow(logT_ev, 3) - - 0.2877056004391 * pow(logT_ev, 4) + - 0.03482559773736999 * pow(logT_ev, 5) - - 0.00263197617559 * pow(logT_ev, 6) + - 0.0001119543953861 * pow(logT_ev, 7) - - 2.039149852002e-6 * pow(logT_ev, 8)) / - units; + Real k1 = exp(-32.71396786375 + 13.53655609057 * logT_ev - 5.739328757388 * pow(logT_ev, 2) + + 1.563154982022 * pow(logT_ev, 3) - 0.2877056004391 * pow(logT_ev, 4) + + 0.03482559773736999 * pow(logT_ev, 5) - 0.00263197617559 * pow(logT_ev, 6) + + 0.0001119543953861 * pow(logT_ev, 7) - 2.039149852002e-6 * pow(logT_ev, 8)) / + units; if (T_ev <= 0.8) { k1 = fmax(tiny, k1); } @@ -777,14 +677,10 @@ __device__ Real coll_i_HeI_rate(Real T, Real units) Real logT_ev = log(T_ev); if (T_ev > 0.8) { - return exp(-44.09864886561001 + 23.91596563469 * logT_ev - - 10.75323019821 * pow(logT_ev, 2) + - 3.058038757198 * pow(logT_ev, 3) - - 0.5685118909884001 * pow(logT_ev, 4) + - 0.06795391233790001 * pow(logT_ev, 5) - - 0.005009056101857001 * pow(logT_ev, 6) + - 0.0002067236157507 * pow(logT_ev, 7) - - 3.649161410833e-6 * pow(logT_ev, 8)) / + return exp(-44.09864886561001 + 23.91596563469 * logT_ev - 10.75323019821 * pow(logT_ev, 2) + + 3.058038757198 * pow(logT_ev, 3) - 0.5685118909884001 * pow(logT_ev, 4) + + 0.06795391233790001 * pow(logT_ev, 5) - 0.005009056101857001 * pow(logT_ev, 6) + + 0.0002067236157507 * pow(logT_ev, 7) - 3.649161410833e-6 * pow(logT_ev, 8)) / units; } else { return tiny; @@ -804,8 +700,7 @@ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B) // If case B recombination off. if (T_ev > 0.8) { - return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / - (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + 3.92e-13 / pow(T_ev, 0.6353)) / units; } else { @@ -818,8 +713,7 @@ __device__ Real recomb_HeII_rate_case_A(Real T, Real units) Real T_ev = T / 11605.0; Real logT_ev = log(T_ev); if (T_ev > 0.8) { - return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / - (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + 3.92e-13 / pow(T_ev, 0.6353)) / units; } else { @@ -839,8 +733,7 @@ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B) { if (use_case_B) { if (T < 1.0e9) { - return 4.881357e-6 * pow(T, -1.5) * - pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; + return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; } else { return tiny; } @@ -850,14 +743,10 @@ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B) Real T_ev = T / tevk; Real logT_ev = log(T_ev); - return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - - 0.02026044731984691 * pow(logT_ev, 2) - - 0.002380861877349834 * pow(logT_ev, 3) - - 0.0003212605213188796 * pow(logT_ev, 4) - - 0.00001421502914054107 * pow(logT_ev, 5) + - 4.989108920299513e-6 * pow(logT_ev, 6) + - 5.755614137575758e-7 * pow(logT_ev, 7) - - 1.856767039775261e-8 * pow(logT_ev, 8) - + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) - 3.071135243196595e-9 * pow(logT_ev, 9)) / units; } else { @@ -873,14 +762,10 @@ __device__ Real recomb_HII_rate_case_A(Real T, Real units) Real T_ev = T / tevk; Real logT_ev = log(T_ev); - return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - - 0.02026044731984691 * pow(logT_ev, 2) - - 0.002380861877349834 * pow(logT_ev, 3) - - 0.0003212605213188796 * pow(logT_ev, 4) - - 0.00001421502914054107 * pow(logT_ev, 5) + - 4.989108920299513e-6 * pow(logT_ev, 6) + - 5.755614137575758e-7 * pow(logT_ev, 7) - - 1.856767039775261e-8 * pow(logT_ev, 8) - + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) - 3.071135243196595e-9 * pow(logT_ev, 9)) / units; } else { @@ -892,8 +777,7 @@ __device__ Real recomb_HII_rate_case_A(Real T, Real units) __device__ Real recomb_HII_rate_case_B(Real T, Real units) { if (T < 1.0e9) { - return 4.881357e-6 * pow(T, -1.5) * - pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; + return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; } else { return tiny; } @@ -908,14 +792,10 @@ __device__ Real coll_i_HeII_rate(Real T, Real units) Real k5; if (T_ev > 0.8) { - k5 = exp(-68.71040990212001 + 43.93347632635 * logT_ev - - 18.48066993568 * pow(logT_ev, 2) + - 4.701626486759002 * pow(logT_ev, 3) - - 0.7692466334492 * pow(logT_ev, 4) + - 0.08113042097303 * pow(logT_ev, 5) - - 0.005324020628287001 * pow(logT_ev, 6) + - 0.0001975705312221 * pow(logT_ev, 7) - - 3.165581065665e-6 * pow(logT_ev, 8)) / + k5 = exp(-68.71040990212001 + 43.93347632635 * logT_ev - 18.48066993568 * pow(logT_ev, 2) + + 4.701626486759002 * pow(logT_ev, 3) - 0.7692466334492 * pow(logT_ev, 4) + + 0.08113042097303 * pow(logT_ev, 5) - 0.005324020628287001 * pow(logT_ev, 6) + + 0.0001975705312221 * pow(logT_ev, 7) - 3.165581065665e-6 * pow(logT_ev, 8)) / units; } else { k5 = tiny; @@ -931,14 +811,12 @@ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B) // Has case B recombination setting. if (use_case_B) { if (T < 1.0e9) { - k6 = 7.8155e-5 * pow(T, -1.5) * - pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; + k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; } else { k6 = tiny; } } else { - k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / - (1.0 + pow(T / 1.0e6, 0.7)) / units; + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units; } return k6; } @@ -947,8 +825,7 @@ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units) { Real k6; // Has case B recombination setting. - k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / - units; + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units; return k6; } // k6_rate Case B @@ -957,8 +834,7 @@ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units) Real k6; // Has case B recombination setting. if (T < 1.0e9) { - k6 = 7.8155e-5 * pow(T, -1.5) * - pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; + k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; } else { k6 = tiny; } @@ -995,32 +871,28 @@ __device__ Real coll_i_HI_HeI_rate(Real T, Real units) // Cooling collisional excitation HI __host__ __device__ Real cool_ceHI_rate(Real T, Real units) { - return 7.5e-19 * exp(-fmin(log(dhuge), 118348.0 / T)) / - (1.0 + sqrt(T / 1.0e5)) / units; + return 7.5e-19 * exp(-fmin(log(dhuge), 118348.0 / T)) / (1.0 + sqrt(T / 1.0e5)) / units; } // Calculation of ceHeI. // Cooling collisional ionization HeI __host__ __device__ Real cool_ceHeI_rate(Real T, Real units) { - return 9.1e-27 * exp(-fmin(log(dhuge), 13179.0 / T)) * pow(T, -0.1687) / - (1.0 + sqrt(T / 1.0e5)) / units; + return 9.1e-27 * exp(-fmin(log(dhuge), 13179.0 / T)) * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) / units; } // Calculation of ceHeII. // Cooling collisional excitation HeII __host__ __device__ Real cool_ceHeII_rate(Real T, Real units) { - return 5.54e-17 * exp(-fmin(log(dhuge), 473638.0 / T)) * pow(T, -0.3970) / - (1.0 + sqrt(T / 1.0e5)) / units; + return 5.54e-17 * exp(-fmin(log(dhuge), 473638.0 / T)) * pow(T, -0.3970) / (1.0 + sqrt(T / 1.0e5)) / units; } // Calculation of ciHeIS. // Cooling collisional ionization HeIS __host__ __device__ Real cool_ciHeIS_rate(Real T, Real units) { - return 5.01e-27 * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) * - exp(-fmin(log(dhuge), 55338.0 / T)) / units; + return 5.01e-27 * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) * exp(-fmin(log(dhuge), 55338.0 / T)) / units; } // Calculation of ciHI. @@ -1053,11 +925,9 @@ __host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B) { Real lambdaHI = 2.0 * 157807.0 / T; if (use_case_B) { - return 3.435e-30 * T * pow(lambdaHI, 1.970) / - pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; + return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; } else { - return 1.778e-29 * T * pow(lambdaHI, 1.965) / - pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; + return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; } } @@ -1066,8 +936,7 @@ __host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B) __host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units) { Real lambdaHI = 2.0 * 157807.0 / T; - return 1.778e-29 * T * pow(lambdaHI, 1.965) / - pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; + return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; } // Calculation of reHII. @@ -1075,8 +944,7 @@ __host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units) __host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units) { Real lambdaHI = 2.0 * 157807.0 / T; - return 3.435e-30 * T * pow(lambdaHI, 1.970) / - pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; + return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; } // Calculation of reHII. @@ -1122,11 +990,9 @@ __host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B) { Real lambdaHeIII = 2.0 * 631515.0 / T; if (use_case_B) { - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / - pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } else { - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / - pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; } } @@ -1135,8 +1001,7 @@ __host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B) __host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units) { Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / - pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; } // Calculation of reHIII. @@ -1144,15 +1009,13 @@ __host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units) __host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units) { Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / - pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } // Calculation of brem. // Cooling Bremsstrahlung __host__ __device__ Real cool_brem_rate(Real T, Real units) { - return 1.43e-27 * sqrt(T) * - (1.1 + 0.34 * exp(-pow(5.5 - log10(T), 2) / 3.0)) / units; + return 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-pow(5.5 - log10(T), 2) / 3.0)) / units; } #endif diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h index 12b0c0364..9d0790a4e 100644 --- a/src/chemistry_gpu/chemistry_gpu.h +++ b/src/chemistry_gpu/chemistry_gpu.h @@ -126,8 +126,7 @@ class Chem_GPU void Initialize(struct parameters *P); - void Generate_Reaction_Rate_Table(Real **rate_table_array_d, - Rate_Function_T rate_function, Real units); + void Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units); void Initialize_Cooling_Rates(); @@ -142,8 +141,7 @@ class Chem_GPU void Reset(); #ifdef TEXTURES_UVB_INTERPOLATION - void Bind_GPU_Textures(int size, float *H_HI_h, float *H_HeI_h, - float *H_HeII_h, float *I_HI_h, float *I_HeI_h, + void Bind_GPU_Textures(int size, float *H_HI_h, float *H_HeI_h, float *H_HeII_h, float *I_HI_h, float *I_HeI_h, float *I_HeII_h); #endif }; @@ -153,8 +151,7 @@ n_ghost, int n_fields, Real dt, Real gamma) * \brief When passed an array of conserved variables and a timestep, update the ionization fractions of H and He and update the internal energy to account for radiative cooling and photoheating from the UV background. */ -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H); #endif diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index a10729c10..458042968 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -85,8 +85,7 @@ void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct parameters *P) scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples - 1] + 1); chprintf(" Loaded UVB rates: \n"); chprintf(" N redshift values: %d \n", n_uvb_rates_samples); - chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], - rates_z_h[n_uvb_rates_samples - 1]); + chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples - 1]); chprintf(" UVB on: a=%f \n", scale_factor_UVB_on); } diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 0fa3f3c36..dd076b839 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -18,16 +18,14 @@ cudaTextureObject_t coolTexObj = 0; cudaTextureObject_t heatTexObj = 0; -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dt, Real gamma) +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, - heatTexObj); + hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, + gama, coolTexObj, heatTexObj); CudaCheckError(); } @@ -37,10 +35,8 @@ void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, * \brief When passed an array of conserved variables and a timestep, adjust the value of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, - cudaTextureObject_t coolTexObj, - cudaTextureObject_t heatTexObj) +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; @@ -200,10 +196,8 @@ __device__ Real primordial_cool(Real n, Real T) { Real n_h, Y, y, g_ff, cool; Real n_h0, n_hp, n_he0, n_hep, n_hepp, n_e, n_e_old; - Real alpha_hp, alpha_hep, alpha_d, alpha_hepp, gamma_eh0, gamma_ehe0, - gamma_ehep; - Real le_h0, le_hep, li_h0, li_he0, li_hep, lr_hp, lr_hep, lr_hepp, ld_hep, - l_ff; + Real alpha_hp, alpha_hep, alpha_d, alpha_hepp, gamma_eh0, gamma_ehe0, gamma_ehep; + Real le_h0, le_hep, li_h0, li_he0, li_hep, lr_hp, lr_hep, lr_hepp, ld_hep, l_ff; Real gamma_lh0, gamma_lhe0, gamma_lhep, e_h0, e_he0, e_hep, H; int heat_flag, n_iter; Real diff, tol; @@ -220,19 +214,13 @@ __device__ Real primordial_cool(Real n, Real T) // calculate the recombination and collisional ionization rates // (Table 2 from Katz 1996) - alpha_hp = (8.4e-11) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * - (1.0 / (1.0 + pow((T / 1e6), (0.7)))); - alpha_hep = (1.5e-10) * (pow(T, (-0.6353))); - alpha_d = (1.9e-3) * (pow(T, (-1.5))) * exp(-470000.0 / T) * - (1.0 + 0.3 * exp(-94000.0 / T)); - alpha_hepp = (3.36e-10) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * - (1.0 / (1.0 + pow((T / 1e6), (0.7)))); - gamma_eh0 = - (5.85e-11) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); - gamma_ehe0 = - (2.38e-11) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); - gamma_ehep = - (5.68e-12) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + alpha_hp = (8.4e-11) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + alpha_hep = (1.5e-10) * (pow(T, (-0.6353))); + alpha_d = (1.9e-3) * (pow(T, (-1.5))) * exp(-470000.0 / T) * (1.0 + 0.3 * exp(-94000.0 / T)); + alpha_hepp = (3.36e-10) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + gamma_eh0 = (5.85e-11) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehe0 = (2.38e-11) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehep = (5.68e-12) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); // externally evaluated integrals for photoionization rates // assumed J(nu) = 10^-22 (nu_L/nu) gamma_lh0 = 3.19851e-13; @@ -264,11 +252,9 @@ __device__ Real primordial_cool(Real n, Real T) if (diff < tol) break; } } else { - n_h0 = n_h * alpha_hp / (alpha_hp + gamma_eh0); - n_hp = n_h - n_h0; - n_hep = y * n_h / - (1.0 + (alpha_hep + alpha_d) / (gamma_ehe0) + - (gamma_ehep) / alpha_hepp); + n_h0 = n_h * alpha_hp / (alpha_hp + gamma_eh0); + n_hp = n_h - n_h0; + n_hep = y * n_h / (1.0 + (alpha_hep + alpha_d) / (gamma_ehe0) + (gamma_ehep) / alpha_hepp); n_he0 = n_hep * (alpha_hep + alpha_d) / (gamma_ehe0); n_hepp = n_hep * (gamma_ehep) / alpha_hepp; n_e = n_hp + n_hep + 2 * n_hepp; @@ -276,30 +262,20 @@ __device__ Real primordial_cool(Real n, Real T) // using number densities, calculate cooling rates for // various processes (Table 1 from Katz 1996) - le_h0 = (7.50e-19) * exp(-118348.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * - n_e * n_h0; - le_hep = (5.54e-17) * pow(T, (-0.397)) * exp(-473638.0 / T) * - (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep; - li_h0 = (1.27e-21) * sqrt(T) * exp(-157809.1 / T) * - (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_h0; - li_he0 = (9.38e-22) * sqrt(T) * exp(-285335.4 / T) * - (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_he0; - li_hep = (4.95e-22) * sqrt(T) * exp(-631515.0 / T) * - (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep; - lr_hp = (8.70e-27) * sqrt(T) * pow((T / 1e3), (-0.2)) * - (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hp; + le_h0 = (7.50e-19) * exp(-118348.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_h0; + le_hep = (5.54e-17) * pow(T, (-0.397)) * exp(-473638.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep; + li_h0 = (1.27e-21) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_h0; + li_he0 = (9.38e-22) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_he0; + li_hep = (4.95e-22) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep; + lr_hp = (8.70e-27) * sqrt(T) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hp; lr_hep = (1.55e-26) * pow(T, (0.3647)) * n_e * n_hep; - lr_hepp = (3.48e-26) * sqrt(T) * pow((T / 1e3), (-0.2)) * - (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hepp; - ld_hep = (1.24e-13) * pow(T, (-1.5)) * exp(-470000.0 / T) * - (1.0 + 0.3 * exp(-94000.0 / T)) * n_e * n_hep; - g_ff = - 1.1 + 0.34 * exp(-(5.5 - log(T)) * (5.5 - log(T)) / 3.0); // Gaunt factor - l_ff = (1.42e-27) * g_ff * sqrt(T) * (n_hp + n_hep + 4 * n_hepp) * n_e; + lr_hepp = (3.48e-26) * sqrt(T) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hepp; + ld_hep = (1.24e-13) * pow(T, (-1.5)) * exp(-470000.0 / T) * (1.0 + 0.3 * exp(-94000.0 / T)) * n_e * n_hep; + g_ff = 1.1 + 0.34 * exp(-(5.5 - log(T)) * (5.5 - log(T)) / 3.0); // Gaunt factor + l_ff = (1.42e-27) * g_ff * sqrt(T) * (n_hp + n_hep + 4 * n_hepp) * n_e; // calculate total cooling rate (erg s^-1 cm^-3) - cool = le_h0 + le_hep + li_h0 + li_he0 + li_hep + lr_hp + lr_hep + lr_hepp + - ld_hep + l_ff; + cool = le_h0 + le_hep + li_h0 + li_he0 + li_hep + lr_hp + lr_hep + lr_hepp + ld_hep + l_ff; // calculate total photoionization heating rate H = 0.0; @@ -342,8 +318,7 @@ __device__ Real CIE_cool(Real n, Real T) coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ -__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, - cudaTextureObject_t heatTexObj) +__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { Real lambda = 0.0; // cooling rate, erg s^-1 cm^3 Real H = 0.0; // heating rate, erg s^-1 cm^3 diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index 68b8d7e04..32fa3207a 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -18,18 +18,15 @@ extern cudaTextureObject_t heatTexObj; * \brief When passed an array of conserved variables and a timestep, adjust the value of the total energy for each cell according to the specified cooling function. */ -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dt, Real gamma); +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); /*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real dt, Real gamma) * \brief When passed an array of conserved variables and a timestep, adjust the value of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma, - cudaTextureObject_t coolTexObj, - cudaTextureObject_t heatTexObj); +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); /* \fn __device__ Real test_cool(Real n, Real T) * \brief Cooling function from Creasey 2011. */ @@ -49,8 +46,7 @@ __device__ Real CIE_cool(Real n, Real T); coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ -__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, - cudaTextureObject_t heatTexObj); +__device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); #endif // COOLING_GPU #endif // CUDA diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index ce4839425..e0022fbb5 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -110,17 +110,14 @@ void Load_Cuda_Textures() // allocate host arrays to be copied to textures // these arrays are declared as external pointers in global.h - CudaSafeCall(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), - cudaHostAllocDefault)); - CudaSafeCall(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), - cudaHostAllocDefault)); + CudaSafeCall(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault)); + CudaSafeCall(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault)); // Read cooling tables into the host arrays Host_Read_Cooling_Tables(cooling_table, heating_table); // Allocate CUDA arrays in device memory - cudaChannelFormatDesc channelDesc = - cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny); cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny); @@ -131,10 +128,10 @@ void Load_Cuda_Textures() // cudaMemcpyHostToDevice); cudaMemcpyToArray(cuHeatArray, 0, 0, // heating_table, nx*ny*sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy2DToArray(cuCoolArray, 0, 0, cooling_table, nx * sizeof(float), - nx * sizeof(float), ny, cudaMemcpyHostToDevice); - cudaMemcpy2DToArray(cuHeatArray, 0, 0, heating_table, nx * sizeof(float), - nx * sizeof(float), ny, cudaMemcpyHostToDevice); + cudaMemcpy2DToArray(cuCoolArray, 0, 0, cooling_table, nx * sizeof(float), nx * sizeof(float), ny, + cudaMemcpyHostToDevice); + cudaMemcpy2DToArray(cuHeatArray, 0, 0, heating_table, nx * sizeof(float), nx * sizeof(float), ny, + cudaMemcpyHostToDevice); // Specify textures struct cudaResourceDesc coolResDesc; @@ -149,12 +146,10 @@ void Load_Cuda_Textures() // Specify texture object parameters (same for both tables) struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = - cudaAddressModeClamp; // out-of-bounds fetches return border values - // dimension 0 - texDesc.addressMode[1] = - cudaAddressModeClamp; // out-of-bounds fetches return border values - // dimension 1 + texDesc.addressMode[0] = cudaAddressModeClamp; // out-of-bounds fetches return border values + // dimension 0 + texDesc.addressMode[1] = cudaAddressModeClamp; // out-of-bounds fetches return border values + // dimension 1 texDesc.filterMode = cudaFilterModePoint; // We use point mode instead of Linear mode in order to do the interpolation // ourselves. Linear mode introduces errors since it only uses 8 bits. @@ -190,8 +185,7 @@ void Free_Cuda_Textures() /* Consider this function only to be used at the end of Load_Cuda_Textures when * testing Evaluate texture on grid of size num_n num_T for variables n,T */ -__global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, - cudaTextureObject_t coolTexObj, +__global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { int id, id_n, id_T; @@ -215,10 +209,8 @@ __global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, float rlog_n = (log_n + 6.0) * 10; // Evaluate - float lambda = Bilinear_Texture( - coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture( - heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n", log_T, log_n, lambda, heat); @@ -226,8 +218,7 @@ __global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, /* Consider this function only to be used at the end of Load_Cuda_Textures when * testing Evaluate texture on grid of size num_n num_T for variables n,T */ -__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, - cudaTextureObject_t coolTexObj, +__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { int id, id_n, id_T; @@ -253,10 +244,8 @@ __global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, float rlog_n = (id_n - 1) * 0.0125; // Evaluate - float lambda = Bilinear_Texture( - coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture( - heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, @@ -271,8 +260,7 @@ void Test_Cloudy_Textures() int num_T = 1 + 2 * 81; dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, - num_n, num_T, coolTexObj, heatTexObj); + hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); CHECK(cudaDeviceSynchronize()); printf("Exiting due to Test_Cloudy_Textures() being called \n"); exit(0); @@ -287,8 +275,7 @@ void Test_Cloudy_Speed() CHECK(cudaDeviceSynchronize()); Real time_start = get_time(); for (int i = 0; i < 100; i++) { - hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, - num_n, num_T, coolTexObj, heatTexObj); + hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); } CHECK(cudaDeviceSynchronize()); Real time_end = get_time(); diff --git a/src/cooling/texture_utilities.h b/src/cooling/texture_utilities.h index 70b1baebf..56d1ac82b 100644 --- a/src/cooling/texture_utilities.h +++ b/src/cooling/texture_utilities.h @@ -13,17 +13,13 @@ #include "../global/global.h" #include "../utils/gpu.hpp" -inline __device__ float lerp(float v0, float v1, float f) -{ - return fma(f, v1, fma(-f, v0, v0)); -} +inline __device__ float lerp(float v0, float v1, float f) { return fma(f, v1, fma(-f, v0, v0)); } /* \fn float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) \brief Access texture values from tex at coordinates (x,y) using bilinear interpolation */ -inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, - float y) +inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) { // Split coordinates into integer px/py and fractional fx/fy parts float px = floorf(x); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index bcb829f58..f57edcdb4 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -45,22 +45,19 @@ void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) Real kpc = KPC_CGS; Real km = KM_CGS - dens_to_CGS = - dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; - vel_to_CGS = km; - energy_to_CGS = km * km; + dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; + vel_to_CGS = km; + energy_to_CGS = km * km; // First, set up the units system. // These are conversions from code units to cgs. units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not units.a_units = 1.0; // units for the expansion factor units.a_value = Cosmo.current_a / units.a_units; - units.density_units = - dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; - units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; - units.time_units = KPC / Cosmo.cosmo_h; - units.velocity_units = units.length_units / Cosmo.current_a / - units.time_units; // since u = a * dx/dt + units.density_units = dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; + units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; + units.time_units = KPC / Cosmo.cosmo_h; + units.velocity_units = units.length_units / Cosmo.current_a / units.time_units; // since u = a * dx/dt // Second, create a chemistry object for parameters. This needs to be a // pointer. @@ -99,9 +96,7 @@ void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) data->omp_nthreads = N_OMP_THREADS_GRACKLE; #endif - if (data->UVbackground == 1) - chprintf("GRACKLE: Loading UV Background File: %s\n", - data->grackle_data_file); + if (data->UVbackground == 1) chprintf("GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file); // Finally, initialize the chemistry object. if (initialize_chemistry_data(&units) == 0) { @@ -150,8 +145,7 @@ void Grid3D::Allocate_Memory_Grackle() Cool.fields.y_velocity = NULL; Cool.fields.z_velocity = NULL; - chprintf( - " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); + chprintf(" Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); Cool.fields.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; Cool.fields.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; Cool.fields.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; diff --git a/src/cooling_grackle/grackle_functions.cpp b/src/cooling_grackle/grackle_functions.cpp index e3ba1213b..d68281c3d 100644 --- a/src/cooling_grackle/grackle_functions.cpp +++ b/src/cooling_grackle/grackle_functions.cpp @@ -33,9 +33,8 @@ void Grid3D::Initialize_Fields_Grackle() // Cool.fields.y_velocity[id] = 0.0; // Cool.fields.z_velocity[id] = 0.0; - Cool.fields.internal_energy[id] = C.GasEnergy[id] / C.density[id] * - Cool.energy_conv / Cosmo.current_a / - Cosmo.current_a; + Cool.fields.internal_energy[id] = + C.GasEnergy[id] / C.density[id] * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a; } } } @@ -130,9 +129,8 @@ void Grid3D::Copy_Fields_To_Grackle_function(int g_start, int g_end) // if ( flag_DE ) U = GE; // else U = E - Ekin; - U = GE; - Cool.fields.internal_energy[id] = - U / d * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a; + U = GE; + Cool.fields.internal_energy[id] = U / d * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a; } } } @@ -171,9 +169,8 @@ void Grid3D::Update_Internal_Energy_function(int g_start, int g_end) // else if ( flag_DE == 1 ) U_0 = GE; // else std::cout << " ### Frag_DE ERROR: Flag_DE: " << flag_DE << // std::endl; - U_0 = GE; - U_1 = Cool.fields.internal_energy[id] * dens / Cool.energy_conv * - Cosmo.current_a * Cosmo.current_a; + U_0 = GE; + U_1 = Cool.fields.internal_energy[id] * dens / Cool.energy_conv * Cosmo.current_a * Cosmo.current_a; delta_U = U_1 - U_0; C.GasEnergy[id] += delta_U; C.Energy[id] += delta_U; @@ -186,17 +183,15 @@ void Grid3D::Do_Cooling_Step_Grackle() { Real kpc_cgs = KPC_CGS; // Update the units conversion - Cool.units.a_value = Cosmo.current_a / Cool.units.a_units; - Cool.units.density_units = - Cool.dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; - Cool.units.length_units = kpc_cgs / Cosmo.cosmo_h * Cosmo.current_a; + Cool.units.a_value = Cosmo.current_a / Cool.units.a_units; + Cool.units.density_units = Cool.dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; + Cool.units.length_units = kpc_cgs / Cosmo.cosmo_h * Cosmo.current_a; Copy_Fields_To_Grackle(); Real dt_cool = Cosmo.dt_secs; chprintf(" dt_cool: %e s\n", dt_cool); - if (solve_chemistry(&Cool.units, &Cool.fields, - dt_cool / Cool.units.time_units) == 0) { + if (solve_chemistry(&Cool.units, &Cool.fields, dt_cool / Cool.units.time_units) == 0) { chprintf("GRACKLE: Error in solve_chemistry.\n"); return; } @@ -222,8 +217,7 @@ Real Cool_GK::Get_Mean_Molecular_Weight(int cell_id) HeII_dens = fields.HeII_density[cell_id]; HeIII_dens = fields.HeIII_density[cell_id]; - mu = dens / (HI_dens + 2 * HII_dens + - (HeI_dens + 2 * HeII_dens + 3 * HeIII_dens) / 4); + mu = dens / (HI_dens + 2 * HII_dens + (HeI_dens + 2 * HeII_dens + 3 * HeIII_dens) / 4); return mu; } diff --git a/src/cosmology/cosmology.cpp b/src/cosmology/cosmology.cpp index 4c6821e71..ac0045b6b 100644 --- a/src/cosmology/cosmology.cpp +++ b/src/cosmology/cosmology.cpp @@ -6,8 +6,7 @@ Cosmology::Cosmology(void) {} -void Cosmology::Initialize(struct parameters *P, Grav3D &Grav, - Particles_3D &Particles) +void Cosmology::Initialize(struct parameters *P, Grav3D &Grav, Particles_3D &Particles) { chprintf("Cosmological Simulation\n"); @@ -47,12 +46,11 @@ void Cosmology::Initialize(struct parameters *P, Grav3D &Grav, time_conversion = KPC; // Set Normalization factors - r_0_dm = P->xlen / P->nx; - t_0_dm = 1. / H0; - v_0_dm = r_0_dm / t_0_dm / cosmo_h; - rho_0_dm = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; - rho_mean_baryon = - 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_b / cosmo_h / cosmo_h; + r_0_dm = P->xlen / P->nx; + t_0_dm = 1. / H0; + v_0_dm = r_0_dm / t_0_dm / cosmo_h; + rho_0_dm = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; + rho_mean_baryon = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_b / cosmo_h / cosmo_h; // dens_avrg = 0; r_0_gas = 1.0; diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index d58ae9c2d..517549dd6 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -81,13 +81,11 @@ void Grid3D::Change_GAS_Frame_System(bool forward) if (forward) { dens_factor = 1 / Cosmo.rho_0_gas; momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a; - energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * - Cosmo.current_a * Cosmo.current_a; + energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a; } else { dens_factor = Cosmo.rho_0_gas; momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; - energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / - Cosmo.current_a / Cosmo.current_a; + energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; } int k, j, i, id; for (k = 0; k < H.nz; k++) { diff --git a/src/cosmology/cosmology_functions_gpu.cu b/src/cosmology/cosmology_functions_gpu.cu index e73630a99..e6da6dc66 100644 --- a/src/cosmology/cosmology_functions_gpu.cu +++ b/src/cosmology/cosmology_functions_gpu.cu @@ -11,10 +11,9 @@ // // } -void __global__ Change_GAS_Frame_System_kernel( - Real dens_factor, Real momentum_factor, Real energy_factor, int nx, int ny, - int nz, Real *density_d, Real *momentum_x_d, Real *momentum_y_d, - Real *momentum_z_d, Real *Energy_d, Real *GasEnergy_d) +void __global__ Change_GAS_Frame_System_kernel(Real dens_factor, Real momentum_factor, Real energy_factor, int nx, + int ny, int nz, Real *density_d, Real *momentum_x_d, Real *momentum_y_d, + Real *momentum_z_d, Real *Energy_d, Real *GasEnergy_d) { int tid_x, tid_y, tid_z, tid_grid; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -44,13 +43,11 @@ void Grid3D::Change_GAS_Frame_System_GPU(bool forward) if (forward) { dens_factor = 1 / Cosmo.rho_0_gas; momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a; - energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * - Cosmo.current_a * Cosmo.current_a; + energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a; } else { dens_factor = Cosmo.rho_0_gas; momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; - energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / - Cosmo.current_a / Cosmo.current_a; + energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; } int nx, ny, nz; @@ -77,10 +74,9 @@ void Grid3D::Change_GAS_Frame_System_GPU(bool forward) GasEnergy_d = NULL; #endif - hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, - 0, dens_factor, momentum_factor, energy_factor, nx, ny, nz, - C.d_density, C.d_momentum_x, C.d_momentum_y, - C.d_momentum_z, C.d_Energy, GasEnergy_d); + hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor, + energy_factor, nx, ny, nz, C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy, + GasEnergy_d); } #endif // COSMOLOGY diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 5df251725..6d158dd72 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -16,20 +16,17 @@ #include "../utils/hydro_utilities.h" #include "dust_cuda.h" -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dt, Real gamma) +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - nx, ny, nz, n_ghost, n_fields, dt, gamma); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); CudaCheckError(); } -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma) +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { // get grid indices int n_cells = nx * ny * nz; @@ -130,10 +127,7 @@ __device__ __host__ Real calc_tau_sp(Real n, Real T) } // McKinnon et al. (2017) -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) -{ - return -d_dust / (tau_sp / 3); -} +__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp / 3); } #endif // DUST #endif // CUDA diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 5fd9ffb33..0377b645b 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -9,11 +9,10 @@ #include "../global/global.h" #include "../utils/gpu.hpp" -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dt, Real gamma); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dt, Real gamma); +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Real gamma); __device__ __host__ Real calc_tau_sp(Real n, Real T); diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index b6a56a292..e3db8dac1 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -39,17 +39,12 @@ TEST(tDUSTTestSputteringTimescale, bool is_true; - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, - ulps_diff); - - EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num - << std::endl - << "The test value is: " << test_num - << std::endl - << "The absolute difference is: " << abs_diff - << std::endl - << "The ULP difference is: " << ulps_diff - << std::endl; + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; } TEST(tDUSTTestSputteringGrowthRate, @@ -69,17 +64,12 @@ TEST(tDUSTTestSputteringGrowthRate, bool is_true; - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, - ulps_diff); - - EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num - << std::endl - << "The test value is: " << test_num - << std::endl - << "The absolute difference is: " << abs_diff - << std::endl - << "The ULP difference is: " << ulps_diff - << std::endl; + is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; } #endif // DUST \ No newline at end of file diff --git a/src/global/global.cpp b/src/global/global.cpp index 9447f2548..ec28208e5 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -61,11 +61,9 @@ Real calc_eta(Real cW[], Real gamma) { Real pl, pr, al, ar; - pl = (cW[8] - 0.5 * (cW[2] * cW[2] + cW[4] * cW[4] + cW[6] * cW[6]) / cW[0]) * - (gamma - 1.0); + pl = (cW[8] - 0.5 * (cW[2] * cW[2] + cW[4] * cW[4] + cW[6] * cW[6]) / cW[0]) * (gamma - 1.0); pl = fmax(pl, TINY_NUMBER); - pr = (cW[9] - 0.5 * (cW[3] * cW[3] + cW[5] * cW[5] + cW[7] * cW[7]) / cW[1]) * - (gamma - 1.0); + pr = (cW[9] - 0.5 * (cW[3] * cW[3] + cW[5] * cW[5] + cW[7] * cW[7]) / cW[1]) * (gamma - 1.0); pr = fmax(pr, TINY_NUMBER); al = sqrt(gamma * pl / cW[0]); @@ -95,11 +93,9 @@ char *trim(char *s) } const std::set optionalParams = { - "flag_delta", "ddelta_dt", "n_delta", "Lz", - "Lx", "phi", "theta", "delta", - "nzr", "nxr", "H0", "Omega_M", - "Omega_L", "Init_redshift", "End_redshift", "tile_length", - "n_proc_x", "n_proc_y", "n_proc_z"}; + "flag_delta", "ddelta_dt", "n_delta", "Lz", "Lx", "phi", "theta", + "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", + "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z"}; /*! \fn int is_param_valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids @@ -116,15 +112,13 @@ void parse_param(char *name, char *value, struct parameters *parms); /*! \fn void parse_params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -void parse_params(char *param_file, struct parameters *parms, int argc, - char **argv) +void parse_params(char *param_file, struct parameters *parms, int argc, char **argv) { int buf; char *s, buff[256]; FILE *fp = fopen(param_file, "r"); if (fp == NULL) { - chprintf("Exiting at file %s line %d: failed to read param file %s \n", - __FILE__, __LINE__, param_file); + chprintf("Exiting at file %s line %d: failed to read param file %s \n", __FILE__, __LINE__, param_file); exit(1); return; } diff --git a/src/global/global.h b/src/global/global.h index cbcb4cf43..59a19e5c2 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -35,14 +35,13 @@ typedef double Real; #define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 #define C_L 0.306594593 // speed of light in kpc/kyr -#define MYR 31.536e12 // Myears in secs -#define KPC 3.086e16 // kpc in km -#define G_COSMO \ - 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 -#define MSUN_CGS 1.98847e33; // Msun in gr -#define KPC_CGS 3.086e21; // kpc in cm -#define KM_CGS 1e5; // km in cm -#define MH 1.67262171e-24 // Mass of hydrogen [g] +#define MYR 31.536e12 // Myears in secs +#define KPC 3.086e16 // kpc in km +#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 +#define MSUN_CGS 1.98847e33; // Msun in gr +#define KPC_CGS 3.086e21; // kpc in cm +#define KM_CGS 1e5; // km in cm +#define MH 1.67262171e-24 // Mass of hydrogen [g] #define TIME_UNIT 3.15569e10 // 1 kyr in s #define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm @@ -98,9 +97,8 @@ typedef double Real; 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the // CIC interpolation of the potential ) #else - #define N_GHOST_POTENTIAL \ - 2 // 2 ghost cells are needed for 5 point gradient - #endif // PARTICLES + #define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient + #endif // PARTICLES #else #ifdef PARTICLES @@ -108,10 +106,9 @@ typedef double Real; 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the // CIC interpolation of the potential ) #else - #define N_GHOST_POTENTIAL \ - 1 // 1 ghost cells are needed for 3 point gradient - #endif // PARTICLES - #endif // GRAVITY_5_POINTS_GRADIENT + #define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient + #endif // PARTICLES + #endif // GRAVITY_5_POINTS_GRADIENT typedef long int grav_int_t; #endif @@ -326,8 +323,7 @@ struct parameters { /*! \fn void parse_params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -extern void parse_params(char *param_file, struct parameters *parms, int argc, - char **argv); +extern void parse_params(char *param_file, struct parameters *parms, int argc, char **argv); /*! \fn int is_param_valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 214e49e23..c0d71dd22 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -45,8 +45,7 @@ inline void __cudaSafeCall(cudaError err, const char *file, const int line) { #ifdef CUDA_ERROR_CHECK if (cudaSuccess != err) { - fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n", file, line, - cudaGetErrorString(err)); + fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } #endif @@ -59,8 +58,7 @@ inline void __cudaCheckError(const char *file, const int line) #ifdef CUDA_ERROR_CHECK cudaError err = cudaGetLastError(); if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, - cudaGetErrorString(err)); + fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } @@ -68,8 +66,7 @@ inline void __cudaCheckError(const char *file, const int line) // Comment away if needed. err = cudaDeviceSynchronize(); if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, - line, cudaGetErrorString(err)); + fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } #endif @@ -84,8 +81,7 @@ inline void __cudaCheckError(const char *file, const int line) inline void gpuAssert(cudaError_t code, char *file, int line, bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, - line); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } @@ -109,8 +105,7 @@ __device__ double atomicAdd(double *address, double val) unsigned long long int old = *address_as_ull, assumed; do { assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); } diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 8ab114059..e5032e0f3 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -16,11 +16,9 @@ Grav3D::Grav3D(void) {} -void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, - Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, - int nx, int ny, int nz, int nx_real, int ny_real, - int nz_real, Real dx_real, Real dy_real, Real dz_real, - int n_ghost_pot_offset, struct parameters *P) +void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, + Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P) { // Set Box Size Lbox_x = Lx; @@ -55,9 +53,8 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, // Local n_cells without ghost cells n_cells = nx_local * ny_local * nz_local; // Local n_cells including ghost cells for the potential array - n_cells_potential = (nx_local + 2 * N_GHOST_POTENTIAL) * - (ny_local + 2 * N_GHOST_POTENTIAL) * - (nz_local + 2 * N_GHOST_POTENTIAL); + n_cells_potential = + (nx_local + 2 * N_GHOST_POTENTIAL) * (ny_local + 2 * N_GHOST_POTENTIAL) * (nz_local + 2 * N_GHOST_POTENTIAL); // Set Initial and dt used for the extrapolation of the potential; // The first timestep the potential in not extrapolated ( INITIAL = TRUE ) @@ -98,8 +95,7 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, chprintf( "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n " "Global: %d %d %d \n", - Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, - nz_total); + Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, nz_total); chprintf(" dx:%f dy:%f dz:%f\n", dx, dy, dz); chprintf(" N ghost potential: %d\n", N_GHOST_POTENTIAL); @@ -112,12 +108,10 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, chprintf(" N OMP Threads per MPI process: %d\n", N_OMP_THREADS); #endif - Poisson_solver.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, - ny_total, nz_total, nx_local, ny_local, nz_local, - dx, dy, dz); + Poisson_solver.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, + nz_local, dx, dy, dz); #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) - Poisson_solver_test.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, - nx_total, ny_total, nz_total, nx_local, + Poisson_solver_test.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz); #endif @@ -131,37 +125,27 @@ void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, void Grav3D::AllocateMemory_CPU(void) { // allocate memory for the density and potential arrays - F.density_h = (Real *)malloc(n_cells * sizeof(Real)); // array for the - // density - F.potential_h = (Real *)malloc( - n_cells_potential * - sizeof(Real)); // array for the potential at the n-th timestep - F.potential_1_h = (Real *)malloc( - n_cells_potential * - sizeof(Real)); // array for the potential at the (n-1)-th timestep - boundary_flags = - (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags + F.density_h = (Real *)malloc(n_cells * sizeof(Real)); // array for the + // density + F.potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the n-th timestep + F.potential_1_h = + (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep + boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags #ifdef GRAV_ISOLATED_BOUNDARY_X - F.pot_boundary_x0 = (Real *)malloc( - N_GHOST_POTENTIAL * ny_local * nz_local * - sizeof(Real)); // array for the potential isolated boundary - F.pot_boundary_x1 = - (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)); + F.pot_boundary_x0 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_x1 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - F.pot_boundary_y0 = (Real *)malloc( - N_GHOST_POTENTIAL * nx_local * nz_local * - sizeof(Real)); // array for the potential isolated boundary - F.pot_boundary_y1 = - (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)); + F.pot_boundary_y0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_y1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - F.pot_boundary_z0 = (Real *)malloc( - N_GHOST_POTENTIAL * nx_local * ny_local * - sizeof(Real)); // array for the potential isolated boundary - F.pot_boundary_z1 = - (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)); + F.pot_boundary_z0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_z1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)); #endif #ifdef GRAVITY_ANALYTIC_COMP diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index 691a352e7..a751f1652 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -191,11 +191,9 @@ class Grav3D /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ - void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, - Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, - int ny_total, int nz_total, int nx_real, int ny_real, - int nz_real, Real dx_real, Real dy_real, Real dz_real, - int n_ghost_pot_offset, struct parameters *P); + void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, + int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); void AllocateMemory_CPU(void); void Initialize_values_CPU(); @@ -210,9 +208,7 @@ class Grav3D void Set_Boundary_Flags(int *flags); #ifdef SOR - void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, - Real *isolated_boundary_d, - int boundary_size); + void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size); void Copy_Isolated_Boundaries_To_GPU(struct parameters *P); #endif diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index ad942c859..a4291316a 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -7,11 +7,9 @@ #include "../io/io.h" #include "../model/disk_galaxy.h" - #if defined(GRAV_ISOLATED_BOUNDARY_X) || \ - defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) + #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) -void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, - struct parameters *P) +void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P) { // Set Isolated Boundaries for the ghost cells. int bc_potential_type = P->bc_potential_type; @@ -24,8 +22,7 @@ void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, if (dir == 5) Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type); } -void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, - int *flags) +void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *flags) { Real *pot_boundary; int n_i, n_j, nGHST; @@ -72,25 +69,16 @@ void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, id_buffer = i + j * n_i + k * n_i * n_j; if (direction == 0) { - if (side == 0) - id_grid = (k) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; - if (side == 1) - id_grid = (k + nx_local + nGHST) + (i + nGHST) * nx_g + - (j + nGHST) * nx_g * ny_g; + if (side == 0) id_grid = (k) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + if (side == 1) id_grid = (k + nx_local + nGHST) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; } if (direction == 1) { - if (side == 0) - id_grid = (i + nGHST) + (k)*nx_g + (j + nGHST) * nx_g * ny_g; - if (side == 1) - id_grid = (i + nGHST) + (k + ny_local + nGHST) * nx_g + - (j + nGHST) * nx_g * ny_g; + if (side == 0) id_grid = (i + nGHST) + (k)*nx_g + (j + nGHST) * nx_g * ny_g; + if (side == 1) id_grid = (i + nGHST) + (k + ny_local + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; } if (direction == 2) { - if (side == 0) - id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) - id_grid = (i + nGHST) + (j + nGHST) * nx_g + - (k + nz_local + nGHST) * nx_g * ny_g; + if (side == 0) id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; + if (side == 1) id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k + nz_local + nGHST) * nx_g * ny_g; } Grav.F.potential_h[id_grid] = pot_boundary[id_buffer]; @@ -99,8 +87,7 @@ void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, } } -void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, - int bc_potential_type) +void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type) { Real domain_l, Lx_local, Ly_local, Lz_local; Real *pot_boundary; @@ -139,16 +126,14 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, } #endif - Real M, cm_pos_x, cm_pos_y, cm_pos_z, pos_x, pos_y, pos_z, r, delta_x, - delta_y, delta_z; + Real M, cm_pos_x, cm_pos_y, cm_pos_z, pos_x, pos_y, pos_z, r, delta_x, delta_y, delta_z; if (bc_potential_type == 0) { const Real r0 = H.sphere_radius; - M = (H.sphere_density - H.sphere_background_density) * 4.0 * M_PI * r0 * - r0 * r0 / 3.0; - cm_pos_x = H.sphere_center_x; - cm_pos_y = H.sphere_center_y; - cm_pos_z = H.sphere_center_z; + M = (H.sphere_density - H.sphere_background_density) * 4.0 * M_PI * r0 * r0 * r0 / 3.0; + cm_pos_x = H.sphere_center_x; + cm_pos_y = H.sphere_center_y; + cm_pos_z = H.sphere_center_z; } // for bc_pontential_type = 1 the mod_frac is the fraction @@ -190,8 +175,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, delta_x = pos_x - cm_pos_x; delta_y = pos_y - cm_pos_y; delta_z = pos_z - cm_pos_z; - r = sqrt((delta_x * delta_x) + (delta_y * delta_y) + - (delta_z * delta_z)); + r = sqrt((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)); pot_val = -Grav.Gconst * M / r; } else if (bc_potential_type == 1) { // M-W disk potential @@ -211,8 +195,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, #endif // GRAV_ISOLATED_BOUNDARY_X -void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, - int *flags) +void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, int *flags) { // Flags: 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi) @@ -229,8 +212,7 @@ void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { if (side == 0) { - indx_src = (nx_g - 2 * nGHST + i) + (j)*nx_g + - (k)*nx_g * ny_g; // Periodic + indx_src = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; // Periodic indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g; } if (side == 1) { @@ -249,8 +231,7 @@ void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) { - indx_src = (i) + (ny_g - 2 * nGHST + j) * nx_g + - (k)*nx_g * ny_g; // Periodic + indx_src = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; // Periodic indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g; } if (side == 1) { @@ -269,8 +250,7 @@ void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) { - indx_src = (i) + (j)*nx_g + - (nz_g - 2 * nGHST + k) * nx_g * ny_g; // Periodic + indx_src = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; // Periodic indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g; } if (side == 1) { @@ -285,8 +265,7 @@ void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, } #ifdef MPI_CHOLLA -int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, - Real *buffer, int buffer_start) +int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start) { int i, j, k, indx, indx_buff, length; int nGHST, nx_g, ny_g, nz_g; @@ -302,8 +281,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { if (side == 0) indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; indx_buff = (j) + (k)*ny_g + i * ny_g * nz_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -318,8 +296,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; indx_buff = (i) + (k)*nx_g + j * nx_g * nz_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -334,8 +311,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; - if (side == 1) - indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + if (side == 1) indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; indx_buff = (i) + (j)*nx_g + k * nx_g * ny_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -345,9 +321,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, return length; } -void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, - Real *buffer, - int buffer_start) +void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start) { int i, j, k, indx, indx_buff; int nGHST, nx_g, ny_g, nz_g; @@ -376,8 +350,7 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; indx_buff = (i) + (k)*nx_g + j * nx_g * nz_g; Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff]; } @@ -391,8 +364,7 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + if (side == 1) indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; indx_buff = (i) + (j)*nx_g + k * nx_g * ny_g; Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff]; } diff --git a/src/gravity/gravity_boundaries_gpu.cu b/src/gravity/gravity_boundaries_gpu.cu index 9d67e49f6..b72bb1701 100644 --- a/src/gravity/gravity_boundaries_gpu.cu +++ b/src/gravity/gravity_boundaries_gpu.cu @@ -6,12 +6,11 @@ #include "../grid/grid3D.h" #include "../io/io.h" - #if defined(GRAV_ISOLATED_BOUNDARY_X) || \ - defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) + #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) -void __global__ Set_Potential_Boundaries_Isolated_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost, Real *potential_d, Real *pot_boundary_d) +void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost, Real *potential_d, + Real *pot_boundary_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -20,39 +19,27 @@ void __global__ Set_Potential_Boundaries_Isolated_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; - if (side == 1) - tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + - (tid_j + n_ghost) * nx * ny; + if (side == 0) tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + if (side == 1) tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; - if (side == 1) - tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + - (tid_j + n_ghost) * nx * ny; + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; + if (side == 1) tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + (tid_j + n_ghost) * nx * ny; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; - if (side == 1) - tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + - (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; + if (side == 1) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (nz - n_ghost + tid_k) * nx * ny; } potential_d[tid_pot] = pot_boundary_d[tid_buffer]; } -void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, - int *flags) +void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags) { int n_i, n_j, n_ghost, size_buffer; int nx_g, ny_g, nz_g; @@ -103,22 +90,18 @@ void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, dim3 dim1dBlock(TPB_GRAV, 1, 1); // Copy the boundary array from host to device - cudaMemcpy(pot_boundary_d, pot_boundary_h, size_buffer * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(pot_boundary_d, pot_boundary_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL(Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, - dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, - nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, - pot_boundary_d); + hipLaunchKernelGGL(Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d); } #endif // GRAV_ISOLATED_BOUNDARY -void __global__ Set_Potential_Boundaries_Periodic_kernel( - int direction, int side, int n_i, int n_j, int nx, int ny, int nz, - int n_ghost, Real *potential_d) +void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost, Real *potential_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; @@ -127,40 +110,31 @@ void __global__ Set_Potential_Boundaries_Periodic_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; if (direction == 0) { - if (side == 0) - tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; if (side == 0) tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; if (side == 1) tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; if (side == 0) tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; if (side == 1) tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) - tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 0) tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; - if (side == 1) - tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 1) tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; } potential_d[tid_dst] = potential_d[tid_src]; } -void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, - int *flags) +void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags) { int n_i, n_j, n_ghost, size; int nx_g, ny_g, nz_g; @@ -192,15 +166,13 @@ void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, dim3 dim1dBlock(TPB_GRAV, 1, 1); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL(Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, - dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, - nz_g, n_ghost, Grav.F.potential_d); + hipLaunchKernelGGL(Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d); } -__global__ void Load_Transfer_Buffer_GPU_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d) +__global__ void Load_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -209,43 +181,29 @@ __global__ void Load_Transfer_Buffer_GPU_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + - (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) - tid_pot = (tid_i) + - (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + - (tid_j)*nx * ny; + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; - if (side == 1) - tid_pot = (tid_i) + (tid_j)*nx + - (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; } -int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, - Real *buffer, int buffer_start) +int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start) { // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, - n_i, n_j, ngrid; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid; ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; @@ -281,19 +239,16 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, - 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, - nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, - send_buffer_d); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d); CHECK(cudaDeviceSynchronize()); return size_buffer; } -__global__ void Unload_Transfer_Buffer_GPU_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d) +__global__ void Unload_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -302,45 +257,29 @@ __global__ void Unload_Transfer_Buffer_GPU_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + - (tid_j)*nx * ny; - if (side == 1) - tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + - (tid_j)*nx * ny; - if (side == 1) - tid_pot = - (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i) + (tid_j)*nx + - (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; - if (side == 1) - tid_pot = - (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; } -void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, - Real *buffer, - int buffer_start) +void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start) { // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, - n_i, n_j, ngrid; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid; ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; @@ -376,10 +315,8 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, - 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, - ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, - potential_d, recv_buffer_d); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d); } #endif // GRAVITY diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 95e3252d8..16dcfd6f9 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -56,11 +56,10 @@ void Grid3D::set_dt_Gravity() #else // NOT ONLY_PARTICLES // Here da_min is the minumum between da_particles and da_hydro Real da_hydro; - da_hydro = Cosmo.Get_da_from_dt(dt_hydro) * Cosmo.current_a * - Cosmo.current_a / Cosmo.H0; // Convet delta_t to delta_a - da_min = fmin(da_hydro, da_particles); // Find the minumum delta_a - chprintf(" Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, - da_hydro); + da_hydro = + Cosmo.Get_da_from_dt(dt_hydro) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0; // Convet delta_t to delta_a + da_min = fmin(da_hydro, da_particles); // Find the minumum delta_a + chprintf(" Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, da_hydro); #endif // ONLY_PARTICLES @@ -94,8 +93,7 @@ void Grid3D::set_dt_Gravity() #ifdef ANALYSIS // Limit delta_a if it's time to run analysis if (Analysis.next_output_indx < Analysis.n_outputs) { - if (H.Output_Now && - fabs(Cosmo.current_a + da_min - Analysis.next_output) < 1e-6) + if (H.Output_Now && fabs(Cosmo.current_a + da_min - Analysis.next_output) < 1e-6) Analysis.Output_Now = true; else if (Cosmo.current_a + da_min > Analysis.next_output) { da_min = Analysis.next_output - Cosmo.current_a; @@ -112,18 +110,16 @@ void Grid3D::set_dt_Gravity() // Set delta_a after it has been computed Cosmo.delta_a = da_min; // Convert delta_a back to delta_t - dt_min = Cosmo.Get_dt_from_da(Cosmo.delta_a) * Cosmo.H0 / - (Cosmo.current_a * Cosmo.current_a); + dt_min = Cosmo.Get_dt_from_da(Cosmo.delta_a) * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a); // Set the new delta_t for the hydro step H.dt = dt_min; - chprintf(" Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, - Cosmo.delta_a, H.dt); + chprintf(" Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt); #ifdef AVERAGE_SLOW_CELLS // Set the min_delta_t for averaging a slow cell da_particles = fmin(da_particles, Cosmo.max_delta_a); - min_dt_slow = Cosmo.Get_dt_from_da(da_particles) / Particles.C_cfl * - Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a) / SLOW_FACTOR; + min_dt_slow = Cosmo.Get_dt_from_da(da_particles) / Particles.C_cfl * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a) / + SLOW_FACTOR; H.min_dt_slow = min_dt_slow; #endif @@ -131,8 +127,7 @@ void Grid3D::set_dt_Gravity() dt_physical = Cosmo.Get_dt_from_da(Cosmo.delta_a); Cosmo.dt_secs = dt_physical * Cosmo.time_conversion; Cosmo.t_secs += Cosmo.dt_secs; - chprintf(" t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs / MYR, - Cosmo.dt_secs / MYR); + chprintf(" t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs / MYR, Cosmo.dt_secs / MYR); Particles.dt = dt_physical; #else // Not Cosmology @@ -247,29 +242,21 @@ Real Grav3D::Get_Average_Density_function(int g_start, int g_end) static inline Real sqr(const Real x) { return x * x; } -static inline Real f1(const Real x) -{ - return exp(-10.0 * sqr(2.0 * x - 1.0)) * sin(8.0 * M_PI * x); -} +static inline Real f1(const Real x) { return exp(-10.0 * sqr(2.0 * x - 1.0)) * sin(8.0 * M_PI * x); } static inline Real d1(const Real x) { return 16.0 * exp(-10.0 * sqr(2.0 * x - 1.0)) * - ((400.0 * x * x - 400.0 * x - 4.0 * M_PI * M_PI + 95.0) * - sin(8.0 * M_PI * x) + + ((400.0 * x * x - 400.0 * x - 4.0 * M_PI * M_PI + 95.0) * sin(8.0 * M_PI * x) + (40.0 * M_PI - 80.0 * M_PI * x) * cos(8.0 * M_PI * x)); } -static inline Real periodicF(const Real x, const Real y, const Real z) -{ - return f1(x) * f1(y) * f1(z); -} +static inline Real periodicF(const Real x, const Real y, const Real z) { return f1(x) * f1(y) * f1(z); } -static inline Real periodicD(const Real x, const Real y, const Real z, - const Real ddlx, const Real ddly, const Real ddlz) +static inline Real periodicD(const Real x, const Real y, const Real z, const Real ddlx, const Real ddly, + const Real ddlz) { - return ddlx * d1(x) * f1(y) * f1(z) + ddly * f1(x) * d1(y) * f1(z) + - ddlz * f1(x) * f1(y) * d1(z); + return ddlx * d1(x) * f1(y) * f1(z) + ddly * f1(x) * d1(y) * f1(z) + ddlz * f1(x) * f1(y) * d1(z); } static constexpr Real twoPi = 2.0 * M_PI; @@ -285,8 +272,7 @@ static inline Real nonzeroF(const Real x, const Real y, const Real z) return sx * sx * sx * sy * sy * sy * sz * sz * sz + f; } -static inline Real nonzeroD(const Real x, const Real y, const Real z, - const Real ddlx, const Real ddly, const Real ddlz) +static inline Real nonzeroD(const Real x, const Real y, const Real z, const Real ddlx, const Real ddly, const Real ddlz) { const Real sx = sin(twoPi * x); const Real sy = sin(twoPi * y); @@ -295,8 +281,7 @@ static inline Real nonzeroD(const Real x, const Real y, const Real z, const Real sy3 = sy * sy * sy; const Real sz3 = sz * sz * sz; const Real f = exp(-x * x - y * y - z * z); - const Real df = ddlx * (4.0 * x * x - 2.0) + ddly * (4.0 * y * y - 2.0) + - ddlz * (4.0 * z * z - 2.0); + const Real df = ddlx * (4.0 * x * x - 2.0) + ddly * (4.0 * y * y - 2.0) + ddlz * (4.0 * z * z - 2.0); return (ddlx * sx * (3.0 * cos(fourPi * x) + 1.0) * sy3 * sz3 + ddly * sx3 * sy * (3.0 * cos(fourPi * y) + 1.0) * sz3 + ddlz * sx3 * sy3 * sz * (3.0 * cos(fourPi * z) + 1.0)) * @@ -306,18 +291,16 @@ static inline Real nonzeroD(const Real x, const Real y, const Real z, #endif #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) -static void printDiff(const Real *p, const Real *q, const int nx, const int ny, - const int nz, const int ng = N_GHOST_POTENTIAL, - const bool plot = false) +static void printDiff(const Real *p, const Real *q, const int nx, const int ny, const int nz, + const int ng = N_GHOST_POTENTIAL, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; - #pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2) + #pragma omp parallel for reduction(max : dMax, qMax) reduction(+ : dSum, dSum2, qSum, qSum2) for (int k = 0; k < nz; k++) { for (int j = 0; j < ny; j++) { for (int i = 0; i < nx; i++) { - const long ijk = - i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); + const long ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); const Real qAbs = fabs(q[ijk]); qMax = std::max(qMax, qAbs); qSum += qAbs; @@ -333,8 +316,8 @@ static void printDiff(const Real *p, const Real *q, const int nx, const int ny, Real sums[4] = {qSum, qSum2, dSum, dSum2}; MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], - sqrt(sums[3] / sums[1]), maxs[1] / maxs[0]); + chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]), + maxs[1] / maxs[0]); fflush(stdout); if (!plot) return; @@ -374,10 +357,9 @@ static void printDiff(const Real *p, const Real *q, const int nx, const int ny, void Grid3D::Initialize_Gravity(struct parameters *P) { chprintf("\nInitializing Gravity... \n"); - Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, - H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, - P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, - H.dz, H.n_ghost_potential_offset, P); + Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, + H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, + H.n_ghost_potential_offset, P); chprintf("Gravity Successfully Initialized. \n\n"); if (P->bc_potential_type == 1) { @@ -412,8 +394,7 @@ void Grid3D::Initialize_Gravity(struct parameters *P) const Real x = Grav.xMin + Grav.dx * (i + dr); const Real r = sqrt(x * x + yy); const int ijk = i + nijk; - exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = - Galaxies::MW.phi_disk_D3D(r, z); + exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r, z); } } } @@ -439,15 +420,12 @@ void Grid3D::Initialize_Gravity(struct parameters *P) } } } - Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, - Grav.Gconst, Galaxies::MW); + Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, Grav.Gconst, Galaxies::MW); chprintf(" Paris Galactic"); - printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, - Grav.nz_local); + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local); Get_Potential_SOR(Grav.Gconst, 0, 0, P); chprintf(" SOR"); - printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, - Grav.nz_local); + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local); #endif #ifdef SOR @@ -498,12 +476,10 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) #else const Real Grav_Constant = Grav.Gconst; // If slowing the Sphere Collapse problem ( bc_potential_type=0 ) - const Real dens_avrg = - (P->bc_potential_type == 0) ? H.sphere_background_density : 0; - const Real r0 = H.sphere_radius; + const Real dens_avrg = (P->bc_potential_type == 0) ? H.sphere_background_density : 0; + const Real r0 = H.sphere_radius; // Re-use current_a as the total mass of the sphere - const Real current_a = - (H.sphere_density - dens_avrg) * 4.0 * M_PI * r0 * r0 * r0 / 3.0; + const Real current_a = (H.sphere_density - dens_avrg) * 4.0 * M_PI * r0 * r0 * r0 / 3.0; #endif if (!Grav.BC_FLAGS_SET) { @@ -542,8 +518,8 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR @@ -552,24 +528,19 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) #ifdef GRAVITY_GPU #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" #endif - Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, - Grav_Constant, Galaxies::MW); - std::vector p(output_potential, - output_potential + Grav.n_cells_potential); + Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, Grav_Constant, Galaxies::MW); + std::vector p(output_potential, output_potential + Grav.n_cells_potential); Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); chprintf(" Paris vs SOR"); - printDiff(p.data(), output_potential, Grav.nx_local, Grav.ny_local, - Grav.nz_local, N_GHOST_POTENTIAL, false); + printDiff(p.data(), output_potential, Grav.nx_local, Grav.ny_local, Grav.nz_local, N_GHOST_POTENTIAL, false); #else Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); #endif #elif defined PARIS_GALACTIC - Grav.Poisson_solver.Get_Potential(input_density, output_potential, - Grav_Constant, Galaxies::MW); + Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, Galaxies::MW); #else - Grav.Poisson_solver.Get_Potential(input_density, output_potential, - Grav_Constant, dens_avrg, current_a); + Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, dens_avrg, current_a); #endif // SOR #ifdef CPU_TIME @@ -581,8 +552,7 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) void Grid3D::Setup_Analytic_Potential(struct parameters *P) { #ifndef PARALLEL_OMP - Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, - Galaxies::MW); + Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, Galaxies::MW); #else #pragma omp parallel num_threads(N_OMP_THREADS) { @@ -591,17 +561,15 @@ void Grid3D::Setup_Analytic_Potential(struct parameters *P) omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, - omp_id, &g_start, &g_end); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); Setup_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); } #endif #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy( - Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, - Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, + Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); #endif } @@ -620,8 +588,7 @@ void Grid3D::Add_Analytic_Potential() omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, - omp_id, &g_start, &g_end); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); Add_Analytic_Potential(g_start, g_end); } @@ -639,8 +606,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) for (k = g_start; k < g_end; k++) { for (j = 0; j < Grav.ny_local; j++) { for (i = 0; i < Grav.nx_local; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; id_grav = (i) + (j)*Grav.nx_local + (k)*Grav.nx_local * Grav.ny_local; dens = C.density[id]; @@ -651,8 +617,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) #endif #ifdef PARTICLES - Grav.F.density_h[id_grav] += - dens; // Hydro density is added AFTER partices density + Grav.F.density_h[id_grav] += dens; // Hydro density is added AFTER partices density #else Grav.F.density_h[id_grav] = dens; #endif @@ -688,8 +653,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity() } #ifdef GRAVITY_ANALYTIC_COMP -void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, - DiskGalaxy &gal) +void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal) { int nx = Grav.nx_local + 2 * N_GHOST_POTENTIAL; int ny = Grav.ny_local + 2 * N_GHOST_POTENTIAL; @@ -704,14 +668,12 @@ void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, for (k = g_start; k < g_end; k++) { for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { - id = i + j * nx + k * nx * ny; - x_pos = Grav.xMin + Grav.dx * (i - N_GHOST_POTENTIAL) + 0.5 * Grav.dx; - y_pos = Grav.yMin + Grav.dy * (j - N_GHOST_POTENTIAL) + 0.5 * Grav.dy; - z_pos = Grav.zMin + Grav.dz * (k - N_GHOST_POTENTIAL) + 0.5 * Grav.dz; - R = sqrt(x_pos * x_pos + y_pos * y_pos); - Grav.F.analytic_potential_h[id] = - non_mod_frac * gal.phi_disk_D3D(R, z_pos) + - gal.phi_halo_D3D(R, z_pos); + id = i + j * nx + k * nx * ny; + x_pos = Grav.xMin + Grav.dx * (i - N_GHOST_POTENTIAL) + 0.5 * Grav.dx; + y_pos = Grav.yMin + Grav.dy * (j - N_GHOST_POTENTIAL) + 0.5 * Grav.dy; + z_pos = Grav.zMin + Grav.dz * (k - N_GHOST_POTENTIAL) + 0.5 * Grav.dz; + R = sqrt(x_pos * x_pos + y_pos * y_pos); + Grav.F.analytic_potential_h[id] = non_mod_frac * gal.phi_disk_D3D(R, z_pos) + gal.phi_halo_D3D(R, z_pos); } } } @@ -772,19 +734,16 @@ void Grid3D::Extrapolate_Grav_Potential_Function(int g_start, int g_end) for (j = 0; j < ny_pot; j++) { for (i = 0; i < nx_pot; i++) { id_pot = i + j * nx_pot + k * nx_pot * ny_pot; - id_grid = (i + nGHST) + (j + nGHST) * nx_grid + - (k + nGHST) * nx_grid * ny_grid; + id_grid = (i + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * nx_grid * ny_grid; pot_now = potential_in[id_pot]; // Potential at the n-th timestep if (Grav.INITIAL) { pot_extrp = pot_now; // The first timestep the extrapolated potential // is phi_0 } else { - pot_prev = - Grav.F.potential_1_h[id_pot]; // Potential at the (n-1)-th - // timestep ( previous step ) + pot_prev = Grav.F.potential_1_h[id_pot]; // Potential at the (n-1)-th + // timestep ( previous step ) // Compute the extrapolated potential from phi_n-1 and phi_n - pot_extrp = - pot_now + 0.5 * Grav.dt_now * (pot_now - pot_prev) / Grav.dt_prev; + pot_extrp = pot_now + 0.5 * Grav.dt_now * (pot_now - pot_prev) / Grav.dt_prev; } #ifdef COSMOLOGY @@ -820,8 +779,7 @@ void Grid3D::Extrapolate_Grav_Potential() omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, - omp_id, &g_start, &g_end); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); Extrapolate_Grav_Potential_Function(g_start, g_end); } diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 036b2974d..93533b46f 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -10,41 +10,26 @@ void Grav3D::AllocateMemory_GPU() { CudaSafeCall(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real))); - CudaSafeCall( - cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); - CudaSafeCall( - cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); #ifdef GRAVITY_GPU #ifdef GRAVITY_ANALYTIC_COMP - CudaSafeCall(cudaMalloc((void **)&F.analytic_potential_d, - n_cells_potential * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.analytic_potential_d, n_cells_potential * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_x0_d, - N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_x1_d, - N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_y0_d, - N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_y1_d, - N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_z0_d, - N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); - CudaSafeCall( - cudaMalloc((void **)&F.pot_boundary_z1_d, - N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); #endif #endif // GRAVITY_GPU @@ -80,11 +65,8 @@ void Grav3D::FreeMemory_GPU(void) #endif // GRAVITY_GPU } -void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, - Real *dst_density_d, - int nx_local, int ny_local, - int nz_local, int n_ghost, - Real cosmo_rho_0_gas) +void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, Real *dst_density_d, int nx_local, + int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas) { int tid_x, tid_y, tid_z, tid_grid, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -112,8 +94,7 @@ void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, #endif #ifdef PARTICLES - dst_density_d[tid_dens] += - dens; // Hydro density is added AFTER partices density + dst_density_d[tid_dens] += dens; // Hydro density is added AFTER partices density #else dst_density_d[tid_dens] = dens; #endif @@ -148,15 +129,12 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #endif // Copy the density from the device array to the Poisson input density array - hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, - dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, nx_local, - ny_local, nz_local, n_ghost, cosmo_rho_0_gas); + hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, + nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas); } #if defined(GRAVITY_ANALYTIC_COMP) -void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, - Real *potential_d, int nx_pot, - int ny_pot, int nz_pot) +void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, Real *potential_d, int nx_pot, int ny_pot, int nz_pot) { int tid_x, tid_y, tid_z, tid; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -200,9 +178,8 @@ void Grid3D::Add_Analytic_Potential_GPU() // Copy the analytic potential from the device array to the device potential // array - hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, - Grav.F.analytic_potential_d, Grav.F.potential_d, nx_pot, - ny_pot, nz_pot); + hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d, + Grav.F.potential_d, nx_pot, ny_pot, nz_pot); cudaDeviceSynchronize(); /*gpuFor(10, GPU_LAMBDA(const int i) { @@ -213,10 +190,10 @@ void Grid3D::Add_Analytic_Potential_GPU() } #endif // GRAVITY_ANALYTIC_COMP -void __global__ Extrapolate_Grav_Potential_Kernel( - Real *dst_potential, Real *src_potential_0, Real *src_potential_1, - int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, - int n_offset, Real dt_now, Real dt_prev, bool INITIAL, Real cosmo_factor) +void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src_potential_0, Real *src_potential_1, + int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, + int nz_grid, int n_offset, Real dt_now, Real dt_prev, bool INITIAL, + Real cosmo_factor) { int tid_x, tid_y, tid_z, tid_grid, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -236,8 +213,7 @@ void __global__ Extrapolate_Grav_Potential_Kernel( Real pot_now, pot_prev, pot_extrp; pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep if (INITIAL) { - pot_extrp = - pot_now; // The first timestep the extrapolated potential is phi_0 + pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 } else { pot_prev = src_potential_1[tid_pot]; // Potential at the (n-1)-th timestep // ( previous step ) @@ -294,18 +270,15 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, - 0, 0, C.d_Grav_potential, Grav.F.potential_d, - Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, - ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, - cosmo_factor); + hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, + Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, + n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor); } #ifdef PARTICLES_CPU void Grid3D::Copy_Potential_From_GPU() { - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, - Grav.n_cells_potential * sizeof(Real), + CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index 0733ef0c9..1944c7b10 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -42,9 +42,7 @@ void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) // Read potential and copy to device to be used as potential n-1 Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential"); #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy(F.potential_1_d, F.potential_1_h, - n_cells_potential * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); #endif H5Fclose(file_id); @@ -65,8 +63,7 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) hsize_t attr_dims = 1; hid_t dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); status = H5Aclose(attribute_id); @@ -74,9 +71,7 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) // Copy device to host if needed #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy(F.potential_1_h, F.potential_1_d, - n_cells_potential * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // Write potential @@ -96,13 +91,11 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) // Do nothing void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) { - chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", - __FILE__, __LINE__); + chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", __FILE__, __LINE__); } void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) { - chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", - __FILE__, __LINE__); + chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", __FILE__, __LINE__); } #endif diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu index 8924e4541..8053dfecb 100644 --- a/src/gravity/paris/HenryPeriodic.cu +++ b/src/gravity/paris/HenryPeriodic.cu @@ -7,9 +7,7 @@ #include "HenryPeriodic.hpp" -HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], - const double hi[3], const int m[3], - const int id[3]) +HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) : idi_(id[0]), idj_(id[1]), idk_(id[2]), @@ -64,26 +62,21 @@ HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], // Maximum memory needed by work arrays - const long nMax = std::max( - {long(di_) * long(dj_) * long(dk_), - long(mp_) * long(mq_) * long(dip_) * long(djq_) * long(dk_), - long(2) * long(dip_) * long(djq_) * long(mk_) * long(dh_), - long(2) * long(dip_) * long(mp_) * long(djq_) * long(mq_) * long(dh_), - long(2) * long(dip_) * long(djq_) * long(mjq) * long(dhq_), - long(2) * long(dip_) * long(dhq_) * long(mip) * long(djp_), - long(2) * djp_ * long(dhq_) * long(mip) * long(dip_)}); + const long nMax = + std::max({long(di_) * long(dj_) * long(dk_), long(mp_) * long(mq_) * long(dip_) * long(djq_) * long(dk_), + long(2) * long(dip_) * long(djq_) * long(mk_) * long(dh_), + long(2) * long(dip_) * long(mp_) * long(djq_) * long(mq_) * long(dh_), + long(2) * long(dip_) * long(djq_) * long(mjq) * long(dhq_), + long(2) * long(dip_) * long(dhq_) * long(mip) * long(djp_), + long(2) * djp_ * long(dhq_) * long(mip) * long(dip_)}); assert(nMax <= INT_MAX); bytes_ = nMax * sizeof(double); // FFT objects - CHECK(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, - djp_ * dhq_)); - CHECK(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, - dip_ * dhq_)); - CHECK(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, - dip_ * djq_)); - CHECK(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, - dip_ * djq_)); + CHECK(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, djp_ * dhq_)); + CHECK(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, dip_ * dhq_)); + CHECK(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, dip_ * djq_)); + CHECK(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, dip_ * djq_)); #ifndef MPI_GPU // Host arrays for MPI communication diff --git a/src/gravity/paris/ParisPeriodic.cu b/src/gravity/paris/ParisPeriodic.cu index b7e081f78..0686626c6 100644 --- a/src/gravity/paris/ParisPeriodic.cu +++ b/src/gravity/paris/ParisPeriodic.cu @@ -6,9 +6,7 @@ __host__ __device__ static inline double sqr(const double x) { return x * x; } -ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], - const double hi[3], const int m[3], - const int id[3]) +ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) : ni_(n[0]), nj_(n[1]), #ifdef PARIS_3PT @@ -30,8 +28,7 @@ ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], { } -void ParisPeriodic::solve(const size_t bytes, double *const density, - double *const potential) const +void ParisPeriodic::solve(const size_t bytes, double *const density, double *const potential) const { // Local copies of members for lambda capture const int ni = ni_, nj = nj_; @@ -51,15 +48,13 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, #endif // Provide FFT filter with a lambda that does Poisson solve in frequency space - henry.filter( - bytes, density, potential, - [=] __device__(const int i, const int j, const int k, - const cufftDoubleComplex b) { - if (i || j || k) { + henry.filter(bytes, density, potential, + [=] __device__(const int i, const int j, const int k, const cufftDoubleComplex b) { + if (i || j || k) { #ifdef PARIS_3PT - const double i2 = sqr(sin(double(min(i, ni - i)) * si) * ddi); - const double j2 = sqr(sin(double(min(j, nj - j)) * sj) * ddj); - const double k2 = sqr(sin(double(k) * sk) * ddk); + const double i2 = sqr(sin(double(min(i, ni - i)) * si) * ddi); + const double j2 = sqr(sin(double(min(j, nj - j)) * sj) * ddj); + const double k2 = sqr(sin(double(k) * sk) * ddk); #elif defined PARIS_5PT const double ci = cos(double(min(i, ni - i)) * si); const double cj = cos(double(min(j, nj - j)) * sj); @@ -72,12 +67,12 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, const double j2 = sqr(double(min(j, nj - j)) * ddj); const double k2 = sqr(double(k) * ddk); #endif - const double d = -1.0 / (i2 + j2 + k2); - return cufftDoubleComplex{d * b.x, d * b.y}; - } else { - return cufftDoubleComplex{0.0, 0.0}; - } - }); + const double d = -1.0 / (i2 + j2 + k2); + return cufftDoubleComplex{d * b.x, d * b.y}; + } else { + return cufftDoubleComplex{0.0, 0.0}; + } + }); } #endif diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 57d068e43..a741b567e 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -8,15 +8,11 @@ #include "PoissonZero3DBlockedGPU.hpp" -static constexpr double sqrt2 = - 0.4142135623730950488016887242096980785696718753769480731766797379; +static constexpr double sqrt2 = 0.4142135623730950488016887242096980785696718753769480731766797379; static inline __host__ __device__ double sqr(const double x) { return x * x; } -PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], - const double lo[3], - const double hi[3], - const int m[3], +PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) : #ifdef PARIS_GALACTIC_3PT @@ -80,21 +76,17 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], nj2_ = 2 * (nj_ / 2 + 1); nk2_ = 2 * (nk_ / 2 + 1); - const long nMax = std::max( - {di_ * dj_ * dk_, dip_ * djq_ * mk_ * dk_, dip_ * mp_ * djq_ * mq_ * dk_, - dip_ * djq_ * nk2_, dip_ * djq_ * mjq * dkq_, dip_ * dkq_ * nj2_, - dip_ * dkq_ * mip * djp_, dkq_ * djp_ * mip * dip_, dkq_ * djp_ * ni2_}); - bytes_ = nMax * sizeof(double); + const long nMax = std::max({di_ * dj_ * dk_, dip_ * djq_ * mk_ * dk_, dip_ * mp_ * djq_ * mq_ * dk_, + dip_ * djq_ * nk2_, dip_ * djq_ * mjq * dkq_, dip_ * dkq_ * nj2_, + dip_ * dkq_ * mip * djp_, dkq_ * djp_ * mip * dip_, dkq_ * djp_ * ni2_}); + bytes_ = nMax * sizeof(double); int nkh = nk_ / 2 + 1; - CHECK(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, - dip_ * djq_)); + CHECK(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, dip_ * djq_)); int njh = nj_ / 2 + 1; - CHECK(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, - dip_ * dkq_)); + CHECK(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, dip_ * dkq_)); int nih = ni_ / 2 + 1; - CHECK(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, - dkq_ * djp_)); + CHECK(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, dkq_ * djp_)); #ifndef MPI_GPU CHECK(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); @@ -116,8 +108,7 @@ PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() MPI_Comm_free(&commK_); } -void print(const char *const title, const int ni, const int nj, const int nk, - const double *const v) +void print(const char *const title, const int ni, const int nj, const int nk, const double *const v) { printf("%s:\n", title); for (int i = 0; i < ni; i++) { @@ -130,8 +121,7 @@ void print(const char *const title, const int ni, const int nj, const int nk, printf("\n"); } -void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, - double *const potential) const +void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, double *const potential) const { assert(bytes >= bytes_); assert(density); @@ -165,24 +155,19 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, const int nk2 = nk2_; gpuFor( - mp, mq, dip, djq, dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, - const int k) { + mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { const int iLo = p * dip; const int jLo = q * djq; if ((i + iLo < di) && (j + jLo < dj)) - ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = - ub[((i + iLo) * dj + j + jLo) * dk + k]; + ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = ub[((i + iLo) * dj + j + jLo) * dk + k]; }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, - commK_); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, - commK_); + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); #endif gpuFor( dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { @@ -207,15 +192,13 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, gpuFor( dip, nk / 2 + 1, djq, GPU_LAMBDA(const int i, const int k, const int j) { if (k == 0) { - const int q0 = (nk - 1) / dkq; - const int k0 = (nk - 1) % dkq; - ua[((q0 * dip + i) * dkq + k0) * djq + j] = - 2.0 * ub[(i * djq + j) * nk2]; + const int q0 = (nk - 1) / dkq; + const int k0 = (nk - 1) % dkq; + ua[((q0 * dip + i) * dkq + k0) * djq + j] = 2.0 * ub[(i * djq + j) * nk2]; } else if (k + k == nk) { - const int qa = (nk / 2 - 1) / dkq; - const int ka = (nk / 2 - 1) % dkq; - ua[((qa * dip + i) * dkq + ka) * djq + j] = - sqrt2 * ub[(i * djq + j) * nk2 + nk]; + const int qa = (nk / 2 - 1) / dkq; + const int ka = (nk / 2 - 1) % dkq; + ua[((qa * dip + i) * dkq + ka) * djq + j] = sqrt2 * ub[(i * djq + j) * nk2 + nk]; } else { const int qa = (nk - k - 1) / dkq; const int ka = (nk - k - 1) % dkq; @@ -231,13 +214,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, - MPI_DOUBLE, commJ_); + MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, MPI_DOUBLE, commJ_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, - commJ_); + MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, commJ_); #endif gpuFor( dip, dkq, nj / 2 + 1, GPU_LAMBDA(const int i, const int k, const int j) { @@ -261,15 +242,13 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, gpuFor( dkq, nj / 2 + 1, dip, GPU_LAMBDA(const int k, const int j, const int i) { if (j == 0) { - const int pa = (nj - 1) / djp; - const int ja = (nj - 1) % djp; - ua[((pa * dkq + k) * djp + ja) * dip + i] = - 2.0 * ub[(i * dkq + k) * nj2]; + const int pa = (nj - 1) / djp; + const int ja = (nj - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = 2.0 * ub[(i * dkq + k) * nj2]; } else if (j + j == nj) { - const int pa = (nj / 2 - 1) / djp; - const int ja = (nj / 2 - 1) % djp; - ua[((pa * dkq + k) * djp + ja) * dip + i] = - sqrt2 * ub[(i * dkq + k) * nj2 + nj]; + const int pa = (nj / 2 - 1) / djp; + const int ja = (nj / 2 - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = sqrt2 * ub[(i * dkq + k) * nj2 + nj]; } else { const double aj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j]; const double bj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j + 1]; @@ -285,13 +264,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, - MPI_DOUBLE, commI_); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, - commI_); + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); #endif gpuFor( dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) { @@ -299,21 +276,19 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, if (i == 0) { ua[kj] = ub[(k * djp + j) * dip]; } else if (i + i == ni) { - const int ida = (ni - 1) / di; - const int pa = (ni - 1) % di / dip; - const int ia = ni - 1 - ida * di - pa * dip; - ua[kj + ni / 2] = - -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[kj + ni / 2] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; } else { - const int ida = (i + i - 1) / di; - const int pa = (i + i - 1) % di / dip; - const int ia = i + i - 1 - ida * di - pa * dip; - ua[kj + ni - i] = - -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; - const int idb = (i + i) / di; - const int pb = (i + i) % di / dip; - const int ib = i + i - idb * di - pb * dip; - ua[kj + i] = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib]; + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; + ua[kj + ni - i] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; + const int idb = (i + i) / di; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; + ua[kj + i] = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib]; } }); CHECK(cufftExecD2Z(d2zi_, ua, uc)); @@ -335,13 +310,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, const int jLo = (idi * mp + idp) * djp; const int kLo = (idj * mq + idq) * dkq; gpuFor( - dkq, djp, ni / 2 + 1, - GPU_LAMBDA(const int k, const int j, const int i) { + dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) { const int kj = (k * djp + j) * ni; const int kj2 = (k * djp + j) * ni2; #ifdef PARIS_GALACTIC_3PT - const double jjkk = sqr(sin(double(jLo + j + 1) * sj) * ddj) + - sqr(sin(double(kLo + k + 1) * sk) * ddk); + const double jjkk = sqr(sin(double(jLo + j + 1) * sj) * ddj) + sqr(sin(double(kLo + k + 1) * sk) * ddk); #elif defined PARIS_GALACTIC_5PT const double cj = cos(double(jLo + j + 1) * sj); const double jj = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); @@ -394,33 +367,30 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, if (i == 0) { ua[k * dip * djp + j] = ub[(k * djp + j) * ni2]; } else if (i + i == ni) { - const int ida = (ni - 1) / di; - const int pa = (ni - 1) % di / dip; - const int ia = ni - 1 - ida * di - pa * dip; - ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = - -ub[(k * djp + j) * ni2 + ni]; + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = -ub[(k * djp + j) * ni2 + ni]; } else { - const double ai = ub[(k * djp + j) * ni2 + i + i]; - const double bi = ub[(k * djp + j) * ni2 + i + i + 1]; - const int ida = (i + i - 1) / di; - const int pa = (i + i - 1) % di / dip; - const int ia = i + i - 1 - ida * di - pa * dip; + const double ai = ub[(k * djp + j) * ni2 + i + i]; + const double bi = ub[(k * djp + j) * ni2 + i + i + 1]; + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = bi - ai; const int idb = (i + i) / di; - const int pb = (i + i) % di / dip; - const int ib = i + i - idb * di - pb * dip; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; ua[(((idb * mp + pb) * dkq + k) * dip + ib) * djp + j] = ai + bi; } }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, - MPI_DOUBLE, commI_); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, - commI_); + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); #endif gpuFor( dkq, dip, nj / 2 + 1, GPU_LAMBDA(const int k, const int i, const int j) { @@ -454,34 +424,31 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, if (j == 0) { ua[i * djq * dkq + k] = ub[(k * dip + i) * nj2]; } else if (j + j == nj) { - const int ida = (nj - 1) / dj; - const int qa = (nj - 1) % dj / djq; - const int ja = nj - 1 - ida * dj - qa * djq; - ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = - -ub[(k * dip + i) * nj2 + nj]; + const int ida = (nj - 1) / dj; + const int qa = (nj - 1) % dj / djq; + const int ja = nj - 1 - ida * dj - qa * djq; + ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = -ub[(k * dip + i) * nj2 + nj]; } else { - const int jj = j + j; - const int ida = (jj - 1) / dj; - const int qa = (jj - 1) % dj / djq; - const int ja = jj - 1 - ida * dj - qa * djq; - const int idb = jj / dj; - const int qb = jj % dj / djq; - const int jb = jj - idb * dj - qb * djq; - const double aj = ub[(k * dip + i) * nj2 + jj]; - const double bj = ub[(k * dip + i) * nj2 + jj + 1]; + const int jj = j + j; + const int ida = (jj - 1) / dj; + const int qa = (jj - 1) % dj / djq; + const int ja = jj - 1 - ida * dj - qa * djq; + const int idb = jj / dj; + const int qb = jj % dj / djq; + const int jb = jj - idb * dj - qb * djq; + const double aj = ub[(k * dip + i) * nj2 + jj]; + const double bj = ub[(k * dip + i) * nj2 + jj + 1]; ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = bj - aj; ua[(((idb * mq + qb) * dip + i) * djq + jb) * dkq + k] = aj + bj; } }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, - MPI_DOUBLE, commJ_); + MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, MPI_DOUBLE, commJ_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, - commJ_); + MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, commJ_); #endif gpuFor( dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { @@ -516,16 +483,15 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, if (k == 0) { ua[(i * djq + j) * dk] = divN * ub[(i * djq + j) * nk2]; } else if (k + k == nk) { - const int pqa = (nk - 1) / dk; - const int ka = nk - 1 - pqa * dk; - ua[((pqa * dip + i) * djq + j) * dk + ka] = - -divN * ub[(i * djq + j) * nk2 + nk]; + const int pqa = (nk - 1) / dk; + const int ka = nk - 1 - pqa * dk; + ua[((pqa * dip + i) * djq + j) * dk + ka] = -divN * ub[(i * djq + j) * nk2 + nk]; } else { - const int kk = k + k; - const double ak = ub[(i * djq + j) * nk2 + kk]; - const double bk = ub[(i * djq + j) * nk2 + kk + 1]; - const int pqa = (kk - 1) / dk; - const int ka = kk - 1 - pqa * dk; + const int kk = k + k; + const double ak = ub[(i * djq + j) * nk2 + kk]; + const double bk = ub[(i * djq + j) * nk2 + kk + 1]; + const int pqa = (kk - 1) / dk; + const int ka = kk - 1 - pqa * dk; ua[((pqa * dip + i) * djq + j) * dk + ka] = divN * (bk - ak); const int pqb = kk / dk; const int kb = kk - pqb * dk; @@ -534,23 +500,18 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, - commK_); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, - commK_); + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); #endif gpuFor( - mp, dip, mq, djq, dk, - GPU_LAMBDA(const int p, const int i, const int q, const int j, - const int k) { + mp, dip, mq, djq, dk, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { const int iLo = p * dip; const int jLo = q * djq; if ((iLo + i < di) && (jLo + j < dj)) - ua[((i + iLo) * dj + j + jLo) * dk + k] = - ub[(((p * mq + q) * dip + i) * djq + j) * dk + k]; + ua[((i + iLo) * dj + j + jLo) * dk + k] = ub[(((p * mq + q) * dip + i) * djq + j) * dk + k]; }); } diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index c5cea83dd..016a33c54 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -14,10 +14,8 @@ Potential_SOR_3D::Potential_SOR_3D(void) {} -void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, - Real y_min, Real z_min, int nx, int ny, - int nz, int nx_real, int ny_real, int nz_real, - Real dx_real, Real dy_real, Real dz_real) +void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real) { Lbox_x = Lx; Lbox_y = Ly; @@ -79,8 +77,8 @@ void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, TRANSFER_POISSON_BOUNDARIES = false; chprintf(" Using Poisson Solver: SOR\n"); - chprintf(" SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, - Lbox_y, Lbox_z, nx_local, ny_local, nz_local, dx, dy, dz); + chprintf(" SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, + dx, dy, dz); chprintf(" SOR: Allocating memory...\n"); AllocateMemory_CPU(); @@ -118,66 +116,52 @@ void Potential_SOR_3D::AllocateMemory_GPU(void) #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real(&F.boundary_isolated_x0_d, - n_ghost * ny_local * nz_local); - Allocate_Array_GPU_Real(&F.boundary_isolated_x1_d, - n_ghost * ny_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_x0_d, n_ghost * ny_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_x1_d, n_ghost * ny_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real(&F.boundary_isolated_y0_d, - n_ghost * nx_local * nz_local); - Allocate_Array_GPU_Real(&F.boundary_isolated_y1_d, - n_ghost * nx_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_y0_d, n_ghost * nx_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_y1_d, n_ghost * nx_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Allocate_Array_GPU_Real(&F.boundary_isolated_z0_d, - n_ghost * nx_local * ny_local); - Allocate_Array_GPU_Real(&F.boundary_isolated_z1_d, - n_ghost * nx_local * ny_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_z0_d, n_ghost * nx_local * ny_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_z1_d, n_ghost * nx_local * ny_local); #endif } -void Potential_SOR_3D::Copy_Input_And_Initialize( - Real *input_density, const Real *const input_potential, Real Grav_Constant, - Real dens_avrg, Real current_a) +void Potential_SOR_3D::Copy_Input_And_Initialize(Real *input_density, const Real *const input_potential, + Real Grav_Constant, Real dens_avrg, Real current_a) { - Copy_Input(n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, - current_a); + Copy_Input(n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a); if (!potential_initialized) { chprintf("SOR: Initializing Potential \n"); - CHECK(cudaMemcpy(F.potential_d, input_potential, - n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(F.potential_d, input_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); // Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, // F.potential_d, F.density_d ); potential_initialized = true; } } -void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, - Real epsilon) +void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon) { if (n_step == 0) - Poisson_iteration_Patial_1(n_cells_local, nx_local, ny_local, nz_local, - n_ghost, dx, dy, dz, omega, epsilon, F.density_d, - F.potential_d, F.converged_h, F.converged_d); + Poisson_iteration_Patial_1(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, + F.density_d, F.potential_d, F.converged_h, F.converged_d); if (n_step == 1) - Poisson_iteration_Patial_2(n_cells_local, nx_local, ny_local, nz_local, - n_ghost, dx, dy, dz, omega, epsilon, F.density_d, - F.potential_d, F.converged_h, F.converged_d); + Poisson_iteration_Patial_2(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, + F.density_d, F.potential_d, F.converged_h, F.converged_d); } -void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, - Real current_a, struct parameters *P) +void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P) { #ifdef TIME_SOR Real time_start, time_end, time; time_start = get_time(); #endif - Grav.Poisson_solver.Copy_Input_And_Initialize( - Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, - current_a); + Grav.Poisson_solver.Copy_Input_And_Initialize(Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, + current_a); // Set Isolated Boundary Conditions Grav.Copy_Isolated_Boundaries_To_GPU(P); @@ -213,8 +197,7 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( - Grav.Poisson_solver.iteration_parity, omega, epsilon); + Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon); // Second Partial Iteration Grav.Poisson_solver.iteration_parity = 1; @@ -223,14 +206,12 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( - Grav.Poisson_solver.iteration_parity, omega, epsilon); + Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon); // Get convergence state #ifdef MPI_CHOLLA Grav.Poisson_solver.F.converged_h[0] = - Grav.Poisson_solver.Get_Global_Converged( - Grav.Poisson_solver.F.converged_h[0]); + Grav.Poisson_solver.Get_Global_Converged(Grav.Poisson_solver.F.converged_h[0]); #endif // Only aloow to connverge after the boundaries have been transfere to avoid @@ -260,57 +241,42 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct parameters *P) { - if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && - P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) return; // chprintf( " Copying Isolated Boundaries \n"); if (boundary_flags[0] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, - Poisson_solver.n_ghost * ny_local * nz_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, + Poisson_solver.n_ghost * ny_local * nz_local); if (boundary_flags[1] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, - Poisson_solver.n_ghost * ny_local * nz_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, + Poisson_solver.n_ghost * ny_local * nz_local); if (boundary_flags[2] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, - Poisson_solver.n_ghost * nx_local * nz_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, + Poisson_solver.n_ghost * nx_local * nz_local); if (boundary_flags[3] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, - Poisson_solver.n_ghost * nx_local * nz_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, + Poisson_solver.n_ghost * nx_local * nz_local); if (boundary_flags[4] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, - Poisson_solver.n_ghost * nx_local * ny_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, + Poisson_solver.n_ghost * nx_local * ny_local); if (boundary_flags[5] == 3) - Copy_Isolated_Boundary_To_GPU_buffer( - F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, - Poisson_solver.n_ghost * nx_local * ny_local); + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, + Poisson_solver.n_ghost * nx_local * ny_local); } -void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, - struct parameters *P) +void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, struct parameters *P) { - if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && - P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) return; chprintf(" Setting Isolated Boundaries \n"); - if (boundary_flags[0] == 3) - Set_Isolated_Boundary_GPU(0, 0, F.boundary_isolated_x0_d); - if (boundary_flags[1] == 3) - Set_Isolated_Boundary_GPU(0, 1, F.boundary_isolated_x1_d); - if (boundary_flags[2] == 3) - Set_Isolated_Boundary_GPU(1, 0, F.boundary_isolated_y0_d); - if (boundary_flags[3] == 3) - Set_Isolated_Boundary_GPU(1, 1, F.boundary_isolated_y1_d); - if (boundary_flags[4] == 3) - Set_Isolated_Boundary_GPU(2, 0, F.boundary_isolated_z0_d); - if (boundary_flags[5] == 3) - Set_Isolated_Boundary_GPU(2, 1, F.boundary_isolated_z1_d); + if (boundary_flags[0] == 3) Set_Isolated_Boundary_GPU(0, 0, F.boundary_isolated_x0_d); + if (boundary_flags[1] == 3) Set_Isolated_Boundary_GPU(0, 1, F.boundary_isolated_x1_d); + if (boundary_flags[2] == 3) Set_Isolated_Boundary_GPU(1, 0, F.boundary_isolated_y0_d); + if (boundary_flags[3] == 3) Set_Isolated_Boundary_GPU(1, 1, F.boundary_isolated_y1_d); + if (boundary_flags[4] == 3) Set_Isolated_Boundary_GPU(2, 0, F.boundary_isolated_z0_d); + if (boundary_flags[5] == 3) Set_Isolated_Boundary_GPU(2, 1, F.boundary_isolated_z1_d); } void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic(int direction, int side) @@ -334,12 +300,10 @@ void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic(int direction, int side) side_load = side; side_unload = (side_load + 1) % 2; - Load_Transfer_Buffer_GPU(direction, side_load, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_GPU(direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer); - Unload_Transfer_Buffer_GPU(direction, side_unload, nx_local, ny_local, - nz_local, n_ghost_transfer, n_ghost, F.potential_d, - boundaries_buffer); + Unload_Transfer_Buffer_GPU(direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, + F.potential_d, boundaries_buffer); } void Potential_SOR_3D::FreeMemory_GPU(void) @@ -385,8 +349,7 @@ void Potential_SOR_3D::Reset(void) #ifdef MPI_CHOLLA -int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, - Real *buffer_host) +int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer_host) { int size_buffer; @@ -423,14 +386,12 @@ int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host(size_buffer, buffer_host, - buffer_dev); + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host(size_buffer, buffer_host, buffer_dev); return size_buffer; } -void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, - Real *buffer_host) +void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host) { int size_buffer; @@ -441,26 +402,19 @@ void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, // Copy the host_buffer to the device_buffer Real *buffer_dev; if (direction == 0) { - if (side == 0) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; - if (side == 1) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; } if (direction == 1) { - if (side == 0) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; - if (side == 1) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; } if (direction == 2) { - if (side == 0) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; - if (side == 1) - buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device(size_buffer, buffer_host, - buffer_dev); + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device(size_buffer, buffer_host, buffer_dev); // Unload the transfer buffer in the GPU if (direction == 0) { @@ -480,84 +434,76 @@ void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d); #else - Load_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_x0_d); + Load_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x0_d); #endif } void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d); #else - Load_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_x1_d); + Load_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x1_d); #endif } void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d); #else - Load_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_y0_d); + Load_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y0_d); #endif } void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d); #else - Load_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_y1_d); + Load_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y1_d); #endif } void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d); #else - Load_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_z0_d); + Load_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z0_d); #endif } void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1() { #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Load_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d); #else - Load_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, - n_ghost, F.potential_d, F.boundaries_buffer_z1_d); + Load_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z1_d); #endif } void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d); #else - Unload_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d); #endif } @@ -565,12 +511,10 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0() void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d); #else - Unload_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d); #endif } @@ -578,12 +522,10 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1() void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d); #else - Unload_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d); #endif } @@ -591,12 +533,10 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0() void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d); #else - Unload_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d); #endif } @@ -604,12 +544,10 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1() void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d); #else - Unload_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d); #endif } @@ -617,12 +555,10 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0() void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1() { #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d); #else - Unload_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, - n_ghost_transfer, n_ghost, F.potential_d, + Unload_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d); #endif } diff --git a/src/gravity/potential_SOR_3D.h b/src/gravity/potential_SOR_3D.h index 9c4a5f28c..048b08f33 100644 --- a/src/gravity/potential_SOR_3D.h +++ b/src/gravity/potential_SOR_3D.h @@ -89,16 +89,15 @@ class Potential_SOR_3D Potential_SOR_3D(void); - void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, - int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, - Real dx, Real dy, Real dz); + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, + int ny_real, int nz_real, Real dx, Real dy, Real dz); void AllocateMemory_CPU(void); void AllocateMemory_GPU(void); void FreeMemory_GPU(void); void Reset(void); - void Copy_Input(int n_cells, Real *input_d, Real *input_density_h, - Real Grav_Constant, Real dens_avrg, Real current_a); + void Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, + Real current_a); void Copy_Output(Real *output_potential); void Copy_Potential_From_Host(Real *output_potential); @@ -113,51 +112,35 @@ class Potential_SOR_3D void Free_Array_GPU_Real(Real *array_dev); void Free_Array_GPU_bool(bool *array_dev); - void Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, - Real *potential_d, Real *density_d); - void Copy_Input_And_Initialize(Real *input_density, - const Real *input_potential, - Real Grav_Constant, Real dens_avrg, + void Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d); + void Copy_Input_And_Initialize(Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg, Real current_a); - void Poisson_iteration(int n_cells, int nx, int ny, int nz, - int n_ghost_potential, Real dx, Real dy, Real dz, - Real omega, Real epsilon, Real *density_d, - Real *potential_d, bool *converged_h, + void Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d); - void Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, - int n_ghost_potential, Real dx, Real dy, - Real dz, Real omega, Real epsilon, - Real *density_d, Real *potential_d, - bool *converged_h, bool *converged_d); - void Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, - int n_ghost_potential, Real dx, Real dy, - Real dz, Real omega, Real epsilon, - Real *density_d, Real *potential_d, - bool *converged_h, bool *converged_d); + void Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, + bool *converged_d); + void Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, + bool *converged_d); void Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon); - void Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, - int n_ghost_transfer, int n_ghost_potential, - Real *potential_d, Real *transfer_buffer_d); - void Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, - int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d); + void Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); + void Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); void Load_Transfer_Buffer_GPU_x0(); void Load_Transfer_Buffer_GPU_x1(); void Load_Transfer_Buffer_GPU_y0(); void Load_Transfer_Buffer_GPU_y1(); void Load_Transfer_Buffer_GPU_z0(); void Load_Transfer_Buffer_GPU_z1(); - void Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, - int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d); - void Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, - int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d); + void Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); + void Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); void Unload_Transfer_Buffer_GPU_x0(); void Unload_Transfer_Buffer_GPU_x1(); void Unload_Transfer_Buffer_GPU_y0(); @@ -172,13 +155,10 @@ class Potential_SOR_3D // void Load_Transfer_Buffer_GPU_All(); // void Unload_Transfer_Buffer_GPU_All(); - void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, - Real *transfer_buffer_d); - void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, - Real *transfer_buffer_d); + void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); + void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); - void Set_Isolated_Boundary_Conditions(int *boundary_flags, - struct parameters *P); + void Set_Isolated_Boundary_Conditions(int *boundary_flags, struct parameters *P); void Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d); #ifdef MPI_CHOLLA diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index 93883af5f..9910b62cd 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -6,15 +6,13 @@ #define TPB_SOR 1024 -void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, - grav_int_t size) +void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size) { cudaMalloc((void **)array_dev, size * sizeof(Real)); CudaCheckError(); } -void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, - grav_int_t size) +void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size) { cudaMalloc((void **)array_dev, size * sizeof(bool)); CudaCheckError(); @@ -32,28 +30,24 @@ void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) CudaCheckError(); } -__global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, - Real Grav_Constant, Real dens_avrg, +__global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, Real current_a) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= n_cells) return; #ifdef COSMOLOGY - density_d[tid] = - 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg) / current_a; + density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg) / current_a; #else density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg); #endif // if (tid == 0) printf("dens: %f\n", density_d[tid]); } -void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, - Real *input_density_h, Real Grav_Constant, - Real dens_avrg, Real current_a) +void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, + Real current_a) { - cudaMemcpy(input_d, input_density_h, n_cells * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(input_d, input_density_h, n_cells * sizeof(Real), cudaMemcpyHostToDevice); // set values for GPU kernels int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR; @@ -64,22 +58,18 @@ void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, // Copy_Input_Kernel<<>>( n_cells_local, F.input_d, // F.density_d, Grav_Constant, dens_avrg, current_a ); - hipLaunchKernelGGL(Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_cells_local, F.input_d, F.density_d, Grav_Constant, - dens_avrg, current_a); + hipLaunchKernelGGL(Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d, + Grav_Constant, dens_avrg, current_a); } -void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, - Real *isolated_boundary_d, +void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size) { - cudaMemcpy(isolated_boundary_d, isolated_boundary_h, - boundary_size * sizeof(Real), cudaMemcpyHostToDevice); + cudaMemcpy(isolated_boundary_d, isolated_boundary_h, boundary_size * sizeof(Real), cudaMemcpyHostToDevice); } -__global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, - Real *density_d, int nx, int ny, - int nz, int n_ghost) +__global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz, + int n_ghost) { int tid_x, tid_y, tid_z, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -108,9 +98,8 @@ __global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, // potential_d[tid_pot] = -dens; } -void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, - int n_ghost_potential, - Real *potential_d, Real *density_d) +void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, + Real *density_d) { // set values for GPU kernels int tpb_x = 16; @@ -126,15 +115,12 @@ void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, // Initialize_Potential_Kernel<<>>( 1, potential_d, // density_d, nx, ny, nz, n_ghost_potential ); - hipLaunchKernelGGL(Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, - 1, potential_d, density_d, nx, ny, nz, n_ghost_potential); + hipLaunchKernelGGL(Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz, + n_ghost_potential); } -__global__ void Iteration_Step_SOR(int n_cells, Real *density_d, - Real *potential_d, int nx, int ny, int nz, - int n_ghost, Real dx, Real dy, Real dz, - Real omega, int parity, Real epsilon, - bool *converged_d) +__global__ void Iteration_Step_SOR(int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon, bool *converged_d) { int tid_x, tid_y, tid_z, tid, tid_pot; tid_x = 2 * (blockIdx.x * blockDim.x + threadIdx.x); @@ -201,9 +187,7 @@ __global__ void Iteration_Step_SOR(int n_cells, Real *density_d, phi_b = potential_d[tid_x + tid_y * nx_pot + indx_b * nx_pot * ny_pot]; phi_t = potential_d[tid_x + tid_y * nx_pot + indx_t * nx_pot * ny_pot]; - phi_new = (1 - omega) * phi_c + - omega / 6 * - (phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx * dx * rho); + phi_new = (1 - omega) * phi_c + omega / 6 * (phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx * dx * rho); potential_d[tid_pot] = phi_new; // potential_d[tid_pot] = parity + 1; @@ -214,12 +198,9 @@ __global__ void Iteration_Step_SOR(int n_cells, Real *density_d, // epsilon ) ) converged_d[0] = 0; } -void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, - int n_ghost_potential, Real dx, - Real dy, Real dz, Real omega, - Real epsilon, Real *density_d, - Real *potential_d, bool *converged_h, - bool *converged_d) +void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, + Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, + bool *converged_h, bool *converged_d) { // set values for GPU kernels int tpb_x = 16; @@ -240,26 +221,21 @@ void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, // Iteration_Step_SOR<<>>( n_cells, density_d, // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, // converged_d ); - hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, - n_cells, density_d, potential_d, nx, ny, nz, - n_ghost_potential, dx, dy, dz, omega, 0, epsilon, - converged_d); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d); // Iteration_Step_SOR<<>>( n_cells, density_d, // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, // converged_d ); - hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, - n_cells, density_d, potential_d, nx, ny, nz, - n_ghost_potential, dx, dy, dz, omega, 1, epsilon, - converged_d); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d); cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } -void Potential_SOR_3D::Poisson_iteration_Patial_1( - int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, - Real dy, Real dz, Real omega, Real epsilon, Real *density_d, - Real *potential_d, bool *converged_h, bool *converged_d) +void Potential_SOR_3D::Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) { // set values for GPU kernels int tpb_x = 16; @@ -280,16 +256,13 @@ void Potential_SOR_3D::Poisson_iteration_Patial_1( // Iteration_Step_SOR<<>>( n_cells, density_d, // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, // converged_d ); - hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, - n_cells, density_d, potential_d, nx, ny, nz, - n_ghost_potential, dx, dy, dz, omega, 0, epsilon, - converged_d); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d); } -void Potential_SOR_3D::Poisson_iteration_Patial_2( - int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, - Real dy, Real dz, Real omega, Real epsilon, Real *density_d, - Real *potential_d, bool *converged_h, bool *converged_d) +void Potential_SOR_3D::Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) { // set values for GPU kernels int tpb_x = 16; @@ -308,17 +281,15 @@ void Potential_SOR_3D::Poisson_iteration_Patial_2( // Iteration_Step_SOR<<>>( n_cells, density_d, // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, // converged_d ); - hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, - n_cells, density_d, potential_d, nx, ny, nz, - n_ghost_potential, dx, dy, dz, omega, 1, epsilon, - converged_d); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d); cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } -__global__ void Set_Isolated_Boundary_GPU_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int n_ghost, - int nx_pot, int ny_pot, int nz_pot, Real *potential_d, Real *boundary_d) +__global__ void Set_Isolated_Boundary_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int n_ghost, int nx_pot, int ny_pot, int nz_pot, Real *potential_d, + Real *boundary_d) { // get a global thread ID int nx_local, ny_local, nz_local; @@ -331,42 +302,30 @@ __global__ void Set_Isolated_Boundary_GPU_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (tid_k) + (tid_i + n_ghost) * nx_pot + - (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 0) tid_pot = (tid_k) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; if (side == 1) - tid_pot = (tid_k + nx_local + n_ghost) + (tid_i + n_ghost) * nx_pot + - (tid_j + n_ghost) * nx_pot * ny_pot; + tid_pot = (tid_k + nx_local + n_ghost) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i + n_ghost) + (tid_k)*nx_pot + - (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_k)*nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; if (side == 1) - tid_pot = (tid_i + n_ghost) + (tid_k + ny_local + n_ghost) * nx_pot + - (tid_j + n_ghost) * nx_pot * ny_pot; + tid_pot = (tid_i + n_ghost) + (tid_k + ny_local + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + - (tid_k)*nx_pot * ny_pot; + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k)*nx_pot * ny_pot; if (side == 1) - tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + - (tid_k + nz_local + n_ghost) * nx_pot * ny_pot; + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k + nz_local + n_ghost) * nx_pot * ny_pot; } potential_d[tid_pot] = boundary_d[tid_buffer]; } -void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, - Real *boundary_d) +void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d) { // #ifdef MPI_CHOLLA // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, @@ -402,27 +361,23 @@ void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, // Set_Isolated_Boundary_GPU_kernel<<>>( direction, // side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, // F.potential_d, boundary_d ); - hipLaunchKernelGGL(Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, - 0, direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, - ny_pot, nz_pot, F.potential_d, boundary_d); + hipLaunchKernelGGL(Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d); } void Potential_SOR_3D::Copy_Output(Real *output_potential) { - cudaMemcpy(output_potential, F.potential_d, n_cells_potential * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(output_potential, F.potential_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost); } void Potential_SOR_3D::Copy_Potential_From_Host(Real *output_potential) { - cudaMemcpy(F.potential_d, output_potential, n_cells_potential * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(F.potential_d, output_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice); } -__global__ void Load_Transfer_Buffer_GPU_kernel_SOR( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d) +__global__ void Load_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -431,41 +386,28 @@ __global__ void Load_Transfer_Buffer_GPU_kernel_SOR( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + - (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) - tid_pot = (tid_i) + - (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + - (tid_j)*nx * ny; + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; - if (side == 1) - tid_pot = (tid_i) + (tid_j)*nx + - (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; } -__global__ void Load_Transfer_Buffer_GPU_Half_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d, int parity) +__global__ void Load_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d, int parity) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -488,39 +430,25 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx_pot + - (tid_j)*nx_pot * ny_pot; + if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; if (side == 1) - tid_pot = (nx_pot - n_ghost_potential - n_ghost_transfer + tid_k) + - (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; + tid_pot = (nx_pot - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx_pot + - (tid_j)*nx_pot * ny_pot; + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; if (side == 1) - tid_pot = - (tid_i) + - (ny_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + - (tid_j)*nx_pot * ny_pot; + tid_pot = (tid_i) + (ny_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i) + (tid_j)*nx_pot + - (n_ghost_potential + tid_k) * nx_pot * ny_pot; + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential + tid_k) * nx_pot * ny_pot; if (side == 1) - tid_pot = (tid_i) + (tid_j)*nx_pot + - (nz_pot - n_ghost_potential - n_ghost_transfer + tid_k) * - nx_pot * ny_pot; + tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; } // printf( "Loading Buffer Half: val= %d pot= %f \n", parity+1, @@ -528,10 +456,9 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; } -__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d) +__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -540,42 +467,29 @@ __global__ void Unload_Transfer_Buffer_GPU_kernel_SOR( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + - (tid_j)*nx * ny; - if (side == 1) - tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + - (tid_j)*nx * ny; - if (side == 1) - tid_pot = - (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_pot = (tid_i) + (tid_j)*nx + - (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; - if (side == 1) - tid_pot = - (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; } -__global__ void Unload_Transfer_Buffer_GPU_Half_kernel( - int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, - int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, - Real *transfer_buffer_d, int parity) +__global__ void Unload_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d, int parity) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; @@ -598,45 +512,30 @@ __global__ void Unload_Transfer_Buffer_GPU_Half_kernel( // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost_transfer) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; if (direction == 0) { - if (side == 0) - tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + - (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; - if (side == 1) - tid_pot = (nx_pot - n_ghost_potential + tid_k) + (tid_i)*nx_pot + - (tid_j)*nx_pot * ny_pot; + if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) tid_pot = (nx_pot - n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; } if (direction == 1) { if (side == 0) - tid_pot = (tid_i) + - (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + - (tid_j)*nx_pot * ny_pot; - if (side == 1) - tid_pot = (tid_i) + (ny_pot - n_ghost_potential + tid_k) * nx_pot + - (tid_j)*nx_pot * ny_pot; + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) tid_pot = (tid_i) + (ny_pot - n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; } if (direction == 2) { if (side == 0) - tid_pot = - (tid_i) + (tid_j)*nx_pot + - (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; - if (side == 1) - tid_pot = (tid_i) + (tid_j)*nx_pot + - (nz_pot - n_ghost_potential + tid_k) * nx_pot * ny_pot; + tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential + tid_k) * nx_pot * ny_pot; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU( - int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +void Potential_SOR_3D::Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) { int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; nx_pot = nx + 2 * n_ghost_potential; @@ -668,15 +567,14 @@ void Potential_SOR_3D::Load_Transfer_Buffer_GPU( // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, // size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, // n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, - 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, - ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, - potential_d, transfer_buffer_d); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, + transfer_buffer_d); } -void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( - int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, + int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) { int size_buffer, n_i, n_j, ngrid; nz_pot = nz + 2 * n_ghost_potential; @@ -707,15 +605,13 @@ void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( // Load_Transfer_Buffer_GPU_Half_kernel<<>>( direction, // side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, - dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, - nx, ny, nz, n_ghost_transfer, n_ghost_potential, - potential_d, transfer_buffer_d, iteration_parity); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, + iteration_parity); } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( - int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) { int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; nx_pot = nx + 2 * n_ghost_potential; @@ -747,15 +643,14 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, // side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, // n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, - dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, - nx_pot, ny_pot, nz_pot, n_ghost_transfer, - n_ghost_potential, potential_d, transfer_buffer_d); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, + transfer_buffer_d); } -void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( - int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, - int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, + int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) { int size_buffer, n_i, n_j, ngrid; @@ -785,25 +680,18 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( // Unload_Transfer_Buffer_GPU_Half_kernel<<>>( // direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, - dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, - nx, ny, nz, n_ghost_transfer, n_ghost_potential, - potential_d, transfer_buffer_d, iteration_parity); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, + iteration_parity); } -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, - Real *transfer_buffer_h, - Real *transfer_buffer_d) +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) { - CudaSafeCall(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, - size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); } -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, - Real *transfer_buffer_h, - Real *transfer_buffer_d) +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) { - CudaSafeCall(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, - size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); } #endif // GRAVITY diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 1306e1ea3..26de6a619 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -9,17 +9,15 @@ #include "../utils/gpu.hpp" static void __attribute__((unused)) -printDiff(const Real *p, const Real *q, const int ng, const int nx, - const int ny, const int nz, const bool plot = false) +printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; - #pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2) + #pragma omp parallel for reduction(max : dMax, qMax) reduction(+ : dSum, dSum2, qSum, qSum2) for (int k = 0; k < nz; k++) { for (int j = 0; j < ny; j++) { for (int i = 0; i < nx; i++) { - const int ijk = - i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); + const int ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); const Real qAbs = fabs(q[ijk]); qMax = std::max(qMax, qAbs); qSum += qAbs; @@ -35,8 +33,8 @@ printDiff(const Real *p, const Real *q, const int ng, const int nx, Real sums[4] = {qSum, qSum2, dSum, dSum2}; MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], - sqrt(sums[3] / sums[1]), maxs[1] / maxs[0]); + chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]), + maxs[1] / maxs[0]); fflush(stdout); if (!plot) return; @@ -45,8 +43,7 @@ printDiff(const Real *p, const Real *q, const int ng, const int nx, // for (int j = 0; j < ny; j++) { const int j = ny / 2; for (int i = 0; i < nx; i++) { - const int ijk = - i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); + const int ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); // printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); printf("%d %g %g %g\n", i, q[ijk], p[ijk], q[ijk] - p[ijk]); } @@ -74,8 +71,7 @@ Potential_Paris_3D::Potential_Paris_3D() Potential_Paris_3D::~Potential_Paris_3D() { Reset(); } -void Potential_Paris_3D::Get_Potential(const Real *const density, - Real *const potential, const Real g, +void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, const Real a) { #ifdef COSMOLOGY @@ -107,10 +103,8 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, gpuFor( nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { const int ia = i + ni * (j + nj * k); - const int ib = - i + N_GHOST_POTENTIAL + - ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); - db[ib] = da[ia]; + const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + db[ib] = da[ia]; }); assert(potential); @@ -121,12 +115,9 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, #endif } -void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, - const Real xMin, const Real yMin, - const Real zMin, const int nx, const int ny, - const int nz, const int nxReal, - const int nyReal, const int nzReal, - const Real dx, const Real dy, const Real dz) +void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) { chprintf(" Using Poisson Solver: Paris Periodic"); #ifdef PARIS_5PT @@ -157,18 +148,15 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, myLo_[2] = xMin; MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - const Real hi[3] = {lo_[0] + lz - dr_[0], lo_[1] + ly - dr_[1], - lo_[2] + lx - dr_[2]}; + const Real hi[3] = {lo_[0] + lz - dr_[0], lo_[1] + ly - dr_[1], lo_[2] + lx - dr_[2]}; const int n[3] = {nz, ny, nx}; const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; - const int id[3] = {int(round((zMin - lo_[0]) / (dn_[0] * dr_[0]))), - int(round((yMin - lo_[1]) / (dn_[1] * dr_[1]))), + const int id[3] = {int(round((zMin - lo_[0]) / (dn_[0] * dr_[0]))), int(round((yMin - lo_[1]) / (dn_[1] * dr_[1]))), int(round((xMin - lo_[2]) / (dn_[2] * dr_[2])))}; chprintf( " Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d " "]\n", - lo_[2], lo_[1], lo_[0], lo_[2] + lx, lo_[1] + ly, lo_[0] + lz, dn_[2], - dn_[1], dn_[0], m[2], m[1], m[0]); + lo_[2], lo_[1], lo_[0], lo_[2] + lx, lo_[1] + ly, lo_[0] + lz, dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]); assert(dn_[0] == n[0] / m[0]); assert(dn_[1] == n[1] / m[1]); @@ -176,18 +164,15 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, pp_ = new ParisPeriodic(n, lo_, hi, m, id); assert(pp_); - minBytes_ = pp_->bytes(); - densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; - const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; - potentialBytes_ = - long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); - - CHECK(cudaMalloc(reinterpret_cast(&da_), - std::max(minBytes_, densityBytes_))); + minBytes_ = pp_->bytes(); + densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); + + CHECK(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); assert(da_); - CHECK(cudaMalloc(reinterpret_cast(&db_), - std::max(minBytes_, potentialBytes_))); + CHECK(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, potentialBytes_))); assert(db_); } diff --git a/src/gravity/potential_paris_3D.h b/src/gravity/potential_paris_3D.h index 06c1d7db6..9cc2828c2 100644 --- a/src/gravity/potential_paris_3D.h +++ b/src/gravity/potential_paris_3D.h @@ -10,11 +10,9 @@ class Potential_Paris_3D public: Potential_Paris_3D(); ~Potential_Paris_3D(); - void Get_Potential(const Real *density, Real *potential, Real g, - Real massInfo, Real a); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, - int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, - Real dx, Real dy, Real dz); + void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, + int nyReal, int nzReal, Real dx, Real dy, Real dz); void Reset(); protected: diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index c233ad93c..c6c341aa6 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -27,9 +27,7 @@ Potential_Paris_Galactic::Potential_Paris_Galactic() Potential_Paris_Galactic::~Potential_Paris_Galactic() { Reset(); } -void Potential_Paris_Galactic::Get_Potential(const Real *const density, - Real *const potential, - const Real g, +void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const potential, const Real g, const DiskGalaxy &galaxy) { const Real scale = Real(4) * M_PI * g; @@ -51,8 +49,7 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const phi = potential; #else CHECK(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0)); - CHECK(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, - 0)); + CHECK(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, 0)); const Real *const rho = da; Real *const phi = dc_; #endif @@ -78,12 +75,11 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, const Real y = yMin + j * dy; const Real z = zMin + k * dz; - const Real r = sqrt(x * x + y * y); - const Real a = sqrt(z * z + zd * zd); - const Real b = rd + a; - const Real c = r * r + b * b; - const Real dRho = - rho0 * (rd * c + 3.0 * a * b * b) / (a * a * a * pow(c, 2.5)); + const Real r = sqrt(x * x + y * y); + const Real a = sqrt(z * z + zd * zd); + const Real b = rd + a; + const Real c = r * r + b * b; + const Real dRho = rho0 * (rd * c + 3.0 * a * b * b) / (a * a * a * pow(c, 2.5)); da[ia] = scale * (rho[ia] - dRho); }); @@ -94,9 +90,7 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, gpuFor( nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { const int ia = i + ni * (j + nj * k); - const int ib = - i + N_GHOST_POTENTIAL + - ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); const Real x = xMin + i * dx; const Real y = yMin + j * dy; @@ -116,11 +110,10 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, #endif } -void Potential_Paris_Galactic::Initialize( - const Real lx, const Real ly, const Real lz, const Real xMin, - const Real yMin, const Real zMin, const int nx, const int ny, const int nz, - const int nxReal, const int nyReal, const int nzReal, const Real dx, - const Real dy, const Real dz) +void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, + const Real dz) { const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); @@ -142,8 +135,7 @@ void Potential_Paris_Galactic::Initialize( myLo_[2] = xMin + 0.5 * dr_[2]; MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - const Real hi[3] = {lo_[0] + lr_[0] - dr_[0], lo_[1] + lr_[1] - dr_[1], - lo_[2] + lr_[1] - dr_[2]}; + const Real hi[3] = {lo_[0] + lr_[0] - dr_[0], lo_[1] + lr_[1] - dr_[1], lo_[2] + lr_[1] - dr_[2]}; const int n[3] = {nz, ny, nx}; const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; const int id[3] = {int(round((myLo_[0] - lo_[0]) / (dn_[0] * dr_[0]))), @@ -152,8 +144,7 @@ void Potential_Paris_Galactic::Initialize( chprintf( " Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ " "%d %d %d ]\n", - lo_[2], lo_[1], lo_[0], hi[2], hi[1], hi[0], dn_[2], dn_[1], dn_[0], m[2], - m[1], m[0]); + lo_[2], lo_[1], lo_[0], hi[2], hi[1], hi[0], dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]); assert(dn_[0] == n[0] / m[0]); assert(dn_[1] == n[1] / m[1]); @@ -164,15 +155,12 @@ void Potential_Paris_Galactic::Initialize( minBytes_ = pp_->bytes(); densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; - CHECK(cudaMalloc(reinterpret_cast(&da_), - std::max(minBytes_, densityBytes_))); - CHECK(cudaMalloc(reinterpret_cast(&db_), - std::max(minBytes_, densityBytes_))); + CHECK(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); + CHECK(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, densityBytes_))); #ifndef GRAVITY_GPU - const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; - potentialBytes_ = - long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); CHECK(cudaMalloc(reinterpret_cast(&dc_), potentialBytes_)); #endif } diff --git a/src/gravity/potential_paris_galactic.h b/src/gravity/potential_paris_galactic.h index 9e5df6adf..999e4d56c 100644 --- a/src/gravity/potential_paris_galactic.h +++ b/src/gravity/potential_paris_galactic.h @@ -11,11 +11,9 @@ class Potential_Paris_Galactic public: Potential_Paris_Galactic(); ~Potential_Paris_Galactic(); - void Get_Potential(const Real *density, Real *potential, Real g, - const DiskGalaxy &galaxy); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, - int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, - Real dx, Real dy, Real dz); + void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, + int nyReal, int nzReal, Real dx, Real dy, Real dz); void Reset(); protected: diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index d36045148..9a4a30f6a 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -14,8 +14,7 @@ // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x * x; } -inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, - Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; @@ -29,8 +28,7 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, r_halo = sqrt(x_pos * x_pos + r_disk * r_disk); // set properties of halo and disk (these must match initial conditions) - Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, - x; + Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; M_vir = 1.0e12; // viral mass of MW in M_sun M_d = 6.5e10; // mass of disk in M_sun M_h = M_vir - M_d; // halo mass in M_sun @@ -46,8 +44,7 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); a_disk_z = -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) / - (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * - sqrt(x_pos * x_pos + z_d * z_d)); + (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * sqrt(x_pos * x_pos + z_d * z_d)); // total acceleration is the sum of the halo + disk components *gx = (x_pos / r_halo) * a_halo + a_disk_z; @@ -55,8 +52,7 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, return; } -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, - int n_ghost, Real dx, Real dy, Real xbound, +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; @@ -104,9 +100,8 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, // calculate acceleration x = r / R_s; a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); - a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / - ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); - a = a_d + a_h; + a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); + a = a_d + a_h; *gx = -cos(phi) * a; *gy = -sin(phi) * a; @@ -114,10 +109,8 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, return; } -inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, - int y_off, int z_off, int n_ghost, Real dx, - Real dy, Real dz, Real xbound, Real ybound, - Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, + Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; // use the subgrid offset and global boundaries to calculate absolute @@ -158,13 +151,10 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); a_halo_r = a_halo * (r_disk / r_halo); a_halo_z = a_halo * (z_pos / r_halo); - a_disk_r = - -GN * M_d * r_disk * - pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); a_disk_z = -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / - (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * - sqrt(z_pos * z_pos + z_d * z_d)); + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); // total acceleration is the sum of the halo + disk components *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 91d08dbe7..a1dfd7132 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -74,18 +74,14 @@ void Grid3D::Set_Boundary_Conditions(parameters P) n_bounds); printf(" Boundary Hydro: %d\n", (int)H.TRANSFER_HYDRO_BOUNDARIES); #ifdef GRAVITY - printf(" Boundary Potential: %d\n", - (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES); + printf(" Boundary Potential: %d\n", (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES); #ifdef SOR - printf(" Boundary Poisson: %d\n", - (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES); + printf(" Boundary Poisson: %d\n", (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES); #endif // SOR #endif // GRAVITY #ifdef PARTICLES - printf(" Boundary Particles: %d\n", - (int)Particles.TRANSFER_PARTICLES_BOUNDARIES); - printf(" Boundary Particles Density: %d\n", - (int)Particles.TRANSFER_DENSITY_BOUNDARIES); + printf(" Boundary Particles: %d\n", (int)Particles.TRANSFER_PARTICLES_BOUNDARIES); + printf(" Boundary Particles Density: %d\n", (int)Particles.TRANSFER_DENSITY_BOUNDARIES); #endif // PARTICLES exit(-1); } @@ -308,9 +304,8 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) Set_Boundary_Extents(dir, &imin[0], &imax[0]); // from grid/cuda_boundaries.cu - SetGhostCells(C.device, H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, - flags, imax[0] - imin[0], imax[1] - imin[1], imax[2] - imin[2], - imin[0], imin[1], imin[2], dir); + SetGhostCells(C.device, H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, imax[0] - imin[0], + imax[1] - imin[1], imax[2] - imin[2], imin[0], imin[1], imin[2], dir); } /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) @@ -434,9 +429,8 @@ void Grid3D::Wind_Boundary() z_off = nz_local_start; #endif - Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, - y_off, z_off, H.dx, H.dy, H.dz, H.xbound, H.ybound, - H.zbound, gama, H.t); + Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, gama, H.t); } /*! \fn void Noh_Boundary() @@ -457,9 +451,8 @@ void Grid3D::Noh_Boundary() z_off = nz_local_start; #endif - Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, - y_off, z_off, H.dx, H.dy, H.dz, H.xbound, H.ybound, - H.zbound, gama, H.t); + Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, gama, H.t); /* int i, j, k, id; diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index ac95c2874..8261c63b5 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -3,16 +3,12 @@ #include "../utils/gpu.hpp" #include "cuda_boundaries.h" -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, - Real *a); +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a); -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], - int nx, int ny, int nz, int n_ghost); +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost); -__global__ void PackBuffers3DKernel(Real *buffer, Real *c_head, int isize, - int jsize, int ksize, int nx, int ny, - int idxoffset, int buffer_ncells, - int n_fields, int n_cells) +__global__ void PackBuffers3DKernel(Real *buffer, Real *c_head, int isize, int jsize, int ksize, int nx, int ny, + int idxoffset, int buffer_ncells, int n_fields, int n_cells) { int id, i, j, k, idx, ii; id = threadIdx.x + blockIdx.x * blockDim.x; @@ -30,22 +26,19 @@ __global__ void PackBuffers3DKernel(Real *buffer, Real *c_head, int isize, } } -void PackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, - int n_cells, int idxoffset, int isize, int jsize, int ksize) +void PackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize) { int buffer_ncells = isize * jsize * ksize; dim3 dim1dGrid((buffer_ncells + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(PackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, - c_head, isize, jsize, ksize, nx, ny, idxoffset, - buffer_ncells, n_fields, n_cells); + hipLaunchKernelGGL(PackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, c_head, isize, jsize, ksize, nx, ny, + idxoffset, buffer_ncells, n_fields, n_cells); CHECK(cudaDeviceSynchronize()); } -__global__ void UnpackBuffers3DKernel(Real *buffer, Real *c_head, int isize, - int jsize, int ksize, int nx, int ny, - int idxoffset, int buffer_ncells, - int n_fields, int n_cells) +__global__ void UnpackBuffers3DKernel(Real *buffer, Real *c_head, int isize, int jsize, int ksize, int nx, int ny, + int idxoffset, int buffer_ncells, int n_fields, int n_cells) { int id, i, j, k, idx, ii; id = threadIdx.x + blockIdx.x * blockDim.x; @@ -61,9 +54,8 @@ __global__ void UnpackBuffers3DKernel(Real *buffer, Real *c_head, int isize, } } -void UnpackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, - int n_cells, int idxoffset, int isize, int jsize, - int ksize) +void UnpackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize) { // void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, // int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int @@ -71,15 +63,12 @@ void UnpackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int buffer_ncells = isize * jsize * ksize; dim3 dim1dGrid((buffer_ncells + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(UnpackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, - c_head, isize, jsize, ksize, nx, ny, idxoffset, - buffer_ncells, n_fields, n_cells); + hipLaunchKernelGGL(UnpackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, c_head, isize, jsize, ksize, nx, ny, + idxoffset, buffer_ncells, n_fields, n_cells); } -__global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, - int n_fields, int n_cells, int n_ghost, - int f0, int f1, int f2, int f3, int f4, - int f5, int isize, int jsize, int ksize, +__global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, + int f0, int f1, int f2, int f3, int f4, int f5, int isize, int jsize, int ksize, int imin, int jmin, int kmin, int dir) { int id, i, j, k, gidx, idx, ii; @@ -140,16 +129,14 @@ __global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, // Direction 0,2,4 are left-side, don't allow inflow with positive // momentum if (c_head[momdex] > 0.0) { - c_head[gidx + 4 * n_cells] -= - 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; c_head[momdex] = 0.0; } } else { // Direction 1,3,5 are right-side, don't allow inflow with negative // momentum if (c_head[momdex] < 0.0) { - c_head[gidx + 4 * n_cells] -= - 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; c_head[momdex] = 0.0; } } @@ -157,20 +144,17 @@ __global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, } // end idx>=0 } // end function -void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, - int n_cells, int n_ghost, int flags[], int isize, int jsize, - int ksize, int imin, int jmin, int kmin, int dir) +void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize, + int jsize, int ksize, int imin, int jmin, int kmin, int dir) { dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(SetGhostCellsKernel, dim1dGrid, dim1dBlock, 0, 0, c_head, - nx, ny, nz, n_fields, n_cells, n_ghost, flags[0], flags[1], - flags[2], flags[3], flags[4], flags[5], isize, jsize, - ksize, imin, jmin, kmin, dir); + hipLaunchKernelGGL(SetGhostCellsKernel, dim1dGrid, dim1dBlock, 0, 0, c_head, nx, ny, nz, n_fields, n_cells, n_ghost, + flags[0], flags[1], flags[2], flags[3], flags[4], flags[5], isize, jsize, ksize, imin, jmin, kmin, + dir); } -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], - int nx, int ny, int nz, int n_ghost) +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost) { // nx, ny, nz, n_ghost /* 1D */ @@ -249,8 +233,7 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], return idx; } -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, - Real *a) +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a) { int id; @@ -315,10 +298,8 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, return id; } -__global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, - int n_cells, int n_ghost, int x_off, - int y_off, int z_off, Real dx, Real dy, - Real dz, Real xbound, Real ybound, +__global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { int id, xid, yid, zid, gid; @@ -362,16 +343,13 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, c_device[gid + 1 * n_cells] = vx * d_0; c_device[gid + 2 * n_cells] = vy * d_0; c_device[gid + 3 * n_cells] = vz * d_0; - c_device[gid + 4 * n_cells] = - P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz); + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz); } __syncthreads(); } -__global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, - int n_cells, int n_ghost, int x_off, - int y_off, int z_off, Real dx, Real dy, - Real dz, Real xbound, Real ybound, +__global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { int id, xid, yid, zid, gid; @@ -526,10 +504,8 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, } } -void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, - int n_ghost, int x_off, int y_off, int z_off, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real gamma, Real t) +void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { // determine the size of the grid to launch // need at least as many threads as the largest boundary face @@ -543,15 +519,12 @@ void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, - c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, - z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); + hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, + y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); } -void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, - int n_ghost, int x_off, int y_off, int z_off, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real gamma, Real t) +void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { // determine the size of the grid to launch // need at least as many threads as the largest boundary face @@ -565,7 +538,6 @@ void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, - nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, - dz, xbound, ybound, zbound, gamma, t); + hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, + y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); } \ No newline at end of file diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index 8418264bd..0c2617720 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -5,27 +5,22 @@ // void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int // ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void PackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, - int n_cells, int idxoffset, int isize, int jsize, int ksize); +void PackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize); -void UnpackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, - int n_cells, int idxoffset, int isize, int jsize, - int ksize); +void UnpackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize); // void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int // ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, - int n_cells, int n_ghost, int flags[], int isize, int jsize, - int ksize, int imin, int jmin, int kmin, int dir); +void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize, + int jsize, int ksize, int imin, int jmin, int kmin, int dir); -void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, - int n_ghost, int x_off, int y_off, int z_off, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real gamma, Real t); +void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, + Real t); -void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, - int n_ghost, int x_off, int y_off, int z_off, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real gamma, Real t); +void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t); #endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index c27563534..c5101c06a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -85,8 +85,7 @@ Grid3D::Grid3D(void) /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real * *zpos) \brief Get the cell-centered position based on cell index */ -void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, - Real *z_pos) +void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *z_pos) { #ifndef MPI_CHOLLA @@ -124,8 +123,7 @@ Real Grid3D::Calc_Inverse_Timestep() { // ==Calculate the next inverse time step using Calc_dt_GPU from // hydro/hydro_cuda.h== - return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, - H.dy, H.dz, gama); + return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama); } /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) @@ -288,9 +286,7 @@ void Grid3D::AllocateMemory(void) { // allocate memory for the conserved variable arrays // allocate all the memory to density, to insure contiguous memory - CudaSafeCall(cudaHostAlloc((void **)&C.host, - H.n_fields * H.n_cells * sizeof(Real), - cudaHostAllocDefault)); + CudaSafeCall(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault)); // point conserved variables to the appropriate locations C.density = C.host; @@ -314,10 +310,8 @@ void Grid3D::AllocateMemory(void) #endif // DE // allocate memory for the conserved variable arrays on the device - CudaSafeCall( - cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); - cuda_utilities::initGpuMemory(C.device, - H.n_fields * H.n_cells * sizeof(Real)); + CudaSafeCall(cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); + cuda_utilities::initGpuMemory(C.device, H.n_fields * H.n_cells * sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); C.d_momentum_y = &(C.device[2 * H.n_cells]); @@ -339,10 +333,8 @@ void Grid3D::AllocateMemory(void) #endif // DE #if defined(GRAVITY) - CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), - cudaHostAllocDefault)); - CudaSafeCall( - cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); + CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); + CudaSafeCall(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else C.Grav_potential = NULL; C.d_Grav_potential = NULL; @@ -438,45 +430,40 @@ Real Grid3D::Update_Grid(void) { #ifdef CUDA #ifdef VL - VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, - H.n_fields); + VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); #endif // VL #ifdef SIMPLE - Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, - H.dt, H.n_fields); + Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); #endif // SIMPLE #endif // CUDA } else if (H.nx > 1 && H.ny > 1 && H.nz == 1) // 2D { #ifdef CUDA #ifdef VL - VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, - H.dy, H.xbound, H.ybound, H.dt, H.n_fields); + VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, + H.n_fields); #endif // VL #ifdef SIMPLE - Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, - H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); + Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, + H.n_fields); #endif // SIMPLE #endif // CUDA } else if (H.nx > 1 && H.ny > 1 && H.nz > 1) // 3D { #ifdef CUDA #ifdef VL - VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, - y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, - H.ybound, H.zbound, H.dt, H.n_fields, density_floor, - U_floor, C.Grav_potential); + VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, + C.Grav_potential); #endif // VL #ifdef SIMPLE - Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, - x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, - H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, - density_floor, U_floor, C.Grav_potential); + Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, + C.Grav_potential); #endif // SIMPLE #endif } else { - chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", - H.nx, H.ny, H.nz); + chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); chexit(-1); } @@ -504,8 +491,7 @@ Real Grid3D::Update_Grid(void) // Set the min_delta_t for averaging a slow cell Real max_dti_slow; max_dti_slow = 1 / H.min_dt_slow; - Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, - H.dy, H.dz, gama, max_dti_slow); + Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow); #endif // AVERAGE_SLOW_CELLS // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 5d580c209..34c470399 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -405,9 +405,8 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, - *d_scalar, *d_basic_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, - *d_GasEnergy; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, *d_magnetic_x, + *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; /*! pointer to gravitational potential on device */ Real *d_Grav_potential; @@ -528,8 +527,7 @@ class Grid3D /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) * \brief Constant gas properties. */ - void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, - Real Bz); + void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz); /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ @@ -560,12 +558,9 @@ class Grid3D * Z-direction \param[in] pitch The pitch angle of the linear wave \param[in] * yaw The yaw angle of the linear wave */ - void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, - Real By, Real Bz, Real rEigenVec_rho, - Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, - Real rEigenVec_MomentumZ, Real rEigenVec_E, - Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, - Real pitch, Real yaw); + void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, Real rEigenVec_E, + Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, Real pitch, Real yaw); /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure @@ -576,10 +571,8 @@ class Grid3D Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ - void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, - Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, - Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph); + void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, + Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph); /*! \fn void Shu_Osher() * \brief Initialize the grid with the Shu-Osher shock tube problem. See @@ -718,32 +711,23 @@ class Grid3D void Extrapolate_Grav_Potential_Function(int g_start, int g_end); void Extrapolate_Grav_Potential(); void Set_Potential_Boundaries_Periodic(int direction, int side, int *flags); - int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, - int buffer_start); - void Unload_Gravity_Potential_from_Buffer(int direction, int side, - Real *buffer, int buffer_start); + int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start); + void Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start); void Set_Potential_Boundaries_Isolated(int direction, int side, int *flags); void Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P); - void Compute_Potential_Isolated_Boundary(int direction, int side, - int bc_potential_type); + void Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type); #ifdef SOR - void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, - struct parameters *P); + void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P); int Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer); - void Unload_Poisson_Boundary_From_Buffer(int direction, int side, - Real *buffer_host); + void Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host); #endif #ifdef GRAVITY_GPU void Copy_Hydro_Density_to_Gravity_GPU(); void Extrapolate_Grav_Potential_GPU(); - int Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, - Real *buffer, int buffer_start); - void Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, - Real *buffer, int buffer_start); - void Set_Potential_Boundaries_Isolated_GPU(int direction, int side, - int *flags); - void Set_Potential_Boundaries_Periodic_GPU(int direction, int side, - int *flags); + int Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start); + void Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start); + void Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags); + void Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags); #endif #endif // GRAVITY @@ -772,40 +756,28 @@ class Grid3D void Set_Particles_Open_Boundary_CPU(int dir, int side); #endif #ifdef MPI_CHOLLA - int Load_Particles_Density_Boundary_to_Buffer(int direction, int side, - Real *buffer); - void Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, - Real *buffer); - void Load_and_Send_Particles_X0(int ireq_n_particles, - int ireq_particles_transfer); - void Load_and_Send_Particles_X1(int ireq_n_particles, - int ireq_particles_transfer); - void Load_and_Send_Particles_Y0(int ireq_n_particles, - int ireq_particles_transfer); - void Load_and_Send_Particles_Y1(int ireq_n_particles, - int ireq_particles_transfer); - void Load_and_Send_Particles_Z0(int ireq_n_particles, - int ireq_particles_transfer); - void Load_and_Send_Particles_Z1(int ireq_n_particles, - int ireq_particles_transfer); + int Load_Particles_Density_Boundary_to_Buffer(int direction, int side, Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, Real *buffer); + void Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer); void Unload_Particles_from_Buffer_X0(int *flags); void Unload_Particles_from_Buffer_X1(int *flags); void Unload_Particles_from_Buffer_Y0(int *flags); void Unload_Particles_from_Buffer_Y1(int *flags); void Unload_Particles_from_Buffer_Z0(int *flags); void Unload_Particles_from_Buffer_Z1(int *flags); - void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, - int *flags); - void Load_NTtransfer_and_Request_Receive_Particles_Transfer( - int index, int *ireq_particles_transfer); + void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags); + void Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer); void Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags); void Unload_Particles_From_Buffers_BLOCK(int index, int *flags); void Finish_Particles_Transfer(); #endif // MPI_CHOLLA void Transfer_Particles_Density_Boundaries(struct parameters P); - void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, - Real *buffer_d, - Real *buffer_h); + void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h); // void Transfer_Particles_Boundaries( struct parameters P ); void WriteData_Particles(struct parameters P, int nfile); void OutputData_Particles(struct parameters P, int nfile); @@ -821,10 +793,8 @@ class Grid3D void Get_Gravity_CIC(); void Advance_Particles_KDK_Step1(); void Advance_Particles_KDK_Step2(); - void Advance_Particles_KDK_Step1_function(part_int_t p_start, - part_int_t p_end); - void Advance_Particles_KDK_Step2_function(part_int_t p_start, - part_int_t p_end); + void Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end); + void Advance_Particles_KDK_Step2_function(part_int_t p_start, part_int_t p_end); void Get_Particles_Acceleration(); void Advance_Particles(int N_KDK_step); Real Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end); @@ -840,11 +810,8 @@ class Grid3D void Copy_Potential_From_GPU(); void Copy_Particles_Density_to_GPU(); void Copy_Particles_Density_GPU(); - int Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, - Real *buffer); - void Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, - int side, - Real *buffer); + int Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer); #endif // GRAVITY_GPU #endif // PARTICLES @@ -854,10 +821,8 @@ class Grid3D void Change_GAS_Frame_System(bool forward); void Change_GAS_Frame_System_GPU(bool forward); void Change_Cosmological_Frame_Sytem(bool forward); - void Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, - part_int_t p_end); - void Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, - part_int_t p_end); + void Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, part_int_t p_end); + void Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt_Cosmo(); #ifdef PARTICLES_GPU @@ -911,8 +876,7 @@ class Grid3D // typedef for Grid3D_PointerMemberFunction typedef void (Grid3D::*Grid3D_PMF_UnloadHydroBuffer)(Real *); -typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)(int, int, Real *, - int); +typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)(int, int, Real *, int); typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity)(int, int, Real *); #endif // GRID3D_H diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 268e66226..6df1d6e26 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -31,8 +31,7 @@ enum : int { // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to // compute NSCALARS scalar, - scalar_minus_1 = - scalar - 1, // so that next enum item starts at same index as scalar + scalar_minus_1 = scalar - 1, // so that next enum item starts at same index as scalar #ifdef SCALAR // Add scalars here, wrapped appropriately with ifdefs: @@ -58,10 +57,8 @@ enum : int { #endif // SCALAR - finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = - finalscalar_plus_1 - - 1, // resets enum to finalscalar so fields afterwards are correct + finalscalar_plus_1, // needed to calculate NSCALARS + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct // so that anything after starts with scalar + NSCALARS #ifdef MHD diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index d3b5a5317..6331399e0 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -35,16 +35,14 @@ void Grid3D::Set_Initial_Conditions(parameters P) } else if (strcmp(P.init, "Sound_Wave") == 0) { Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Linear_Wave") == 0) { - Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, - P.rEigenVec_rho, P.rEigenVec_MomentumX, P.rEigenVec_MomentumY, - P.rEigenVec_MomentumZ, P.rEigenVec_E, P.rEigenVec_Bx, - P.rEigenVec_By, P.rEigenVec_Bz, P.pitch, P.yaw); + Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, P.rEigenVec_rho, P.rEigenVec_MomentumX, + P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, + P.rEigenVec_Bz, P.pitch, P.yaw); } else if (strcmp(P.init, "Square_Wave") == 0) { Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Riemann") == 0) { - Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, - P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, P.Bx_r, P.By_r, P.Bz_r, - P.diaph); + Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, + P.Bx_r, P.By_r, P.Bz_r, P.diaph); } else if (strcmp(P.init, "Shu_Osher") == 0) { Shu_Osher(); } else if (strcmp(P.init, "Blast_1D") == 0) { @@ -65,8 +63,7 @@ void Grid3D::Set_Initial_Conditions(parameters P) Noh_3D(); } else if (strcmp(P.init, "Disk_2D") == 0) { Disk_2D(); - } else if (strcmp(P.init, "Disk_3D") == 0 || - strcmp(P.init, "Disk_3D_particles") == 0) { + } else if (strcmp(P.init, "Disk_3D") == 0 || strcmp(P.init, "Disk_3D_particles") == 0) { Disk_3D(P); } else if (strcmp(P.init, "Spherical_Overpressure_3D") == 0) { Spherical_Overpressure_3D(); @@ -93,9 +90,7 @@ void Grid3D::Set_Initial_Conditions(parameters P) } if (C.device != NULL) { - CudaSafeCall(cudaMemcpy(C.device, C.density, - H.n_fields * H.n_cells * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(C.device, C.density, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); } } @@ -142,12 +137,9 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.yblocal = H.ybound + ((Real)ny_local_start) * (P.ylen / ny_param); H.zblocal = H.zbound + ((Real)nz_local_start) * (P.zlen / nz_param); - H.xblocal_max = H.xbound + ((Real)(nx_local_start + H.nx - 2 * H.n_ghost)) * - (P.xlen / nx_param); - H.yblocal_max = H.ybound + ((Real)(ny_local_start + H.ny - 2 * H.n_ghost)) * - (P.ylen / ny_param); - H.zblocal_max = H.zbound + ((Real)(nz_local_start + H.nz - 2 * H.n_ghost)) * - (P.zlen / nz_param); + H.xblocal_max = H.xbound + ((Real)(nx_local_start + H.nx - 2 * H.n_ghost)) * (P.xlen / nx_param); + H.yblocal_max = H.ybound + ((Real)(ny_local_start + H.ny - 2 * H.n_ghost)) * (P.ylen / ny_param); + H.zblocal_max = H.zbound + ((Real)(nz_local_start + H.nz - 2 * H.n_ghost)) * (P.zlen / nz_param); #endif @@ -175,8 +167,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real * By, Real Bz) \brief Constant gas properties. */ -void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, - Real By, Real Bz) +void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -223,8 +214,7 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, C.momentum_x[id] = rho * vx; C.momentum_y[id] = rho * vy; C.momentum_z[id] = rho * vz; - C.Energy[id] = - P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif // DE @@ -279,8 +269,7 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.momentum_x[id] = rho * vx; C.momentum_y[id] = rho * vy; C.momentum_z[id] = rho * vz; - C.Energy[id] = - P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); // add small-amplitude perturbations C.density[id] = C.density[id] + A * sin(2.0 * PI * x_pos); C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0 * PI * x_pos); @@ -300,23 +289,19 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) /*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, - Real Bx, Real By, Real Bz, Real rEigenVec_rho, - Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, - Real rEigenVec_MomentumZ, Real rEigenVec_E, - Real rEigenVec_Bx, Real rEigenVec_By, +void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, + Real rEigenVec_rho, Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, + Real rEigenVec_MomentumZ, Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, Real pitch, Real yaw) { - auto [stagger, junk1, junk2] = - math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, pitch, yaw); + auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, pitch, yaw); // set initial values of conserved variables for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) { for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) { for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) { // Rotate the indices - auto [i_rot, j_rot, k_rot] = - math_utils::rotateCoords(i, j, k, pitch, yaw); + auto [i_rot, j_rot, k_rot] = math_utils::rotateCoords(i, j, k, pitch, yaw); // get cell index int id = i + j * H.nx + k * H.nx * H.ny; @@ -333,8 +318,7 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, C.momentum_x[id] = rho * vx; C.momentum_y[id] = rho * vy; C.momentum_z[id] = rho * vz; - C.Energy[id] = - mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); + C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); // add small-amplitude perturbations C.density[id] += A * rEigenVec_rho * sine_wave; C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; @@ -395,8 +379,7 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.momentum_y[id] = rho * vy; C.momentum_z[id] = rho * vz; // C.momentum_z[id] = rho_l * v_l; - C.Energy[id] = - P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif @@ -410,8 +393,7 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.momentum_x[id] = rho * A * vx; C.momentum_y[id] = rho * A * vy; C.momentum_z[id] = rho * A * vz; - C.Energy[id] = - P / (gama - 1.0) + 0.5 * rho * A * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * A * (vx * vx + vy * vy + vz * vz); #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif @@ -430,10 +412,8 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, - Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, - Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, - Real Bz_r, Real diaph) +void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, + Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -488,8 +468,7 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, C.momentum_x[id] = rho_l * vx_l; C.momentum_y[id] = rho_l * vy_l; C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = mhd::utils::computeEnergy( - P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); + C.Energy[id] = mhd::utils::computeEnergy(P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); #ifdef SCALAR #ifdef BASIC_SCALAR C.basic_scalar[id] = 1.0 * rho_l; @@ -503,8 +482,7 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, C.momentum_x[id] = rho_r * vx_r; C.momentum_y[id] = rho_r * vy_r; C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = mhd::utils::computeEnergy( - P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); + C.Energy[id] = mhd::utils::computeEnergy(P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); #ifdef SCALAR #ifdef BASIC_SCALAR C.basic_scalar[id] = 0.0 * rho_r; @@ -675,10 +653,8 @@ void Grid3D::KH() #endif } C.Energy[id] = - P / (gama - 1.0) + 0.5 * - (C.momentum_x[id] * C.momentum_x[id] + - C.momentum_y[id] * C.momentum_y[id]) / - C.density[id]; + P / (gama - 1.0) + + 0.5 * (C.momentum_x[id] * C.momentum_x[id] + C.momentum_y[id] * C.momentum_y[id]) / C.density[id]; #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif // DE @@ -737,134 +713,67 @@ void Grid3D::KH_res_ind() if (fabs(y_pos - 0.5) < 0.25) { if (y_pos > 0.5) { C.density[id] = - d1 - - (d1 - d2) * - exp(-0.5 * - pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = - v1 * C.density[id] - - C.density[id] * (v1 - v2) * - exp(-0.5 * - pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * - pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + v1 * C.density[id] - C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else { C.density[id] = - d1 - - (d1 - d2) * - exp(-0.5 * - pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = - v1 * C.density[id] - - C.density[id] * (v1 - v2) * - exp(-0.5 * - pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * - pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + v1 * C.density[id] - C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } } // outer fluid else { if (y_pos > 0.5) { C.density[id] = - d2 + - (d1 - d2) * - exp(-0.5 * - pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = - v2 * C.density[id] + - C.density[id] * (v1 - v2) * - exp(-0.5 * - pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * - pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + v2 * C.density[id] + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else { C.density[id] = - d2 + - (d1 - d2) * - exp(-0.5 * - pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = - v2 * C.density[id] + - C.density[id] * (v1 - v2) * - exp(-0.5 * - pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * - pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + v2 * C.density[id] + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } } // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); C.momentum_z[id] = 0.0; // cylindrical version (3D only) - r = sqrt((z_pos - zc) * (z_pos - zc) + - (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc + r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc phi = atan2((z_pos - zc), (y_pos - yc)); if (r < 0.25) // inside the cylinder { - C.density[id] = - d1 - (d1 - d2) * - exp(-0.5 * - pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_x[id] = - v1 * C.density[id] - - C.density[id] * - exp(-0.5 * - pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_z[id] = - sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else // outside the cylinder { - C.density[id] = - d2 + (d1 - d2) * - exp(-0.5 * - pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_x[id] = - v2 * C.density[id] + - C.density[id] * - exp(-0.5 * - pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy)); - C.momentum_y[id] = - cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * - (1.0 - - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy))); - C.momentum_z[id] = - sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * - (1.0 - - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / - (dy * dy))); + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); } // No matter what we do with the density and momentum, set the Energy @@ -872,8 +781,7 @@ void Grid3D::KH_res_ind() mx = C.momentum_x[id]; my = C.momentum_y[id]; mz = C.momentum_z[id]; - C.Energy[id] = P / (gama - 1.0) + - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]; + C.Energy[id] = P / (gama - 1.0) + 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]; #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); @@ -903,8 +811,7 @@ void Grid3D::Rayleigh_Taylor() Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos); // set the y velocities (small perturbation tapering off from center) - vy = 0.01 * cos(6 * PI * x_pos + PI) * - exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); + vy = 0.01 * cos(6 * PI * x_pos + PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); // vy = 0.0; // lower half of slab @@ -926,9 +833,7 @@ void Grid3D::Rayleigh_Taylor() C.momentum_z[id] = 0.0; } - C.Energy[id] = - P / (gama - 1.0) + - 0.5 * (C.momentum_y[id] * C.momentum_y[id]) / C.density[id]; + C.Energy[id] = P / (gama - 1.0) + 0.5 * (C.momentum_y[id] * C.momentum_y[id]) / C.density[id]; #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif // DE @@ -1012,7 +917,7 @@ void Grid3D::Gresho() } else if (r >= 0.2 && r < 0.4) { vx = -sin(phi) * (2.0 - 5.0 * r) + v_boost; vy = cos(phi) * (2.0 - 5.0 * r); - P = 9.0 - 4.0 * log(0.2) + 0.5 * 25.0 * r * r - 20.0 * r + 4.0 * log(r); + P = 9.0 - 4.0 * log(0.2) + 0.5 * 25.0 * r * r - 20.0 * r + 4.0 * log(r); } else { vx = 0.0; vy = 0.0; @@ -1176,16 +1081,15 @@ void Grid3D::Disk_2D() // Assume gas surface density is exponential with scale length 2*R_d and // mass 0.25*M_d Sigma = 0.25 * M_d * exp(-r / (2 * R_d)) / (8 * PI * R_d * R_d); - d = Sigma; // just use sigma for mass density since height is arbitrary - n = d * DENSITY_UNIT / MP; // number density, cgs - P = n * KB * T_d / PRESSURE_UNIT; // disk pressure, code units + d = Sigma; // just use sigma for mass density since height is arbitrary + n = d * DENSITY_UNIT / MP; // number density, cgs + P = n * KB * T_d / PRESSURE_UNIT; // disk pressure, code units // radial acceleration due to Kuzmin disk + NFW halo x = r / R_h; a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); - a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / - ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); - a = a_d + a_h; + a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); + a = a_d + a_h; // circular velocity v = sqrt(r * a); @@ -1236,8 +1140,7 @@ void Grid3D::Spherical_Overpressure_3D() density = 0.1; pressure = 1; - r = sqrt((x_pos - center_x) * (x_pos - center_x) + - (y_pos - center_y) * (y_pos - center_y) + + r = sqrt((x_pos - center_x) * (x_pos - center_x) + (y_pos - center_y) * (y_pos - center_y) + (z_pos - center_z) * (z_pos - center_z)); if (r < 0.2) { density = overDensity; @@ -1265,8 +1168,7 @@ void Grid3D::Spherical_Overdensity_3D() { int i, j, k, id; Real x_pos, y_pos, z_pos, r, center_x, center_y, center_z; - Real density, pressure, overDensity, overPressure, energy, radius, - background_density; + Real density, pressure, overDensity, overPressure, energy, radius, background_density; Real vx, vy, vz, v2; center_x = 0.5; center_y = 0.5; @@ -1298,8 +1200,7 @@ void Grid3D::Spherical_Overdensity_3D() density = background_density; pressure = 0.0005; - r = sqrt((x_pos - center_x) * (x_pos - center_x) + - (y_pos - center_y) * (y_pos - center_y) + + r = sqrt((x_pos - center_x) * (x_pos - center_x) + (y_pos - center_y) * (y_pos - center_y) + (z_pos - center_z) * (z_pos - center_z)); if (r < radius) { density = overDensity; @@ -1355,8 +1256,7 @@ void Grid3D::Clouds() cl_pos[nn][0] = 0.5 * H.xdglobal; cl_pos[nn][1] = 0.5 * H.ydglobal; cl_pos[nn][2] = 0.5 * H.zdglobal; - printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], - cl_pos[nn][2]); + printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } n_bg = 1.68e-4; @@ -1405,9 +1305,7 @@ void Grid3D::Clouds() C.momentum_x[id] = rho_bg * vx_bg; C.momentum_y[id] = rho_bg * vy_bg; C.momentum_z[id] = rho_bg * vz_bg; - C.Energy[id] = - p_bg / (gama - 1.0) + - 0.5 * rho_bg * (vx_bg * vx_bg + vy_bg * vy_bg + vz_bg * vz_bg); + C.Energy[id] = p_bg / (gama - 1.0) + 0.5 * rho_bg * (vx_bg * vx_bg + vy_bg * vy_bg + vz_bg * vz_bg); #ifdef DE C.GasEnergy[id] = p_bg / (gama - 1.0); #endif @@ -1426,9 +1324,7 @@ void Grid3D::Clouds() C.momentum_x[id] = rho_cl * vx_cl; C.momentum_y[id] = rho_cl * vy_cl; C.momentum_z[id] = rho_cl * vz_cl; - C.Energy[id] = - p_cl / (gama - 1.0) + - 0.5 * rho_cl * (vx_cl * vx_cl + vy_cl * vy_cl + vz_cl * vz_cl); + C.Energy[id] = p_cl / (gama - 1.0) + 0.5 * rho_cl * (vx_cl * vx_cl + vy_cl * vy_cl + vz_cl * vz_cl); #ifdef DE C.GasEnergy[id] = p_cl / (gama - 1.0); #endif // DE @@ -1597,10 +1493,8 @@ void Grid3D::Chemistry_Test(struct parameters P) chprintf("Initializing Chemistry Test...\n"); #ifdef COSMOLOGY - Real H0, Omega_M, Omega_L, Omega_b, current_z, rho_gas_mean, kpc_cgs, G, z, h, - mu, T0, U, rho_gas; - Real HI_frac, HII_frac, HeI_frac, HeII_frac, HeIII_frac, e_frac, metal_frac, - _min; + Real H0, Omega_M, Omega_L, Omega_b, current_z, rho_gas_mean, kpc_cgs, G, z, h, mu, T0, U, rho_gas; + Real HI_frac, HII_frac, HeI_frac, HeII_frac, HeIII_frac, e_frac, metal_frac, _min; H0 = P.H0; Omega_M = P.Omega_M; diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 1261d4582..9f6f6d3de 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -1,6 +1,6 @@ #include -#include "../global/global_cuda.h" //provides TPB +#include "../global/global_cuda.h" //provides TPB #include "../grid/cuda_boundaries.h" // provides PackBuffers3D and UnpackBuffers3D #include "../io/io.h" #include "../mpi/mpi_routines.h" @@ -52,8 +52,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(0, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); #endif } } @@ -73,8 +72,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(1, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); #endif } } @@ -94,8 +92,7 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(2, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); #endif } } @@ -110,21 +107,19 @@ int Grid3D::Load_Hydro_DeviceBuffer_X0(Real *send_buffer_x0) // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.n_ghost; - PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, 1, 1); + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { int idxoffset = H.n_ghost + H.n_ghost * H.nx; - PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { int idxoffset = H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; - PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, - H.nz - 2 * H.n_ghost); + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } return x_buffer_length; @@ -136,22 +131,19 @@ int Grid3D::Load_Hydro_DeviceBuffer_X1(Real *send_buffer_x1) // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.nx - 2 * H.n_ghost; - PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, 1, 1); + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx; - PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { - int idxoffset = - H.nx - 2 * H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; - PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, - H.nz - 2 * H.n_ghost); + int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } return x_buffer_length; @@ -163,14 +155,13 @@ int Grid3D::Load_Hydro_DeviceBuffer_Y0(Real *send_buffer_y0) // 2D if (H.nz == 1) { int idxoffset = H.n_ghost * H.nx; - PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, 1); + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; - PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } return y_buffer_length; @@ -181,14 +172,13 @@ int Grid3D::Load_Hydro_DeviceBuffer_Y1(Real *send_buffer_y1) // 2D if (H.nz == 1) { int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx; - PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, 1); + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; - PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } return y_buffer_length; @@ -199,8 +189,7 @@ int Grid3D::Load_Hydro_DeviceBuffer_Z0(Real *send_buffer_z0) { // 3D int idxoffset = H.n_ghost * H.nx * H.ny; - PackBuffers3D(send_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.ny, H.n_ghost); + PackBuffers3D(send_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } @@ -209,8 +198,7 @@ int Grid3D::Load_Hydro_DeviceBuffer_Z1(Real *send_buffer_z1) { // 3D int idxoffset = (H.nz - 2 * H.n_ghost) * H.nx * H.ny; - PackBuffers3D(send_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.ny, H.n_ghost); + PackBuffers3D(send_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } @@ -220,21 +208,19 @@ void Grid3D::Unload_Hydro_DeviceBuffer_X0(Real *recv_buffer_x0) // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, 1, 1); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { int idxoffset = H.n_ghost * H.nx; - UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = H.n_ghost * (H.nx + H.nx * H.ny); - UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, - H.nz - 2 * H.n_ghost); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } } @@ -243,21 +229,19 @@ void Grid3D::Unload_Hydro_DeviceBuffer_X1(Real *recv_buffer_x1) // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.nx - H.n_ghost; - UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, 1, 1); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { int idxoffset = H.nx - H.n_ghost + H.n_ghost * H.nx; - UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, 1); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = H.nx - H.n_ghost + H.n_ghost * (H.nx + H.nx * H.ny); - UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.n_ghost, H.ny - 2 * H.n_ghost, - H.nz - 2 * H.n_ghost); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } } @@ -266,14 +250,13 @@ void Grid3D::Unload_Hydro_DeviceBuffer_Y0(Real *recv_buffer_y0) // 2D if (H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, 1); + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = H.n_ghost * H.nx * H.ny; - UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } } @@ -282,14 +265,13 @@ void Grid3D::Unload_Hydro_DeviceBuffer_Y1(Real *recv_buffer_y1) // 2D if (H.nz == 1) { int idxoffset = (H.ny - H.n_ghost) * H.nx; - UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, 1); + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { int idxoffset = (H.ny - H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; - UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.n_ghost, H.nz - 2 * H.n_ghost); + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } } @@ -297,16 +279,14 @@ void Grid3D::Unload_Hydro_DeviceBuffer_Z0(Real *recv_buffer_z0) { // 3D int idxoffset = 0; - UnpackBuffers3D(recv_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.ny, H.n_ghost); + UnpackBuffers3D(recv_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); } void Grid3D::Unload_Hydro_DeviceBuffer_Z1(Real *recv_buffer_z1) { // 3D int idxoffset = (H.nz - H.n_ghost) * H.nx * H.ny; - UnpackBuffers3D(recv_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, - idxoffset, H.nx, H.ny, H.n_ghost); + UnpackBuffers3D(recv_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); } void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) @@ -325,8 +305,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) int ireq; ireq = 0; - int xbsize = x_buffer_length, ybsize = y_buffer_length, - zbsize = z_buffer_length; + int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length; int buffer_length; @@ -341,23 +320,19 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X0(d_send_buffer_x0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif } #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(0, 0, d_send_buffer_x0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 0, d_send_buffer_x0, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(0, 0, h_send_buffer_x0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(0, 0, h_send_buffer_x0, 0); #endif } #ifdef SOR @@ -369,21 +344,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 0, 0, d_send_buffer_x0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 0, d_send_buffer_x0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 0, 0, h_send_buffer_x0_particles); - cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0_particles); + cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -397,20 +367,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive left x communication buffer - MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); // non-blocking send left x communication buffer - MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, - world, &send_request[0]); + MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); #else // post non-blocking receive left x communication buffer - MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); // non-blocking send left x communication buffer - MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, - world, &send_request[0]); + MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); #endif MPI_Request_free(send_request); @@ -424,8 +390,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X1(d_send_buffer_x1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif // printf("X1 len: %d\n", buffer_length); } @@ -433,15 +398,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(0, 1, d_send_buffer_x1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 1, d_send_buffer_x1, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(0, 1, h_send_buffer_x1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(0, 1, h_send_buffer_x1, 0); #endif } #ifdef SOR @@ -453,21 +415,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 0, 1, d_send_buffer_x1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 1, d_send_buffer_x1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 0, 1, h_send_buffer_x1_particles); - cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1_particles); + cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -481,20 +438,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive right x communication buffer - MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); // non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, - world, &send_request[1]); + MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); #else // post non-blocking receive right x communication buffer - MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); // non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, - world, &send_request[1]); + MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); #endif MPI_Request_free(send_request + 1); @@ -505,8 +458,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for X #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); #endif } @@ -517,8 +469,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y0(d_send_buffer_y0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif // printf("Y0 len: %d\n", buffer_length); } @@ -526,15 +477,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(1, 0, d_send_buffer_y0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 0, d_send_buffer_y0, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(1, 0, h_send_buffer_y0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(1, 0, h_send_buffer_y0, 0); #endif } #ifdef SOR @@ -546,21 +494,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 1, 0, d_send_buffer_y0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 0, d_send_buffer_y0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 1, 0, h_send_buffer_y0_particles); - cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0_particles); + cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -574,20 +517,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive left y communication buffer - MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); // non-blocking send left y communication buffer - MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, - world, &send_request[0]); + MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); #else // post non-blocking receive left y communication buffer - MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); // non-blocking send left y communication buffer - MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, - world, &send_request[0]); + MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); #endif MPI_Request_free(send_request); @@ -602,8 +541,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y1(d_send_buffer_y1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif // printf("Y1 len: %d\n", buffer_length); } @@ -611,15 +549,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(1, 1, d_send_buffer_y1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 1, d_send_buffer_y1, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(1, 1, h_send_buffer_y1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(1, 1, h_send_buffer_y1, 0); #endif } #ifdef SOR @@ -631,21 +566,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 1, 1, d_send_buffer_y1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 1, d_send_buffer_y1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 1, 1, h_send_buffer_y1_particles); - cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1_particles); + cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -659,20 +589,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive right y communication buffer - MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); // non-blocking send right y communication buffer - MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, - world, &send_request[1]); + MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); #else // post non-blocking receive right y communication buffer - MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); // non-blocking send right y communication buffer - MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, - world, &send_request[1]); + MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); #endif MPI_Request_free(send_request + 1); @@ -682,8 +608,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for Y #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); #endif } @@ -694,8 +619,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z0(d_send_buffer_z0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif // printf("Z0 len: %d\n", buffer_length); } @@ -703,15 +627,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(2, 0, d_send_buffer_z0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 0, d_send_buffer_z0, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(2, 0, h_send_buffer_z0, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(2, 0, h_send_buffer_z0, 0); #endif } #ifdef SOR @@ -723,21 +644,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 2, 0, d_send_buffer_z0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 0, d_send_buffer_z0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 2, 0, h_send_buffer_z0_particles); - cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0_particles); + cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -751,19 +667,15 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive left z communication buffer - MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); // non-blocking send left z communication buffer - MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, - world, &send_request[0]); + MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); #else // post non-blocking receive left z communication buffer - MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); // non-blocking send left z communication buffer - MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, - world, &send_request[0]); + MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); #endif MPI_Request_free(send_request); @@ -778,8 +690,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z1(d_send_buffer_z1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif // printf("Z1 len: %d\n", buffer_length); } @@ -787,15 +698,12 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef GRAVITY if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - buffer_length = - Load_Gravity_Potential_To_Buffer_GPU(2, 1, d_send_buffer_z1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 1, d_send_buffer_z1, 0); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else - buffer_length = - Load_Gravity_Potential_To_Buffer(2, 1, h_send_buffer_z1, 0); + buffer_length = Load_Gravity_Potential_To_Buffer(2, 1, h_send_buffer_z1, 0); #endif } #ifdef SOR @@ -807,21 +715,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( - 2, 1, d_send_buffer_z1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 1, d_send_buffer_z1); #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); #endif #else #ifndef MPI_GPU - buffer_length = - Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1); #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( - 2, 1, h_send_buffer_z1_particles); - cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, - buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1_particles); + cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); #endif #endif } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { @@ -835,20 +738,16 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) if (transfer_main_buffer) { #if defined(MPI_GPU) // post non-blocking receive right x communication buffer - MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, - world, &recv_request[ireq]); + MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); // non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, - world, &send_request[1]); + MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); #else // post non-blocking receive right x communication buffer - MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, - world, &recv_request[ireq]); + MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); // non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, - world, &send_request[1]); + MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); #endif MPI_Request_free(send_request + 1); @@ -858,8 +757,7 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for Z #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) - Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); #endif } } @@ -912,13 +810,10 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags) void Grid3D::Unload_MPI_Comm_Buffers(int index) { // local recv buffers - Real *l_recv_buffer_x0, *l_recv_buffer_x1, *l_recv_buffer_y0, - *l_recv_buffer_y1, *l_recv_buffer_z0, *l_recv_buffer_z1; + Real *l_recv_buffer_x0, *l_recv_buffer_x1, *l_recv_buffer_y0, *l_recv_buffer_y1, *l_recv_buffer_z0, *l_recv_buffer_z1; - Grid3D_PMF_UnloadHydroBuffer Fptr_Unload_Hydro_Buffer_X0, - Fptr_Unload_Hydro_Buffer_X1, Fptr_Unload_Hydro_Buffer_Y0, - Fptr_Unload_Hydro_Buffer_Y1, Fptr_Unload_Hydro_Buffer_Z0, - Fptr_Unload_Hydro_Buffer_Z1; + Grid3D_PMF_UnloadHydroBuffer Fptr_Unload_Hydro_Buffer_X0, Fptr_Unload_Hydro_Buffer_X1, Fptr_Unload_Hydro_Buffer_Y0, + Fptr_Unload_Hydro_Buffer_Y1, Fptr_Unload_Hydro_Buffer_Z0, Fptr_Unload_Hydro_Buffer_Z1; Grid3D_PMF_UnloadGravityPotential Fptr_Unload_Gravity_Potential; Grid3D_PMF_UnloadParticleDensity Fptr_Unload_Particle_Density; @@ -977,8 +872,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Gravity_Potential = - &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; + Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; #else @@ -989,23 +883,16 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - Fptr_Unload_Gravity_Potential = - &Grid3D::Unload_Gravity_Potential_from_Buffer; + Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer; #endif // GRAVITY_GPU - if (index == 0) - (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); - if (index == 1) - (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); - if (index == 2) - (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); - if (index == 3) - (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); - if (index == 4) - (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); - if (index == 5) - (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); + if (index == 0) (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); + if (index == 1) (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); + if (index == 2) (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); + if (index == 3) (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); + if (index == 4) (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); + if (index == 5) (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); } #ifdef SOR @@ -1042,30 +929,17 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Particle_Density = - &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; + Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; #else #ifdef MPI_GPU - if (index == 0) - Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, - h_recv_buffer_x0_particles); - if (index == 1) - Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, - h_recv_buffer_x1_particles); - if (index == 2) - Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, - h_recv_buffer_y0_particles); - if (index == 3) - Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, - h_recv_buffer_y1_particles); - if (index == 4) - Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, - h_recv_buffer_z0_particles); - if (index == 5) - Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, - h_recv_buffer_z1_particles); + if (index == 0) Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles); + if (index == 1) Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles); + if (index == 2) Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles); + if (index == 3) Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles); + if (index == 4) Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles); + if (index == 5) Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles); l_recv_buffer_x0 = h_recv_buffer_x0_particles; l_recv_buffer_x1 = h_recv_buffer_x1_particles; l_recv_buffer_y0 = h_recv_buffer_y0_particles; @@ -1081,23 +955,16 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z1 = h_recv_buffer_z1; #endif // MPI_GPU - Fptr_Unload_Particle_Density = - &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; + Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; #endif // PARTICLES_GPU - if (index == 0) - (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); - if (index == 1) - (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); - if (index == 2) - (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); - if (index == 3) - (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); - if (index == 4) - (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); - if (index == 5) - (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); + if (index == 0) (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); + if (index == 1) (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); + if (index == 2) (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); + if (index == 3) (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); + if (index == 4) (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); + if (index == 5) (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); } #endif // PARTICLES diff --git a/src/h_correction/flux_correction.h b/src/h_correction/flux_correction.h index f4294c73a..6b1a2e055 100644 --- a/src/h_correction/flux_correction.h +++ b/src/h_correction/flux_correction.h @@ -5,35 +5,26 @@ #ifndef FLUX_CORRECTION_H #define FLUX_CORRECTION_H -void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, - int y_off, int z_off, int n_ghost, Real dx, Real dy, - Real dz, Real xbound, Real ybound, Real zbound, - Real dt); - -void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, - int dir); - -void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], - Real C_imt[], Real C_ipo[], Real C_ipt[], Real C_jmo[], - Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], - Real C_kmt[], Real C_kpo[], Real C_kpt[], int i, int j, - int k, Real dx, Real dy, Real dz, Real dt, +void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt); + +void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, int dir); + +void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[], + Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[], + Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt, int n_fields, int nx, int ny, int nz, int n_cells); -void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, - int n_cells, int n_fields); +void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, int n_cells, int n_fields); -void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, - Real dtody, Real dtodz, int nfields, int nx, int ny, - int nz, int n_cells); +void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, + int nx, int ny, int nz, int n_cells); -void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, - Real dtody, Real dtodz, int nfields, int nx, int ny, - int nz, int n_cells); +void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, + int nx, int ny, int nz, int n_cells); -void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, - int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, - Real zbound, Real *gx, Real *gy, Real *gz); +void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); void cooling_CPU(Real *C2, int id, int n_cells, Real dt); diff --git a/src/h_correction/h_correction_2D_cuda.cu b/src/h_correction/h_correction_2D_cuda.cu index 3ee7c74e5..f13827644 100644 --- a/src/h_correction/h_correction_2D_cuda.cu +++ b/src/h_correction/h_correction_2D_cuda.cu @@ -16,8 +16,7 @@ * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_x, int nx, int ny, int n_ghost, +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) { int n_cells = nx * ny; @@ -34,35 +33,29 @@ __global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, int yid = (tid - zid * nx * ny) / nx; int xid = tid - zid * nx * ny - yid * nx; - if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && - yid < ny - n_ghost + 1) { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1) { // load the interface values into registers id = xid + yid * nx; - pl = - (dev_bounds_L[4 * n_cells + id] - - 0.5 * - (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + - dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + - dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / - dev_bounds_L[id]) * - (gamma - 1.0); + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); pl = fmax(pl, (Real)1.0e-20); - pr = - (dev_bounds_R[4 * n_cells + id] - - 0.5 * - (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + - dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + - dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / - dev_bounds_R[id]) * - (gamma - 1.0); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); pr = fmax(pr, (Real)1.0e-20); al = sqrt(gamma * pl / dev_bounds_L[id]); ar = sqrt(gamma * pl / dev_bounds_R[id]); - eta_x[id] = - 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - - (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); + eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } } @@ -71,8 +64,7 @@ __global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_y, int nx, int ny, int n_ghost, +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma) { int n_cells = nx * ny; @@ -89,33 +81,29 @@ __global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, int yid = (tid - zid * nx * ny) / nx; int xid = tid - zid * nx * ny - yid * nx; - if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && - xid < nx - n_ghost + 1) { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1) { // load the interface values into registers id = xid + yid * nx; - pl = (dev_bounds_L[4 * n_cells + id] - - 0.5 * - (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + - dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + - dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / - dev_bounds_L[id]) * + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * (gamma - 1.0); pl = fmax(pl, (Real)1.0e-20); - pr = (dev_bounds_R[4 * n_cells + id] - - 0.5 * - (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + - dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + - dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / - dev_bounds_R[id]) * + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * (gamma - 1.0); pr = fmax(pr, (Real)1.0e-20); al = sqrt(gamma * pl / dev_bounds_L[id]); ar = sqrt(gamma * pl / dev_bounds_R[id]); - eta_y[id] = - 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - - (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); + eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } } @@ -124,8 +112,7 @@ __global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, - int ny, int n_ghost) +__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) { // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -137,8 +124,7 @@ __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, Real etah; - if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { id = xid + yid * nx; etah = fmax(eta_y[xid + (yid - 1) * nx], eta_y[id]); @@ -155,8 +141,7 @@ __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, - int ny, int n_ghost) +__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) { // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -168,8 +153,7 @@ __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, Real etah; - if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && - xid < nx - n_ghost) { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost) { id = xid + yid * nx; etah = fmax(eta_x[xid - 1 + yid * nx], eta_x[id]); diff --git a/src/h_correction/h_correction_2D_cuda.h b/src/h_correction/h_correction_2D_cuda.h index 8fea8200f..4fc213133 100644 --- a/src/h_correction/h_correction_2D_cuda.h +++ b/src/h_correction/h_correction_2D_cuda.h @@ -17,8 +17,7 @@ * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_x, int nx, int ny, int n_ghost, +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma); /*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int @@ -26,8 +25,7 @@ __global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_y, int nx, int ny, int n_ghost, +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma); /*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int @@ -35,16 +33,14 @@ __global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, - int ny, int n_ghost); +__global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost); /*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, - int ny, int n_ghost); +__global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost); #endif // H_CORRECTION_2D_H #endif // H_CORRECTION diff --git a/src/h_correction/h_correction_3D_cuda.cu b/src/h_correction/h_correction_3D_cuda.cu index 089665bdc..716332607 100644 --- a/src/h_correction/h_correction_3D_cuda.cu +++ b/src/h_correction/h_correction_3D_cuda.cu @@ -15,8 +15,7 @@ * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_x, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) { int n_cells = nx * ny * nz; @@ -33,35 +32,30 @@ __global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, int xid = tid - zid * nx * ny - yid * nx; // x-direction - if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && - yid < ny - n_ghost + 1 && zid > n_ghost - 2 && zid < nz - n_ghost + 1) { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1 && zid > n_ghost - 2 && + zid < nz - n_ghost + 1) { // load the interface values into registers id = xid + yid * nx + zid * nx * ny; - pl = - (dev_bounds_L[4 * n_cells + id] - - 0.5 * - (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + - dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + - dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / - dev_bounds_L[id]) * - (gamma - 1.0); + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); pl = fmax(pl, (Real)1.0e-20); - pr = - (dev_bounds_R[4 * n_cells + id] - - 0.5 * - (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + - dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + - dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / - dev_bounds_R[id]) * - (gamma - 1.0); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); pr = fmax(pr, (Real)1.0e-20); al = sqrt(gamma * pl / dev_bounds_L[id]); ar = sqrt(gamma * pl / dev_bounds_R[id]); - eta_x[id] = - 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - - (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); + eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } } @@ -70,8 +64,7 @@ __global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_y, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) { int n_cells = nx * ny * nz; @@ -88,33 +81,30 @@ __global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, int xid = tid - zid * nx * ny - yid * nx; // y-direction - if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && - xid < nx - n_ghost + 1 && zid > n_ghost - 2 && zid < nz - n_ghost + 1) { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && zid > n_ghost - 2 && + zid < nz - n_ghost + 1) { // load the interface values into registers id = xid + yid * nx + zid * nx * ny; - pl = (dev_bounds_L[4 * n_cells + id] - - 0.5 * - (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + - dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + - dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / - dev_bounds_L[id]) * + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * (gamma - 1.0); pl = fmax(pl, (Real)1.0e-20); - pr = (dev_bounds_R[4 * n_cells + id] - - 0.5 * - (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + - dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + - dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / - dev_bounds_R[id]) * + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * (gamma - 1.0); pr = fmax(pr, (Real)1.0e-20); al = sqrt(gamma * pl / dev_bounds_L[id]); ar = sqrt(gamma * pl / dev_bounds_R[id]); - eta_y[id] = - 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - - (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); + eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } } @@ -123,8 +113,7 @@ __global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_z, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) { int n_cells = nx * ny * nz; @@ -141,35 +130,30 @@ __global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, int xid = tid - zid * nx * ny - yid * nx; // z-direction - if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 2 && - xid < nx - n_ghost + 1 && yid > n_ghost - 2 && yid < ny - n_ghost + 1) { + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && yid > n_ghost - 2 && + yid < ny - n_ghost + 1) { // load the interface values into registers id = xid + yid * nx + zid * nx * ny; - pl = - (dev_bounds_L[4 * n_cells + id] - - 0.5 * - (dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + - dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + - dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id]) / - dev_bounds_L[id]) * - (gamma - 1.0); + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); pl = fmax(pl, (Real)1.0e-20); - pr = - (dev_bounds_R[4 * n_cells + id] - - 0.5 * - (dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + - dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + - dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id]) / - dev_bounds_R[id]) * - (gamma - 1.0); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); pr = fmax(pr, (Real)1.0e-20); al = sqrt(gamma * pl / dev_bounds_L[id]); ar = sqrt(gamma * pl / dev_bounds_R[id]); - eta_z[id] = - 0.5 * fabs((dev_bounds_R[3 * n_cells + id] / dev_bounds_R[id] + ar) - - (dev_bounds_L[3 * n_cells + id] / dev_bounds_L[id] - al)); + eta_z[id] = 0.5 * fabs((dev_bounds_R[3 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[3 * n_cells + id] / dev_bounds_L[id] - al)); } } @@ -178,9 +162,7 @@ __global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_x, int nx, int ny, int nz, - int n_ghost) +__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) { // get a thread ID int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -192,12 +174,11 @@ __global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real etah; // x-direction - if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_y[xid + (yid - 1) * nx + zid * nx * ny], - eta_y[xid + 1 + (yid - 1) * nx + zid * nx * ny]); + etah = fmax(eta_y[xid + (yid - 1) * nx + zid * nx * ny], eta_y[xid + 1 + (yid - 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_y[id]); etah = fmax(etah, eta_y[xid + 1 + yid * nx + zid * nx * ny]); @@ -217,9 +198,7 @@ __global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_y, int nx, int ny, int nz, - int n_ghost) +__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) { // get a thread ID int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -231,12 +210,11 @@ __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real etah; // y-direction - if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && - xid < nx - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_z[xid + yid * nx + (zid - 1) * nx * ny], - eta_z[xid + (yid + 1) * nx + (zid - 1) * nx * ny]); + etah = fmax(eta_z[xid + yid * nx + (zid - 1) * nx * ny], eta_z[xid + (yid + 1) * nx + (zid - 1) * nx * ny]); etah = fmax(etah, eta_z[id]); etah = fmax(etah, eta_z[xid + (yid + 1) * nx + zid * nx * ny]); @@ -256,9 +234,7 @@ __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_z, int nx, int ny, int nz, - int n_ghost) +__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) { // get a thread ID int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -270,12 +246,11 @@ __global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real etah; // z-direction - if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 1 && - xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_x[xid - 1 + yid * nx + zid * nx * ny], - eta_x[xid - 1 + yid * nx + (zid + 1) * nx * ny]); + etah = fmax(eta_x[xid - 1 + yid * nx + zid * nx * ny], eta_x[xid - 1 + yid * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_x[id]); etah = fmax(etah, eta_x[xid + yid * nx + (zid + 1) * nx * ny]); diff --git a/src/h_correction/h_correction_3D_cuda.h b/src/h_correction/h_correction_3D_cuda.h index 6277ca264..bb2aedd89 100644 --- a/src/h_correction/h_correction_3D_cuda.h +++ b/src/h_correction/h_correction_3D_cuda.h @@ -13,8 +13,7 @@ * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_x, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma); /*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int @@ -22,8 +21,7 @@ __global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_y, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma); /*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int @@ -31,8 +29,7 @@ __global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the left and right boundary values at an interface, calculates the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, - Real *eta_z, int nx, int ny, int nz, int n_ghost, +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma); /*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, @@ -40,8 +37,7 @@ __global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_x, int nx, int ny, int nz, +__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost); /*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, @@ -49,8 +45,7 @@ __global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_y, int nx, int ny, int nz, +__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost); /*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, @@ -58,8 +53,7 @@ __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, * \brief When passed the eta values at every interface, calculates the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, - Real *etah_z, int nx, int ny, int nz, +__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost); #endif // H_CORRECTION_3D_H diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index fb401cb0b..cb59cea22 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -18,10 +18,8 @@ #include "../utils/hydro_utilities.h" #include "../utils/reduction_utilities.h" -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, - int n_cells, int x_off, - int n_ghost, Real dx, Real xbound, - Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, Real gamma, int n_fields) { int id; #ifdef STATIC_GRAV @@ -45,25 +43,19 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, // update the conserved variable array dev_conserved[id] += dtodx * (dev_F[id - 1] - dev_F[id]); - dev_conserved[n_cells + id] += - dtodx * (dev_F[n_cells + id - 1] - dev_F[n_cells + id]); - dev_conserved[2 * n_cells + id] += - dtodx * (dev_F[2 * n_cells + id - 1] - dev_F[2 * n_cells + id]); - dev_conserved[3 * n_cells + id] += - dtodx * (dev_F[3 * n_cells + id - 1] - dev_F[3 * n_cells + id]); - dev_conserved[4 * n_cells + id] += - dtodx * (dev_F[4 * n_cells + id - 1] - dev_F[4 * n_cells + id]); + dev_conserved[n_cells + id] += dtodx * (dev_F[n_cells + id - 1] - dev_F[n_cells + id]); + dev_conserved[2 * n_cells + id] += dtodx * (dev_F[2 * n_cells + id - 1] - dev_F[2 * n_cells + id]); + dev_conserved[3 * n_cells + id] += dtodx * (dev_F[3 * n_cells + id - 1] - dev_F[3 * n_cells + id]); + dev_conserved[4 * n_cells + id] += dtodx * (dev_F[4 * n_cells + id - 1] - dev_F[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += - dtodx * - (dev_F[(5 + i) * n_cells + id - 1] - dev_F[(5 + i) * n_cells + id]); + dtodx * (dev_F[(5 + i) * n_cells + id - 1] - dev_F[(5 + i) * n_cells + id]); } #endif #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += - dtodx * (dev_F[(n_fields - 1) * n_cells + id - 1] - - dev_F[(n_fields - 1) * n_cells + id]); + dtodx * (dev_F[(n_fields - 1) * n_cells + id - 1] - dev_F[(n_fields - 1) * n_cells + id]); #endif #ifdef STATIC_GRAV // add gravitational source terms, time averaged from n to // n+1 @@ -89,12 +81,9 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, } } -__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, - Real *dev_F_x, Real *dev_F_y, - int nx, int ny, int x_off, - int y_off, int n_ghost, Real dx, - Real dy, Real xbound, Real ybound, - Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, + int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real dt, Real gamma, int n_fields) { int id, xid, yid, n_cells; int imo, jmo; @@ -120,8 +109,7 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, jmo = xid + (yid - 1) * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { #ifdef STATIC_GRAV d = dev_conserved[id]; d_inv = 1.0 / d; @@ -129,40 +117,30 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, vy = dev_conserved[2 * n_cells + id] * d_inv; #endif // update the conserved variable array - dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + - dtody * (dev_F_y[jmo] - dev_F_y[id]); - dev_conserved[n_cells + id] += - dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + - dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]); - dev_conserved[2 * n_cells + id] += - dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + - dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]); - dev_conserved[3 * n_cells + id] += - dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + - dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); - dev_conserved[4 * n_cells + id] += - dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + - dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); + dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]); + dev_conserved[n_cells + id] += dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + + dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]); + dev_conserved[2 * n_cells + id] += dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + + dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]); + dev_conserved[3 * n_cells + id] += dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + + dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); + dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += - dtodx * (dev_F_x[(5 + i) * n_cells + imo] - - dev_F_x[(5 + i) * n_cells + id]) + - dtody * (dev_F_y[(5 + i) * n_cells + jmo] - - dev_F_y[(5 + i) * n_cells + id]); + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]); } #endif #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += - dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - - dev_F_x[(n_fields - 1) * n_cells + id]) + - dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - - dev_F_y[(n_fields - 1) * n_cells + id]); + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]); #endif #ifdef STATIC_GRAV // calculate the gravitational acceleration as a function of x & y position - calc_g_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, - &gy); + calc_g_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); // add gravitational source terms, time averaged from n to n+1 d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; @@ -171,12 +149,10 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); dev_conserved[4 * n_cells + id] += - 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + - 0.25 * dt * gy * (d + d_n) * (vy + vy_n); + 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + 0.25 * dt * gy * (d + d_n) * (vy + vy_n); #endif if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id]) { - printf("%3d %3d Thread crashed in final update. %f %f %f\n", xid, yid, - dtodx * (dev_F_x[imo] - dev_F_x[id]), + printf("%3d %3d Thread crashed in final update. %f %f %f\n", xid, yid, dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), dev_conserved[id]); } /* @@ -193,12 +169,11 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, } } -__global__ void Update_Conserved_Variables_3D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, - Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, - int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, Real density_floor, Real *dev_potential) +__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, + int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, + Real gamma, int n_fields, Real density_floor, Real *dev_potential) { int id, xid, yid, zid, n_cells; int imo, jmo, kmo; @@ -246,8 +221,8 @@ __global__ void Update_Conserved_Variables_3D( kmo = xid + yid * nx + (zid - 1) * nx * ny; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { #if defined(STATIC_GRAV) || defined(GRAVITY) d = dev_conserved[id]; d_inv = 1.0 / d; @@ -257,57 +232,43 @@ __global__ void Update_Conserved_Variables_3D( #endif // update the conserved variable array - dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + - dtody * (dev_F_y[jmo] - dev_F_y[id]) + + dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]) + dtodz * (dev_F_z[kmo] - dev_F_z[id]); - dev_conserved[n_cells + id] += - dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + - dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) + - dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]); - dev_conserved[2 * n_cells + id] += - dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + - dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) + - dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]); - dev_conserved[3 * n_cells + id] += - dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + - dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) + - dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]); - dev_conserved[4 * n_cells + id] += - dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + - dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) + - dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]); + dev_conserved[n_cells + id] += dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + + dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) + + dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]); + dev_conserved[2 * n_cells + id] += dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + + dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) + + dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]); + dev_conserved[3 * n_cells + id] += dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + + dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) + + dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]); + dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) + + dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += - dtodx * (dev_F_x[(5 + i) * n_cells + imo] - - dev_F_x[(5 + i) * n_cells + id]) + - dtody * (dev_F_y[(5 + i) * n_cells + jmo] - - dev_F_y[(5 + i) * n_cells + id]) + - dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - - dev_F_z[(5 + i) * n_cells + id]); + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) + + dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]); #ifdef COOLING_GRACKLE // If the updated value is negative, then revert to the value before the // update if (dev_conserved[(5 + i) * n_cells + id] < 0) { dev_conserved[(5 + i) * n_cells + id] -= - dtodx * (dev_F_x[(5 + i) * n_cells + imo] - - dev_F_x[(5 + i) * n_cells + id]) + - dtody * (dev_F_y[(5 + i) * n_cells + jmo] - - dev_F_y[(5 + i) * n_cells + id]) + - dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - - dev_F_z[(5 + i) * n_cells + id]); + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) + + dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]); } #endif } #endif #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += - dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - - dev_F_x[(n_fields - 1) * n_cells + id]) + - dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - - dev_F_y[(n_fields - 1) * n_cells + id]) + - dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - - dev_F_z[(n_fields - 1) * n_cells + id]); + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]); // + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + // dtodz*(vz_kmo-vz_kpo)); // Note: this term is added in a separate kernel to avoid synchronization @@ -326,21 +287,18 @@ __global__ void Update_Conserved_Variables_3D( dev_conserved[3 * n_cells + id] *= (density_floor / dens_0); dev_conserved[4 * n_cells + id] *= (density_floor / dens_0); #ifdef DE - dev_conserved[(n_fields - 1) * n_cells + id] *= - (density_floor / dens_0); + dev_conserved[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); #endif } else { // If the density is negative: average the density on that cell dens_0 = dev_conserved[id]; - Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, - dev_conserved); + Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved); } } #endif // DENSITY_FLOOR #ifdef STATIC_GRAV - calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, - ybound, zbound, &gx, &gy, &gz); + calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; @@ -349,10 +307,9 @@ __global__ void Update_Conserved_Variables_3D( dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n); - dev_conserved[4 * n_cells + id] += - 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + - 0.25 * dt * gy * (d + d_n) * (vy + vy_n) + - 0.25 * dt * gz * (d + d_n) * (vz + vz_n); + dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + + 0.25 * dt * gy * (d + d_n) * (vy + vy_n) + + 0.25 * dt * gz * (d + d_n) * (vz + vz_n); #endif #ifdef GRAVITY @@ -415,22 +372,16 @@ __global__ void Update_Conserved_Variables_3D( // Add gravity term to Total Energy // Add the work done by the gravitational force dev_conserved[4 * n_cells + id] += - 0.5 * dt * - (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + - gz * (d * vz + d_n * vz_n)); + 0.5 * dt * (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + gz * (d * vz + d_n * vz_n)); #endif // GRAVITY #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) - if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || - dev_conserved[4 * n_cells + id] < 0.0 || + if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { - printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", - xid + x_off, yid + y_off, zid + z_off, dev_conserved[id], - dtodx * (dev_F_x[imo] - dev_F_x[id]), - dtody * (dev_F_y[jmo] - dev_F_y[id]), - dtodz * (dev_F_z[kmo] - dev_F_z[id]), - dev_conserved[4 * n_cells + id]); + printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, + dev_conserved[id], dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), + dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]); } #endif // DENSITY_FLOOR /* @@ -447,10 +398,9 @@ __global__ void Update_Conserved_Variables_3D( } } -__device__ __host__ Real hydroInverseCrossingTime( - Real const &E, Real const &d, Real const &d_inv, Real const &vx, - Real const &vy, Real const &vz, Real const &dx, Real const &dy, - Real const &dz, Real const &gamma) +__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma) { // Compute pressure and sound speed Real P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); @@ -464,17 +414,14 @@ __device__ __host__ Real hydroInverseCrossingTime( return cellMaxInverseDt; } -__device__ __host__ Real mhdInverseCrossingTime( - Real const &E, Real const &d, Real const &d_inv, Real const &vx, - Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, - Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, - Real const &gamma) +__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, + Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhd::utils::computeGasPressure(E, d, vx * d, vy * d, vz * d, - avgBx, avgBy, avgBz, gamma); - Real cf = - mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); + Real gasP = mhd::utils::computeGasPressure(E, d, vx * d, vy * d, vz * d, avgBx, avgBy, avgBz, gamma); + Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) Real cellMaxInverseDt = fmax((fabs(vx) + cf) / dx, (fabs(vy) + cf) / dy); @@ -484,8 +431,7 @@ __device__ __host__ Real mhdInverseCrossingTime( return cellMaxInverseDt; } -__global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, - int n_ghost, int nx, Real dx) +__global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, Real dx) { Real max_dti = -DBL_MAX; @@ -497,8 +443,7 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; - id += blockDim.x * gridDim.x) { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // threads corresponding to real cells do the calculation if (id > n_ghost - 1 && id < n_cells - n_ghost) { // start timestep calculation here @@ -509,10 +454,8 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; vz = dev_conserved[3 * n_cells + id] * d_inv; - P = (dev_conserved[4 * n_cells + id] - - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * - (gamma - 1.0); - P = fmax(P, (Real)TINY_NUMBER); + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + P = fmax(P, (Real)TINY_NUMBER); // find the max wavespeed in that cell, use it to calculate the inverse // timestep cs = sqrt(d_inv * gamma * P); @@ -524,8 +467,8 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, reduction_utilities::gridReduceMax(max_dti, dev_dti); } -__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, - int n_ghost, int nx, int ny, Real dx, Real dy) +__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, + Real dy) { Real max_dti = -DBL_MAX; @@ -538,15 +481,13 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; - id += blockDim.x * gridDim.x) { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // get a global thread ID yid = id / nx; xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global // memory d = dev_conserved[id]; @@ -554,10 +495,8 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; vz = dev_conserved[3 * n_cells + id] * d_inv; - P = (dev_conserved[4 * n_cells + id] - - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * - (gamma - 1.0); - P = fmax(P, (Real)1.0e-20); + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + P = fmax(P, (Real)1.0e-20); // find the max wavespeed in that cell, use it to calculate the inverse // timestep cs = sqrt(d_inv * gamma * P); @@ -569,9 +508,8 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, reduction_utilities::gridReduceMax(max_dti, dev_dti); } -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, - int n_ghost, int n_fields, int nx, int ny, int nz, - Real dx, Real dy, Real dz) +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, + int nz, Real dx, Real dy, Real dz) { Real max_dti = -DBL_MAX; @@ -585,14 +523,13 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; - id += blockDim.x * gridDim.x) { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // get a global thread ID cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global // memory d = dev_conserved[id]; @@ -606,14 +543,11 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, #ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces - auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields( - dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - max_dti = fmax( - max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, - avgBz, dx, dy, dz, gamma)); + auto const [avgBx, avgBy, avgBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + max_dti = fmax(max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); #else // not MHD - max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, - dx, dy, dz, gamma)); + max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); #endif // MHD } } @@ -622,8 +556,8 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, reduction_utilities::gridReduceMax(max_dti, dev_dti); } -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dx, Real dy, Real dz, Real gamma) +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma) { // Allocate the device memory cuda_utilities::DeviceVector static dev_dti(1); @@ -637,24 +571,20 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D); - hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, - launchParams.threadsPerBlock, 0, 0, dev_conserved, + hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, dev_dti.data(), gamma, n_ghost, nx, dx); } else if (nx > 1 && ny > 1 && nz == 1) // 2D { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D); - hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, - launchParams.threadsPerBlock, 0, 0, dev_conserved, + hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); } else if (nx > 1 && ny > 1 && nz > 1) // 3D { // set launch parameters for GPU kernels. cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_3D); - hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, - launchParams.threadsPerBlock, 0, 0, dev_conserved, - dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, - dy, dz); + hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); } CudaCheckError(); @@ -665,9 +595,8 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, #ifdef AVERAGE_SLOW_CELLS -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dx, Real dy, Real dz, - Real gamma, Real max_dti_slow) +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, + Real dz, Real gamma, Real max_dti_slow) { // set values for GPU kernels int n_cells = nx * ny * nz; @@ -678,16 +607,13 @@ void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, dim3 dim1dBlock(TPB, 1, 1); if (nx > 1 && ny > 1 && nz > 1) { // 3D - hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, - gamma, max_dti_slow); + hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + dx, dy, dz, gamma, max_dti_slow); } } -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields, - Real dx, Real dy, Real dz, Real gamma, - Real max_dti_slow) +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, + Real dy, Real dz, Real gamma, Real max_dti_slow) { int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, max_dti; @@ -700,8 +626,8 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -710,34 +636,29 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, E = dev_conserved[4 * n_cells + id]; // Compute the maximum inverse crossing time in the cell - max_dti = - hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); if (max_dti > max_dti_slow) { speed = sqrt(vx * vx + vy * vy + vz * vz); - temp = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / - (d * DENSITY_UNIT / 0.6 / MP) / KB; - P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); - cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; + temp = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB; + P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; // Average this cell kernel_printf( " Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, " "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", - xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, - dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, - speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, - vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, cs); - Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, - dev_conserved); + xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, + speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, + cs); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved); } } } #endif // AVERAGE_SLOW_CELLS #ifdef DE -__global__ void Partial_Update_Advected_Internal_Energy_1D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, - Real dt, Real gamma, int n_fields) +__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields) { int id, xid, n_cells; int imo, ipo; @@ -773,14 +694,13 @@ __global__ void Partial_Update_Advected_Internal_Energy_1D( vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields - 1) * n_cells + id] += - 0.5 * P * (dtodx * (vx_imo - vx_ipo)); + dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo)); } } -__global__ void Partial_Update_Advected_Internal_Energy_2D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, - int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields) +__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields) { int id, xid, yid, n_cells; int imo, jmo; @@ -800,8 +720,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_2D( xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -825,15 +744,14 @@ __global__ void Partial_Update_Advected_Internal_Energy_2D( vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields - 1) * n_cells + id] += - 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); + dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); } } -__global__ void Partial_Update_Advected_Internal_Energy_3D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, - Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, - Real dy, Real dz, Real dt, Real gamma, int n_fields) +__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, + int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, + int n_fields) { int id, xid, yid, zid, n_cells; int imo, jmo, kmo; @@ -855,8 +773,8 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -868,10 +786,8 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - auto [centeredBx, centeredBy, centeredBz] = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, - n_cells, nx, ny) E_kin += - mhd::utils::computeMagneticEnergy(magX, magY, magZ); + auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields( + dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); @@ -893,9 +809,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( // Use center values of neighbor cells for the divergence of velocity dev_conserved[(n_fields - 1) * n_cells + id] += - 0.5 * P * - (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + - dtodz * (vz_kmo - vz_kpo)); + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); // OPTION 2: Use the reconstructed velocities to compute the velocity // gradient @@ -915,8 +829,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( } } -__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, - int n_ghost, int n_fields) +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields) { int id, xid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; @@ -963,8 +876,7 @@ __global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, } } -__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, - int n_ghost, int n_fields) +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields) { int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; @@ -989,8 +901,7 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, jpo = xid + jpo * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory d = dev_conserved[id]; d_inv = 1.0 / d; @@ -1022,8 +933,7 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, } } -__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields) +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields) { int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; @@ -1052,8 +962,8 @@ __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, kpo = xid + yid * nx + kpo * nx * ny; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory d = dev_conserved[id]; d_inv = 1.0 / d; @@ -1087,8 +997,7 @@ __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, } } -__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, - Real gamma, int n_fields) +__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields) { int id, xid, n_cells; Real d, d_inv, vx, vy, vz, U; @@ -1109,13 +1018,11 @@ __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, U = dev_conserved[(n_fields - 1) * n_cells + id]; // Use the previously selected Internal Energy to update the total energy - dev_conserved[4 * n_cells + id] = - 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } } -__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, - int n_ghost, Real gamma, int n_fields) +__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields) { int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, U; @@ -1128,8 +1035,7 @@ __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory d = dev_conserved[id]; d_inv = 1.0 / d; @@ -1139,13 +1045,11 @@ __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, U = dev_conserved[(n_fields - 1) * n_cells + id]; // Use the previously selected Internal Energy to update the total energy - dev_conserved[4 * n_cells + id] = - 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } } -__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, Real gamma, int n_fields) +__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields) { // Called in a separate kernel to avoid interfering with energy selection in // Select_Internal_Energy @@ -1161,8 +1065,8 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory d = dev_conserved[id]; d_inv = 1.0 / d; @@ -1172,16 +1076,14 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, U = dev_conserved[(n_fields - 1) * n_cells + id]; // Use the previously selected Internal Energy to update the total energy - dev_conserved[4 * n_cells + id] = - 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } } #endif // DE #ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields, +__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) { int id, xid, yid, zid, n_cells; @@ -1195,8 +1097,8 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && - yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -1216,8 +1118,7 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, } #endif // TEMPERATURE_FLOOR -__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, - int nx, int ny, int nz, int ncells, +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved) { Real v_l, v_r, v_d, v_u, v_b, v_t, v_avrg; @@ -1241,8 +1142,7 @@ __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, return v_avrg; } -__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, - int nz, int ncells, int n_fields, +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved) { // Average Density @@ -1258,8 +1158,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, #ifdef DE // Average GasEnergy - Average_Cell_Single_Field(n_fields - 1, i, j, k, nx, ny, nz, ncells, - conserved); + Average_Cell_Single_Field(n_fields - 1, i, j, k, nx, ny, nz, ncells, conserved); #endif // DE } diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 99bb98820..a5c4ab713 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -8,23 +8,18 @@ #include "../global/global.h" #include "../utils/mhd_utilities.h" -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, - int n_cells, int x_off, - int n_ghost, Real dx, Real xbound, - Real dt, Real gamma, - int n_fields); - -__global__ void Update_Conserved_Variables_2D( - Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, - int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real dt, Real gamma, int n_fields); - -__global__ void Update_Conserved_Variables_3D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, - Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, - int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, Real density_floor, Real *dev_potential); +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, Real gamma, int n_fields); + +__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, + int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real dt, Real gamma, int n_fields); + +__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, + int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, + Real gamma, int n_fields, Real density_floor, Real *dev_potential); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -41,10 +36,9 @@ __global__ void Update_Conserved_Variables_3D( * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real hydroInverseCrossingTime( - Real const &E, Real const &d, Real const &d_inv, Real const &vx, - Real const &vy, Real const &vz, Real const &dx, Real const &dy, - Real const &dz, Real const &gamma); +__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -64,74 +58,59 @@ __device__ __host__ Real hydroInverseCrossingTime( * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real mhdInverseCrossingTime( - Real const &E, Real const &d, Real const &d_inv, Real const &vx, - Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, - Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, - Real const &gamma); +__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, + Real const &gamma); -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, - int n_ghost, int n_fields, int nx, int ny, int nz, - Real dx, Real dy, Real dz); +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, + int nz, Real dx, Real dy, Real dz); -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, - int n_fields, Real dx, Real dy, Real dz, Real gamma); +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma); -__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, - Real gamma, int n_fields); +__global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields); -__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, - int n_ghost, Real gamma, int n_fields); +__global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields); -__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, Real gamma, int n_fields); +__global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); #ifdef AVERAGE_SLOW_CELLS -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, - int n_ghost, int n_fields, Real dx, Real dy, Real dz, - Real gamma, Real max_dti_slow); +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, + Real dz, Real gamma, Real max_dti_slow); -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields, - Real dx, Real dy, Real dz, Real gamma, - Real max_dti_slow); +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, + Real dy, Real dz, Real gamma, Real max_dti_slow); #endif #ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields, +__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); #endif -__global__ void Partial_Update_Advected_Internal_Energy_1D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, - Real dt, Real gamma, int n_fields); +__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields); -__global__ void Partial_Update_Advected_Internal_Energy_2D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, - int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields); +__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields); -__global__ void Partial_Update_Advected_Internal_Energy_3D( - Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, - Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, - Real dy, Real dz, Real dt, Real gamma, int n_fields); +__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, + int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, + int n_fields); -__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, - int n_ghost, int n_fields); +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields); -__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, - int n_ghost, int n_fields); +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields); -__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, - int nz, int n_ghost, int n_fields); +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields); -__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, - int nz, int ncells, int n_fields, +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved); -__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, - int nx, int ny, int nz, int ncells, +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved); #endif // HYDRO_CUDA_H diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 4d4752ad9..482564462 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -61,8 +61,7 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // dz) // Run the kernel - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved.data(), dev_dti.data(), gamma, n_ghost, + hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); CudaCheckError(); @@ -73,16 +72,11 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) double absoluteDiff; int64_t ulpsDiff; bool areEqual; - areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, - absoluteDiff, ulpsDiff); - EXPECT_TRUE(areEqual) << "The fiducial value is: " << fiducialDt - << std::endl - << "The test value is: " << testData - << std::endl - << "The absolute difference is: " << absoluteDiff - << std::endl - << "The ULP difference is: " << ulpsDiff - << std::endl; + areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) << "The fiducial value is: " << fiducialDt << std::endl + << "The test value is: " << testData << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } // ============================================================================= // End of tests for the Calc_dt_GPU function @@ -108,14 +102,11 @@ TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput) double const fiducialInverseCrossingTime = 0.038751126881804446; // Function to test - double testInverseCrossingTime = hydroInverseCrossingTime( - energy, density, 1. / density, velocityX, velocityY, velocityZ, cellSizeX, - cellSizeY, cellSizeZ, gamma); + double testInverseCrossingTime = hydroInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY, + velocityZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, - testInverseCrossingTime, - "inverse crossing time"); + testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the hydroInverseCrossingTime function @@ -144,14 +135,12 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) double const fiducialInverseCrossingTime = 0.038688028391959103; // Function to test - double testInverseCrossingTime = mhdInverseCrossingTime( - energy, density, 1. / density, velocityX, velocityY, velocityZ, magneticX, - magneticY, magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); + double testInverseCrossingTime = + mhdInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY, velocityZ, magneticX, magneticY, + magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, - testInverseCrossingTime, - "inverse crossing time"); + testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the mhdInverseCrossingTime function diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index bc40da90d..e1764e386 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -24,12 +24,12 @@ #include "../utils/error_handling.h" #include "../utils/gpu.hpp" -__global__ void Update_Conserved_Variables_1D_half( - Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, - int n_ghost, Real dx, Real dt, Real gamma, int n_fields); +__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, + int n_cells, int n_ghost, Real dx, Real dt, Real gamma, + int n_fields); -void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, int n_fields) +void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -48,8 +48,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, dev_conserved = d_conserved; // CudaSafeCall( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, - n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); @@ -63,72 +62,66 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Step 1: Use PCM reconstruction to put conserved variables into interface // arrays - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); CudaCheckError(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif CudaCheckError(); // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, - 0, dev_conserved, dev_conserved_half, F_x, n_cells, - n_ghost, dx, 0.5 * dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, + F_x, n_cells, n_ghost, dx, 0.5 * dt, gama, n_fields); CudaCheckError(); // Step 4: Construct left and right interface values using updated conserved // variables #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, - dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif CudaCheckError(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif CudaCheckError(); @@ -136,22 +129,18 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Compute the divergence of velocity before updating the conserved array, // this solves synchronization issues when adding this term on // Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, - dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, - dt, gama, n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, + n_ghost, dx, dt, gama, n_fields); #endif // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, - dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, + n_ghost, dx, xbound, dt, gama, n_fields); CudaCheckError(); #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, nx, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, - nx, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields); CudaCheckError(); #endif @@ -168,9 +157,8 @@ void Free_Memory_VL_1D() cudaFree(F_x); } -__global__ void Update_Conserved_Variables_1D_half( - Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, - int n_ghost, Real dx, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, + int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields) { int id, imo; Real dtodx = dt / dx; @@ -194,9 +182,7 @@ __global__ void Update_Conserved_Variables_1D_half( vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; vz = dev_conserved[3 * n_cells + id] * d_inv; - P = (dev_conserved[4 * n_cells + id] - - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * - (gamma - 1.0); + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); // if (d < 0.0 || d != d) printf("Negative density before half step // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step // update.\n", id); @@ -205,33 +191,26 @@ __global__ void Update_Conserved_Variables_1D_half( vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; #endif // update the conserved variable array - dev_conserved_half[id] = - dev_conserved[id] + dtodx * (dev_F[imo] - dev_F[id]); + dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F[imo] - dev_F[id]); dev_conserved_half[n_cells + id] = - dev_conserved[n_cells + id] + - dtodx * (dev_F[n_cells + imo] - dev_F[n_cells + id]); + dev_conserved[n_cells + id] + dtodx * (dev_F[n_cells + imo] - dev_F[n_cells + id]); dev_conserved_half[2 * n_cells + id] = - dev_conserved[2 * n_cells + id] + - dtodx * (dev_F[2 * n_cells + imo] - dev_F[2 * n_cells + id]); + dev_conserved[2 * n_cells + id] + dtodx * (dev_F[2 * n_cells + imo] - dev_F[2 * n_cells + id]); dev_conserved_half[3 * n_cells + id] = - dev_conserved[3 * n_cells + id] + - dtodx * (dev_F[3 * n_cells + imo] - dev_F[3 * n_cells + id]); + dev_conserved[3 * n_cells + id] + dtodx * (dev_F[3 * n_cells + imo] - dev_F[3 * n_cells + id]); dev_conserved_half[4 * n_cells + id] = - dev_conserved[4 * n_cells + id] + - dtodx * (dev_F[4 * n_cells + imo] - dev_F[4 * n_cells + id]); + dev_conserved[4 * n_cells + id] + dtodx * (dev_F[4 * n_cells + imo] - dev_F[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved_half[(5 + i) * n_cells + id] = dev_conserved[(5 + i) * n_cells + id] + - dtodx * - (dev_F[(5 + i) * n_cells + imo] - dev_F[(5 + i) * n_cells + id]); + dtodx * (dev_F[(5 + i) * n_cells + imo] - dev_F[(5 + i) * n_cells + id]); } #endif #ifdef DE dev_conserved_half[(n_fields - 1) * n_cells + id] = dev_conserved[(n_fields - 1) * n_cells + id] + - dtodx * (dev_F[(n_fields - 1) * n_cells + imo] - - dev_F[(n_fields - 1) * n_cells + id]) + + dtodx * (dev_F[(n_fields - 1) * n_cells + imo] - dev_F[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo)); #endif } diff --git a/src/integrators/VL_1D_cuda.h b/src/integrators/VL_1D_cuda.h index bbbfc12b8..da8837956 100644 --- a/src/integrators/VL_1D_cuda.h +++ b/src/integrators/VL_1D_cuda.h @@ -8,8 +8,8 @@ #include "../global/global.h" -void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, int n_fields); +void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields); void Free_Memory_VL_1D(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index a3c851c11..05bbbec6f 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -21,16 +21,12 @@ #include "../riemann_solvers/roe_cuda.h" #include "../utils/gpu.hpp" -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, - Real *dev_conserved_half, - Real *dev_F_x, Real *dev_F_y, - int nx, int ny, int n_ghost, - Real dx, Real dy, Real dt, - Real gamma, int n_fields); +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields); -void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, - int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real dt, int n_fields) +void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -51,8 +47,7 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, // CudaSafeCall( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, - n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); @@ -69,92 +64,82 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, // Step 1: Use PCM reconstruction to put conserved variables into interface // arrays - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, - gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, + n_ghost, gama, n_fields); CudaCheckError(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif CudaCheckError(); // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, - 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, - n_ghost, dx, dy, 0.5 * dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, + F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields); CudaCheckError(); // Step 4: Construct left and right interface values using updated conserved // variables #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); #endif // PPMC CudaCheckError(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif CudaCheckError(); @@ -162,22 +147,18 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, // Compute the divergence of velocity before updating the conserved array, // this solves synchronization issues when adding this term on // Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, - dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, - nx, ny, n_ghost, dx, dy, dt, gama, n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); #endif // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, - dy, xbound, ybound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); CudaCheckError(); #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); CudaCheckError(); #endif @@ -197,12 +178,9 @@ void Free_Memory_VL_2D() cudaFree(F_y); } -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, - Real *dev_conserved_half, - Real *dev_F_x, Real *dev_F_y, - int nx, int ny, int n_ghost, - Real dx, Real dy, Real dt, - Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields) { int id, xid, yid, n_cells; int imo, jmo; @@ -234,9 +212,7 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; vz = dev_conserved[3 * n_cells + id] * d_inv; - P = (dev_conserved[4 * n_cells + id] - - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * - (gamma - 1.0); + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); // if (d < 0.0 || d != d) printf("Negative density before half step // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step // update.\n", id); @@ -248,42 +224,33 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; #endif // update the conserved variable array - dev_conserved_half[id] = dev_conserved[id] + - dtodx * (dev_F_x[imo] - dev_F_x[id]) + - dtody * (dev_F_y[jmo] - dev_F_y[id]); - dev_conserved_half[n_cells + id] = - dev_conserved[n_cells + id] + - dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + - dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]); - dev_conserved_half[2 * n_cells + id] = - dev_conserved[2 * n_cells + id] + - dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + - dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]); - dev_conserved_half[3 * n_cells + id] = - dev_conserved[3 * n_cells + id] + - dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + - dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); - dev_conserved_half[4 * n_cells + id] = - dev_conserved[4 * n_cells + id] + - dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + - dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); + dev_conserved_half[id] = + dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]); + dev_conserved_half[n_cells + id] = dev_conserved[n_cells + id] + + dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + + dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]); + dev_conserved_half[2 * n_cells + id] = dev_conserved[2 * n_cells + id] + + dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + + dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]); + dev_conserved_half[3 * n_cells + id] = dev_conserved[3 * n_cells + id] + + dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + + dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); + dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] + + dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved_half[(5 + i) * n_cells + id] = dev_conserved[(5 + i) * n_cells + id] + - dtodx * (dev_F_x[(5 + i) * n_cells + imo] - - dev_F_x[(5 + i) * n_cells + id]) + - dtody * (dev_F_y[(5 + i) * n_cells + jmo] - - dev_F_y[(5 + i) * n_cells + id]); + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]); } #endif #ifdef DE dev_conserved_half[(n_fields - 1) * n_cells + id] = dev_conserved[(n_fields - 1) * n_cells + id] + - dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - - dev_F_x[(n_fields - 1) * n_cells + id]) + - dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - - dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); #endif } diff --git a/src/integrators/VL_2D_cuda.h b/src/integrators/VL_2D_cuda.h index 980defa3e..0231f4582 100644 --- a/src/integrators/VL_2D_cuda.h +++ b/src/integrators/VL_2D_cuda.h @@ -8,9 +8,8 @@ #include "../global/global.h" -void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, - int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real dt, int n_fields); +void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields); void Free_Memory_VL_2D(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 561eece53..52d8124ff 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -30,17 +30,14 @@ #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" -__global__ void Update_Conserved_Variables_3D_half( - Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, - Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, - Real dz, Real dt, Real gamma, int n_fields, Real density_floor); - -void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, - int ny, int nz, int x_off, int y_off, int z_off, - int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, - Real density_floor, Real U_floor, - Real *host_grav_potential) +__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor); + +void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -96,8 +93,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, #else // not MHD size_t const arraySize = n_fields * n_cells * sizeof(Real); #endif // MHD - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, - n_fields * n_cells * sizeof(Real))); + CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Lx, arraySize)); CudaSafeCall(cudaMalloc((void **)&Q_Rx, arraySize)); CudaSafeCall(cudaMalloc((void **)&Q_Ly, arraySize)); @@ -108,8 +104,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, CudaSafeCall(cudaMalloc((void **)&F_y, arraySize)); CudaSafeCall(cudaMalloc((void **)&F_z, arraySize)); - cuda_utilities::initGpuMemory(dev_conserved_half, - n_fields * n_cells * sizeof(Real)); + cuda_utilities::initGpuMemory(dev_conserved_half, n_fields * n_cells * sizeof(Real)); cuda_utilities::initGpuMemory(Q_Lx, arraySize); cuda_utilities::initGpuMemory(Q_Rx, arraySize); cuda_utilities::initGpuMemory(Q_Ly, arraySize); @@ -139,187 +134,159 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, } #if defined(GRAVITY) && !defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, - n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif // GRAVITY and GRAVITY_GPU // Step 1: Use PCM reconstruction to put primitive variables into interface // arrays - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, - nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, + Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // HLL #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lx, Q_Rx, - &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, nx, - ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Ly, Q_Ry, - &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, nx, - ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lz, Q_Rz, - &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, nx, - ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, + &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, + &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, + &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLLD CudaCheckError(); #ifdef MHD // Step 2.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, - 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, - ny, nz, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, + ctElectricFields, nx, ny, nz, n_cells); CudaCheckError(); #endif // MHD // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, - 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, - ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, - density_floor); + hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, + F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); CudaCheckError(); #ifdef MHD // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, dev_conserved_half, ctElectricFields, nx, - ny, nz, n_cells, 0.5 * dt, dx, dy, dz); + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, + ctElectricFields, nx, ny, nz, n_cells, 0.5 * dt, dx, dy, dz); CudaCheckError(); #endif // MHD // Step 4: Construct left and right interface values using updated conserved // variables #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, - ny, nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); #endif // PCM #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, + dt, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, + dt, gama, 2, n_fields); #endif // PLMC #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, + dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, + dt, gama, 2, n_fields); #endif // PPMC CudaCheckError(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // HLLC #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lx, Q_Rx, - &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), - F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Ly, Q_Ry, - &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), - F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lz, Q_Rz, - &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), - F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, + &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, + &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, + &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, + n_fields); #endif // HLLD CudaCheckError(); @@ -327,48 +294,41 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, - dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, - n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); CudaCheckError(); #endif // DE #ifdef MHD // Step 5.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, - 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, - nx, ny, nz, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, + ctElectricFields, nx, ny, nz, n_cells); CudaCheckError(); #endif // MHD // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, - F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, - dz, xbound, ybound, zbound, dt, gama, n_fields, - density_floor, dev_grav_potential); + hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, + zbound, dt, gama, n_fields, density_floor, dev_grav_potential); CudaCheckError(); #ifdef MHD // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, - n_cells, dt, dx, dy, dz); + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, + ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); CudaCheckError(); #endif // MHD #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields); + hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); #endif // DE #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); + hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + U_floor); CudaCheckError(); #endif // TEMPERATURE_FLOOR @@ -392,10 +352,10 @@ void Free_Memory_VL_3D() cudaFree(ctElectricFields); } -__global__ void Update_Conserved_Variables_3D_half( - Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, - Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, - Real dz, Real dt, Real gamma, int n_fields, Real density_floor) +__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor) { Real dtodx = dt / dx; Real dtody = dt / dy; @@ -425,8 +385,7 @@ __global__ void Update_Conserved_Variables_3D_half( // threads corresponding to all cells except outer ring of ghost cells do the // calculation - if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && - zid < nz - 1) { + if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && zid < nz - 1) { #ifdef DE d = dev_conserved[id]; d_inv = 1.0 / d; @@ -440,8 +399,7 @@ __global__ void Update_Conserved_Variables_3D_half( #ifdef MHD // Add the magnetic energy auto const [centeredBx, centeredBy, centeredBz] = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, - n_cells, nx, ny) E_kin += + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); @@ -463,54 +421,40 @@ __global__ void Update_Conserved_Variables_3D_half( #endif // DE // update the conserved variable array - dev_conserved_half[id] = dev_conserved[id] + - dtodx * (dev_F_x[imo] - dev_F_x[id]) + - dtody * (dev_F_y[jmo] - dev_F_y[id]) + - dtodz * (dev_F_z[kmo] - dev_F_z[id]); - dev_conserved_half[n_cells + id] = - dev_conserved[n_cells + id] + - dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + - dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) + - dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]); - dev_conserved_half[2 * n_cells + id] = - dev_conserved[2 * n_cells + id] + - dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + - dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) + - dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]); - dev_conserved_half[3 * n_cells + id] = - dev_conserved[3 * n_cells + id] + - dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + - dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) + - dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]); - dev_conserved_half[4 * n_cells + id] = - dev_conserved[4 * n_cells + id] + - dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + - dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) + - dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]); + dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) + + dtody * (dev_F_y[jmo] - dev_F_y[id]) + dtodz * (dev_F_z[kmo] - dev_F_z[id]); + dev_conserved_half[n_cells + id] = dev_conserved[n_cells + id] + + dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + + dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) + + dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]); + dev_conserved_half[2 * n_cells + id] = dev_conserved[2 * n_cells + id] + + dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) + + dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) + + dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]); + dev_conserved_half[3 * n_cells + id] = dev_conserved[3 * n_cells + id] + + dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) + + dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) + + dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]); + dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] + + dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) + + dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved_half[(5 + i) * n_cells + id] = dev_conserved[(5 + i) * n_cells + id] + - dtodx * (dev_F_x[(5 + i) * n_cells + imo] - - dev_F_x[(5 + i) * n_cells + id]) + - dtody * (dev_F_y[(5 + i) * n_cells + jmo] - - dev_F_y[(5 + i) * n_cells + id]) + - dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - - dev_F_z[(5 + i) * n_cells + id]); + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) + + dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]); } #endif // SCALAR #ifdef DE dev_conserved_half[(n_fields - 1) * n_cells + id] = dev_conserved[(n_fields - 1) * n_cells + id] + - dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - - dev_F_x[(n_fields - 1) * n_cells + id]) + - dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - - dev_F_y[(n_fields - 1) * n_cells + id]) + - dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - - dev_F_z[(n_fields - 1) * n_cells + id]) + - 0.5 * P * - (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + - dtodz * (vz_kmo - vz_kpo)); + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]) + + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); #endif // DE #ifdef DENSITY_FLOOR @@ -524,8 +468,7 @@ __global__ void Update_Conserved_Variables_3D_half( dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0); dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0); #ifdef DE - dev_conserved_half[(n_fields - 1) * n_cells + id] *= - (density_floor / dens_0); + dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); #endif // DE } #endif // DENSITY_FLOOR diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 6446bb2cb..ab52cba85 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -8,12 +8,9 @@ #include "../global/global.h" -void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, - int ny, int nz, int x_off, int y_off, int z_off, - int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, - Real density_floor, Real U_floor, - Real *host_grav_potential); +void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 1560be9e0..415edceef 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -23,8 +23,8 @@ #include "../utils/error_handling.h" #include "../utils/gpu.hpp" -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, int n_fields) +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -56,43 +56,43 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Step 1: Do the reconstruction #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); CudaCheckError(); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, - Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); CudaCheckError(); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, - Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); CudaCheckError(); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, - Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); CudaCheckError(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, - Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); CudaCheckError(); #endif // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, - Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif CudaCheckError(); @@ -100,23 +100,19 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, - dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, - dt, gama, n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, + n_ghost, dx, dt, gama, n_fields); #endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, - dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, + n_ghost, dx, xbound, dt, gama, n_fields); CudaCheckError(); // Synchronize the total and internal energy, if using dual-energy formalism #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, - dev_conserved, nx, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, - n_cells, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields); CudaCheckError(); #endif diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h index 03a7a8f7c..69c38cae7 100644 --- a/src/integrators/simple_1D_cuda.h +++ b/src/integrators/simple_1D_cuda.h @@ -8,8 +8,8 @@ #include "../global/global.h" -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, int n_fields); +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields); void Free_Memory_Simple_1D(); diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index eb78e86d7..295c955a8 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -20,8 +20,7 @@ #include "../riemann_solvers/roe_cuda.h" #include "../utils/gpu.hpp" -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, - int y_off, int n_ghost, Real dx, Real dy, +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields) { // Here, *dev_conserved contains the entire @@ -58,62 +57,53 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, // Step 1: Do the reconstruction #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, - gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, + n_ghost, gama, n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); #endif CudaCheckError(); // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif CudaCheckError(); @@ -121,23 +111,19 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, - dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, - nx, ny, n_ghost, dx, dy, dt, gama, n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); #endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, - dy, xbound, ybound, dt, gama, n_fields); + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); CudaCheckError(); // Synchronize the total and internal energy #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); + hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); CudaCheckError(); #endif diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h index 58fc7077a..297800b10 100644 --- a/src/integrators/simple_2D_cuda.h +++ b/src/integrators/simple_2D_cuda.h @@ -8,8 +8,7 @@ #include "../global/global.h" -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, - int y_off, int n_ghost, Real dx, Real dy, +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields); void Free_Memory_Simple_2D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 24a40562f..01c9c6ac1 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -24,12 +24,9 @@ #include "../riemann_solvers/roe_cuda.h" #include "../utils/gpu.hpp" -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, - int ny, int nz, int x_off, int y_off, int z_off, - int n_ghost, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real dt, - int n_fields, Real density_floor, Real U_floor, - Real *host_grav_potential) +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -55,9 +52,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d " "nz: %d \n", n_fields, n_cells, nx, ny, nz); - chprintf(" Memory needed: %f GB Free: %f GB Total: %f GB \n", - n_fields * n_cells * sizeof(Real) / 1e9, global_free / 1e9, - global_total / 1e9); + chprintf(" Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields * n_cells * sizeof(Real) / 1e9, + global_free / 1e9, global_total / 1e9); dev_conserved = d_conserved; CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); @@ -86,95 +82,81 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, } #if defined(GRAVITY) && !defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, - n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif // Step 1: Construct left and right interface values using updated conserved // variables #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, - nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, + Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, - n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, - n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); CudaCheckError(); #endif // PPMC // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, - Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); #endif // HLL CudaCheckError(); @@ -182,32 +164,27 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, - dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, - Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, - n_fields); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); CudaCheckError(); #endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, - F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, - dz, xbound, ybound, zbound, dt, gama, n_fields, - density_floor, dev_grav_potential); + hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, + zbound, dt, gama, n_fields, density_floor, dev_grav_potential); CudaCheckError(); #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields); + hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); #endif #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, - dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); + hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + U_floor); CudaCheckError(); #endif // TEMPERATURE_FLOOR diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index c1a5f5126..dc83e044c 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -9,12 +9,9 @@ #include "../chemistry_gpu/chemistry_gpu.h" #include "../global/global.h" -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, - int ny, int nz, int x_off, int y_off, int z_off, - int n_ghost, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real dt, - int n_fields, Real density_floor, Real U_floor, - Real *host_grav_potential); +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential); void Free_Memory_Simple_3D(); diff --git a/src/io/io.cpp b/src/io/io.cpp index 9b84ca509..cd1af83a7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -28,8 +28,7 @@ /* function used to rotate points about an axis in 3D for the rotated projection * output routine */ -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, - Real *xp, Real *yp, Real *zp); +void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); void Create_Log_File(struct parameters P) { @@ -75,8 +74,7 @@ void Write_Message_To_Log_File(const char *message) /* Write Cholla Output Data */ void WriteData(Grid3D &G, struct parameters P, int nfile) { - cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); chprintf("\nSaving Snapshot: %d \n", nfile); @@ -110,8 +108,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) // This function does other checks to make sure it is valid (3D only) #ifdef HDF5 - if (P.n_out_float32 && nfile % P.n_out_float32 == 0) - OutputFloat32(G, P, nfile); + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G, P, nfile); #endif #ifdef PROJECTION @@ -119,8 +116,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) #endif /*PROJECTION*/ #ifdef ROTATED_PROJECTION - if (nfile % P.n_rotated_projection == 0) - OutputRotatedProjectedData(G, P, nfile); + if (nfile % P.n_rotated_projection == 0) OutputRotatedProjectedData(G, P, nfile); #endif /*ROTATED_PROJECTION*/ #ifdef SLICES @@ -135,12 +131,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) if (G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial) { G.Cosmo.Set_Next_Scale_Output(); if (!G.Cosmo.exit_now) { - chprintf(" Saved Snapshot: %d z:%f next_output: %f\n", nfile, - G.Cosmo.current_z, 1 / G.Cosmo.next_output - 1); + chprintf(" Saved Snapshot: %d z:%f next_output: %f\n", nfile, G.Cosmo.current_z, + 1 / G.Cosmo.next_output - 1); G.H.Output_Initial = false; } else { - chprintf(" Saved Snapshot: %d z:%f Exiting now\n", nfile, - G.Cosmo.current_z); + chprintf(" Saved Snapshot: %d z:%f Exiting now\n", nfile, G.Cosmo.current_z); } } else @@ -180,8 +175,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) strcat(filename, ".h5"); #else strcat(filename, ".txt"); - if (G.H.nx * G.H.ny * G.H.nz > 1000) - printf("Ascii outputs only recommended for small problems!\n"); + if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); #endif #ifdef MPI_CHOLLA sprintf(filename, "%s.%d", filename, procID); @@ -304,49 +298,39 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // Using static DeviceVector here automatically allocates the buffer the // first time it is needed It persists until program exit, and then calls // Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{ - buffer_size}; + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; float *device_dataset_buffer = device_dataset_vector.data(); float *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); if (P.out_float32_density > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); if (P.out_float32_momentum_x > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); if (P.out_float32_momentum_y > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); if (P.out_float32_momentum_z > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); if (P.out_float32_Energy > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); #ifdef DE if (P.out_float32_GasEnergy > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); #endif // DE #ifdef MHD if (P.out_float32_magnetic_x > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); if (P.out_float32_magnetic_y > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); if (P.out_float32_magnetic_z > 0) - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); #endif @@ -455,8 +439,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) status = H5Fclose(file_id); #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", - procID); + printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); } #else @@ -504,8 +487,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", - procID); + printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); } #else @@ -576,8 +558,7 @@ void Grid3D::Write_Header_Text(FILE *fp) fprintf(fp, "Git Commit Hash = %s\n", GIT_HASH); fprintf(fp, "Macro Flags = %s\n", MACRO_FLAGS); fprintf(fp, "n_step: %d sim t: %f sim dt: %f\n", H.n_step, H.t, H.dt); - fprintf(fp, "mass unit: %e length unit: %e time unit: %e\n", MASS_UNIT, - LENGTH_UNIT, TIME_UNIT); + fprintf(fp, "mass unit: %e length unit: %e time unit: %e\n", MASS_UNIT, LENGTH_UNIT, TIME_UNIT); fprintf(fp, "nx: %d ny: %d nz: %d\n", H.nx, H.ny, H.nz); fprintf(fp, "xmin: %f ymin: %f zmin: %f\n", H.xbound, H.ybound, H.zbound); fprintf(fp, "t: %f\n", H.t); @@ -631,8 +612,7 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); // Create a group attribute - attribute_id = H5Acreate(file_id, "gamma", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "gamma", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); // Write the attribute data status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &gama); // Close the attribute @@ -642,56 +622,45 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) hid_t stringType = H5Tcopy(H5T_C_S1); H5Tset_size(stringType, H5T_VARIABLE); - attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); const char *gitHash = GIT_HASH; status = H5Awrite(attribute_id, stringType, &gitHash); H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); const char *macroFlags = MACRO_FLAGS; status = H5Awrite(attribute_id, stringType, ¯oFlags); H5Aclose(attribute_id); // Numeric Attributes - status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t"); - status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt"); - status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step"); - status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields"); - double time_unit = TIME_UNIT; - status = Write_HDF5_Attribute(file_id, dataspace_id, &time_unit, "time_unit"); - double length_unit = LENGTH_UNIT; - status = - Write_HDF5_Attribute(file_id, dataspace_id, &length_unit, "length_unit"); - double mass_unit = MASS_UNIT; - status = Write_HDF5_Attribute(file_id, dataspace_id, &mass_unit, "mass_unit"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields"); + double time_unit = TIME_UNIT; + status = Write_HDF5_Attribute(file_id, dataspace_id, &time_unit, "time_unit"); + double length_unit = LENGTH_UNIT; + status = Write_HDF5_Attribute(file_id, dataspace_id, &length_unit, "length_unit"); + double mass_unit = MASS_UNIT; + status = Write_HDF5_Attribute(file_id, dataspace_id, &mass_unit, "mass_unit"); double velocity_unit = VELOCITY_UNIT; - status = Write_HDF5_Attribute(file_id, dataspace_id, &velocity_unit, - "velocity_unit"); - double density_unit = DENSITY_UNIT; - status = Write_HDF5_Attribute(file_id, dataspace_id, &density_unit, - "density_unit"); - double energy_unit = ENERGY_UNIT; - status = - Write_HDF5_Attribute(file_id, dataspace_id, &energy_unit, "energy_unit"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &velocity_unit, "velocity_unit"); + double density_unit = DENSITY_UNIT; + status = Write_HDF5_Attribute(file_id, dataspace_id, &density_unit, "density_unit"); + double energy_unit = ENERGY_UNIT; + status = Write_HDF5_Attribute(file_id, dataspace_id, &energy_unit, "energy_unit"); #ifdef MHD double magnetic_field_unit = MAGNETIC_FIELD_UNIT; - status = Write_HDF5_Attribute(file_id, dataspace_id, &magnetic_field_unit, - "magnetic_field_unit"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &magnetic_field_unit, "magnetic_field_unit"); #endif // MHD #ifdef COSMOLOGY status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.H0, "H0"); - status = - Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_M, "Omega_M"); - status = - Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_L, "Omega_L"); - status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_z, - "Current_z"); - status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_a, - "Current_a"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_M, "Omega_M"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_L, "Omega_L"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_z, "Current_z"); + status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_a, "Current_a"); #endif // Close the dataspace @@ -720,8 +689,7 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) int_data[i]++; } - status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, - "magnetic_field_dims"); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "magnetic_field_dims"); #endif // MHD #ifdef MPI_CHOLLA @@ -736,8 +704,7 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) int_data[1] = H.ny_real + 1; int_data[2] = H.nz_real + 1; - status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, - "magnetic_field_dims_local"); + status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "magnetic_field_dims_local"); #endif // MHD int_data[0] = nx_local_start; @@ -800,8 +767,7 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) for (int j = 0; j < 2; j++) { for (int k = 0; k < 2; k++) { // find the corners of this domain in the rotated position - Get_Position(H.n_ghost + i * (H.nx - 2 * H.n_ghost), - H.n_ghost + j * (H.ny - 2 * H.n_ghost), + Get_Position(H.n_ghost + i * (H.nx - 2 * H.n_ghost), H.n_ghost + j * (H.ny - 2 * H.n_ghost), H.n_ghost + k * (H.nz - 2 * H.n_ghost), &x, &y, &z); // rotate cell position rotate_point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp); @@ -831,8 +797,7 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); // Create a group attribute - attribute_id = H5Acreate(file_id, "gamma", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "gamma", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); // Write the attribute data status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &gama); // Close the attribute @@ -842,14 +807,12 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) hid_t stringType = H5Tcopy(H5T_C_S1); H5Tset_size(stringType, H5T_VARIABLE); - attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); const char *gitHash = GIT_HASH; status = H5Awrite(attribute_id, stringType, &gitHash); H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); const char *macroFlags = MACRO_FLAGS; status = H5Awrite(attribute_id, stringType, ¯oFlags); H5Aclose(attribute_id); @@ -958,12 +921,10 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\n"); for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { id = i; - fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, C.density[id], - C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], - C.Energy[id]); + fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], C.Energy[id]); #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], - C.magnetic_z[id]); + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #endif // MHD #ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); @@ -973,8 +934,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) #ifdef MHD // Save the last line of magnetic fields id = H.nx - H.n_ghost; - fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], - C.magnetic_y[id], C.magnetic_z[id]); + fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #ifdef DE fprintf(fp, "\tNan"); #endif // DE @@ -995,12 +955,10 @@ void Grid3D::Write_Grid_Text(FILE *fp) for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) { id = i + j * H.nx; - fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, - C.density[id], C.momentum_x[id], C.momentum_y[id], - C.momentum_z[id], C.Energy[id]); + fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, C.density[id], C.momentum_x[id], + C.momentum_y[id], C.momentum_z[id], C.Energy[id]); #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], - C.magnetic_z[id]); + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #endif // MHD #ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); @@ -1010,9 +968,8 @@ void Grid3D::Write_Grid_Text(FILE *fp) #ifdef MHD // Save the last line of magnetic fields id = i + (H.ny - H.n_ghost) * H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i - H.n_ghost, - H.ny - 2 * H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], - C.magnetic_z[id]); + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i - H.n_ghost, H.ny - 2 * H.n_ghost, C.magnetic_x[id], + C.magnetic_y[id], C.magnetic_z[id]); #ifdef DE fprintf(fp, "\tNan"); #endif // DE @@ -1022,9 +979,8 @@ void Grid3D::Write_Grid_Text(FILE *fp) #ifdef MHD // Save the last line of magnetic fields id = H.nx - H.n_ghost + (H.ny - H.n_ghost) * H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", - H.nx - 2 * H.n_ghost, H.ny - 2 * H.n_ghost, C.magnetic_x[id], - C.magnetic_y[id], C.magnetic_z[id]); + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx - 2 * H.n_ghost, H.ny - 2 * H.n_ghost, + C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #ifdef DE fprintf(fp, "\tNan"); #endif // DE @@ -1050,23 +1006,19 @@ void Grid3D::Write_Grid_Text(FILE *fp) // Exclude the rightmost ghost cell on the "left" side for the hydro // variables if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) { - fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, - j - H.n_ghost, k - H.n_ghost, C.density[id], - C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], - C.Energy[id]); + fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost, C.density[id], + C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); #ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); #endif // DE } else { - fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i - H.n_ghost, - j - H.n_ghost, k - H.n_ghost); + fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost); #ifdef DE fprintf(fp, "\tn/a"); #endif // DE } #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], - C.magnetic_z[id]); + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); #endif // MHD fprintf(fp, "\n"); } @@ -1172,79 +1124,64 @@ void Grid3D::Write_Grid_Binary(FILE *fp) } #ifdef HDF5 -herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, - double *attribute, const char *name) +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double *attribute, const char *name) { - hid_t attribute_id = H5Acreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + hid_t attribute_id = H5Acreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, attribute); status = H5Aclose(attribute_id); return status; } -herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int *attribute, - const char *name) +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int *attribute, const char *name) { - hid_t attribute_id = H5Acreate(file_id, name, H5T_STD_I32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + hid_t attribute_id = H5Acreate(file_id, name, H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); herr_t status = H5Awrite(attribute_id, H5T_NATIVE_INT, attribute); status = H5Aclose(attribute_id); return status; } -herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, - const char *name) +herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, const char *name) { hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); - herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); return status; } // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage -herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, - double *dataset_buffer, const char *name) +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, const char *name) { // Create a dataset id for density - hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); // Write the density array to file - herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); return status; } -herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, - float *dataset_buffer, const char *name) +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float *dataset_buffer, const char *name) { // Create a dataset id for density - hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); // Write the density array to file - herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); return status; } -void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, - Real *dataset_buffer, Real *source, +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real *dataset_buffer, Real *source, const char *name) { // Copy non-ghost source to Buffer int id = H.n_ghost; memcpy(&dataset_buffer[0], &(source[id]), H.nx_real * sizeof(Real)); // Buffer write to HDF5 Dataset - herr_t status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } -void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, - float *dataset_buffer, double *source, +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float *dataset_buffer, double *source, const char *name) { // Copy non-ghost source to Buffer with conversion from double to float @@ -1253,12 +1190,10 @@ void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, dataset_buffer[i] = (float)source[i + H.n_ghost]; } // Buffer write to HDF5 Dataset - herr_t status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } -void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, - Real *dataset_buffer, Real *source, +void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real *dataset_buffer, Real *source, const char *name) { int i, j, id, buf_id; @@ -1271,13 +1206,11 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, } } // Buffer write to HDF5 Dataset - herr_t status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } // Convert double to float if necessary -void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, - float *dataset_buffer, double *source, +void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float *dataset_buffer, double *source, const char *name) { int i, j, id, buf_id; @@ -1290,8 +1223,7 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, } } // Buffer write to HDF5 Dataset - herr_t status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } /*! \fn void Write_Grid_HDF5(hid_t file_id) @@ -1340,8 +1272,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // COOLING_GRACKLE or CHEMISTRY_GPU #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL) - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, - Grav.n_cells_potential * sizeof(Real), + CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // GRAVITY_GPU and OUTPUT_POTENTIAL @@ -1355,16 +1286,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[0] = nx_dset; dataspace_id = H5Screate_simple(1, dims, NULL); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, - "/density"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_x, "/momentum_x"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_y, "/momentum_y"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_z, "/momentum_z"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, - "/Energy"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); #ifdef SCALAR for (int s = 0; s < NSCALARS; s++) { @@ -1381,18 +1307,15 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.scalar[id + s * H.n_cells]), - H.nx_real * sizeof(Real)); + memcpy(&dataset_buffer[0], &(C.scalar[id + s * H.n_cells]), H.nx_real * sizeof(Real)); // dataset here is just a name - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif // SCALAR #ifdef DE - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.GasEnergy, "/GasEnergy"); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif // DE // Free the dataspace id @@ -1411,16 +1334,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, - "/density"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_x, "/momentum_x"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_y, "/momentum_y"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.momentum_z, "/momentum_z"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, - "/Energy"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); #ifdef SCALAR for (int s = 0; s < NSCALARS; s++) { @@ -1444,14 +1362,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif // SCALAR #ifdef DE - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, - C.GasEnergy, "/GasEnergy"); + Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif // DE // Free the dataspace id @@ -1478,8 +1394,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // Using static DeviceVector here automatically allocates the buffer the // first time it is needed It persists until program exit, and then calls // Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{ - buffer_size}; + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; double *device_dataset_buffer = device_dataset_vector.data(); dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); // CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); @@ -1490,30 +1405,24 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[1] = ny_dset; dims[2] = nz_dset; dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, - dataset_buffer, device_dataset_buffer, C.d_density, - "/density"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, + C.d_density, "/density"); if (output_momentum || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); } if (output_energy || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); } #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && \ - !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle + #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle for (int s = 0; s < NSCALARS; s++) { // create the name of the dataset char dataset[100]; @@ -1525,15 +1434,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; dataset_buffer[buf_id] = C.scalar[id + s * H.n_cells]; } } } - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #else // COOLING_GRACKLE or CHEMISTRY_GPU. Write Chemistry when using // GRACKLE @@ -1541,9 +1448,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.HI_density[id]; #endif // COOLING_GRACKLE @@ -1553,15 +1459,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/HI_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HI_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.HII_density[id]; #endif // COOLING_GRACKLE @@ -1572,16 +1476,14 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_full_ionization || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/HII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HII_density"); } for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.HeI_density[id]; #endif // COOLING_GRACKLE @@ -1592,15 +1494,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_full_ionization || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/HeI_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeI_density"); } for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.HeII_density[id]; #endif // COOLING_GRACKLE @@ -1610,15 +1510,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/HeII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeII_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.HeIII_density[id]; #endif // COOLING_GRACKLE @@ -1628,15 +1526,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/HeIII_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeIII_density"); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.fields.e_density[id]; #endif // COOLING_GRACKLE @@ -1647,24 +1543,21 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } if (output_electrons || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/e_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/e_density"); } #ifdef GRACKLE_METALS for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; dataset_buffer[buf_id] = Cool.fields.metal_density[id]; } } } if (output_metals || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/metal_density"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/metal_density"); } #endif // GRACKLE_METALS @@ -1680,9 +1573,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; #ifdef COOLING_GRACKLE dataset_buffer[buf_id] = Cool.temperature[id]; #endif @@ -1693,8 +1585,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/temperature"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/temperature"); #endif // OUTPUT_TEMPERATURE @@ -1703,8 +1594,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef DE if (output_energy || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, - file_id, dataset_buffer, device_dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy"); } #endif // DE @@ -1716,32 +1606,25 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) for (i = 0; i < Grav.nx_local; i++) { // id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; // buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real; - id = (i + N_GHOST_POTENTIAL) + - (j + N_GHOST_POTENTIAL) * - (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + - (k + N_GHOST_POTENTIAL) * - (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * + id = (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + + (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * (Grav.ny_local + 2 * N_GHOST_POTENTIAL); - buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; + buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; dataset_buffer[buf_id] = Grav.F.potential_h[id]; } } } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, - "/grav_potential"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD if (H.Output_Complete_Data) { // Note: for WriteHDF5Field3D, use the left side n_ghost - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, - H.n_ghost - 1, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z"); } #endif // MHD @@ -1796,8 +1679,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Txy = 0; // for each xy element, sum over the z column for (k = 0; k < H.nz_real; k++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxy += C.density[id] * H.dz; // calculate number density @@ -1808,8 +1690,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real my = C.momentum_y[id]; Real mz = C.momentum_z[id]; Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * - (gama - 1.0) * PRESSURE_UNIT / (n * KB); + T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); #endif #ifdef DE T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); @@ -1829,8 +1710,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Txz = 0; // for each xz element, sum over the y column for (j = 0; j < H.ny_real; j++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxz += C.density[id] * H.dy; // calculate number density @@ -1841,8 +1721,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real my = C.momentum_y[id]; Real mz = C.momentum_z[id]; Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * - (gama - 1.0) * PRESSURE_UNIT / (n * KB); + T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); #endif #ifdef DE T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); @@ -1856,14 +1735,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) } // Write the projected density and temperature arrays to file - status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, - "/d_xy"); - status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, - "/d_xz"); - status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, - "/T_xy"); - status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, - "/T_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); // Free the dataspace ids status = H5Sclose(dataspace_xz_id); @@ -1952,8 +1827,7 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { for (j = 0; j < H.ny_real; j++) { // get cell index - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // get cell positions Get_Position(i + H.n_ghost, j + H.n_ghost, k + H.n_ghost, &x, &y, &z); @@ -1990,8 +1864,7 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) Real my = C.momentum_y[id]; Real mz = C.momentum_z[id]; Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * - (gama - 1.0) * PRESSURE_UNIT / (n * KB); + T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); #endif #ifdef DE T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); @@ -2012,16 +1885,11 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) } // Write projected d,T,vx,vy,vz - status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, - "/d_xzr"); - status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, - "/T_xzr"); - status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, - "/vx_xzr"); - status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, - "/vy_xzr"); - status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, - "/vz_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr"); // Free the dataspace id status = H5Sclose(dataspace_xzr_id); @@ -2090,21 +1958,19 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = - (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); #endif // Copy the xy slices to the memory buffers for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + zslice * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + zslice * H.nx * H.ny; buf_id = j + i * H.ny_real; #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (zslice >= nz_local_start && zslice < nz_local_start + nz_local) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (zslice - nz_local_start + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (zslice - nz_local_start + H.n_ghost) * H.nx * H.ny; #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; @@ -2116,8 +1982,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) #endif #ifdef SCALAR for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = - C.scalar[id + ii * H.n_cells]; + dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = C.scalar[id + ii * H.n_cells]; } #endif #ifdef MPI_CHOLLA @@ -2143,23 +2008,16 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the xy datasets for each variable - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy"); #ifdef DE - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xy"); #endif // Free the dataspace id status = H5Sclose(dataspace_id); @@ -2192,21 +2050,19 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = - (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real)); #endif // Copy the xz slices to the memory buffers for (k = 0; k < H.nz_real; k++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + yslice * H.nx + (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + yslice * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + i * H.nz_real; #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (yslice >= ny_local_start && yslice < ny_local_start + ny_local) { - id = (i + H.n_ghost) + (yslice - ny_local_start + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (yslice - ny_local_start + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; @@ -2218,8 +2074,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) #endif #ifdef SCALAR for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = - C.scalar[id + ii * H.n_cells]; + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells]; } #endif #ifdef MPI_CHOLLA @@ -2245,23 +2100,16 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the xz datasets for each variable - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz"); #ifdef DE - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xz"); #endif // Free the dataspace id @@ -2295,21 +2143,19 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = - (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real)); #endif // Copy the yz slices to the memory buffers for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { - id = xslice + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + id = xslice + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real; #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (xslice >= nx_local_start && xslice < nx_local_start + nx_local) { - id = (xslice - nx_local_start) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (xslice - nx_local_start) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; @@ -2321,8 +2167,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) #endif #ifdef SCALAR for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = - C.scalar[id + ii * H.n_cells]; + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells]; } #endif #ifdef MPI_CHOLLA @@ -2348,23 +2193,16 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } // Write out the yz datasets for each variable - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz"); - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz"); #ifdef DE - status = - Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, - "/scalar_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_yz"); #endif // Free the dataspace id @@ -2616,8 +2454,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2629,8 +2466,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2642,8 +2478,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2655,8 +2490,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2668,8 +2502,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2682,8 +2515,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2705,15 +2537,13 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the scalar array to the grid id = H.n_ghost; - memcpy(&(C.scalar[id + s * H.n_cells]), &dataset_buffer[0], - H.nx_real * sizeof(Real)); + memcpy(&(C.scalar[id + s * H.n_cells]), &dataset_buffer[0], H.nx_real * sizeof(Real)); } #endif // SCALAR } @@ -2727,8 +2557,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2745,8 +2574,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2763,8 +2591,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2781,8 +2608,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2799,8 +2625,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2818,8 +2643,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); // Read the internal energy array into the dataset buffer // NOTE: NEED TO // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2846,16 +2670,15 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); // Copy the scalar array to the grid for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; + buf_id = j + i * H.ny_real; C.scalar[id + s * H.n_cells] = dataset_buffer[buf_id]; } } @@ -2870,15 +2693,13 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) Real mean_g, min_g, max_g; // need a dataset buffer to remap fastest index - dataset_buffer = - (Real *)malloc(H.nz_real * H.ny_real * H.nx_real * sizeof(Real)); + dataset_buffer = (Real *)malloc(H.nz_real * H.ny_real * H.nx_real * sizeof(Real)); // Open the density dataset dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2890,8 +2711,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.density[id] = dataset_buffer[buf_id]; mean_l += C.density[id]; @@ -2912,17 +2732,14 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif // MPI_CHOLLA #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", - mean_l, min_l, max_l); + chprintf(" Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", mean_l, min_l, max_l); #endif // PRINT_INITIAL_STATS and COSMOLOGY // Open the x momentum dataset dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2933,8 +2750,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_x[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_x[id]); @@ -2965,8 +2781,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -2977,8 +2792,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_y[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_y[id]); @@ -3009,8 +2823,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3021,8 +2834,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_z[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_z[id]); @@ -3053,8 +2865,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3065,8 +2876,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.Energy[id] = dataset_buffer[buf_id]; mean_l += C.Energy[id]; @@ -3098,8 +2908,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); // Read the internal Energy array into the dataset buffer // NOTE: NEED TO // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3115,8 +2924,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.GasEnergy[id] = dataset_buffer[buf_id]; mean_l += C.GasEnergy[id]; @@ -3153,16 +2961,14 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) " GasEnergy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 " "s^-2 ] \n", mean_l, min_l, max_l); - chprintf(" Temperature Mean: %f Min: %f Max: %f [ K ] \n", - temp_mean_l, temp_min_l, temp_max_l); + chprintf(" Temperature Mean: %f Min: %f Max: %f [ K ] \n", temp_mean_l, temp_min_l, temp_max_l); #endif // PRINT_INITIAL_STATS and COSMOLOGY #endif // DE #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && \ - !defined(CHEMISTRY_GPU) // Dont Load scalars when using grackle or - // CHEMISTRY_GPU + #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont Load scalars when using grackle or + // CHEMISTRY_GPU for (int s = 0; s < NSCALARS; s++) { // create the name of the dataset char dataset[100]; @@ -3175,8 +2981,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3184,9 +2989,8 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.scalar[id + s * H.n_cells] = dataset_buffer[buf_id]; } } @@ -3214,8 +3018,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; dens = C.density[id]; C.HI_density[id] = HI_frac * dens; C.HII_density[id] = HII_frac * dens; @@ -3231,14 +3034,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) } } else { dataset_id = H5Dopen(file_id, "/HI_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.HI_density[id] = dataset_buffer[buf_id]; // chprintf("%f \n", C.scalar[0*H.n_cells + id] / C.density[id]); @@ -3246,70 +3047,60 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) } } dataset_id = H5Dopen(file_id, "/HII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.HII_density[id] = dataset_buffer[buf_id]; } } } dataset_id = H5Dopen(file_id, "/HeI_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.HeI_density[id] = dataset_buffer[buf_id]; } } } dataset_id = H5Dopen(file_id, "/HeII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.HeII_density[id] = dataset_buffer[buf_id]; } } } dataset_id = H5Dopen(file_id, "/HeIII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.HeIII_density[id] = dataset_buffer[buf_id]; } } } dataset_id = H5Dopen(file_id, "/e_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.e_density[id] = dataset_buffer[buf_id]; } @@ -3317,14 +3108,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) } #ifdef GRACKLE_METALS dataset_id = H5Dopen(file_id, "/metal_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + - (k + H.n_ghost) * H.nx * H.ny; + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.metal_density[id] = dataset_buffer[buf_id]; } @@ -3339,15 +3128,13 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) // Start by creating a dataspace and buffer that is large enough for the // magnetic field since it's one larger than the rest free(dataset_buffer); - dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * - (H.nx_real + 1) * sizeof(Real)); + dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); // Open the x magnetic field dataset dataset_id = H5Dopen(file_id, "/magnetic_x", H5P_DEFAULT); // Read the x magnetic field array into the dataset buffer // NOTE: NEED TO // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3358,10 +3145,8 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real + 1; k++) { for (j = 0; j < H.ny_real + 1; j++) { for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + - (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = - k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_x[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_x[id]); if (fabs(C.magnetic_x[id]) > max_l) max_l = fabs(C.magnetic_x[id]); @@ -3391,8 +3176,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3403,10 +3187,8 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real + 1; k++) { for (j = 0; j < H.ny_real + 1; j++) { for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + - (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = - k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_y[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_y[id]); if (fabs(C.magnetic_y[id]) > max_l) max_l = fabs(C.magnetic_y[id]); @@ -3436,8 +3218,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -3448,10 +3229,8 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) for (k = 0; k < H.nz_real + 1; k++) { for (j = 0; j < H.ny_real + 1; j++) { for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + - (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = - k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_z[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_z[id]); if (fabs(C.magnetic_z[id]) > max_l) max_l = fabs(C.magnetic_z[id]); @@ -3504,8 +3283,7 @@ int chprintf(const char *__restrict sdata, ...) return code; } -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, - Real *xp, Real *yp, Real *zp) +void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) { Real cd, sd, cp, sp, ct, st; // sines and cosines Real a00, a01, a02; // rotation matrix elements diff --git a/src/io/io.h b/src/io/io.h index b7c501543..fb47246a8 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -53,26 +53,19 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp -herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, - double* attribute, const char* name); -herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, - const char* name); +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute, const char* name); +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name); -herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, - const char* name); +herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); -herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, - double* dataset_buffer, const char* name); -herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, - float* dataset_buffer, const char* name); +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, // then write HDF5 field -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, - int n_ghost, hid_t file_id, float* buffer, +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, - int n_ghost, hid_t file_id, double* buffer, +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index ddf51c946..62f0473a8 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -14,8 +14,7 @@ // Copy Real (non-ghost) cells from source to a double destination (for writing // HDF5 in double precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, - int nz_real, int n_ghost, +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) { int dest_id, source_id, id, i, j, k; @@ -38,8 +37,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // Copy Real (non-ghost) cells from source to a float destination (for writing // HDF5 in float precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, - int nz_real, int n_ghost, +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) { int dest_id, source_id, id, i, j, k; @@ -62,10 +60,8 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything // using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, - int n_ghost, hid_t file_id, double* buffer, - double* device_buffer, Real* device_source, - const char* name) +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, + double* device_buffer, Real* device_source, const char* name) { herr_t status; hsize_t dims[3]; @@ -77,12 +73,9 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, // Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, - nx_real, ny_real, nz_real, n_ghost, device_buffer, - device_source); - CudaSafeCall(cudaMemcpy(buffer, device_buffer, - nx_real * ny_real * nz_real * sizeof(double), - cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_buffer, device_source); + CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); @@ -95,10 +88,8 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, // When buffer is float, automatically use the float version of everything using // function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, - int n_ghost, hid_t file_id, float* buffer, - float* device_buffer, Real* device_source, - const char* name) +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, + float* device_buffer, Real* device_source, const char* name) { herr_t status; hsize_t dims[3]; @@ -110,12 +101,9 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, // Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, - nx_real, ny_real, nz_real, n_ghost, device_buffer, - device_source); - CudaSafeCall(cudaMemcpy(buffer, device_buffer, - nx_real * ny_real * nz_real * sizeof(float), - cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_buffer, device_source); + CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); diff --git a/src/main.cpp b/src/main.cpp index ba8f8786c..bcda7a32a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,10 +79,8 @@ int main(int argc, char *argv[]) chprintf( "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", - P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, - P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); - if (strcmp(P.init, "Read_Grid") == 0) - chprintf("Input directory: %s\n", P.indir); + P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + if (strcmp(P.init, "Read_Grid") == 0) chprintf("Input directory: %s\n", P.indir); chprintf("Output directory: %s\n", P.outdir); // Create a Log file to output run-time messages and output the git hash and @@ -95,8 +93,7 @@ int main(int argc, char *argv[]) // initialize the grid G.Initialize(&P); - chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, - G.H.ny_real, G.H.nz_real, G.H.n_cells); + chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); message = "Initializing Simulation"; Write_Message_To_Log_File(message.c_str()); @@ -112,10 +109,8 @@ int main(int argc, char *argv[]) } #ifdef DE - chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", - DE_ETA_1, DE_ETA_2); - message = " eta_1: " + std::to_string(DE_ETA_1) + - " eta_2: " + std::to_string(DE_ETA_2); + chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2); + message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); Write_Message_To_Log_File(message.c_str()); #endif @@ -185,8 +180,7 @@ int main(int argc, char *argv[]) G.Get_Particles_Acceleration(); #endif - chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, - G.H.dz); + chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, G.H.dz); chprintf("Ratio of specific heats gamma = %f\n", gama); chprintf("Nstep = %d Simulation time = %f\n", G.H.n_step, G.H.t); @@ -215,8 +209,7 @@ int main(int argc, char *argv[]) init_min = ReduceRealMin(init); init_max = ReduceRealMax(init); init_avg = ReduceRealAvg(init); - chprintf("Init min: %9.4f max: %9.4f avg: %9.4f\n", init_min, init_max, - init_avg); + chprintf("Init min: %9.4f max: %9.4f avg: %9.4f\n", init_min, init_max, init_avg); #else printf("Init %9.4f\n", init); #endif // MPI_CHOLLA diff --git a/src/main_tests.cpp b/src/main_tests.cpp index d88f5a50f..5c3a58be6 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -71,8 +71,7 @@ class InputParser */ bool cmdOptionExists(const std::string &option) const { - return std::find(this->_tokens.begin(), this->_tokens.end(), option) != - this->_tokens.end(); + return std::find(this->_tokens.begin(), this->_tokens.end(), option) != this->_tokens.end(); } // ===================================================================== @@ -86,8 +85,7 @@ class InputParser */ InputParser(int &argc, char **argv) { - for (int i = 1; i < argc; ++i) - this->_tokens.push_back(std::string(argv[i])); + for (int i = 1; i < argc; ++i) this->_tokens.push_back(std::string(argv[i])); } ~InputParser() = default; // ===================================================================== diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index e816c0a65..349779f34 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -17,10 +17,9 @@ namespace mhd { // ========================================================================= -__global__ void Calculate_CT_Electric_Fields( - Real const *fluxX, Real const *fluxY, Real const *fluxZ, - Real const *dev_conserved, Real *ctElectricFields, int const nx, - int const ny, int const nz, int const n_cells) +__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells) { // get a thread index int const threadId = threadIdx.x + blockIdx.x * blockDim.x; @@ -64,318 +63,218 @@ __global__ void Calculate_CT_Electric_Fields( // being computed. Note that the direction for the face is parallel // to the face and the other direction that is parallel to that face // is the direction of the electric field being calculated - Real slope_x_pos, slope_x_neg, slope_y_pos, slope_y_neg, slope_z_pos, - slope_z_neg, face_x_pos, face_x_neg, face_y_pos, face_y_neg, face_z_pos, - face_z_neg; + Real slope_x_pos, slope_x_neg, slope_y_pos, slope_y_neg, slope_z_pos, slope_z_neg, face_x_pos, face_x_neg, + face_y_pos, face_y_neg, face_z_pos, face_z_neg; // ================ // X electric field // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_y_pos = - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, - xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { slope_y_pos = - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, - xid, yid, zid, nx, ny, n_cells); + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_pos = - 0.5 * - (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, - xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, - xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_y_neg = - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, - yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_neg = - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, - xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_z_pos = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, - yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_pos = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, - xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_z_neg = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, - yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_neg = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, - xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); } else { slope_z_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, - 2, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field face_y_pos = - +fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + - (grid_enum::fluxZ_magnetic_x)*n_cells]; + +fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; face_y_neg = - +fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + - (grid_enum::fluxZ_magnetic_x)*n_cells]; + +fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; face_z_pos = - -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + - (grid_enum::fluxY_magnetic_x)*n_cells]; + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; face_z_neg = - -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + - (grid_enum::fluxY_magnetic_x)*n_cells]; + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 0 * n_cells] = - 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + - slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); + ctElectricFields[threadId + 0 * n_cells] = 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); // ================ // Y electric field // ================ // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_x_pos = - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, - yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_pos = - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, - xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_x_neg = - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, - yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_neg = - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, - xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_z_pos = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, - xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { slope_z_pos = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, - xid, yid, zid, nx, ny, n_cells); + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_pos = - 0.5 * - (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, - xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, - xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; if (signUpwind > 0.0) { - slope_z_neg = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, - yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_neg = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, - xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, - 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field face_x_pos = - -fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + - (grid_enum::fluxZ_magnetic_y)*n_cells]; + -fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; face_x_neg = - -fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + - (grid_enum::fluxZ_magnetic_y)*n_cells]; + -fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; face_z_pos = - +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + - (grid_enum::fluxX_magnetic_y)*n_cells]; + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; face_z_neg = - +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + - (grid_enum::fluxX_magnetic_y)*n_cells]; + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 1 * n_cells] = - 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + - slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); + ctElectricFields[threadId + 1 * n_cells] = 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); // ================ // Z electric field // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_y_pos = - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, - yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_pos = - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, - xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, - 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_y_neg = - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, - yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_neg = - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, - xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, - 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_x_pos = - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, - xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { slope_x_pos = - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, - xid, yid, zid, nx, ny, n_cells); + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_pos = - 0.5 * - (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, - xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, - xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; if (signUpwind > 0.0) { - slope_x_neg = - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, - yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_neg = - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, - xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, - 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, - -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to // convert from magnetic flux to electric field face_x_pos = - +fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + - (grid_enum::fluxY_magnetic_z)*n_cells]; + +fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; face_x_neg = - +fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + - (grid_enum::fluxY_magnetic_z)*n_cells]; + +fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; face_y_pos = - -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + - (grid_enum::fluxX_magnetic_z)*n_cells]; + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; face_y_neg = - -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + - (grid_enum::fluxX_magnetic_z)*n_cells]; + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 2 * n_cells] = - 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + - slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); + ctElectricFields[threadId + 2 * n_cells] = 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); } } // ========================================================================= diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index eb79fa17e..349aa42c8 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -63,12 +63,11 @@ namespace _internal * \param[in] n_cells The total number of cells * \return Real The slope of the electric field */ -inline __host__ __device__ Real -_ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, - int const &ctDirection, int const &conservedQuadrent1, - int const &conservedQuadrent2, int const &fluxQuadrent1, - int const &fluxQuadrent2, int const &xid, int const &yid, - int const &zid, int const &nx, int const &ny, int const &n_cells) +inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, + int const &ctDirection, int const &conservedQuadrent1, + int const &conservedQuadrent2, int const &fluxQuadrent1, + int const &fluxQuadrent2, int const &xid, int const &yid, int const &zid, + int const &nx, int const &ny, int const &n_cells) { // Compute the various required indices @@ -77,59 +76,42 @@ _ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, int const modPlus2 = (ctDirection + 2) % 3; // Indices for the cell centered values - int const xidCentered = - xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); - int const yidCentered = - yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); - int const zidCentered = - zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); - int const idxCentered = cuda_utilities::compute1DIndex( - xidCentered, yidCentered, zidCentered, nx, ny); + int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); + int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); + int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); + int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny); // Index for the flux - int const idxFlux = cuda_utilities::compute1DIndex( - xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), - yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), - zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), nx, ny); + int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), + yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), + zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), nx, ny); // Indices for the face centered magnetic fields that need to be averaged int const idxB2Shift = cuda_utilities::compute1DIndex( - xidCentered - int(modPlus1 == 0), yidCentered - int(modPlus1 == 1), - zidCentered - int(modPlus1 == 2), nx, ny); + xidCentered - int(modPlus1 == 0), yidCentered - int(modPlus1 == 1), zidCentered - int(modPlus1 == 2), nx, ny); int const idxB3Shift = cuda_utilities::compute1DIndex( - xidCentered - int(modPlus2 == 0), yidCentered - int(modPlus2 == 1), - zidCentered - int(modPlus2 == 2), nx, ny); + xidCentered - int(modPlus2 == 0), yidCentered - int(modPlus2 == 1), zidCentered - int(modPlus2 == 2), nx, ny); // Load values for cell centered electric field. B1 (not present) is // the magnetic field in the same direction as the `ctDirection` // variable, B2 and B3 are the next two fields cyclically. i.e. if // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The // same rules apply for the momentum - Real const density = dev_conserved[idxCentered]; - Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + 1) * n_cells]; - Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + 1) * n_cells]; - Real const B2Centered = - 0.5 * (dev_conserved[idxCentered + - (modPlus1 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB2Shift + - (modPlus1 + grid_enum::magnetic_start) * n_cells]); - Real const B3Centered = - 0.5 * (dev_conserved[idxCentered + - (modPlus2 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB3Shift + - (modPlus2 + grid_enum::magnetic_start) * n_cells]); + Real const density = dev_conserved[idxCentered]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + 1) * n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + 1) * n_cells]; + Real const B2Centered = 0.5 * (dev_conserved[idxCentered + (modPlus1 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB2Shift + (modPlus1 + grid_enum::magnetic_start) * n_cells]); + Real const B3Centered = 0.5 * (dev_conserved[idxCentered + (modPlus2 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB3Shift + (modPlus2 + grid_enum::magnetic_start) * n_cells]); // Compute the electric field in the center with a cross product - Real const electric_centered = - (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; + Real const electric_centered = (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; // Load face centered electric field, note fluxSign to correctly do // the shift from magnetic flux to EMF/electric field and to choose // which field to use - Real const electric_face = - fluxSign * - flux[idxFlux + - (int(fluxSign == 1) + grid_enum::magnetic_start) * n_cells]; + Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1) + grid_enum::magnetic_start) * n_cells]; // Compute the slope and return it // S&G 2009 equation 24 @@ -154,10 +136,9 @@ _ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, * \param[in] nz The number of cells in the z-direction * \param[in] n_cells The total number of cells */ -__global__ void Calculate_CT_Electric_Fields( - Real const *fluxX, Real const *fluxY, Real const *fluxZ, - Real const *dev_conserved, Real *ctElectricFields, int const nx, - int const ny, int const nz, int const n_cells); +__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells); // ========================================================================= } // end namespace mhd #endif // MHD \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 3dbcc008a..05b610805 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -57,8 +57,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test CudaSafeCall(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double))); CudaSafeCall(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double))); CudaSafeCall(cudaMalloc(&dev_grid, grid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_testCTElectricFields, - testCTElectricFields.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size() * sizeof(double))); // Populate the grids with values where vector.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique @@ -89,8 +88,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test std::vector fiducialData; // device pointers - double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, - *dev_testCTElectricFields; + double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields; /*! * \brief Launch the kernel and check results @@ -99,42 +97,30 @@ class tMHDCalculateCTElectricFields : public ::testing::Test void runTest() { // Copy values to GPU - CudaSafeCall(cudaMemcpy(dev_fluxX, fluxX.data(), - fluxX.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_fluxY, fluxY.data(), - fluxY.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_fluxZ, fluxZ.data(), - fluxZ.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy( - dev_testCTElectricFields, testCTElectricFields.data(), - testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_testCTElectricFields, testCTElectricFields.data(), + testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Call the kernel to test - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, - 0, dev_fluxX, dev_fluxY, dev_fluxZ, dev_grid, - dev_testCTElectricFields, nx, ny, nz, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, 0, dev_fluxX, dev_fluxY, dev_fluxZ, + dev_grid, dev_testCTElectricFields, nx, ny, nz, n_cells); CudaCheckError(); // Copy test data back - CudaSafeCall(cudaMemcpy( - testCTElectricFields.data(), dev_testCTElectricFields, - testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(testCTElectricFields.data(), dev_testCTElectricFields, + testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); // Check the results for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults( - fiducialData.at(i), testCTElectricFields.at(i), - "value at i = " + std::to_string(i) + ", xid = " + - std::to_string(xid) + ", yid = " + std::to_string(yid) + - ", zid = " + std::to_string(zid)); + testingUtilities::checkResults(fiducialData.at(i), testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index b51d7bfee..5c154262b 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -30,11 +30,8 @@ namespace mhd { // ========================================================================= -__global__ void calculateMagneticDivergence(Real const *dev_conserved, - Real *dev_maxDivergence, - Real const dx, Real const dy, - Real const dz, int const nx, - int const ny, int const nz, +__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *dev_maxDivergence, Real const dx, + Real const dy, Real const dz, int const nx, int const ny, int const nz, int const n_cells) { // Variables to store the divergence @@ -45,15 +42,13 @@ __global__ void calculateMagneticDivergence(Real const *dev_conserved, int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; // Grid stride loop to perform as much of the reduction as possible - for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; - id += blockDim.x * gridDim.x) { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // compute the real indices cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); // Thread guard to avoid overrun and to skip ghost cells that cannot // have their divergences computed due to a missing face; - if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and - zid < nz) { + if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and zid < nz) { // Compute the various offset indices id_xMin1 = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); id_yMin1 = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); @@ -61,16 +56,15 @@ __global__ void calculateMagneticDivergence(Real const *dev_conserved, // Compute divergence // Stone et al. 2008 equation 25 - cellDivergence = - ((dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - - dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) / - dx) + - ((dev_conserved[id + (grid_enum::magnetic_y)*n_cells] - - dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) / - dy) + - ((dev_conserved[id + (grid_enum::magnetic_z)*n_cells] - - dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) / - dz); + cellDivergence = ((dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - + dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) / + dx) + + ((dev_conserved[id + (grid_enum::magnetic_y)*n_cells] - + dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) / + dy) + + ((dev_conserved[id + (grid_enum::magnetic_z)*n_cells] - + dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) / + dz); maxDivergence = max(maxDivergence, fabs(cellDivergence)); } @@ -86,8 +80,7 @@ Real checkMagneticDivergence(Grid3D const &G) { // Compute the local value of the divergence // First let's create some variables we'll need. - cuda_utilities::AutomaticLaunchParams static const launchParams( - mhd::calculateMagneticDivergence); + cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); cuda_utilities::DeviceVector static dev_maxDivergence(1); // Set the device side divergence to the smallest possible double so that @@ -95,10 +88,8 @@ Real checkMagneticDivergence(Grid3D const &G) dev_maxDivergence.assign(std::numeric_limits::lowest()); // Now lets get the local maximum divergence - hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, - launchParams.threadsPerBlock, 0, 0, G.C.device, - dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, - G.H.ny, G.H.nz, G.H.n_cells); + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + G.C.device, dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_cells); CudaCheckError(); Real max_magnetic_divergence = dev_maxDivergence[0]; @@ -119,13 +110,11 @@ Real checkMagneticDivergence(Grid3D const &G) chexit(-1); } else if (max_magnetic_divergence < 0.0) { // Report the error and exit - chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", - max_magnetic_divergence); + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence); chexit(-1); } else // The magnetic divergence is within acceptable bounds { - chprintf("Global maximum magnetic divergence = %7.4e\n", - max_magnetic_divergence); + chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence); } return max_magnetic_divergence; diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h index af324bb47..605a50dae 100644 --- a/src/mhd/magnetic_divergence.h +++ b/src/mhd/magnetic_divergence.h @@ -39,11 +39,9 @@ namespace mhd * \param[in] nz Number of cells in the Z-direction * \param[in] n_cells Total number of cells */ -__global__ void calculateMagneticDivergence(Real const *dev_conserved, - Real *maxDivergence, Real const dx, - Real const dy, Real const dz, - int const nx, int const ny, - int const nz, int const n_cells); +__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *maxDivergence, Real const dx, + Real const dy, Real const dz, int const nx, int const ny, int const nz, + int const n_cells); // ========================================================================= // ========================================================================= diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 4fcdb3447..9393c2498 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -27,9 +27,8 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) { // Grid Parameters & testing parameters - size_t const gridSize = - 96; // Needs to be at least 64 so that each thread has a value - size_t const n_ghost = 4; + size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value + size_t const n_ghost = 4; // Instantiate Grid3D object Grid3D G; @@ -62,8 +61,7 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) MPI_Finalize(); // Perform Comparison Real const fiducialDivergence = 3.6318132783263106 / 1E15; - testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, - "maximum divergence"); + testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 2077d3656..7d1b48086 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -18,10 +18,9 @@ namespace mhd { // ========================================================================= -__global__ void Update_Magnetic_Field_3D( - Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, - int const nx, int const ny, int const nz, int const n_cells, Real const dt, - Real const dx, Real const dy, Real const dz) +__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells, Real const dt, Real const dx, + Real const dy, Real const dz) { // get a thread index int const blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -40,52 +39,34 @@ __global__ void Update_Magnetic_Field_3D( // Load the various edge electric fields required. The '1' and '2' // fields are not shared and the '3' fields are shared by two of the // updates - Real electric_x_1 = ctElectricFields[( - cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny))]; - Real electric_x_2 = ctElectricFields[( - cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny))]; - Real electric_x_3 = ctElectricFields[( - cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny))]; - Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid, nx, ny)) + - n_cells]; - Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid, zid + 1, nx, ny)) + - n_cells]; - Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid + 1, nx, ny)) + - n_cells]; - Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid, nx, ny)) + - 2 * n_cells]; - Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid + 1, zid, nx, ny)) + - 2 * n_cells]; - Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid + 1, zid, nx, ny)) + - 2 * n_cells]; + Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny))]; + Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny))]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny))]; + Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + n_cells]; + Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid + 1, nx, ny)) + n_cells]; + Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + 2 * n_cells]; + Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + 2 * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid + 1, zid, nx, ny)) + 2 * n_cells]; // Perform Updates // X field update // S&G 2009 equation 10 destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + - dtodz * (electric_y_3 - electric_y_1) + + sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + dtodz * (electric_y_3 - electric_y_1) + dtody * (electric_z_1 - electric_z_3); // Y field update // S&G 2009 equation 11 destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + - dtodx * (electric_z_3 - electric_z_2) + + sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + dtodx * (electric_z_3 - electric_z_2) + dtodz * (electric_x_1 - electric_x_3); // Z field update // S&G 2009 equation 12 destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + - dtody * (electric_x_3 - electric_x_2) + + sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + dtody * (electric_x_3 - electric_x_2) + dtodx * (electric_y_2 - electric_y_3); } } diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h index b0398fec3..2601abdb7 100644 --- a/src/mhd/magnetic_update.h +++ b/src/mhd/magnetic_update.h @@ -44,9 +44,8 @@ namespace mhd * \param[in] dy The size of each cell in the y-direction * \param[in] dz The size of each cell in the z-direction */ -__global__ void Update_Magnetic_Field_3D( - Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, - int const nx, int const ny, int const nz, int const n_cells, Real const dt, - Real const dx, Real const dy, Real const dz); +__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells, Real const dt, Real const dx, + Real const dy, Real const dz); // ========================================================================= } // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index ecf76ade6..f311271b2 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -50,19 +50,15 @@ class tMHDUpdateMagneticField3D : public ::testing::Test dimBlock(TPB, 1, 1) { // Allocate device arrays - CudaSafeCall( - cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_destinationGrid, - destinationGrid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_ctElectricFields, - ctElectricFields.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_destinationGrid, destinationGrid.size() * sizeof(double))); + CudaSafeCall(cudaMalloc(&dev_ctElectricFields, ctElectricFields.size() * sizeof(double))); // Populate the grids with values where vector.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique // value std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); - std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), - sourceGrid.back() + 1); + std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1); } ~tMHDUpdateMagneticField3D() = default; @@ -84,8 +80,7 @@ class tMHDUpdateMagneticField3D : public ::testing::Test std::vector fiducialData; // device pointers - double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, - *dev_fiducialData; + double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData; /*! * \brief Launch the kernel and check results @@ -94,26 +89,20 @@ class tMHDUpdateMagneticField3D : public ::testing::Test void runTest() { // Copy values to GPU - CudaSafeCall(cudaMemcpy(dev_sourceGrid, sourceGrid.data(), - sourceGrid.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), - destinationGrid.size() * sizeof(Real), + CudaSafeCall( + cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), - ctElectricFields.size() * sizeof(Real), + CudaSafeCall(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Call the kernel to test - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, - dev_sourceGrid, dev_destinationGrid, - dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, - dz); + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, dev_sourceGrid, dev_destinationGrid, + dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); CudaCheckError(); // Copy test data back - CudaSafeCall(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, - destinationGrid.size() * sizeof(Real), + CudaSafeCall(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, destinationGrid.size() * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); @@ -122,10 +111,8 @@ class tMHDUpdateMagneticField3D : public ::testing::Test int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); testingUtilities::checkResults(fiducialData.at(i), destinationGrid.at(i), - "value at i = " + std::to_string(i) + - ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + - ", zid = " + std::to_string(zid)); + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index f834cd015..fb2b9fa96 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -188,8 +188,7 @@ Real r_hc_D3D(int i, Real dr) * \brief Calculate the density at spherical radius r due to a hydrostatic halo. Uses an analytic expression normalized by the value of the potential at the cooling radius. */ -void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, - int nr) +void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) { // Routine to determine the hydrostatic density profile // along a ray from the galaxy center @@ -216,9 +215,7 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, // store densities for (i = 0; i < nr; i++) { r[i] = r_hc_D3D(i, dr); - rho[i] = rho_eos * pow(D_rho - gmo * (phi_hot_halo_D3D(r[i], hdp) - Phi_0) / - (cs * cs), - 1. / gmo); + rho[i] = rho_eos * pow(D_rho - gmo * (phi_hot_halo_D3D(r[i], hdp) - Phi_0) / (cs * cs), 1. / gmo); } } @@ -228,8 +225,7 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, assuming an isothermal gas. Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0), then sets the densities according to an analytic expression. */ -void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, - int nz, int ng) +void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) { // x is cell center in x direction // y is cell center in y direction @@ -399,8 +395,7 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, * \brief Calculate the 1D density distribution in a hydrostatic column. Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0), then sets the densities according to an analytic expression. */ -void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, - int nz, int ng) +void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) { // x is cell center in x direction // y is cell center in y direction @@ -519,9 +514,8 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); printf("iter_phi = %d\n", iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, - z_1, z_2, A_0, A_1, phi_total_D3D(R, z_0, hdp), - phi_total_D3D(R, z_1, hdp)); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, + phi_total_D3D(R, z_0, hdp), phi_total_D3D(R, z_1, hdp)); #ifdef MPI_CHOLLA MPI_Finalize(); #endif @@ -601,9 +595,8 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, } else { km = ng + nz / 2 - (k - ks) - 1; } - rho[km] = rho[k]; - Delta_phi = - (phi_total_D3D(R, z_hc_D3D(k, dz, nz, ng), hdp) - Phi_0) / (cs * cs); + rho[km] = rho[k]; + Delta_phi = (phi_total_D3D(R, z_hc_D3D(k, dz, nz, ng), hdp) - Phi_0) / (cs * cs); } // check the surface density @@ -657,9 +650,8 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); printf("iter_phi = %d\n", iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, - z_2, A_0, A_1, phi_total_D3D(0, z_0, hdp), - phi_total_D3D(0, z_1, hdp)); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, + phi_total_D3D(0, z_0, hdp), phi_total_D3D(0, z_1, hdp)); #ifdef MPI_CHOLLA MPI_Finalize(); #endif @@ -716,9 +708,7 @@ Real halo_density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr) } } // return the interpolated density profile - return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / - (r_halo[i + 1] - r_halo[i]) + - rho_halo[i]; + return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / (r_halo[i + 1] - r_halo[i]) + rho_halo[i]; } /*! \fn void Disk_3D(parameters P) @@ -753,27 +743,23 @@ void Grid3D::Disk_3D(parameters p) R_s = R_vir / c_vir; // halo scale length in kpc // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 // T_d = 2.0e5; - T_d = 1.0e4; // CHANGED FOR ISOTHERMAL - T_h = 1.0e6; // halo temperature, at density floor - rho_eos = 1.0e7; // gas eos normalized at 1e7 Msun/kpc^3 - rho_eos_h = - 3.0e3; // gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) - mu = 0.6; - - R_g = 2.0 * R_d; // gas scale length in kpc - Sigma_0 = 0.25 * M_d / - (2 * M_PI * R_g * R_g); // central surface density in Msun/kpc^2 - H_g = z_d; // initial guess for gas scale height + T_d = 1.0e4; // CHANGED FOR ISOTHERMAL + T_h = 1.0e6; // halo temperature, at density floor + rho_eos = 1.0e7; // gas eos normalized at 1e7 Msun/kpc^3 + rho_eos_h = 3.0e3; // gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) + mu = 0.6; + + R_g = 2.0 * R_d; // gas scale length in kpc + Sigma_0 = 0.25 * M_d / (2 * M_PI * R_g * R_g); // central surface density in Msun/kpc^2 + H_g = z_d; // initial guess for gas scale height // rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3 // EOS info - cs = sqrt(KB * T_d / (mu * MP)) * TIME_UNIT / - LENGTH_UNIT; // sound speed in kpc/kyr - cs_h = sqrt(KB * T_h / (mu * MP)) * TIME_UNIT / - LENGTH_UNIT; // sound speed in kpc/kyr + cs = sqrt(KB * T_d / (mu * MP)) * TIME_UNIT / LENGTH_UNIT; // sound speed in kpc/kyr + cs_h = sqrt(KB * T_h / (mu * MP)) * TIME_UNIT / LENGTH_UNIT; // sound speed in kpc/kyr // set some initial parameters - int nhdp = 21; // number of parameters to pass hydrostatic column + int nhdp = 21; // number of parameters to pass hydrostatic column Real *hdp = (Real *)calloc(nhdp, sizeof(Real)); // parameters hdp[0] = M_vir; hdp[1] = M_d; @@ -928,10 +914,8 @@ void Grid3D::Disk_3D(parameters p) idp = (i + 1) + j * H.nx + k * H.nx * H.ny; Get_Position(i - 1, j, k, &xpm, &ypm, &zpm); Get_Position(i + 1, j, k, &xpp, &ypp, &zpp); - Pm = C.Energy[idm] * - (gama - 1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp] * - (gama - 1.0); // only internal energy stored in energy currently + Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * (gama - 1.0); // only internal energy stored in energy currently dPdx = (Pp - Pm) / (xpp - xpm); // pressure gradient along y direction @@ -945,10 +929,8 @@ void Grid3D::Disk_3D(parameters p) idp = i + (j + 1) * H.nx + k * H.nx * H.ny; Get_Position(i, j - 1, k, &xpm, &ypm, &zpm); Get_Position(i, j + 1, k, &xpp, &ypp, &zpm); - Pm = C.Energy[idm] * - (gama - 1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp] * - (gama - 1.0); // only internal energy stored in energy currently + Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * (gama - 1.0); // only internal energy stored in energy currently dPdy = (Pp - Pm) / (ypp - ypm); // radial pressure gradient @@ -980,20 +962,16 @@ void Grid3D::Disk_3D(parameters p) // sheepishly check for NaN's! - if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || - (P != P)) + if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || (P != P)) printf("d %e P %e i %d j %d k %d id %d\n", d, P, i, j, k, id); - if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || - (vy != vy) || (vz != vz)) { - printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, - j, k, id); + if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || (vy != vy) || (vz != vz)) { + printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, j, k, id); } else { // if the density is negative, there // is a bigger problem! if (d < 0) { - printf("pid %d error negative density i %d j %d k %d d %e\n", - -1, i, j, k, d); + printf("pid %d error negative density i %d j %d k %d d %e\n", -1, i, j, k, d); } } } @@ -1053,8 +1031,7 @@ void Grid3D::Disk_3D(parameters p) // add kinetic contribution to total energy C.Energy[id] += 0.5 * - (C.momentum_x[id] * C.momentum_x[id] + - C.momentum_y[id] * C.momentum_y[id] + + (C.momentum_x[id] * C.momentum_x[id] + C.momentum_y[id] * C.momentum_y[id] + C.momentum_z[id] * C.momentum_z[id]) / C.density[id]; } diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index e13cc4fa2..08e1190be 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -16,8 +16,7 @@ class DiskGalaxy Real log_func(Real y) { return log(1 + y) - y / (1 + y); }; public: - DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, - Real rcool) + DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) { M_d = md; R_d = rd; @@ -65,10 +64,7 @@ class DiskGalaxy * point * @return */ - Real gr_total_D3D(Real R, Real z) - { - return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); - }; + Real gr_total_D3D(Real R, Real z) { return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); }; /** * Potential of NFW halo @@ -105,18 +101,14 @@ class DiskGalaxy const Real b = sqrt(z * z + c * c); const Real d = a + b; const Real s = r * r + d * d; - return M_d * c * c * (a * (d * d + r * r) + 3.0 * b * d * d) / - (4.0 * M_PI * b * b * b * pow(s, 2.5)); + return M_d * c * c * (a * (d * d + r * r) + 3.0 * b * d * d) / (4.0 * M_PI * b * b * b * pow(s, 2.5)); } /** * Convenience method that returns the combined gravitational potential * of the disk and halo. */ - Real phi_total_D3D(Real R, Real z) - { - return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); - }; + Real phi_total_D3D(Real R, Real z) { return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); }; /** * epicyclic frequency @@ -129,28 +121,19 @@ class DiskGalaxy Real A = R_d + sqrt(z * z + Z_d * Z_d); Real B = sqrt(R * R + A * A); - Real phiH_prime = -C * R / (r * r) / (1 + x) + - C * log(1 + x) * R_h * R / (r * r * r) + - GN * M_d * R / (B * B * B); - Real phiH_prime_prime = - -C / (r * r) / (1 + x) + 2 * C * R * R / (r * r * r * r) / (1 + x) + - C / ((1 + x) * (1 + x)) * R * R / R_h / (r * r * r) + - C * R * R / (1 + x) / (r * r * r * r) + - C * log(1 + x) * R_h / (r * r * r) * (1 - 3 * R * R / (r * r)) + - GN * M_d / (B * B * B) * (1 - 3 * R * R / (B * B)); + Real phiH_prime = -C * R / (r * r) / (1 + x) + C * log(1 + x) * R_h * R / (r * r * r) + GN * M_d * R / (B * B * B); + Real phiH_prime_prime = -C / (r * r) / (1 + x) + 2 * C * R * R / (r * r * r * r) / (1 + x) + + C / ((1 + x) * (1 + x)) * R * R / R_h / (r * r * r) + + C * R * R / (1 + x) / (r * r * r * r) + + C * log(1 + x) * R_h / (r * r * r) * (1 - 3 * R * R / (r * r)) + + GN * M_d / (B * B * B) * (1 - 3 * R * R / (B * B)); return 3 / R * phiH_prime + phiH_prime_prime; }; - Real surface_density(Real R) - { - return M_d / (2 * M_PI) / (R_d * R_d) * exp(-R / R_d); - }; + Real surface_density(Real R) { return M_d / (2 * M_PI) / (R_d * R_d) * exp(-R / R_d); }; - Real sigma_crit(Real R) - { - return 3.36 * GN * surface_density(R) / sqrt(kappa2(R, 0.0)); - }; + Real sigma_crit(Real R) { return 3.36 * GN * surface_density(R) / sqrt(kappa2(R, 0.0)); }; Real getM_d() const { return M_d; }; Real getR_d() const { return R_d; }; @@ -168,11 +151,8 @@ class ClusteredDiskGalaxy : public DiskGalaxy Real normalization; public: - ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, - Real rvir, Real cvir, Real rcool) - : DiskGalaxy{md, rd, zd, mvir, rvir, cvir, rcool}, - lower_cluster_mass{lm}, - higher_cluster_mass{hm} + ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) + : DiskGalaxy{md, rd, zd, mvir, rvir, cvir, rcool}, lower_cluster_mass{lm}, higher_cluster_mass{hm} { // if (lower_cluster_mass >= higher_cluster_mass) normalization = 1 / log(higher_cluster_mass / lower_cluster_mass); @@ -182,8 +162,7 @@ class ClusteredDiskGalaxy : public DiskGalaxy Real getHigherClusterMass() const { return higher_cluster_mass; } Real getNormalization() const { return normalization; } - std::vector generateClusterPopulationMasses(int N, - std::mt19937_64 generator) + std::vector generateClusterPopulationMasses(int N, std::mt19937_64 generator) { std::vector population; for (int i = 0; i < N; i++) { @@ -203,8 +182,7 @@ namespace Galaxies { // all masses in M_sun and all distances in kpc // static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); -static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, - 157.0); +static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); }; // namespace Galaxies diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 66f3cd0ef..e11b5a257 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -193,28 +193,24 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) } #ifdef PARTICLES - if (!(send_request_n_particles = - (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + if (!(send_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf( "Error allocating send_request for number of particles for " "transfer.\n"); chexit(-2); } - if (!(recv_request_n_particles = - (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + if (!(recv_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf( "Error allocating recv_request for number of particles for " "transfer.\n"); chexit(-2); } - if (!(send_request_particles_transfer = - (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + if (!(send_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating send_request for particles transfer.\n"); chexit(-2); } - if (!(recv_request_particles_transfer = - (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + if (!(recv_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { chprintf("Error allocating recv_request for particles transfer.\n"); chexit(-2); } @@ -238,8 +234,7 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) } /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, - int ny_gin, int nz_gin) +void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { DomainDecompositionBLOCK(P, H, nx_gin, ny_gin, nz_gin); @@ -268,8 +263,7 @@ void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, } /* Perform domain decomposition */ -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, - int nx_gin, int ny_gin, int nz_gin) +void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { int n; int i, j, k; @@ -279,8 +273,7 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, // enforce an even number of processes if (nproc % 2 && nproc > 1) { - chprintf( - "WARNING: Odd number of processors > 1 is not officially supported\n"); + chprintf("WARNING: Odd number of processors > 1 is not officially supported\n"); } /* record global size */ @@ -311,8 +304,7 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, nproc_x = P->n_proc_x; nproc_y = P->n_proc_y; nproc_z = P->n_proc_z; - chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", nproc_x, nproc_y, - nproc_z); + chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", nproc_x, nproc_y, nproc_z); // chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", P->n_proc_x, // P->n_proc_y, P->n_proc_z); #endif @@ -529,8 +521,7 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) } // 3D if (H->ny > 1 && H->nz > 1) { - xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * - (H->nz - 2 * H->n_ghost); + xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * (H->nz - 2 * H->n_ghost); ybsize = H->n_fields * H->n_ghost * (H->nx) * (H->nz - 2 * H->n_ghost); zbsize = H->n_fields * H->n_ghost * (H->nx) * (H->ny); } @@ -558,30 +549,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle age #endif - buffer_length_particles_x0_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x0_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x1_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_x1_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y0_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y0_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y1_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_y1_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z0_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z0_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z1_send = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - buffer_length_particles_z1_recv = - N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_x1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_y1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; + buffer_length_particles_z1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; #endif // PARTICLES chprintf("Allocating MPI communication buffers on GPU "); @@ -627,30 +606,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) "Allocating MPI communication buffers on GPU for particle transfers ( " "N_Particles: %d ).\n", N_PARTICLES_TRANSFER); - CudaSafeCall(cudaMalloc(&d_send_buffer_x0_particles, - buffer_length_particles_x0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_x1_particles, - buffer_length_particles_x1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y0_particles, - buffer_length_particles_y0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y1_particles, - buffer_length_particles_y1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z0_particles, - buffer_length_particles_z0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z1_particles, - buffer_length_particles_z1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x0_particles, - buffer_length_particles_x0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x1_particles, - buffer_length_particles_x1_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y0_particles, - buffer_length_particles_y0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y1_particles, - buffer_length_particles_y1_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z0_particles, - buffer_length_particles_z0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z1_particles, - buffer_length_particles_z1_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_x0_particles, buffer_length_particles_x0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_x1_particles, buffer_length_particles_x1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y0_particles, buffer_length_particles_y0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_y1_particles, buffer_length_particles_y1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z0_particles, buffer_length_particles_z0_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_send_buffer_z1_particles, buffer_length_particles_z1_send * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real))); + CudaSafeCall(cudaMalloc(&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real))); #endif // PARTICLES && PARTICLES_GPU // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers @@ -661,30 +628,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) "Allocating MPI communication buffers on Host for particle transfers ( " "N_Particles: %d ).\n", N_PARTICLES_TRANSFER); - h_send_buffer_x0_particles = - (Real *)malloc(buffer_length_particles_x0_send * sizeof(Real)); - h_send_buffer_x1_particles = - (Real *)malloc(buffer_length_particles_x1_send * sizeof(Real)); - h_send_buffer_y0_particles = - (Real *)malloc(buffer_length_particles_y0_send * sizeof(Real)); - h_send_buffer_y1_particles = - (Real *)malloc(buffer_length_particles_y1_send * sizeof(Real)); - h_send_buffer_z0_particles = - (Real *)malloc(buffer_length_particles_z0_send * sizeof(Real)); - h_send_buffer_z1_particles = - (Real *)malloc(buffer_length_particles_z1_send * sizeof(Real)); - h_recv_buffer_x0_particles = - (Real *)malloc(buffer_length_particles_x0_recv * sizeof(Real)); - h_recv_buffer_x1_particles = - (Real *)malloc(buffer_length_particles_x1_recv * sizeof(Real)); - h_recv_buffer_y0_particles = - (Real *)malloc(buffer_length_particles_y0_recv * sizeof(Real)); - h_recv_buffer_y1_particles = - (Real *)malloc(buffer_length_particles_y1_recv * sizeof(Real)); - h_recv_buffer_z0_particles = - (Real *)malloc(buffer_length_particles_z0_recv * sizeof(Real)); - h_recv_buffer_z1_particles = - (Real *)malloc(buffer_length_particles_z1_recv * sizeof(Real)); + h_send_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_send * sizeof(Real)); + h_send_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_send * sizeof(Real)); + h_send_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_send * sizeof(Real)); + h_send_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_send * sizeof(Real)); + h_send_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_send * sizeof(Real)); + h_send_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_send * sizeof(Real)); + h_recv_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_recv * sizeof(Real)); + h_recv_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_recv * sizeof(Real)); + h_recv_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_recv * sizeof(Real)); + h_recv_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_recv * sizeof(Real)); + h_recv_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_recv * sizeof(Real)); + h_recv_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_recv * sizeof(Real)); #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || // defined(PARTICLES_CPU) #endif // PARTICLES @@ -753,8 +708,7 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local) n_local_all = (part_int_t *)malloc(nproc * sizeof(part_int_t)); n_local_send[0] = n_local; - MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, - world); + MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world); global_offset = 0; for (int other_rank = 0; other_rank < nproc; other_rank++) { if (other_rank < procID) global_offset += n_local_all[other_rank]; @@ -776,8 +730,7 @@ void Print_Domain_Properties(struct Header H) for (i = 0; i < nproc; i++) { if (i == procID) { printf("procID %d nxl %ld nxls %ld\n", procID, nx_local, nx_local_start); - printf("xb %e yb %e zb %e xbl %e ybl %e zbl %e\n", H.xbound, H.ybound, - H.zbound, H.xblocal, H.yblocal, H.zblocal); + printf("xb %e yb %e zb %e xbl %e ybl %e zbl %e\n", H.xbound, H.ybound, H.zbound, H.xblocal, H.yblocal, H.zblocal); printf("dx %e\n", H.dx); printf("dy %e\n", H.dy); printf("dz %e\n", H.dz); @@ -791,21 +744,19 @@ void Print_Domain_Properties(struct Header H) #ifdef PARTICLES // Funtion that checks if the buffer size For the particles transfer is large // enough, and grows the buffer if needed. -void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, - int new_size) +void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size) { int current_size = *current_size_ptr; if (new_size <= current_size) return; new_size = (int)2 * new_size; - std::cout << " ####### Growing Particles Transfer Buffer, size: " - << current_size << " new_size: " << new_size << std::endl; + std::cout << " ####### Growing Particles Transfer Buffer, size: " << current_size << " new_size: " << new_size + << std::endl; Real *new_buffer; new_buffer = (Real *)realloc(*part_buffer, new_size * sizeof(Real)); if (new_buffer == NULL) { - std::cout << " Error When Allocating New Particles Transfer Buffer" - << std::endl; + std::cout << " Error When Allocating New Particles Transfer Buffer" << std::endl; chexit(-1); } *part_buffer = new_buffer; @@ -961,33 +912,26 @@ void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m) void copyHostToDeviceReceiveBuffer(int direction) { - int xbsize = x_buffer_length, ybsize = y_buffer_length, - zbsize = z_buffer_length; + int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length; switch (direction) { case (0): - cudaMemcpy(d_recv_buffer_x0, h_recv_buffer_x0, xbsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_x0, h_recv_buffer_x0, xbsize * sizeof(Real), cudaMemcpyHostToDevice); break; case (1): - cudaMemcpy(d_recv_buffer_x1, h_recv_buffer_x1, xbsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_x1, h_recv_buffer_x1, xbsize * sizeof(Real), cudaMemcpyHostToDevice); break; case (2): - cudaMemcpy(d_recv_buffer_y0, h_recv_buffer_y0, ybsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_y0, h_recv_buffer_y0, ybsize * sizeof(Real), cudaMemcpyHostToDevice); break; case (3): - cudaMemcpy(d_recv_buffer_y1, h_recv_buffer_y1, ybsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_y1, h_recv_buffer_y1, ybsize * sizeof(Real), cudaMemcpyHostToDevice); break; case (4): - cudaMemcpy(d_recv_buffer_z0, h_recv_buffer_z0, zbsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_z0, h_recv_buffer_z0, zbsize * sizeof(Real), cudaMemcpyHostToDevice); break; case (5): - cudaMemcpy(d_recv_buffer_z1, h_recv_buffer_z1, zbsize * sizeof(Real), - cudaMemcpyHostToDevice); + cudaMemcpy(d_recv_buffer_z1, h_recv_buffer_z1, zbsize * sizeof(Real), cudaMemcpyHostToDevice); break; } } diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 5945fd95f..f1170a6a9 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -25,8 +25,7 @@ extern MPI_Comm node; /*communicator for each node*/ extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/ #ifdef PARTICLES -extern MPI_Datatype - MPI_PART_INT; /*data type describing interger for particles precision*/ +extern MPI_Datatype MPI_PART_INT; /*data type describing interger for particles precision*/ #endif // extern MPI_Request send_request[6]; @@ -149,11 +148,9 @@ extern int nproc_z; void InitializeChollaMPI(int *pargc, char **pargv[]); /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, - int ny_global, int nz_global); +void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, - int nx_global, int ny_global, int nz_global); +void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); /*tile MPI processes in a block decomposition*/ void TileBlockDecomposition(void); @@ -177,8 +174,7 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local); // Function that checks if the buffer size For the particles transfer is large // enough, and grows the buffer if needed. -void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, - int new_size); +void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size); #endif /* Print information about the domain properties */ diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index aac2a2056..3e28f55de 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -110,8 +110,7 @@ void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) for (k = g_start; k < g_end; k++) { for (j = 0; j < ny_dens; j++) { for (i = 0; i < nx_dens; i++) { - id_CIC = (i + nGHST) + (j + nGHST) * nx_part + - (k + nGHST) * nx_part * ny_part; + id_CIC = (i + nGHST) + (j + nGHST) * nx_part + (k + nGHST) * nx_part * ny_part; id_grid = i + j * nx_dens + k * nx_dens * ny_dens; Grav.F.density_h[id_grid] = Particles.G.density[id_CIC]; } @@ -133,27 +132,21 @@ void ::Particles_3D::Clear_Density() #ifdef PARTICLES_GPU -void Particles_3D::Clear_Density_GPU() -{ - Clear_Density_GPU_function(G.density_dev, G.n_cells); -} +void Particles_3D::Clear_Density_GPU() { Clear_Density_GPU_function(G.density_dev, G.n_cells); } void Particles_3D::Get_Density_CIC_GPU() { - Get_Density_CIC_GPU_function( - n_local, particle_mass, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, - G.dx, G.dy, G.dz, G.nx_local, G.ny_local, G.nz_local, - G.n_ghost_particles_grid, G.n_cells, G.density, G.density_dev, pos_x_dev, - pos_y_dev, pos_z_dev, mass_dev); + Get_Density_CIC_GPU_function(n_local, particle_mass, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, + G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells, G.density, + G.density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev); } #endif // PARTICLES_GPU #ifdef PARTICLES_CPU // Get the CIC index from the particle position -void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, - Real pos_x, Real pos_y, Real pos_z, int &indx_x, - int &indx_y, int &indx_z) +void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, + int &indx_x, int &indx_y, int &indx_z) { indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); @@ -197,8 +190,7 @@ void Particles_3D::Get_Density_CIC_Serial() x_pos = pos_x[pIndx]; y_pos = pos_y[pIndx]; z_pos = pos_z[pIndx]; - Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, - indx_y, indx_z); + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z); if (indx_x < -1) ignore = true; if (indx_y < -1) ignore = true; if (indx_z < -1) ignore = true; @@ -211,8 +203,7 @@ void Particles_3D::Get_Density_CIC_Serial() if (!in_local) { std::cout << " Density CIC Error:" << std::endl; #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] - << std::endl; + std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif @@ -226,8 +217,7 @@ void Particles_3D::Get_Density_CIC_Serial() } if (ignore) { #ifdef PARTICLE_IDS - std::cout << "ERROR Density CIC Index pID: " << partIDs[pIndx] - << std::endl; + std::cout << "ERROR Density CIC Index pID: " << partIDs[pIndx] << std::endl; #else std::cout << "ERROR Density CIC Index " << std::endl; #endif @@ -364,8 +354,7 @@ void Particles_3D::Get_Density_CIC_OMP() if (!in_local) { std::cout << " Density CIC Error:" << std::endl; #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " - << partIDs[pIndx] << std::endl; + std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif @@ -419,8 +408,7 @@ void Particles_3D::Get_Density_CIC_OMP() G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z); indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; - G.density[indx] += - pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); } } } diff --git a/src/particles/density_CIC.h b/src/particles/density_CIC.h index d8ea2bef6..b7181e68d 100644 --- a/src/particles/density_CIC.h +++ b/src/particles/density_CIC.h @@ -3,9 +3,8 @@ #ifndef DENSITY_CIC_H #define DENSITY_CIC_H -void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, - Real pos_x, Real pos_y, Real pos_z, int &indx_x, - int &indx_y, int &indx_z); +void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, + int &indx_x, int &indx_y, int &indx_z); #endif #endif \ No newline at end of file diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 835110237..e4d8a0e52 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -14,8 +14,7 @@ #ifdef GRAVITY_GPU void Grid3D::Copy_Particles_Density_to_GPU() { - CudaSafeCall(cudaMemcpy(Particles.G.density_dev, Particles.G.density, - Particles.G.n_cells * sizeof(Real), + CudaSafeCall(cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); } @@ -32,18 +31,15 @@ __device__ double atomicAdd(double *address, double val) unsigned long long int old = *address_as_ull, assumed; do { assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); } #endif // Get the CIC index from the particle position ( device function ) -__device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, - Real dy, Real dz, Real pos_x, Real pos_y, - Real pos_z, int &indx_x, int &indx_y, - int &indx_z) +__device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, + Real pos_z, int &indx_x, int &indx_y, int &indx_z) { indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); @@ -51,13 +47,10 @@ __device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, } // CUDA Kernel to compute the CIC density from the particles positions -__global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, - Real *density_dev, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, - Real *mass_dev, Real xMin, Real yMin, - Real zMin, Real xMax, Real yMax, - Real zMax, Real dx, Real dy, Real dz, - int nx, int ny, int nz, int n_ghost) +__global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, Real *density_dev, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, + int ny, int nz, int n_ghost) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -82,8 +75,7 @@ __global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, #endif int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, - indx_y, indx_z); + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z); bool in_local = true; @@ -126,23 +118,19 @@ __global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; - atomicAdd(&density_dev[indx], - pMass * (1 - delta_x) * (1 - delta_y) * delta_z); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * delta_z); indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); - atomicAdd(&density_dev[indx], - pMass * (1 - delta_x) * delta_y * (1 - delta_z)); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * (1 - delta_z)); indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); - atomicAdd(&density_dev[indx], - pMass * delta_x * (1 - delta_y) * (1 - delta_z)); + atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * (1 - delta_z)); indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); - atomicAdd(&density_dev[indx], - pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z)); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z)); } // Clear the density array: density=0 @@ -152,12 +140,11 @@ void Particles_3D::Clear_Density_GPU_function(Real *density_dev, int n_cells) } // Call the CIC density kernel to get the particles density -void Particles_3D::Get_Density_CIC_GPU_function( - part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, - Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, - int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, - Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *mass_dev) +void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, + Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, + int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, + int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -168,19 +155,16 @@ void Particles_3D::Get_Density_CIC_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_local, particle_mass, density_dev, pos_x_dev, - pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, - yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, - n_ghost_particles_grid); + hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev, + pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, + nx_local, ny_local, nz_local, n_ghost_particles_grid); CudaCheckError(); cudaDeviceSynchronize(); } #if !defined(GRAVITY_GPU) // Copy the density from device to host - CudaSafeCall(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), cudaMemcpyDeviceToHost)); #endif } diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 60bc8131a..9f3c73f89 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -85,10 +85,7 @@ void Grid3D::Transfer_Particles_Density_Boundaries(struct parameters P) #ifdef MPI_CHOLLA -void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host(int direction, - int side, - Real *buffer_d, - Real *buffer_h) +void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h) { int nGHST, nx_g, ny_g, nz_g, buffer_length; nGHST = Particles.G.n_ghost_particles_grid; @@ -100,14 +97,12 @@ void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host(int direction, if (direction == 1) buffer_length = nGHST * nx_g * nz_g; if (direction == 2) buffer_length = nGHST * nx_g * ny_g; - cudaMemcpy(buffer_h, buffer_d, buffer_length * sizeof(Real), - cudaMemcpyDeviceToHost); + cudaMemcpy(buffer_h, buffer_d, buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); } // Load the particles density boundaries to the MPI buffers for transfer, return // the size of the transfer buffer -int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, - Real *buffer) +int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, Real *buffer) { int i, j, k, indx, indx_buff, buffer_length; int nGHST, nx_g, ny_g, nz_g; @@ -122,8 +117,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + if (side == 1) indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; indx_buff = i + j * nx_g + k * nx_g * ny_g; buffer[indx_buff] = Particles.G.density[indx]; } @@ -138,8 +132,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; indx_buff = i + k * nx_g + j * nx_g * nz_g; buffer[indx_buff] = Particles.G.density[indx]; } @@ -167,9 +160,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, } // Unload the particles density boundaries from the MPI buffers after transfer -void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, - int side, - Real *buffer) +void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, Real *buffer) { int i, j, k, indx, indx_buff, buffer_length; int nGHST, nx_g, ny_g, nz_g; @@ -184,8 +175,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; - if (side == 1) - indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + if (side == 1) indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; indx_buff = i + j * nx_g + k * nx_g * ny_g; Particles.G.density[indx] += buffer[indx_buff]; } @@ -199,8 +189,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { if (side == 0) indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; indx_buff = i + k * nx_g + j * nx_g * nz_g; Particles.G.density[indx] += buffer[indx_buff]; } @@ -214,8 +203,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { if (side == 0) indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) - indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 1) indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; indx_buff = j + k * ny_g + i * ny_g * nz_g; Particles.G.density[indx] += buffer[indx_buff]; } diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu index 80f8bc6b8..c13a27347 100644 --- a/src/particles/density_boundaries_gpu.cu +++ b/src/particles/density_boundaries_gpu.cu @@ -6,9 +6,8 @@ #include "../io/io.h" #include "particles_3D.h" -__global__ void Set_Particles_Density_Boundaries_Periodic_kernel( - int direction, int side, int n_i, int n_j, int nx, int ny, int nz, - int n_ghost, Real *density_d) +__global__ void Set_Particles_Density_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; @@ -17,40 +16,31 @@ __global__ void Set_Particles_Density_Boundaries_Periodic_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; if (direction == 0) { - if (side == 0) - tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; if (side == 0) tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; if (side == 1) tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; if (side == 0) tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; if (side == 1) tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; - if (side == 1) - tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 1) tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; } density_d[tid_dst] += density_d[tid_src]; } -void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, - int side) +void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side) { int n_ghost, nx_g, ny_g, nz_g, size, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; @@ -80,16 +70,15 @@ void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Density_Boundaries_Periodic_kernel, - dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, - nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev); + hipLaunchKernelGGL(Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev); } #ifdef MPI_CHOLLA -__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( - int direction, int side, int n_i, int n_j, int nx, int ny, int nz, - int n_ghost, Real *density_d, Real *transfer_buffer_d) +__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d, + Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; @@ -98,33 +87,26 @@ __global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { if (side == 0) tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { if (side == 0) tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { if (side == 0) tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; - if (side == 1) - tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 1) tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = density_d[tid_dens]; } -int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, - int side, - Real *buffer) +int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer) { int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; @@ -160,18 +142,17 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL(Load_Particles_Density_Boundary_to_Buffer_kernel, - dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, - nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d); + hipLaunchKernelGGL(Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d); cudaDeviceSynchronize(); return size_buffer; } -__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( - int direction, int side, int n_i, int n_j, int nx, int ny, int nz, - int n_ghost, Real *density_d, Real *transfer_buffer_d) +__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d, + Real *transfer_buffer_d) { // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; @@ -180,35 +161,26 @@ __global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || - tid_k >= n_ghost) - return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { if (side == 0) tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) - tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } if (direction == 1) { - if (side == 0) - tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) - tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; } if (direction == 2) { - if (side == 0) - tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; - if (side == 1) - tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 0) tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + if (side == 1) tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; } density_d[tid_dens] += transfer_buffer_d[tid_buffer]; } -void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, - int side, - Real *buffer) +void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer) { int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; @@ -244,9 +216,8 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL(Unload_Particles_Density_Boundary_to_Buffer_kernel, - dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, - nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d); + hipLaunchKernelGGL(Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d); } #endif // MPI_CHOLLA diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 1a55479ce..18c5cd185 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -1,5 +1,4 @@ -#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && \ - defined(PARTICLE_IDS) +#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS) #include #include @@ -43,9 +42,7 @@ __device__ double atomicMax(double* address, double val) unsigned long long int old = *address_as_ull, assumed; do { assumed = old; - old = atomicCAS( - address_as_ull, assumed, - __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); } @@ -71,8 +68,7 @@ __global__ void initState_kernel(unsigned int seed, feedback_prng_t* states) * @param n_local number of local particles on the GPU * @param allocation_factor */ -void supernova::initState(struct parameters* P, part_int_t n_local, - Real allocation_factor) +void supernova::initState(struct parameters* P, part_int_t n_local, Real allocation_factor) { chprintf("supernova::initState start\n"); std::string snr_filename(P->snr_filename); @@ -122,8 +118,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1); CHECK(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real))); - CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), - cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); } else { chprintf("No SN rate file specified. Using constant rate\n"); @@ -139,39 +134,31 @@ void supernova::initState(struct parameters* P, part_int_t n_local, dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); - hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, - randStates); + hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, randStates); CHECK(cudaDeviceSynchronize()); - chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", - n_states, ngrid, TPB_FEEDBACK); + chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); } -__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, - Real t_end) +__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end) { if (t < t_start || t >= t_end) return 0; if (dev_snr == nullptr) return supernova::DEFAULT_SNR; int index = (int)((t - t_start) / snr_dt); - return dev_snr[index] + - (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; + return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; } -__device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, - Real* momentum_y, Real* momentum_z, Real* energy, - int index, Real dx, Real dy, Real dz) +__device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, Real* momentum_y, Real* momentum_z, + Real* energy, int index, Real dx, Real dy, Real dz) { Real dens = fmax(density[index], DENS_FLOOR); Real d_inv = 1.0 / dens; Real vx = momentum_x[index] * d_inv; Real vy = momentum_y[index] * d_inv; Real vz = momentum_z[index] * d_inv; - Real P = fmax((energy[index] - 0.5 * dens * (vx * vx + vy * vy + vz * vz)) * - (gamma - 1.0), - TINY_NUMBER); - Real cs = sqrt(gamma * P * d_inv); - return fmax(fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy), - (fabs(vz) + cs) / dz); + Real P = fmax((energy[index] - 0.5 * dens * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0), TINY_NUMBER); + Real cs = sqrt(gamma * P * d_inv); + return fmax(fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy), (fabs(vz) + cs) / dz); } /** the prescription for dividing a scalar quantity between 3x3x3 cells is done @@ -181,42 +168,34 @@ __device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, should be dx*1/2. In the above the 1/2 factor is normalize over 2 cells/direction. */ -__device__ Real frac(int i, Real dx) -{ - return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; -} +__device__ Real frac(int i, Real dx) { return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; } __device__ Real d_fr(int i, Real dx) { - return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - - 3 * (i - 1) * (i + 1) * (0.5 - dx); + return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - 3 * (i - 1) * (i + 1) * (0.5 - dx); } -__device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, - int nx_grid, int ny_grid, int n_ghost) +__device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid, int n_ghost) { Real d_average = 0.0; for (int i = -1; i < 2; i++) { for (int j = -1; j < 2; j++) { for (int k = -1; k < 2; k++) { - d_average += density[(xi + n_ghost + i) + (yi + n_ghost + j) * nx_grid + - (zi + n_ghost + k) * nx_grid * ny_grid]; + d_average += + density[(xi + n_ghost + i) + (yi + n_ghost + j) * nx_grid + (zi + n_ghost + k) * nx_grid * ny_grid]; } } } return d_average / 27; } -__device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, - int zi, int nx_grid, int ny_grid, +__device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid, int n_ghost) { - return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * - DENSITY_UNIT / (supernova::MU * MP); + return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * DENSITY_UNIT / (supernova::MU * MP); } -__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, - Real* pos_z_dev, part_int_t n_local, int gtid, +__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, part_int_t n_local, int gtid, Real dx) { Real x0 = pos_x_dev[gtid]; @@ -234,20 +213,17 @@ __device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, return true; } -__global__ void Cluster_Feedback_Kernel( - part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev, - Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, - Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, - int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, - Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, - Real* momentum_y, Real* momentum_z, Real gamma, feedback_prng_t* states, - Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, - Real time_sn_start, Real time_sn_end, int n_step) +__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev, + Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, + Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, + int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, + Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, + Real* momentum_z, Real gamma, feedback_prng_t* states, Real* prev_dens, + int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, + Real time_sn_end, int n_step) { - __shared__ Real - s_info[FEED_INFO_N * - TPB_FEEDBACK]; // for collecting SN feedback information, like # - // of SNe or # resolved. + __shared__ Real s_info[FEED_INFO_N * TPB_FEEDBACK]; // for collecting SN feedback information, like # + // of SNe or # resolved. int tid = threadIdx.x; int gtid = blockIdx.x * blockDim.x + tid; @@ -256,8 +232,7 @@ __global__ void Cluster_Feedback_Kernel( s_info[FEED_INFO_N * tid + 2] = 0; // number of unresolved events s_info[FEED_INFO_N * tid + 3] = 0; // resolved energy s_info[FEED_INFO_N * tid + 4] = 0; // unresolved momentum - s_info[FEED_INFO_N * tid + 5] = - 0; // unresolved KE added via momentum injection + s_info[FEED_INFO_N * tid + 5] = 0; // unresolved KE added via momentum injection if (gtid < n_local) { Real pos_x, pos_y, pos_z; @@ -266,8 +241,7 @@ __global__ void Cluster_Feedback_Kernel( Real x_frac, y_frac, z_frac; Real px, py, pz, d; // Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e; - Real feedback_energy = 0, feedback_density = 0, feedback_momentum = 0, n_0, - shell_radius; + Real feedback_energy = 0, feedback_density = 0, feedback_momentum = 0, n_0, shell_radius; bool is_resolved = false; Real dV = dx * dy * dz; Real local_dti = 0.0; @@ -279,9 +253,8 @@ __global__ void Cluster_Feedback_Kernel( // pos_z); kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, // xMin); - bool in_local = (pos_x >= xMin && pos_x < xMax) && - (pos_y >= yMin && pos_y < yMax) && - (pos_z >= zMin && pos_z < zMax); + bool in_local = + (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax); if (!in_local) { kernel_printf( " Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] " @@ -294,8 +267,7 @@ __global__ void Cluster_Feedback_Kernel( int indx_z = (int)floor((pos_z - zMin) / dz); // kernel_printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); - bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || - indx_x >= nx_g - 2 * n_ghost || + bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g - 2 * n_ghost || indx_y >= ny_g - 2 * n_ghost || indx_z >= nz_g - 2 * n_ghost; if (ignore) { kernel_printf( @@ -316,9 +288,8 @@ __global__ void Cluster_Feedback_Kernel( if (direction == -1) N = -prev_N[gtid]; else { - Real average_num_sn = GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, - time_sn_start, time_sn_end) * - mass_dev[gtid] * dt; + Real average_num_sn = + GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt; // N = (int) (average_num_sn + 0.5); @@ -342,8 +313,7 @@ __global__ void Cluster_Feedback_Kernel( if (direction == -1) n_0 = prev_dens[gtid]; else { - n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, - nx_g, ny_g, n_ghost); + n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); prev_dens[gtid] = n_0; } // int devcount; @@ -353,17 +323,14 @@ __global__ void Cluster_Feedback_Kernel( // kernel_printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: // %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); - feedback_momentum = direction * supernova::FINAL_MOMENTUM * - pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; - shell_radius = - supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); - is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + feedback_momentum = direction * supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; + shell_radius = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; if (!is_resolved) kernel_printf( "UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, " "n_0=%0.4e\n", - t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, - (int)id[gtid], N, shell_radius, n_0); + t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); s_info[FEED_INFO_N * tid] = 1. * N; if (is_resolved) @@ -394,8 +361,7 @@ __global__ void Cluster_Feedback_Kernel( for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { for (int k = 0; k < 2; k++) { - indx = (indx_x + i) + (indx_y + j) * nx_g + - (indx_z + k) * nx_g * ny_g; + indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g; if (abs(momentum_x[indx] / density[indx]) >= C_L) { kernel_printf( @@ -428,12 +394,9 @@ __global__ void Cluster_Feedback_Kernel( y_frac = j * (1 - delta_y) + (1 - j) * delta_y; z_frac = k * (1 - delta_z) + (1 - k) * delta_z; - atomicAdd(&density[indx], - x_frac * y_frac * z_frac * feedback_density); - atomicAdd(&gasEnergy[indx], - x_frac * y_frac * z_frac * feedback_energy); - atomicAdd(&energy[indx], - x_frac * y_frac * z_frac * feedback_energy); + atomicAdd(&density[indx], x_frac * y_frac * z_frac * feedback_density); + atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy); + atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy); if (abs(momentum_x[indx] / density[indx]) >= C_L) { kernel_printf( @@ -461,10 +424,8 @@ __global__ void Cluster_Feedback_Kernel( } if (direction > 0) - local_dti = fmax( - local_dti, - Calc_Timestep(gamma, density, momentum_x, momentum_y, - momentum_z, energy, indx, dx, dy, dz)); + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, + energy, indx, dx, dy, dz)); } } } @@ -483,13 +444,10 @@ __global__ void Cluster_Feedback_Kernel( indx_y += n_ghost; indx_z += n_ghost; - if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * - 1e-5) > + if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5) > 40000) { // injected speeds are greater than 4e4 km/s - kernel_printf( - "**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, - indx_y, indx_z, - feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5); + kernel_printf("**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z, + feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5); } feedback_momentum /= sqrt(3.0); @@ -497,21 +455,16 @@ __global__ void Cluster_Feedback_Kernel( for (int j = -1; j < 2; j++) { for (int k = -1; k < 2; k++) { // index in array of conserved quantities - indx = (indx_x + i) + (indx_y + j) * nx_g + - (indx_z + k) * nx_g * ny_g; + indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g; - x_frac = - d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); - y_frac = - frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); - z_frac = - frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); + x_frac = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); + y_frac = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); + z_frac = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); px = x_frac * feedback_momentum; py = y_frac * feedback_momentum; pz = z_frac * feedback_momentum; - d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * - feedback_density + + d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + n_0 * supernova::MU * MP / DENSITY_UNIT; // d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) @@ -564,8 +517,7 @@ __global__ void Cluster_Feedback_Kernel( atomicAdd(&momentum_z[indx], pz); density[indx] = d; - energy[indx] = (momentum_x[indx] * momentum_x[indx] + - momentum_y[indx] * momentum_y[indx] + + energy[indx] = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] + momentum_z[indx] * momentum_z[indx]) / 2 / density[indx] + gasEnergy[indx]; @@ -574,8 +526,7 @@ __global__ void Cluster_Feedback_Kernel( // atomicAdd( &density[indx], d ); s_info[FEED_INFO_N * tid + i_UNRES_ENERGY] += - direction * (px * px + py * py + pz * pz) / 2 / - density[indx] * dV; + direction * (px * px + py * py + pz * pz) / 2 / density[indx] * dV; if (abs(momentum_x[indx] / density[indx]) >= C_L) { kernel_printf( @@ -634,10 +585,8 @@ __global__ void Cluster_Feedback_Kernel( if (direction > 0) { // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, // id[gtid], N, n_0); - local_dti = fmax( - local_dti, - Calc_Timestep(gamma, density, momentum_x, momentum_y, - momentum_z, energy, indx, dx, dy, dz)); + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, + energy, indx, dx, dy, dz)); } } } @@ -719,17 +668,14 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) do { direction = 1; if (G.Particles.n_local > 0) { - hipLaunchKernelGGL( - Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, - G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, - G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, - G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, - G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, - G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, - d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, - G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, - supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, - snr_dt, time_sn_start, time_sn_end, G.H.n_step); + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, + G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, + G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, + G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, + time_sn_end, G.H.n_step); CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } @@ -743,17 +689,14 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) // timestep too big: need to undo the last operation direction = -1; if (G.Particles.n_local > 0) { - hipLaunchKernelGGL( - Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, - G.Particles.n_local, G.Particles.partIDs_dev, G.Particles.pos_x_dev, - G.Particles.pos_y_dev, G.Particles.pos_z_dev, G.Particles.mass_dev, - G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, - G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, - G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, - d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, - G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, - supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, - snr_dt, time_sn_start, time_sn_end, G.H.n_step); + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, + G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, + G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, + G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, + time_sn_end, G.H.n_step); CHECK(cudaDeviceSynchronize()); } @@ -763,8 +706,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) } while (direction == -1); if (G.Particles.n_local > 0) { - CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), - cudaMemcpyDeviceToHost)); + CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_dti)); CHECK(cudaFree(d_info)); CHECK(cudaFree(d_prev_dens)); @@ -786,38 +728,29 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) Real resolved_ratio = 0.0; if (info[supernova::RESOLVED] > 0 || info[supernova::NOT_RESOLVED] > 0) { - resolved_ratio = - info[supernova::RESOLVED] / - (info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); + resolved_ratio = info[supernova::RESOLVED] / (info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); } Real global_resolved_ratio = 0.0; if (analysis.countResolved > 0 || analysis.countUnresolved > 0) { - global_resolved_ratio = analysis.countResolved / - (analysis.countResolved + analysis.countUnresolved); + global_resolved_ratio = analysis.countResolved / (analysis.countResolved + analysis.countUnresolved); } - chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", - G.H.n_step, (long)info[supernova::SN], resolved_ratio); + chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN], + resolved_ratio); chprintf( " this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s " "unres_energy: %.5e erg\n", - info[supernova::ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / - TIME_UNIT / TIME_UNIT, + info[supernova::ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT, info[supernova::MOMENTUM] * VELOCITY_UNIT / 1e5, - info[supernova::UNRES_ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / - TIME_UNIT / TIME_UNIT); - chprintf( - " cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", - (long)analysis.countSN, (long)analysis.countResolved, - (long)analysis.countUnresolved, global_resolved_ratio); + info[supernova::UNRES_ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT); + chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)analysis.countSN, + (long)analysis.countResolved, (long)analysis.countUnresolved, global_resolved_ratio); chprintf( " energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres " "energy: %.5e\n", - analysis.totalEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / - TIME_UNIT, + analysis.totalEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT, analysis.totalMomentum * VELOCITY_UNIT / 1e5, - analysis.totalUnresEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / - TIME_UNIT / TIME_UNIT); + analysis.totalUnresEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT); #ifdef CPU_TIME G.Timer.Feedback.End(); diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index ceb786c6b..13938942e 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -27,8 +27,7 @@ void Grid3D::Get_Gravity_Field_Particles() #endif #ifndef PARALLEL_OMP - Get_Gravity_Field_Particles_function( - 0, Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid); + Get_Gravity_Field_Particles_function(0, Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid); #else #pragma omp parallel num_threads(N_OMP_THREADS) @@ -39,9 +38,8 @@ void Grid3D::Get_Gravity_Field_Particles() omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( - Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid, - N_OMP_THREADS, omp_id, &g_start, &g_end); + Get_OMP_Grid_Indxs(Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id, &g_start, + &g_end); Get_Gravity_Field_Particles_function(g_start, g_end); } @@ -69,8 +67,7 @@ void Grid3D::Get_Gravity_CIC() omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, - &p_end); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); Get_Gravity_CIC_function(p_start, p_end); } @@ -85,19 +82,16 @@ void Grid3D::Get_Gravity_CIC() #ifdef PARTICLES_GPU void Particles_3D::Get_Gravity_Field_Particles_GPU(Real *potential_host) { - Get_Gravity_Field_Particles_GPU_function( - G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, - G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, - G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); + Get_Gravity_Field_Particles_GPU_function(G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, + G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, + G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } void Particles_3D::Get_Gravity_CIC_GPU() { - Get_Gravity_CIC_GPU_function( - n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, - G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, - pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, - G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); + Get_Gravity_CIC_GPU_function(n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, + G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, + grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } #endif // PARTICLES_GPU @@ -140,22 +134,17 @@ void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end) for (k = g_start; k < g_end; k++) { for (j = 0; j < ny_grav; j++) { for (i = 0; i < nx_grav; i++) { - id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; - id_l = (i - 1 + nGHST) + (j + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - id_r = (i + 1 + nGHST) + (j + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; + id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; + id_l = (i - 1 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + id_r = (i + 1 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; phi_l = potential[id_l]; phi_r = potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (i - 2 + nGHST) + (j + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - id_rr = (i + 2 + nGHST) + (j + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - phi_ll = potential[id_ll]; - phi_rr = potential[id_rr]; - Particles.G.gravity_x[id] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); + id_ll = (i - 2 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + id_rr = (i + 2 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + phi_ll = potential[id_ll]; + phi_rr = potential[id_rr]; + Particles.G.gravity_x[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); #else Particles.G.gravity_x[id] = -0.5 * (phi_r - phi_l) / dx; #endif @@ -166,22 +155,17 @@ void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end) for (k = g_start; k < g_end; k++) { for (j = 0; j < ny_grav; j++) { for (i = 0; i < nx_grav; i++) { - id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; - id_l = (i + nGHST) + (j - 1 + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - id_r = (i + nGHST) + (j + 1 + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; + id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; + id_l = (i + nGHST) + (j - 1 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + id_r = (i + nGHST) + (j + 1 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; phi_l = potential[id_l]; phi_r = potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (i + nGHST) + (j - 2 + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - id_rr = (i + nGHST) + (j + 2 + nGHST) * nx_grid + - (k + nGHST) * ny_grid * nx_grid; - phi_ll = potential[id_ll]; - phi_rr = potential[id_rr]; - Particles.G.gravity_y[id] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); + id_ll = (i + nGHST) + (j - 2 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + id_rr = (i + nGHST) + (j + 2 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid; + phi_ll = potential[id_ll]; + phi_rr = potential[id_rr]; + Particles.G.gravity_y[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); // if (i == 0) { // std::cout << "phi_ll[" << id_ll << "] = " << phi_ll << std::endl; // } @@ -195,22 +179,17 @@ void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end) for (k = g_start; k < g_end; k++) { for (j = 0; j < ny_grav; j++) { for (i = 0; i < nx_grav; i++) { - id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; - id_l = (i + nGHST) + (j + nGHST) * nx_grid + - (k - 1 + nGHST) * ny_grid * nx_grid; - id_r = (i + nGHST) + (j + nGHST) * nx_grid + - (k + 1 + nGHST) * ny_grid * nx_grid; + id = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav; + id_l = (i + nGHST) + (j + nGHST) * nx_grid + (k - 1 + nGHST) * ny_grid * nx_grid; + id_r = (i + nGHST) + (j + nGHST) * nx_grid + (k + 1 + nGHST) * ny_grid * nx_grid; phi_l = potential[id_l]; phi_r = potential[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (i + nGHST) + (j + nGHST) * nx_grid + - (k - 2 + nGHST) * ny_grid * nx_grid; - id_rr = (i + nGHST) + (j + nGHST) * nx_grid + - (k + 2 + nGHST) * ny_grid * nx_grid; - phi_ll = potential[id_ll]; - phi_rr = potential[id_rr]; - Particles.G.gravity_z[id] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); + id_ll = (i + nGHST) + (j + nGHST) * nx_grid + (k - 2 + nGHST) * ny_grid * nx_grid; + id_rr = (i + nGHST) + (j + nGHST) * nx_grid + (k + 2 + nGHST) * ny_grid * nx_grid; + phi_ll = potential[id_ll]; + phi_rr = potential[id_rr]; + Particles.G.gravity_z[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); #else Particles.G.gravity_z[id] = -0.5 * (phi_r - phi_l) / dz; #endif @@ -254,8 +233,7 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) x_pos = Particles.pos_x[pIndx]; y_pos = Particles.pos_y[pIndx]; z_pos = Particles.pos_z[pIndx]; - Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, - indx_y, indx_z); + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z); if (indx_x < -1) ignore = true; if (indx_y < -1) ignore = true; if (indx_z < -1) ignore = true; @@ -268,17 +246,13 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) if (!in_local) { std::cout << " Gravity CIC Error:" << std::endl; #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " - << Particles.partIDs[pIndx] << std::endl; + std::cout << " Particle outside Local domain pID: " << Particles.partIDs[pIndx] << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif - std::cout << " Domain X: " << Particles.G.xMin << " " - << Particles.G.xMax << std::endl; - std::cout << " Domain Y: " << Particles.G.yMin << " " - << Particles.G.yMax << std::endl; - std::cout << " Domain Z: " << Particles.G.zMin << " " - << Particles.G.zMax << std::endl; + std::cout << " Domain X: " << Particles.G.xMin << " " << Particles.G.xMax << std::endl; + std::cout << " Domain Y: " << Particles.G.yMin << " " << Particles.G.yMax << std::endl; + std::cout << " Domain Z: " << Particles.G.zMin << " " << Particles.G.zMax << std::endl; std::cout << " Particle X: " << x_pos << std::endl; std::cout << " Particle Y: " << y_pos << std::endl; std::cout << " Particle Z: " << z_pos << std::endl; @@ -289,8 +263,7 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) } if (ignore) { #ifdef PARTICLE_IDS - std::cout << "ERROR GRAVITY_CIC Index pID: " - << Particles.partIDs[pIndx] << std::endl; + std::cout << "ERROR GRAVITY_CIC Index pID: " << Particles.partIDs[pIndx] << std::endl; #else std::cout << "ERROR GRAVITY_CIC Index " << std::endl; #endif @@ -354,32 +327,20 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) g_y_tru = Particles.G.gravity_y[indx]; g_z_tru = Particles.G.gravity_z[indx]; - g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + - g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - - g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + - g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - - g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + - g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); Particles.grav_x[pIndx] = g_x; Particles.grav_y[pIndx] = g_y; diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 05db4795c..6da73f28a 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -17,20 +17,16 @@ #ifdef PARTICLES_GPU // Copy the potential from host to device -void Particles_3D::Copy_Potential_To_GPU(Real *potential_host, - Real *potential_dev, - int n_cells_potential) +void Particles_3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential) { - CudaSafeCall(cudaMemcpy(potential_dev, potential_host, - n_cells_potential * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); } // Kernel to compute the gradient of the potential -__global__ void Get_Gravity_Field_Particles_Kernel( - Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev, int nx, int ny, int nz, int n_ghost_particles_grid, - int n_ghost_potential, Real dx, Real dy, Real dz) +__global__ void Get_Gravity_Field_Particles_Kernel(Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, int nx, int ny, int nz, + int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy, + Real dz) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; int tid_y = blockIdx.y * blockDim.y + threadIdx.y; @@ -58,72 +54,57 @@ __global__ void Get_Gravity_Field_Particles_Kernel( #endif // Get Potential Gradient X - id_l = (tid_x - 1 + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - id_r = (tid_x + 1 + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; + id_l = (tid_x - 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x - 2 + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - id_rr = (tid_x + 2 + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_x_dev[tid] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); + id_ll = (tid_x - 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_x_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); #else gravity_x_dev[tid] = -0.5 * (phi_r - phi_l) / dx; #endif // Get Potential Gradient Y - id_l = (tid_x + nGHST) + (tid_y - 1 + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - id_r = (tid_x + nGHST) + (tid_y + 1 + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; + id_l = (tid_x + nGHST) + (tid_y - 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y - 2 + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - id_rr = (tid_x + nGHST) + (tid_y + 2 + nGHST) * nx_pot + - (tid_z + nGHST) * ny_pot * nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_y_dev[tid] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); + id_ll = (tid_x + nGHST) + (tid_y - 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_y_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); #else gravity_y_dev[tid] = -0.5 * (phi_r - phi_l) / dy; #endif // Get Potential Gradient Z - id_l = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z - 1 + nGHST) * ny_pot * nx_pot; - id_r = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + 1 + nGHST) * ny_pot * nx_pot; + id_l = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 1 + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 1 + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z - 2 + nGHST) * ny_pot * nx_pot; - id_rr = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + - (tid_z + 2 + nGHST) * ny_pot * nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_z_dev[tid] = - -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); + id_ll = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 2 + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 2 + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_z_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); #else gravity_z_dev[tid] = -0.5 * (phi_r - phi_l) / dz; #endif } // Call the kernel to compute the gradient of the potential -void Particles_3D::Get_Gravity_Field_Particles_GPU_function( - int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, - int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, - Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev) +void Particles_3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, int n_cells_potential, Real dx, + Real dy, Real dz, Real *potential_host, Real *potential_dev, + Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) { #ifndef GRAVITY_GPU Copy_Potential_To_GPU(potential_host, potential_dev, n_cells_potential); @@ -146,18 +127,15 @@ void Particles_3D::Get_Gravity_Field_Particles_GPU_function( // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, - 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, - gravity_z_dev, nx_local, ny_local, nz_local, - n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz); + hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, + gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, + N_GHOST_POTENTIAL, dx, dy, dz); CudaCheckError(); } // Get CIC indexes from the particles positions -__device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, - Real dx, Real dy, Real dz, Real pos_x, - Real pos_y, Real pos_z, int &indx_x, - int &indx_y, int &indx_z) +__device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, + Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z) { indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); @@ -166,12 +144,11 @@ __device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, // Kernel to compute the gravitational field at the particles positions via // Cloud-In-Cell -__global__ void Get_Gravity_CIC_Kernel( - part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, - Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, - int nx, int ny, int nz, int n_ghost) +__global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, + int ny, int nz, int n_ghost) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -194,8 +171,7 @@ __global__ void Get_Gravity_CIC_Kernel( pos_z = pos_z_dev[tid]; int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC_Gravity(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, - indx_x, indx_y, indx_z); + Get_Indexes_CIC_Gravity(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z); bool in_local = true; @@ -257,32 +233,20 @@ __global__ void Get_Gravity_CIC_Kernel( g_y_tru = gravity_y_dev[indx]; g_z_tru = gravity_z_dev[indx]; - g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + - g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - - g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + - g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - - g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + - g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + - g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + - g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + - g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + - g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + - g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + - g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); + + g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); grav_x_dev[tid] = g_x; grav_y_dev[tid] = g_y; @@ -291,13 +255,12 @@ __global__ void Get_Gravity_CIC_Kernel( // Call the kernel to compote the gravitational field at the particles positions // ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function( - part_int_t n_local, int nx_local, int ny_local, int nz_local, - int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, - Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, - Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev) +void Particles_3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -308,12 +271,9 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, - pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, - grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, - dz, nx_local, ny_local, nz_local, - n_ghost_particles_grid); + hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, + gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, + zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid); CudaCheckError(); } } @@ -322,10 +282,8 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( #ifdef GRAVITY_GPU -void __global__ Copy_Particles_Density_Kernel(Real *dst_density, - Real *src_density, int nx_local, - int ny_local, int nz_local, - int n_ghost) +void __global__ Copy_Particles_Density_Kernel(Real *dst_density, Real *src_density, int nx_local, int ny_local, + int nz_local, int n_ghost) { int tid_x, tid_y, tid_z, tid_CIC, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -370,9 +328,8 @@ void Grid3D::Copy_Particles_Density_GPU() // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL(Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, - Grav.F.density_d, Particles.G.density_dev, nx_local, - ny_local, nz_local, n_ghost); + hipLaunchKernelGGL(Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d, + Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost); } #endif // GRAVITY_GPU diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 0032d4779..66709bea8 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -42,8 +42,7 @@ void Particles_3D::Load_Particles_Data(struct parameters *P) #ifdef MPI_CHOLLA #ifdef TILED_INITIAL_CONDITIONS - sprintf(filename, "%sics_%dMpc_%d_particles.h5", P->indir, - (int)P->tile_length / 1000, + sprintf(filename, "%sics_%dMpc_%d_particles.h5", P->indir, (int)P->tile_length / 1000, G.nx_local); // Everyone reads the same file #else sprintf(filename, "%s.%d", filename, procID); @@ -76,8 +75,7 @@ void Grid3D::WriteData_Particles(struct parameters P, int nfile) #ifdef HDF5 -void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, - struct parameters *P) +void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P) { int i, j, k, id, buf_id; hid_t attribute_id, dataset_id; @@ -132,63 +130,54 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, dataset_buffer_px = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/pos_x", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_px); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_px); + status = H5Dclose(dataset_id); dataset_buffer_py = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/pos_y", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_py); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_py); + status = H5Dclose(dataset_id); dataset_buffer_pz = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/pos_z", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_pz); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_pz); + status = H5Dclose(dataset_id); dataset_buffer_vx = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/vel_x", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_vx); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vx); + status = H5Dclose(dataset_id); dataset_buffer_vy = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/vel_y", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_vy); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vy); + status = H5Dclose(dataset_id); dataset_buffer_vz = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/vel_z", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_vz); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vz); + status = H5Dclose(dataset_id); #ifndef SINGLE_PARTICLE_MASS dataset_buffer_m = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/mass", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_m); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_m); + status = H5Dclose(dataset_id); #endif #ifdef PARTICLE_IDS part_int_t *dataset_buffer_IDs; dataset_buffer_IDs = (part_int_t *)malloc(n_to_load * sizeof(part_int_t)); dataset_id = H5Dopen(file_id, "/particle_IDs", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_IDs); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs); + status = H5Dclose(dataset_id); #endif #ifdef PARTICLE_AGE dataset_buffer_age = (Real *)malloc(n_to_load * sizeof(Real)); dataset_id = H5Dopen(file_id, "/age", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_age); - status = H5Dclose(dataset_id); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_age); + status = H5Dclose(dataset_id); #endif // Initialize min and max values for position and velocity to print initial @@ -237,18 +226,13 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, chprintf(" N_Procs Z: %d Y: %d X: %d \n", nproc_z, nproc_y, nproc_x); bool tile_length_difference = false; - if (fabs(Lx_local - tile_length) / Lx_local > 1e-2) - tile_length_difference = true; - if (fabs(Ly_local - tile_length) / Ly_local > 1e-2) - tile_length_difference = true; - if (fabs(Lz_local - tile_length) / Lz_local > 1e-2) - tile_length_difference = true; + if (fabs(Lx_local - tile_length) / Lx_local > 1e-2) tile_length_difference = true; + if (fabs(Ly_local - tile_length) / Ly_local > 1e-2) tile_length_difference = true; + if (fabs(Lz_local - tile_length) / Lz_local > 1e-2) tile_length_difference = true; if (tile_length_difference) { - std::cout << " WARNING: Local Domain Length Different to Tile Length " - << std::endl; - printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, - Lx_local); + std::cout << " WARNING: Local Domain Length Different to Tile Length " << std::endl; + printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, Lx_local); printf(" Tile Length: %f \n", tile_length); } @@ -303,8 +287,7 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, if (pPos_z < G.zMin || pPos_z >= G.zMax) in_local = false; if (!in_local) { #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << pID - << std::endl; + std::cout << " Particle outside Local domain pID: " << pID << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif @@ -363,8 +346,7 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, // Alocate memory in GPU for particle data // particles_array_size = (part_int_t) n_to_load; particles_array_size = Compute_Particles_GPU_Array_Size(n_to_load); - chprintf(" Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, - G.gpu_allocation_factor, particles_array_size); + chprintf(" Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size); Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size); Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size); Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size); @@ -390,28 +372,20 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, // printf( " Loaded %ld particles ", n_to_load); // Copy the particle data to GPU memory - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_px, pos_x_dev, - n_local); - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_py, pos_y_dev, - n_local); - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_pz, pos_z_dev, - n_local); - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vx, vel_x_dev, - n_local); - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vy, vel_y_dev, - n_local); - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vz, vel_z_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_px, pos_x_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_py, pos_y_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_pz, pos_z_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vx, vel_x_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vy, vel_y_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vz, vel_z_dev, n_local); #ifndef SINGLE_PARTICLE_MASS Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_m, mass_dev, n_local); #endif #ifdef PARTICLE_IDS - Copy_Particles_Array_Int_Host_to_Device(dataset_buffer_IDs, partIDs_dev, - n_local); + Copy_Particles_Array_Int_Host_to_Device(dataset_buffer_IDs, partIDs_dev, n_local); #endif #ifdef PARTICLE_AGE - Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_age, age_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_age, age_dev, n_local); #endif #endif // PARTICLES_GPU @@ -498,36 +472,30 @@ void Grid3D::Write_Particles_Header_HDF5(hid_t file_id) // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); // Create a group attribute - attribute_id = H5Acreate(file_id, "t_particles", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "t_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); // Write the attribute data status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.t); // Close the attribute status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_LONG, &Particles.n_local); status = H5Aclose(attribute_id); #ifdef SINGLE_PARTICLE_MASS - attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); - status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); + status = H5Aclose(attribute_id); #endif #ifdef COSMOLOGY - attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); status = H5Aclose(attribute_id); - attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT); + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); status = H5Aclose(attribute_id); #endif @@ -558,14 +526,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef PARTICLES_GPU // Copy the device arrays from the device to the host - CudaSafeCall(cudaMemcpy(Particles.G.density, Particles.G.density_dev, - Particles.G.n_cells * sizeof(Real), + CudaSafeCall(cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // PARTICLES_GPU - #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && \ - defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, - Grav.n_cells_potential * sizeof(Real), + #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) + CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // OUTPUT_POTENTIAL @@ -584,8 +549,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) // number of particles. This will indicate an error on the Particles // transfers. if (N_particles_total != Particles.n_total_initial) - chprintf(" WARNING: Lost Particles: %d \n", - Particles.n_total_initial - N_particles_total); + chprintf(" WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total); // Create the data space for the datasets dims[0] = n_local; @@ -598,14 +562,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_x[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.pos_x_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_x_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/pos_x", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/pos_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -614,14 +575,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_y[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.pos_y_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_y_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/pos_y", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/pos_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -630,14 +588,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_z[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.pos_z_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_z_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/pos_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/pos_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -646,14 +601,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_x[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.vel_x_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_x_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/vel_x", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/vel_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -662,14 +614,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_y[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.vel_y_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_y_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/vel_y", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/vel_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -678,14 +627,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_z[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.vel_z_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_z_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU if (output_particle_data || H.Output_Complete_Data) { - dataset_id = H5Dcreate(file_id, "/vel_z", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/vel_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); } @@ -695,13 +641,10 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.mass[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.mass_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.mass_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU - dataset_id = H5Dcreate(file_id, "/mass", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); #endif @@ -711,14 +654,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer_IDs[i] = Particles.partIDs[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Int_Device_to_Host( - Particles.partIDs_dev, dataset_buffer_IDs, Particles.n_local); + Particles.Copy_Particles_Array_Int_Device_to_Host(Particles.partIDs_dev, dataset_buffer_IDs, Particles.n_local); #endif // PARTICLES_GPU - dataset_id = H5Dcreate(file_id, "/particle_IDs", H5T_STD_I64LE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, - dataset_buffer_IDs); - status = H5Dclose(dataset_id); + dataset_id = H5Dcreate(file_id, "/particle_IDs", H5T_STD_I64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs); + status = H5Dclose(dataset_id); free(dataset_buffer_IDs); #endif @@ -727,13 +667,10 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.age[i]; #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Copy_Particles_Array_Real_Device_to_Host( - Particles.age_dev, dataset_buffer, Particles.n_local); + Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.age_dev, dataset_buffer, Particles.n_local); #endif // PARTICLES_GPU - dataset_id = H5Dcreate(file_id, "/age", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/age", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); #endif @@ -744,8 +681,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) int ny_dset = Particles.G.ny_local; int nz_dset = Particles.G.nz_local; hsize_t dims3d[3]; - dataset_buffer = (Real *)malloc(Particles.G.nz_local * Particles.G.ny_local * - Particles.G.nx_local * sizeof(Real)); + dataset_buffer = (Real *)malloc(Particles.G.nz_local * Particles.G.ny_local * Particles.G.nx_local * sizeof(Real)); // Create the data space for the datasets dims3d[0] = nx_dset; @@ -759,19 +695,15 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (j = 0; j < Particles.G.ny_local; j++) { for (i = 0; i < Particles.G.nx_local; i++) { id = (i + nGHST) + (j + nGHST) * (Particles.G.nx_local + 2 * nGHST) + - (k + nGHST) * (Particles.G.nx_local + 2 * nGHST) * - (Particles.G.ny_local + 2 * nGHST); - buf_id = k + j * Particles.G.nz_local + - i * Particles.G.nz_local * Particles.G.ny_local; + (k + nGHST) * (Particles.G.nx_local + 2 * nGHST) * (Particles.G.ny_local + 2 * nGHST); + buf_id = k + j * Particles.G.nz_local + i * Particles.G.nz_local * Particles.G.ny_local; dataset_buffer[buf_id] = Particles.G.density[id]; } } } - dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); + dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); status = H5Dclose(dataset_id); #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) @@ -779,20 +711,18 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) for (k = 0; k < Grav.nz_local; k++) { for (j = 0; j < Grav.ny_local; j++) { for (i = 0; i < Grav.nx_local; i++) { - id = (i + N_GHOST_POTENTIAL) + - (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + - (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * - (Grav.ny_local + 2 * N_GHOST_POTENTIAL); - buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; + id = + (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + + (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * (Grav.ny_local + 2 * N_GHOST_POTENTIAL); + buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; dataset_buffer[buf_id] = Grav.F.potential_h[id]; } } } - dataset_id = H5Dcreate(file_id, "/grav_potential", H5T_IEEE_F64BE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); + dataset_id = + H5Dcreate(file_id, "/grav_potential", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + status = H5Dclose(dataset_id); #endif // OUTPUT_POTENTIAL free(dataset_buffer); diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 36ffcf2ce..b68a966a8 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -23,17 +23,13 @@ #include "../utils/parallel_omp.h" #endif -Particles_3D::Particles_3D(void) - : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) -{ -} +Particles_3D::Particles_3D(void) : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) {} void Grid3D::Initialize_Particles(struct parameters *P) { chprintf("\nInitializing Particles...\n"); - Particles.Initialize(P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, - H.ydglobal, H.zdglobal); + Particles.Initialize(P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal); #if defined(PARTICLES_GPU) && defined(GRAVITY_GPU) // Set the GPU array for the particles potential equal to the Gravity GPU @@ -49,8 +45,7 @@ void Grid3D::Initialize_Particles(struct parameters *P) chprintf("Particles Initialized Successfully. \n\n"); } -void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, - Real ybound, Real zbound, Real xdglobal, +void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal) { // Initialize local and total number of particles to 0 @@ -148,8 +143,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, G.n_ghost_particles_grid = 1; // Number of cells for the particles grid including ghost cells - G.n_cells = (G.nx_local + 2 * G.n_ghost_particles_grid) * - (G.ny_local + 2 * G.n_ghost_particles_grid) * + G.n_cells = (G.nx_local + 2 * G.n_ghost_particles_grid) * (G.ny_local + 2 * G.n_ghost_particles_grid) * (G.nz_local + 2 * G.n_ghost_particles_grid); // Set the boundary types @@ -180,8 +174,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, #endif G.size_blocks_array = 1024 * 128; - G.n_cells_potential = (G.nx_local + 2 * N_GHOST_POTENTIAL) * - (G.ny_local + 2 * N_GHOST_POTENTIAL) * + G.n_cells_potential = (G.nx_local + 2 * N_GHOST_POTENTIAL) * (G.ny_local + 2 * N_GHOST_POTENTIAL) * (G.nz_local + 2 * N_GHOST_POTENTIAL); #ifdef SINGLE_PARTICLE_MASS @@ -207,8 +200,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Initialize_Zeldovich_Pancake(P); else if (strcmp(P->init, "Read_Grid") == 0) Load_Particles_Data(P); - #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && \ - defined(PARTICLE_IDS) + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) else if (strcmp(P->init, "Disk_3D_particles") == 0) Initialize_Disk_Stellar_Clusters(P); #endif @@ -221,11 +213,9 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, chprintf("Particles Initialized: \n n_local: %lu \n", n_local); chprintf(" n_total: %lu \n", n_total_initial); - chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, - G.xMax, G.yMin, G.yMax, G.zMin, G.zMax); - chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", - G.domainMin_x, G.domainMax_x, G.domainMin_y, G.domainMax_y, - G.domainMin_z, G.domainMax_z); + chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax); + chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y, + G.domainMax_y, G.domainMin_z, G.domainMax_z); chprintf(" dx: %f %f %f\n", G.dx, G.dy, G.dz); #ifdef PARTICLE_IDS @@ -256,13 +246,11 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); #pragma omp barrier - Get_OMP_Particles_Indxs(n_local, n_omp_procs, omp_id, &omp_pIndx_start, - &omp_pIndx_end); + Get_OMP_Particles_Indxs(n_local, n_omp_procs, omp_id, &omp_pIndx_start, &omp_pIndx_end); for (int omp_indx = 0; omp_indx < n_omp_procs; omp_indx++) { if (omp_id == omp_indx) - chprintf(" omp_id:%d p_start:%ld p_end:%ld \n", omp_id, - omp_pIndx_start, omp_pIndx_end); + chprintf(" omp_id:%d p_start:%ld p_end:%ld \n", omp_id, omp_pIndx_start, omp_pIndx_end); } } #endif // PRINT_OMP_DOMAIN @@ -338,15 +326,11 @@ void Particles_3D::ReAllocate_Memory_GPU_MPI() buffer_size = particles_array_size; half_blocks_size = ((buffer_size - 1) / 2) / TPB_PARTICLES + 1; Allocate_Particles_GPU_Array_bool(&G.transfer_particles_flags_d, buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, - buffer_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, buffer_size); Allocate_Particles_GPU_Array_int(&G.replace_particles_indices_d, buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, - buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, - half_blocks_size); - printf(" New allocation of arrays for particles transfers new_size: %d \n", - (int)buffer_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, buffer_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, half_blocks_size); + printf(" New allocation of arrays for particles transfers new_size: %d \n", (int)buffer_size); } void Particles_3D::Allocate_Memory_GPU_MPI() @@ -358,13 +342,10 @@ void Particles_3D::Allocate_Memory_GPU_MPI() half_blocks_size = ((buffer_size - 1) / 2) / TPB_PARTICLES + 1; Allocate_Particles_GPU_Array_bool(&G.transfer_particles_flags_d, buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, - buffer_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, buffer_size); Allocate_Particles_GPU_Array_int(&G.replace_particles_indices_d, buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, - buffer_size); - Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, - half_blocks_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, buffer_size); + Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, half_blocks_size); Allocate_Particles_GPU_Array_int(&G.n_transfer_d, 1); G.n_transfer_h = (int *)malloc(sizeof(int)); @@ -532,8 +513,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) Real *temp_mass = (Real *)malloc(particles_array_size * sizeof(Real)); #endif #ifdef PARTICLE_IDS - part_int_t *temp_id = - (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); + part_int_t *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); #endif chprintf(" Allocated GPU memory for particle data\n"); @@ -552,8 +532,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) pPos_y = yPositionPrng(generator); pPos_z = zPositionPrng(generator); - r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + - (pPos_y - center_y) * (pPos_y - center_y) + + r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + (pPos_y - center_y) * (pPos_y - center_y) + (pPos_z - center_z) * (pPos_z - center_z)); if (r > sphereR) continue; @@ -650,8 +629,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) chprintf(" Particles Uniform Sphere Initialized, n_local: %lu\n", n_local); } - #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && \ - defined(PARTICLE_IDS) + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) /** * Initializes a disk population of uniform mass stellar clusters */ @@ -662,8 +640,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) // Set up the PRNG std::mt19937_64 generator(P->prng_seed); - std::gamma_distribution radialDist( - 2, 1); // for generating cyclindrical radii + std::gamma_distribution radialDist(2, 1); // for generating cyclindrical radii std::uniform_real_distribution zDist(-0.005, 0.005); std::uniform_real_distribution vzDist(-1e-8, 1e-8); std::uniform_real_distribution phiDist(0, @@ -671,9 +648,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) std::normal_distribution speedDist(0, 1); // for generating random speeds. - Real M_d = - Galaxies::MW - .getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; @@ -720,7 +695,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) if (y < G.yMin || y >= G.yMax) continue; if (z < G.zMin || z >= G.zMax) continue; - ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); + ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); vPhi = sqrt(R * ac); vx = -vPhi * sin(phi); @@ -774,37 +749,27 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) #ifdef PARTICLES_GPU particles_array_size = Compute_Particles_GPU_Array_Size(n_local); Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_pos_x.data(), pos_x_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_pos_x.data(), pos_x_dev, n_local); Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_pos_y.data(), pos_y_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_pos_y.data(), pos_y_dev, n_local); Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_pos_z.data(), pos_z_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_pos_z.data(), pos_z_dev, n_local); Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_vel_x.data(), vel_x_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_vel_x.data(), vel_x_dev, n_local); Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_vel_y.data(), vel_y_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_vel_y.data(), vel_y_dev, n_local); Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_vel_z.data(), vel_z_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_vel_z.data(), vel_z_dev, n_local); Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_grav_x.data(), grav_x_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_grav_x.data(), grav_x_dev, n_local); Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_grav_y.data(), grav_y_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_grav_y.data(), grav_y_dev, n_local); Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size); - Copy_Particles_Array_Real_Host_to_Device(temp_grav_z.data(), grav_z_dev, - n_local); + Copy_Particles_Array_Real_Host_to_Device(temp_grav_z.data(), grav_z_dev, n_local); Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size); Copy_Particles_Array_Real_Host_to_Device(temp_mass.data(), mass_dev, n_local); Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size); - Copy_Particles_Array_Int_Host_to_Device(temp_ids.data(), partIDs_dev, - n_local); + Copy_Particles_Array_Int_Host_to_Device(temp_ids.data(), partIDs_dev, n_local); Allocate_Particles_GPU_Array_Real(&age_dev, particles_array_size); Copy_Particles_Array_Real_Host_to_Device(temp_age.data(), age_dev, n_local); #endif // PARTICLES_GPU @@ -886,8 +851,8 @@ void Grid3D::Initialize_Uniform_Particles() Particles.n_total_initial = Particles.n_local; #endif - chprintf(" Particles Uniform Grid Initialized, n_local: %lu, n_total: %lu\n", - Particles.n_local, Particles.n_total_initial); + chprintf(" Particles Uniform Grid Initialized, n_local: %lu, n_total: %lu\n", Particles.n_local, + Particles.n_total_initial); } void Particles_3D::Free_Memory(void) diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 2318ec360..b52f85bfc 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -216,8 +216,8 @@ class Particles_3D Particles_3D(void); - void Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, - Real zbound, Real xdglobal, Real ydglobal, Real zdglobal); + void Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, + Real ydglobal, Real zdglobal); void Allocate_Particles_Grid_Field_Real(Real **array_dev, int size); void Free_GPU_Array_Real(Real *array); @@ -235,78 +235,56 @@ class Particles_3D void Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size); void Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size); void Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size); - void Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, - part_int_t size); - void Copy_Particles_Array_Real_Host_to_Device(Real *array_host, - Real *array_dev, - part_int_t size); - void Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, - Real *array_host, - part_int_t size); - void Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, - part_int_t *array_dev, - part_int_t size); - void Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, - part_int_t *array_host, - part_int_t size); + void Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size); + void Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size); + void Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size); + void Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, part_int_t size); + void Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, part_int_t size); void Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size); void Free_Memory_GPU(); void Initialize_Grid_Values_GPU(); void Get_Density_CIC_GPU(); - void Get_Density_CIC_GPU_function( - part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, - Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, - int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, - Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *mass_dev); + void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, + int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, + Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, + Real *mass_dev); void Clear_Density_GPU(); void Clear_Density_GPU_function(Real *density_dev, int n_cells); - void Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, - int n_cells_potential); + void Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential); void Get_Gravity_Field_Particles_GPU(Real *potential_host); - void Get_Gravity_Field_Particles_GPU_function( - int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, - int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, - Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev); + void Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, + int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, + Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev); void Get_Gravity_CIC_GPU(); - void Get_Gravity_CIC_GPU_function( - part_int_t n_local, int nx_local, int ny_local, int nz_local, - int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, - Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, - Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev); - Real Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_local, Real dx, - Real dy, Real dz, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, - Real *dti_array_host, - Real *dti_array_dev); - void Advance_Particles_KDK_Step1_GPU_function( - part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); - void Advance_Particles_KDK_Step1_Cosmo_GPU_function( - part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, - Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); - void Advance_Particles_KDK_Step2_GPU_function( - part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); - void Advance_Particles_KDK_Step2_Cosmo_GPU_function( - part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, - Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, - Real Omega_K); + void Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, + Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, + Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev); + Real Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev); + void Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, + Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); + void Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, + Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); part_int_t Compute_Particles_GPU_Array_Size(part_int_t n); int Select_Particles_to_Transfer_GPU(int direction, int side); - void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, - int side, Real *send_buffer, + void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer, int buffer_length); void Replace_Tranfered_Particles_GPU(int n_transfer); - void Unload_Particles_from_Buffer_GPU(int direction, int side, - Real *recv_buffer_h, int n_recv); + void Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv); void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d); void Set_Particles_Open_Boundary_GPU(int dir, int side); #ifdef PRINT_MAX_MEMORY_USAGE @@ -321,8 +299,7 @@ class Particles_3D void Initialize_Sphere(struct parameters *P); - #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && \ - defined(PARTICLE_IDS) + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) void Initialize_Disk_Stellar_Clusters(struct parameters *P); #endif @@ -351,32 +328,26 @@ class Particles_3D #ifdef MPI_CHOLLA void Clear_Particles_For_Transfer(void); void Select_Particles_to_Transfer_All(int *flags); - void Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, - int buffer_length, Real pId, Real pMass, - Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, - Real pVel_x, Real pVel_y, Real pVel_z); + void Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, + Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z); void Remove_Transfered_Particles(); #ifdef PARTICLES_CPU void Clear_Vectors_For_Transfers(void); - void Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, - Real pPos_y, Real pPos_z, Real pVel_x, + void Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z, int *flags); void Select_Particles_to_Transfer_All_CPU(int *flags); - void Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, - int buffer_length); - void Unload_Particles_from_Buffer_CPU( - int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, - Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, - int buffer_length_z0, int buffer_length_z1, int *flags); + void Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length); + void Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU void Allocate_Memory_GPU_MPI(); void ReAllocate_Memory_GPU_MPI(); - void Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer, - int buffer_length); + void Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer, int buffer_length); #endif // PARTICLES_GPU #endif }; diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 490e2d391..8a8898718 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -13,8 +13,7 @@ void Particles_3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } -void Particles_3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, - int size) +void Particles_3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -48,15 +47,14 @@ void Particles_3D::Print_Max_Memory_Usage() n_local_max = (part_int_t)ReduceRealMax((Real)n_local); n_total = ReducePartIntSum(n_local); fraction_max = (Real)n_local_max / (Real)n_total; - mem_usage = n_local_max * 9 * sizeof(Real); // Usage for pos, vel ans accel. + mem_usage = n_local_max * 9 * sizeof(Real); // Usage for pos, vel ans accel. global_free_min = ReduceRealMin((Real)global_free); chprintf( " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB " " global_free_min: %.1f MB \n", - n_local_max, fraction_max * 100, mem_usage / 1000000, - global_free_min / 1000000); + n_local_max, fraction_max * 100, mem_usage / 1000000, global_free_min / 1000000); } #endif @@ -65,8 +63,7 @@ void Particles_3D::Free_GPU_Array_int(int *array) { cudaFree(array); } void Particles_3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); } template -void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, - part_int_t size) +void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, part_int_t size) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < size) dst_array_dev[tid] = src_array_dev[tid]; @@ -78,13 +75,11 @@ void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, - src_array_dev, dst_array_dev, size); + hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); CudaCheckError(); } -void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, - part_int_t size) +void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -101,8 +96,7 @@ void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, - part_int_t size) +void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -119,8 +113,7 @@ void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, - part_int_t size) +void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -130,16 +123,14 @@ void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, if (global_free < size * sizeof(part_int_t)) { printf("ERROR: Not enough global device memory \n"); printf(" Available Memory: %ld MB \n", global_free / 1000000); - printf(" Requested Memory: %ld MB \n", - size * sizeof(part_int_t) / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(part_int_t) / 1000000); exit(-1); } CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(part_int_t))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, - part_int_t size) +void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -156,49 +147,39 @@ void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, - Real *array_dev, - part_int_t size) +void Particles_3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, - Real *array_host, - part_int_t size) +void Particles_3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Host_to_Device( - part_int_t *array_host, part_int_t *array_dev, part_int_t size) +void Particles_3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, + part_int_t size) { - CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Device_to_Host( - part_int_t *array_dev, part_int_t *array_host, part_int_t size) +void Particles_3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, + part_int_t size) { - CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -__global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, - part_int_t size) +__global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, part_int_t size) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < size) array_dev[tid] = value; } -void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, - part_int_t size) +void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) { // set values for GPU kernels int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -206,8 +187,7 @@ void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, - 0, value, array_dev, size); + hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size); CudaCheckError(); } diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index b8c12c7cf..24692be02 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -44,8 +44,7 @@ void Grid3D::Finish_Particles_Transfer(void) } // Wait for the MPI request and unload the transferred particles -void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, - int *flags) +void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags) { int iwait; int index = 0; @@ -99,8 +98,7 @@ void Grid3D::Unload_Particles_From_Buffers_BLOCK(int index, int *flags) // Wait for the Number of particles that will be transferred, and request the // MPI_Recv to receive the MPI buffer -void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( - int dir, int *flags) +void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags) { #ifdef PARTICLES if (!Particles.TRANSFER_PARTICLES_BOUNDARIES) return; @@ -137,15 +135,13 @@ void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( // wait for recv completion MPI_Waitany(wait_max, recv_request_n_particles, &index, &status); // depending on which face arrived, load the buffer into the ghost grid - Load_NTtransfer_and_Request_Receive_Particles_Transfer( - status.MPI_TAG, &ireq_particles_transfer); + Load_NTtransfer_and_Request_Receive_Particles_Transfer(status.MPI_TAG, &ireq_particles_transfer); } } // Load the Number of particles that will be received (Particles.n_recv) and // make the MPI_Irecv request for that buffer size -void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( - int index, int *ireq_particles_transfer) +void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer) { int buffer_length; @@ -182,27 +178,20 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_x0) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_x0_particles, - Particles.G.recv_buffer_size_x0, + Extend_GPU_Array(&recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_x0 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_x0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, - &buffer_length_particles_x0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, - &buffer_length_particles_x0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length); #endif // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << // Particles.n_recv_x0 << std::endl; - MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, - world, + MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } if (index == 1) { @@ -211,28 +200,21 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_x1) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_x1_particles, - Particles.G.recv_buffer_size_x1, + Extend_GPU_Array(&recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_x1 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_x1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, - &buffer_length_particles_x1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, - &buffer_length_particles_x1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length); #endif // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout // << " Recv X1: " << Particles.n_recv_x1 << " " << procID << " from " // << source[1] << std::endl; - MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, - world, + MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } if (index == 2) { @@ -241,27 +223,20 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_y0) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_y0_particles, - Particles.G.recv_buffer_size_y0, + Extend_GPU_Array(&recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_y0 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_y0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, - &buffer_length_particles_y0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, - &buffer_length_particles_y0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length); #endif // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << // Particles.n_recv_y0 << std::endl; - MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, - world, + MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } if (index == 3) { @@ -270,27 +245,20 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_y1) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_y1_particles, - Particles.G.recv_buffer_size_y1, + Extend_GPU_Array(&recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_y1 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_y1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, - &buffer_length_particles_y1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, - &buffer_length_particles_y1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length); #endif // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << // Particles.n_recv_y1 << std::endl; - MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, - world, + MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } if (index == 4) { @@ -299,27 +267,20 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_z0) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_z0_particles, - Particles.G.recv_buffer_size_z0, + Extend_GPU_Array(&recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_z0 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_z0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, - &buffer_length_particles_z0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, - &buffer_length_particles_z0_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length); #endif // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << // Particles.n_recv_z0 << std::endl; - MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, - world, + MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } if (index == 5) { @@ -328,27 +289,20 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( #ifdef MPI_GPU if (buffer_length > Particles.G.recv_buffer_size_z1) { printf("Extending Particles Transfer Buffer "); - Extend_GPU_Array(&recv_buffer_z1_particles, - Particles.G.recv_buffer_size_z1, + Extend_GPU_Array(&recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor * buffer_length, true); - Particles.G.recv_buffer_size_z1 = - (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; + Particles.G.recv_buffer_size_z1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } #else - Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, - &buffer_length_particles_z1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length); #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, - &buffer_length_particles_z1_recv, - buffer_length); + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length); #endif // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << // Particles.n_recv_z1 << std::endl; - MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, - world, + MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request_particles_transfer[*ireq_particles_transfer]); } @@ -357,22 +311,18 @@ void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer( // Make Send and Receive request for the number of particles that will be // transferred, and then load and send the transfer particles -void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_x0_particles; #ifdef PARTICLES_GPU send_buffer_x0_particles = d_send_buffer_x0_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, - buffer_length_particles_x0_send); + Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send); #endif // PARTICLES_GPU - MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, - &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, - &send_request_n_particles[0]); + MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, &recv_request_n_particles[ireq_n_particles]); + MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); // if ( Particles.n_send_x0 > 0 ) if ( Particles.n_send_x0 > 0 ) std::cout // << " Sent X0: " << Particles.n_send_x0 << " " << procID << " to " << @@ -380,221 +330,183 @@ void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, buffer_length = Particles.n_send_x0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_x0_particles = h_send_buffer_x0_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_x0_particles, - &buffer_length_particles_x0_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(0, 0, send_buffer_x0_particles, - buffer_length_particles_x0_send); + Check_and_Grow_Particles_Buffer(&send_buffer_x0_particles, &buffer_length_particles_x0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_x0_particles = h_send_buffer_x0_particles; #endif - MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_X1(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_x1_particles; #ifdef PARTICLES_GPU send_buffer_x1_particles = d_send_buffer_x1_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, - buffer_length_particles_x1_send); + Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send); #endif // PARTICLES_GPU - MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, - &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, - &send_request_n_particles[1]); + MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, &recv_request_n_particles[ireq_n_particles]); + MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, &send_request_n_particles[1]); MPI_Request_free(send_request_n_particles + 1); // if ( Particles.n_send_x1 > 0 ) std::cout << " Sent X1: " << // Particles.n_send_x1 << std::endl; buffer_length = Particles.n_send_x1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_x1_particles = h_send_buffer_x1_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_x1_particles, - &buffer_length_particles_x1_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(0, 1, send_buffer_x1_particles, - buffer_length_particles_x1_send); + Check_and_Grow_Particles_Buffer(&send_buffer_x1_particles, &buffer_length_particles_x1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_x1_particles = h_send_buffer_x1_particles; #endif - MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y0(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_y0_particles; #ifdef PARTICLES_GPU send_buffer_y0_particles = d_send_buffer_y0_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, - buffer_length_particles_y0_send); + Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send); #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, - &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, - &recv_request_n_particles[ireq_n_particles]); + MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, &recv_request_n_particles[ireq_n_particles]); // if ( Particles.n_send_y0 > 0 ) std::cout << " Sent Y0: " << // Particles.n_send_y0 << std::endl; buffer_length = Particles.n_send_y0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_y0_particles = h_send_buffer_y0_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_y0_particles, - &buffer_length_particles_y0_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(1, 0, send_buffer_y0_particles, - buffer_length_particles_y0_send); + Check_and_Grow_Particles_Buffer(&send_buffer_y0_particles, &buffer_length_particles_y0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_y0_particles = h_send_buffer_y0_particles; #endif - MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y1(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_y1_particles; #ifdef PARTICLES_GPU send_buffer_y1_particles = d_send_buffer_y1_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, - buffer_length_particles_y1_send); + Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send); #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, - &send_request_n_particles[1]); + MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, &send_request_n_particles[1]); MPI_Request_free(send_request_n_particles + 1); - MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, - &recv_request_n_particles[ireq_n_particles]); + MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, &recv_request_n_particles[ireq_n_particles]); // if ( Particles.n_send_y1 > 0 ) std::cout << " Sent Y1: " << // Particles.n_send_y1 << std::endl; buffer_length = Particles.n_send_y1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_y1_particles = h_send_buffer_y1_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_y1_particles, - &buffer_length_particles_y1_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(1, 1, send_buffer_y1_particles, - buffer_length_particles_y1_send); + Check_and_Grow_Particles_Buffer(&send_buffer_y1_particles, &buffer_length_particles_y1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_y1_particles = h_send_buffer_y1_particles; #endif - MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z0(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_z0_particles; #ifdef PARTICLES_GPU send_buffer_z0_particles = d_send_buffer_z0_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, - buffer_length_particles_z0_send); + Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send); #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, - &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, - &recv_request_n_particles[ireq_n_particles]); + MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, &recv_request_n_particles[ireq_n_particles]); // if ( Particles.n_send_z0 > 0 ) std::cout << " Sent Z0: " << // Particles.n_send_z0 << std::endl; buffer_length = Particles.n_send_z0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_z0_particles = h_send_buffer_z0_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_z0_particles, - &buffer_length_particles_z0_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(2, 0, send_buffer_z0_particles, - buffer_length_particles_z0_send); + Check_and_Grow_Particles_Buffer(&send_buffer_z0_particles, &buffer_length_particles_z0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_z0_particles = h_send_buffer_z0_particles; #endif - MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z1(int ireq_n_particles, - int ireq_particles_transfer) +void Grid3D::Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer) { int buffer_length; Real *send_buffer_z1_particles; #ifdef PARTICLES_GPU send_buffer_z1_particles = d_send_buffer_z1_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, - buffer_length_particles_z1_send); + Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send); #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, - &send_request_n_particles[1]); + MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]); MPI_Request_free(send_request_n_particles + 1); - MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, - &recv_request_n_particles[ireq_n_particles]); + MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]); // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << // Particles.n_send_z1 << std::endl; buffer_length = Particles.n_send_z1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_CPU send_buffer_z1_particles = h_send_buffer_z1_particles; - Check_and_Grow_Particles_Buffer(&send_buffer_z1_particles, - &buffer_length_particles_z1_send, - buffer_length); - Particles.Load_Particles_to_Buffer_CPU(2, 1, send_buffer_z1_particles, - buffer_length_particles_z1_send); + Check_and_Grow_Particles_Buffer(&send_buffer_z1_particles, &buffer_length_particles_z1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send); #endif // PARTICLES_CPU #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, - buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); + cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_z1_particles = h_send_buffer_z1_particles; #endif - MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, - world, &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, world, + &send_request_particles_transfer[ireq_particles_transfer]); MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } @@ -604,20 +516,16 @@ void Grid3D::Unload_Particles_from_Buffer_X0(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, - buffer_length_particles_x0_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(0, 0, d_recv_buffer_x0_particles, - Particles.n_recv_x0); + Particles.Unload_Particles_from_Buffer_GPU(0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0); #endif // PARTICLES_GPU } @@ -625,20 +533,16 @@ void Grid3D::Unload_Particles_from_Buffer_X1(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, - buffer_length_particles_x1_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(0, 1, d_recv_buffer_x1_particles, - Particles.n_recv_x1); + Particles.Unload_Particles_from_Buffer_GPU(0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1); #endif // PARTICLES_GPU } @@ -646,20 +550,16 @@ void Grid3D::Unload_Particles_from_Buffer_Y0(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, - buffer_length_particles_y0_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(1, 0, d_recv_buffer_y0_particles, - Particles.n_recv_y0); + Particles.Unload_Particles_from_Buffer_GPU(1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0); #endif // PARTICLES_GPU } @@ -667,20 +567,16 @@ void Grid3D::Unload_Particles_from_Buffer_Y1(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, - buffer_length_particles_y1_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(1, 1, d_recv_buffer_y1_particles, - Particles.n_recv_y1); + Particles.Unload_Particles_from_Buffer_GPU(1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1); #endif // PARTICLES_GPU } @@ -688,20 +584,16 @@ void Grid3D::Unload_Particles_from_Buffer_Z0(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, - buffer_length_particles_z0_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(2, 0, d_recv_buffer_z0_particles, - Particles.n_recv_z0); + Particles.Unload_Particles_from_Buffer_GPU(2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0); #endif // PARTICLES_GPU } @@ -709,20 +601,16 @@ void Grid3D::Unload_Particles_from_Buffer_Z1(int *flags) { #ifdef PARTICLES_CPU Particles.Unload_Particles_from_Buffer_CPU( - 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, - h_send_buffer_z0_particles, h_send_buffer_z1_particles, - buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); #endif // PARTICLES_CPU #ifdef PARTICLES_GPU #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, - buffer_length_particles_z1_recv * sizeof(Real), + cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real), cudaMemcpyHostToDevice); #endif - Particles.Unload_Particles_from_Buffer_GPU(2, 1, d_recv_buffer_z1_particles, - Particles.n_recv_z1); + Particles.Unload_Particles_from_Buffer_GPU(2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1); #endif // PARTICLES_GPU } @@ -807,19 +695,15 @@ int Particles_3D::Select_Particles_to_Transfer_GPU(int direction, int side) // particles that will be sent and load the particles data into the transfer // buffers n_transfer = Select_Particles_to_Transfer_GPU_function( - n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, - G.transfer_particles_flags_d, G.transfer_particles_indices_d, - G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, + n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, + G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d); CHECK(cudaDeviceSynchronize()); return n_transfer; } -void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, - int direction, - int side, - Real *send_buffer_h, +void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, int buffer_length) { part_int_t *n_send; @@ -892,56 +776,44 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, if ((*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER > *buffer_size) { printf("Extending Particles Transfer Buffer "); Extend_GPU_Array(&send_buffer_d, *buffer_size, - G.gpu_allocation_factor * (*n_send + n_transfer) * - N_DATA_PER_PARTICLE_TRANSFER, - true); - *buffer_size = (part_int_t)G.gpu_allocation_factor * - (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER; + G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER, true); + *buffer_size = (part_int_t)G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER; } // Load the particles that will be transferred into the buffers n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_pos_x); - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_pos_y); - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_pos_z); - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_x); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_y); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_z); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); #ifndef SINGLE_PARTICLE_MASS - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, mass_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, mass_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); #endif #ifdef PARTICLE_IDS - Load_Particles_to_Transfer_Int_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); + Load_Particles_to_Transfer_Int_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); #endif #ifdef PARTICLE_AGE - Load_Particles_to_Transfer_GPU_function( - n_transfer, ++field_id, n_fields_to_transfer, age_dev, - G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, - bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, age_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); #endif CHECK(cudaDeviceSynchronize()); @@ -952,38 +824,29 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, void Particles_3D::Replace_Tranfered_Particles_GPU(int n_transfer) { // Replace the particles that were transferred - Replace_Transfered_Particles_GPU_function( - n_transfer, pos_x_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); - Replace_Transfered_Particles_GPU_function( - n_transfer, pos_y_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); - Replace_Transfered_Particles_GPU_function( - n_transfer, pos_z_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); - Replace_Transfered_Particles_GPU_function( - n_transfer, vel_x_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); - Replace_Transfered_Particles_GPU_function( - n_transfer, vel_y_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); - Replace_Transfered_Particles_GPU_function( - n_transfer, vel_z_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); #ifndef SINGLE_PARTICLE_MASS - Replace_Transfered_Particles_GPU_function( - n_transfer, mass_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, mass_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); #endif #ifdef PARTICLE_IDS - Replace_Transfered_Particles_Int_GPU_function( - n_transfer, partIDs_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); + Replace_Transfered_Particles_Int_GPU_function(n_transfer, partIDs_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); #endif #ifdef PARTICLE_AGE - Replace_Transfered_Particles_GPU_function( - n_transfer, age_dev, G.transfer_particles_indices_d, - G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, age_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); #endif CHECK(cudaDeviceSynchronize()); @@ -991,15 +854,12 @@ void Particles_3D::Replace_Tranfered_Particles_GPU(int n_transfer) n_local -= n_transfer; } -void Particles_3D::Load_Particles_to_Buffer_GPU(int direction, int side, - Real *send_buffer_h, - int buffer_length) +void Particles_3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer_h, int buffer_length) { int n_transfer; n_transfer = Select_Particles_to_Transfer_GPU(direction, side); - Copy_Transfer_Particles_to_Buffer_GPU(n_transfer, direction, side, - send_buffer_h, buffer_length); + Copy_Transfer_Particles_to_Buffer_GPU(n_transfer, direction, side, send_buffer_h, buffer_length); Replace_Tranfered_Particles_GPU(n_transfer); } @@ -1040,15 +900,13 @@ void Particles_3D::Set_Particles_Open_Boundary_GPU(int dir, int side) Replace_Tranfered_Particles_GPU(n_transfer); } -void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, - Real *recv_buffer_d) +void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d) { int n_fields_to_transfer; part_int_t n_local_after = n_local + n_recv; if (n_local_after > particles_array_size) { - printf(" Reallocating GPU particles arrays. N local particles: %ld \n", - n_local_after); + printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after); int new_size = G.gpu_allocation_factor * n_local_after; Extend_GPU_Array(&pos_x_dev, (int)particles_array_size, new_size, true); Extend_GPU_Array(&pos_y_dev, (int)particles_array_size, new_size, false); @@ -1075,47 +933,34 @@ void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, // Unload the particles that were transferred from the buffers int field_id = -1; n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, pos_x_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_x_dev, recv_buffer_d); - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, pos_y_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_y_dev, recv_buffer_d); - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, pos_z_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_z_dev, recv_buffer_d); - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, vel_x_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_x_dev, recv_buffer_d); - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, vel_y_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_y_dev, recv_buffer_d); - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, vel_z_dev, + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_z_dev, recv_buffer_d); #ifndef SINGLE_PARTICLE_MASS - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, mass_dev, - recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, mass_dev, recv_buffer_d); #endif #ifdef PARTICLE_IDS - Unload_Particles_Int_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, - partIDs_dev, recv_buffer_d); + Unload_Particles_Int_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, partIDs_dev, + recv_buffer_d); #endif #ifdef PARTICLE_AGE - Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, - n_fields_to_transfer, age_dev, - recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, age_dev, recv_buffer_d); #endif n_local += n_recv; // if ( n_recv > 0 ) printf( "###Unloaded %d particles\n", n_recv ); } -void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, - Real *recv_buffer_h, - int n_recv) +void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv) { int buffer_size; Real domainMin, domainMax; diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 93ebc6099..19fc238ef 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -17,9 +17,9 @@ Real Get_and_Remove_Real(part_int_t indx, real_vector_t &vec) { Real value = vec[indx]; - vec[indx] = vec.back(); // The item at the specified index is replaced by the - // last item in the vector - vec.pop_back(); // The last item in the vector is discarded + vec[indx] = vec.back(); // The item at the specified index is replaced by the + // last item in the vector + vec.pop_back(); // The last item in the vector is discarded return value; } @@ -79,8 +79,7 @@ void Grid3D::Set_Particles_Boundary(int dir, int side) bool changed_pos; Real pos; #ifdef PARALLEL_OMP - #pragma omp parallel for private(pos, changed_pos) \ - num_threads(N_OMP_THREADS) + #pragma omp parallel for private(pos, changed_pos) num_threads(N_OMP_THREADS) #endif for (int i = 0; i < Particles.n_local; i++) { if (dir == 0) pos = Particles.pos_x[i]; @@ -139,8 +138,7 @@ void Grid3D::Set_Particles_Open_Boundary_CPU(int dir, int side) if (dir == 2) pos = Particles.pos_z[i]; // If the position is out of the region, remove. - if ((side == 0 && pos < d_min) || (side == 1 && pos > d_max)) - removed_indices.push_back(i); + if ((side == 0 && pos < d_min) || (side == 1 && pos > d_max)) removed_indices.push_back(i); } std::sort(removed_indices.begin(), removed_indices.end()); @@ -226,9 +224,7 @@ void Particles_3D::Select_Particles_to_Transfer_All_CPU(int *flags) } // Load the particles that need to be transferred to the MPI buffer -void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, - Real *send_buffer, - int buffer_length) +void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length) { part_int_t n_out; part_int_t n_send; @@ -275,11 +271,10 @@ void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, } part_int_t offset, offset_extra; - n_out = out_indxs_vec->size(); // Number of particles to be transferred - offset = *n_in_buffer * - N_DATA_PER_PARTICLE_TRANSFER; // Offset in the array to take in to - // account the particles that already - // reside in the buffer array + n_out = out_indxs_vec->size(); // Number of particles to be transferred + offset = *n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; // Offset in the array to take in to + // account the particles that already + // reside in the buffer array part_int_t indx, pIndx; for (indx = 0; indx < n_out; indx++) { @@ -314,29 +309,22 @@ void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, send_buffer[offset_extra] = age[pIndx]; #endif - *n_in_buffer += - 1; // add one to the number of particles in the transfer_buffer + *n_in_buffer += 1; // add one to the number of particles in the transfer_buffer offset += N_DATA_PER_PARTICLE_TRANSFER; // Check that the offset doesn't exceed the buffer size - if (offset > buffer_length) - std::cout << "ERROR: Buffer length exceeded on particles transfer" - << std::endl; + if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; } } // Add the data of a single particle to a transfer buffer -void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, - int buffer_length, Real pId, - Real pMass, Real pAge, Real pPos_x, - Real pPos_y, Real pPos_z, Real pVel_x, - Real pVel_y, Real pVel_z) +void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, + Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, + Real pVel_z) { int offset, offset_extra; offset = n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; - if (offset > buffer_length) - std::cout << "ERROR: Buffer length exceeded on particles transfer" - << std::endl; + if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; buffer[offset + 0] = pPos_x; buffer[offset + 1] = pPos_y; buffer[offset + 2] = pPos_z; @@ -361,21 +349,16 @@ void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, // After a particle was transferred, add the transferred particle data to the // vectors that contain the data of the local particles -void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, - Real pPos_z, Real pVel_x, - Real pVel_y, Real pVel_z, int *flags) +void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, + Real pVel_x, Real pVel_y, Real pVel_z, int *flags) { // Make sure that the particle position is inside the local domain bool in_local = true; if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false; - if ((pPos_y < G.yMin && flags[2] == 5) || (pPos_y >= G.yMax && flags[3] == 5)) - in_local = false; - if ((pPos_z < G.zMin && flags[4] == 5) || (pPos_z >= G.zMax && flags[4] == 5)) - in_local = false; + if ((pPos_y < G.yMin && flags[2] == 5) || (pPos_y >= G.yMax && flags[3] == 5)) in_local = false; + if ((pPos_z < G.zMin && flags[4] == 5) || (pPos_z >= G.zMax && flags[4] == 5)) in_local = false; if (!in_local) { - std::cout << " Adding particle out of local domain to vectors Error:" - << std::endl; + std::cout << " Adding particle out of local domain to vectors Error:" << std::endl; #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << pId << std::endl; #else @@ -416,11 +399,10 @@ void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, } // After the MPI transfer, unload the particles data from the buffers -void Particles_3D::Unload_Particles_from_Buffer_CPU( - int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, - Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, - int buffer_length_z0, int buffer_length_z1, int *flags) +void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags) { // Loop over the data in the recv_buffer, get the data for each particle and // append the particle data to the local vecors @@ -469,8 +451,7 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // If the particle x_position is outside the local domain there was an error if ((pPos_x < G.xMin) || (pPos_x >= G.xMax)) { #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of X domain pID: " << pId - << std::endl; + std::cout << "ERROR Particle Transfer out of X domain pID: " << pId << std::endl; #else std::cout << "ERROR Particle Transfer out of X domain" << std::endl; #endif @@ -487,17 +468,15 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // domain, then the particles is added to the buffer for the Y_Transfer if (direction == 0) { if (pPos_y < G.yMin && flags[2] == 5) { - Add_Particle_To_Buffer(send_buffer_y0, n_in_buffer_y0, buffer_length_y0, - pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, - pVel_y, pVel_z); + Add_Particle_To_Buffer(send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_y0 += 1; n_in_buffer_y0 += 1; continue; } if (pPos_y >= G.yMax && flags[3] == 5) { - Add_Particle_To_Buffer(send_buffer_y1, n_in_buffer_y1, buffer_length_y1, - pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, - pVel_y, pVel_z); + Add_Particle_To_Buffer(send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_y1 += 1; n_in_buffer_y1 += 1; continue; @@ -512,11 +491,9 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // If the particle y_position is outside the local domain after the // X-Transfer, there was an error - if ((direction == 1 || direction == 2) && - ((pPos_y < G.yMin) || (pPos_y >= G.yMax))) { + if ((direction == 1 || direction == 2) && ((pPos_y < G.yMin) || (pPos_y >= G.yMax))) { #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of Y domain pID: " << pId - << std::endl; + std::cout << "ERROR Particle Transfer out of Y domain pID: " << pId << std::endl; #else std::cout << "ERROR Particle Transfer out of Y domain" << std::endl; #endif @@ -533,17 +510,15 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // domain, then the particles is added to the buffer for the Z_Transfer if (direction != 2) { if (pPos_z < G.zMin && flags[4] == 5) { - Add_Particle_To_Buffer(send_buffer_z0, n_in_buffer_z0, buffer_length_z0, - pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, - pVel_y, pVel_z); + Add_Particle_To_Buffer(send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_z0 += 1; n_in_buffer_z0 += 1; continue; } if (pPos_z >= G.zMax && flags[5] == 5) { - Add_Particle_To_Buffer(send_buffer_z1, n_in_buffer_z1, buffer_length_z1, - pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, - pVel_y, pVel_z); + Add_Particle_To_Buffer(send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_z1 += 1; n_in_buffer_z1 += 1; continue; @@ -560,8 +535,7 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // X-Transfer and Y-Transfer, there was an error if ((direction == 2) && ((pPos_z < G.zMin) || (pPos_z >= G.zMax))) { #ifdef PARTICLE_IDS - std::cout << "ERROR Particle Transfer out of Z domain pID: " << pId - << std::endl; + std::cout << "ERROR Particle Transfer out of Z domain pID: " << pId << std::endl; #else std::cout << "ERROR Particle Transfer out of Z domain" << std::endl; #endif @@ -576,8 +550,7 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( // If the particle doesn't have to be transferred to the y_direction or // z_direction, then add the particle date to the local vectors - Add_Particle_To_Vectors(pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, - pVel_y, pVel_z, flags); + Add_Particle_To_Vectors(pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags); } } @@ -597,18 +570,12 @@ void Particles_3D::Remove_Transfered_Particles(void) // Concatenate the indices of all the particles that moved into a new vector // (delete_indxs_vec) int_vector_t delete_indxs_vec; - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x0.begin(), - out_indxs_vec_x0.end()); - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x1.begin(), - out_indxs_vec_x1.end()); - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y0.begin(), - out_indxs_vec_y0.end()); - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y1.begin(), - out_indxs_vec_y1.end()); - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z0.begin(), - out_indxs_vec_z0.end()); - delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z1.begin(), - out_indxs_vec_z1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); // Clear the vectors that stored the transferred indices for each direction. // All these indices are now stored in delete_indxs_vec @@ -649,18 +616,16 @@ void Particles_3D::Remove_Transfered_Particles(void) delete_indxs_vec.pop_back(); // Discard the index of ther delted particle // from the delete_indxs_vector - n_local -= 1; // substract one to the local number of particles + n_local -= 1; // substract one to the local number of particles } // At the end the delete_indxs_vec must be empty - if (delete_indxs_vec.size() != 0) - std::cout << "ERROR: Deleting Transferred Particles " << std::endl; + if (delete_indxs_vec.size() != 0) std::cout << "ERROR: Deleting Transferred Particles " << std::endl; // Check that the size of the particles data vectors is consistent with the // local number of particles int n_in_out_vectors, n_in_vectors; - n_in_vectors = pos_x.size() + pos_y.size() + pos_z.size() + vel_x.size() + - vel_y.size() + vel_z.size(); + n_in_vectors = pos_x.size() + pos_y.size() + pos_z.size() + vel_x.size() + vel_y.size() + vel_z.size(); #ifndef SINGLE_PARTICLE_MASS n_in_vectors += mass.size(); #endif diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index c9730e10a..ba60916e9 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -17,9 +17,8 @@ #define SCAN_SHARED_SIZE 2 * TPB_PARTICLES -__global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, - Real *pos_dev, Real d_min, - Real d_max, Real d_length) +__global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max, + Real d_length) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -67,15 +66,14 @@ void Grid3D::Set_Particles_Boundary_GPU(int dir, int side) // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, - side, Particles.n_local, pos_dev, d_min, d_max, L); + hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev, + d_min, d_max, L); CudaCheckError(); } // #ifdef MPI_CHOLLA -__global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, - Real d_min, Real d_max, Real *pos_d, +__global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, Real d_min, Real d_max, Real *pos_d, bool *transfer_flags_d) { int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -91,8 +89,7 @@ __global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, transfer_flags_d[tid] = transfer; } -__global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, - int *prefix_sum_d, int *prefix_sum_block_d) +__global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d) { __shared__ int data_sh[SCAN_SHARED_SIZE]; @@ -103,13 +100,9 @@ __global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, block_start = 2 * blockIdx.x * blockDim.x; data_sh[2 * tid_block] = - block_start + 2 * tid_block < n_total - ? (int)transfer_flags_d[block_start + 2 * tid_block] - : 0; + block_start + 2 * tid_block < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block] : 0; data_sh[2 * tid_block + 1] = - block_start + 2 * tid_block + 1 < n_total - ? (int)transfer_flags_d[block_start + 2 * tid_block + 1] - : 0; + block_start + 2 * tid_block + 1 < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block + 1] : 0; __syncthreads(); int offset = 1; @@ -147,17 +140,13 @@ __global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, __syncthreads(); // Write results to device memory - if (block_start + 2 * tid_block < n_total) - prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; + if (block_start + 2 * tid_block < n_total) prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; if (block_start + 2 * tid_block + 1 < n_total) prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1]; // Write the block sum - int last_flag_block = - (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; - if (tid_block == 0) - prefix_sum_block_d[blockIdx.x] = - data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; + int last_flag_block = (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; + if (tid_block == 0) prefix_sum_block_d[blockIdx.x] = data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; } __global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) @@ -172,9 +161,7 @@ __global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) int n = 0; start_index = n * n_threads; while (start_index < n_partial) { - data_sh[tid_block] = start_index + tid_block < n_partial - ? prefix_sum_block_d[start_index + tid_block] - : 0; + data_sh[tid_block] = start_index + tid_block < n_partial ? prefix_sum_block_d[start_index + tid_block] : 0; __syncthreads(); if (tid_block == 0) { @@ -186,15 +173,13 @@ __global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) } __syncthreads(); - if (start_index + tid_block < n_partial) - prefix_sum_block_d[start_index + tid_block] = data_sh[tid_block]; + if (start_index + tid_block < n_partial) prefix_sum_block_d[start_index + tid_block] = data_sh[tid_block]; n += 1; start_index = n * n_threads; } } -__global__ void Sum_Blocks_Kernel(part_int_t n_total, int *prefix_sum_d, - int *prefix_sum_block_d) +__global__ void Sum_Blocks_Kernel(part_int_t n_total, int *prefix_sum_d, int *prefix_sum_block_d) { int tid, tid_block, block_id, data_id; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -213,20 +198,15 @@ __global__ void Sum_Blocks_Kernel(part_int_t n_total, int *prefix_sum_d, if (tid < n_total) prefix_sum_d[tid] += block_sum_sh[0]; } -__global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, - int *n_transfer_d, - bool *transfer_flags_d, +__global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, int *n_transfer_d, bool *transfer_flags_d, int *prefix_sum_d) { - n_transfer_d[0] = - prefix_sum_d[n_total - 1] + (int)transfer_flags_d[n_total - 1]; + n_transfer_d[0] = prefix_sum_d[n_total - 1] + (int)transfer_flags_d[n_total - 1]; // if ( n_transfer_d[0] > 0 ) printf( "##Thread transfer: %d\n", // n_transfer_d[0]); } -__global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, - bool *transfer_flags_d, - int *prefix_sum_d, +__global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *transfer_indices_d) { int tid, transfer_index; @@ -235,18 +215,15 @@ __global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, transfer_index = prefix_sum_d[tid]; if (transfer_index < 0 || transfer_index >= n_total) { - printf( - "#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", - transfer_index); + printf("#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", transfer_index); return; } if (transfer_flags_d[tid]) transfer_indices_d[transfer_index] = tid; } -__global__ void Select_Indices_to_Replace_Transfered_Kernel( - part_int_t n_total, int n_transfer, bool *transfer_flags_d, - int *prefix_sum_d, int *replace_indices_d) +__global__ void Select_Indices_to_Replace_Transfered_Kernel(part_int_t n_total, int n_transfer, bool *transfer_flags_d, + int *prefix_sum_d, int *replace_indices_d) { int tid, tid_inv; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -262,18 +239,15 @@ __global__ void Select_Indices_to_Replace_Transfered_Kernel( replace_id = tid_inv - prefix_sum_inv; if (replace_id < 0 || replace_id >= n_total) { - printf("#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", - replace_id); + printf("#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", replace_id); return; } replace_indices_d[replace_id] = tid; } template -__global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, - int *transfer_indices_d, - int *replace_indices_d, - bool print_replace) +__global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -289,10 +263,8 @@ __global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, } } -void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, - int *transfer_indices_d, - int *replace_indices_d, - bool print_replace) +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) { int grid_size; grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; @@ -301,17 +273,13 @@ void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, - 0, 0, n_transfer, field_d, transfer_indices_d, - replace_indices_d, print_replace); + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, + transfer_indices_d, replace_indices_d, print_replace); CudaCheckError(); } -void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, - part_int_t *field_d, - int *transfer_indices_d, - int *replace_indices_d, - bool print_replace) +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) { int grid_size; grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; @@ -320,17 +288,16 @@ void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, - 0, 0, n_transfer, field_d, transfer_indices_d, - replace_indices_d, print_replace); + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, + transfer_indices_d, replace_indices_d, print_replace); CudaCheckError(); } -part_int_t Select_Particles_to_Transfer_GPU_function( - part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, - int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, - int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, - int *transfer_prefix_sum_blocks_d) +part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax, + Real *pos_d, int *n_transfer_d, int *n_transfer_h, + bool *transfer_flags_d, int *transfer_indices_d, + int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d) { // set values for GPU kernels int grid_size, grid_size_half; @@ -350,51 +317,43 @@ part_int_t Select_Particles_to_Transfer_GPU_function( return 0; } - hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_local, side, domainMin, domainMax, pos_d, + hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d, transfer_flags_d); CudaCheckError(); - hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, - transfer_flags_d, transfer_prefix_sum_d, + hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d); CudaCheckError(); - hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, - grid_size_half, transfer_prefix_sum_blocks_d); + hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, grid_size_half, transfer_prefix_sum_blocks_d); CudaCheckError(); - hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, - transfer_prefix_sum_d, transfer_prefix_sum_blocks_d); + hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d, + transfer_prefix_sum_blocks_d); CudaCheckError(); - hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, - n_transfer_d, transfer_flags_d, transfer_prefix_sum_d); + hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d, + transfer_prefix_sum_d); CudaCheckError(); - CudaSafeCall(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost)); CudaCheckError(); - hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_local, transfer_flags_d, transfer_prefix_sum_d, - transfer_indices_d); + hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_flags_d, + transfer_prefix_sum_d, transfer_indices_d); CudaCheckError(); - hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, n_transfer_h[0], - transfer_flags_d, transfer_prefix_sum_d, - replace_indices_d); + hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer_h[0], + transfer_flags_d, transfer_prefix_sum_d, replace_indices_d); CudaCheckError(); // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); return n_transfer_h[0]; } -__global__ void Load_Transfered_Particles_to_Buffer_Kernel( - int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type) +__global__ void Load_Transfered_Particles_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type) { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -407,17 +366,14 @@ __global__ void Load_Transfered_Particles_to_Buffer_Kernel( field_val = field_d[src_id]; // Set global periodic boundary conditions - if (boundary_type == 1 && field_val < domainMin) - field_val += (domainMax - domainMin); - if (boundary_type == 1 && field_val >= domainMax) - field_val -= (domainMax - domainMin); + if (boundary_type == 1 && field_val < domainMin) field_val += (domainMax - domainMin); + if (boundary_type == 1 && field_val >= domainMax) field_val -= (domainMax - domainMin); send_buffer_d[dst_id] = field_val; } -void Load_Particles_to_Transfer_GPU_function( - int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type) +void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) { // set values for GPU kernels int grid_size; @@ -427,17 +383,16 @@ void Load_Particles_to_Transfer_GPU_function( // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_transfer, field_id, - n_fields_to_transfer, field_d, transfer_indices_d, - send_buffer_d, domainMin, domainMax, boundary_type); + hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, + boundary_type); CudaCheckError(); } -__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel( - int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type) +__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, + Real *send_buffer_d, Real domainMin, Real domainMax, + int boundary_type) { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -450,17 +405,14 @@ __global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel( field_val = field_d[src_id]; // Set global periodic boundary conditions - if (boundary_type == 1 && field_val < domainMin) - field_val += (domainMax - domainMin); - if (boundary_type == 1 && field_val >= domainMax) - field_val -= (domainMax - domainMin); + if (boundary_type == 1 && field_val < domainMin) field_val += (domainMax - domainMin); + if (boundary_type == 1 && field_val >= domainMax) field_val -= (domainMax - domainMin); send_buffer_d[dst_id] = __longlong_as_double(field_val); } -void Load_Particles_to_Transfer_Int_GPU_function( - int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type) +void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type) { // set values for GPU kernels int grid_size; @@ -470,38 +422,33 @@ void Load_Particles_to_Transfer_Int_GPU_function( // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_transfer, field_id, - n_fields_to_transfer, field_d, transfer_indices_d, - send_buffer_d, domainMin, domainMax, boundary_type); + hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, + boundary_type); CudaCheckError(); } #ifdef MPI_CHOLLA -void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, - Real *buffer_d) +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d) { int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), cudaMemcpyDeviceToHost)); CudaCheckError(); } -void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, - Real *buffer_d) +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d) { int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), cudaMemcpyHostToDevice)); CudaCheckError(); } #endif // MPI_CHOLLA -__global__ void Unload_Transfered_Particles_from_Buffer_Kernel( - int n_local, int n_transfer, int field_id, int n_fields_to_transfer, - Real *field_d, Real *recv_buffer_d) +__global__ void Unload_Transfered_Particles_from_Buffer_Kernel(int n_local, int n_transfer, int field_id, + int n_fields_to_transfer, Real *field_d, + Real *recv_buffer_d) { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -513,11 +460,8 @@ __global__ void Unload_Transfered_Particles_from_Buffer_Kernel( field_d[dst_id] = recv_buffer_d[src_id]; } -void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, - int field_id, - int n_fields_to_transfer, - Real *field_d, - Real *recv_buffer_d) +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, Real *recv_buffer_d) { // set values for GPU kernels int grid_size; @@ -527,15 +471,14 @@ void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, n_transfer, field_id, - n_fields_to_transfer, field_d, recv_buffer_d); + hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, + field_id, n_fields_to_transfer, field_d, recv_buffer_d); CudaCheckError(); } -__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel( - int n_local, int n_transfer, int field_id, int n_fields_to_transfer, - part_int_t *field_d, Real *recv_buffer_d) +__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel(int n_local, int n_transfer, int field_id, + int n_fields_to_transfer, part_int_t *field_d, + Real *recv_buffer_d) { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -547,11 +490,8 @@ __global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel( field_d[dst_id] = __double_as_longlong(recv_buffer_d[src_id]); } -void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, - int field_id, - int n_fields_to_transfer, - part_int_t *field_d, - Real *recv_buffer_d) +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, Real *recv_buffer_d) { // set values for GPU kernels int grid_size; @@ -561,9 +501,8 @@ void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, - dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, field_id, - n_fields_to_transfer, field_d, recv_buffer_d); + hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, + n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d); CudaCheckError(); } diff --git a/src/particles/particles_boundaries_gpu.h b/src/particles/particles_boundaries_gpu.h index 940121787..638102ad2 100644 --- a/src/particles/particles_boundaries_gpu.h +++ b/src/particles/particles_boundaries_gpu.h @@ -3,47 +3,32 @@ #ifndef PARTICLES_BOUNDARIES_H #define PARTICLES_BOUNDARIES_H -part_int_t Select_Particles_to_Transfer_GPU_function( - part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, - int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, - int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, - int *transfer_prefix_sum_blocks_d); - -void Load_Particles_to_Transfer_GPU_function( - int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type); -void Load_Particles_to_Transfer_Int_GPU_function( - int n_transfer, int field_id, int n_fields_to_transfer, part_int_t *field_d, - int *transfer_indices_d, Real *send_buffer_d, Real domainMin, - Real domainMax, int boundary_type); - -void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, - int *transfer_indices_d, - int *replace_indices_d, - bool print_replace); -void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, - part_int_t *field_d, - int *transfer_indices_d, - int *replace_indices_d, - bool print_replace); - -void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, - Real *buffer_d); - -void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, - Real *buffer_d); - -void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, - int field_id, - int n_fields_to_transfer, - Real *field_d, - Real *recv_buffer_d); -void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, - int field_id, - int n_fields_to_transfer, - part_int_t *field_d, - Real *recv_buffer_d); +part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax, + Real *pos_d, int *n_transfer_d, int *n_transfer_h, + bool *transfer_flags_d, int *transfer_indices_d, + int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d); + +void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type); +void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type); + +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace); +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace); + +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d); + +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d); + +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, Real *recv_buffer_d); +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, Real *recv_buffer_d); #endif // PARTICLES_H #endif // PARTICLES \ No newline at end of file diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 72485acd9..97045a3a1 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -33,8 +33,7 @@ Real Grid3D::Calc_Particles_dt() part_int_t p_start, p_end; omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, - &p_end); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); dt_particles_all[omp_id] = Calc_Particles_dt_function(p_start, p_end); } @@ -66,25 +65,21 @@ Real Grid3D::Calc_Particles_dt_GPU() // set values for GPU kernels int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; - if (ngrid > Particles.G.size_blocks_array) - chprintf(" Error: particles dt_array too small\n"); + if (ngrid > Particles.G.size_blocks_array) chprintf(" Error: particles dt_array too small\n"); Real max_dti; max_dti = Particles.Calc_Particles_dt_GPU_function( - ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, - Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, - Particles.G.dti_array_host, Particles.G.dti_array_dev); + ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev); Real dt_min; #ifdef COSMOLOGY Real scale_factor, vel_factor, da_min; - scale_factor = - 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * - Cosmo.cosmo_h; - vel_factor = Cosmo.current_a / scale_factor; - da_min = vel_factor / max_dti; - dt_min = Cosmo.Get_dt_from_da(da_min); + scale_factor = 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * Cosmo.cosmo_h; + vel_factor = Cosmo.current_a / scale_factor; + da_min = vel_factor / max_dti; + dt_min = Cosmo.Get_dt_from_da(da_min); #else dt_min = 1 / max_dti; #endif @@ -97,17 +92,14 @@ void Grid3D::Advance_Particles_KDK_Step1_GPU() { #ifdef COSMOLOGY Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( - Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, - Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, - Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, - Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, - Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, + Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); #else - Particles.Advance_Particles_KDK_Step1_GPU_function( - Particles.n_local, Particles.dt, Particles.pos_x_dev, Particles.pos_y_dev, - Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, - Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, - Particles.grav_z_dev); + Particles.Advance_Particles_KDK_Step1_GPU_function(Particles.n_local, Particles.dt, Particles.pos_x_dev, + Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev); #endif } @@ -116,15 +108,13 @@ void Grid3D::Advance_Particles_KDK_Step2_GPU() { #ifdef COSMOLOGY Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( - Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, - Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, - Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, - Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, + Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, + Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); #else - Particles.Advance_Particles_KDK_Step2_GPU_function( - Particles.n_local, Particles.dt, Particles.vel_x_dev, Particles.vel_y_dev, - Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, - Particles.grav_z_dev); + Particles.Advance_Particles_KDK_Step2_GPU_function(Particles.n_local, Particles.dt, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev); #endif } @@ -220,8 +210,7 @@ void Grid3D::Advance_Particles_KDK_Step1() part_int_t p_start, p_end; omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, - &p_end); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); #ifdef COSMOLOGY Advance_Particles_KDK_Cosmo_Step1_function(p_start, p_end); #else @@ -254,8 +243,7 @@ void Grid3D::Advance_Particles_KDK_Step2() part_int_t p_start, p_end; omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, - &p_end); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); #ifdef COSMOLOGY Advance_Particles_KDK_Cosmo_Step2_function(p_start, p_end); #else @@ -272,8 +260,7 @@ void Grid3D::Advance_Particles_KDK_Step2() #ifdef PARTICLES_CPU // Update positions and velocities (step 1 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, - part_int_t p_end) +void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end) { part_int_t pID; Real dt = Particles.dt; @@ -293,8 +280,7 @@ void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, } // Update velocities (step 2 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step2_function(part_int_t p_start, - part_int_t p_end) +void Grid3D::Advance_Particles_KDK_Step2_function(part_int_t p_start, part_int_t p_end) { part_int_t pID; Real dt = Particles.dt; @@ -327,8 +313,7 @@ Real Grid3D::Calc_Particles_dt_Cosmo() part_int_t p_start, p_end; omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, - &p_end); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); dt_particles_all[omp_id] = Calc_Particles_dt_Cosmo_function(p_start, p_end); } @@ -354,16 +339,13 @@ Real Grid3D::Calc_Particles_dt_Cosmo() #ifdef PARTICLES_CPU // Loop over the particles anf compute dt_min for a cosmological simulation -Real Grid3D::Calc_Particles_dt_Cosmo_function(part_int_t p_start, - part_int_t p_end) +Real Grid3D::Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end) { part_int_t pID; Real da, da_min, vel, dt_min; - da_min = 1e100; - Real scale_factor = - 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * - Cosmo.cosmo_h; - Real a2 = (Cosmo.current_a) * (Cosmo.current_a); + da_min = 1e100; + Real scale_factor = 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * Cosmo.cosmo_h; + Real a2 = (Cosmo.current_a) * (Cosmo.current_a); Real vel_factor; vel_factor = Cosmo.current_a / scale_factor; @@ -388,8 +370,7 @@ Real Grid3D::Calc_Particles_dt_Cosmo_function(part_int_t p_start, // Update positions and velocities (step 1 of KDK scheme ) COSMOLOGICAL // SIMULATION -void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, - part_int_t p_end) +void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, part_int_t p_end) { Real dt, dt_half; part_int_t pIndx; @@ -441,8 +422,7 @@ void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, } // Update velocities (step 2 of KDK scheme ) COSMOLOGICAL SIMULATION -void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, - part_int_t p_end) +void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part_int_t p_end) { Real dt; part_int_t pIndx; diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index 37d69c3b1..f4192ad62 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -18,8 +18,7 @@ // FUTURE FIX: The Hubble function was defined here because I couldn't get it // form other file, tried -dc flag when compiling buu paris broke. -__device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, - Real Omega_L, Real Omega_K) +__device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K) { Real a2 = a * a; Real a3 = a2 * a; @@ -28,10 +27,8 @@ __device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, } #endif -__global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, - Real dz, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, - Real *dti_array) +__global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *dti_array) { __shared__ Real max_dti[TPB_PARTICLES]; @@ -76,10 +73,9 @@ __global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, if (tid == 0) dti_array[blockIdx.x] = max_dti[0]; } -Real Particles_3D::Calc_Particles_dt_GPU_function( - int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, - Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host, - Real *dti_array_dev) +Real Particles_3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, + Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host, + Real *dti_array_dev) { // // set values for GPU kernels // int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -95,16 +91,14 @@ Real Particles_3D::Calc_Particles_dt_GPU_function( return 0; } - hipLaunchKernelGGL(Calc_Particles_dti_Kernel, dim1dGrid, dim1dBlock, 0, 0, - n_particles_local, dx, dy, dz, vel_x, vel_y, vel_z, - dti_array_dev); + hipLaunchKernelGGL(Calc_Particles_dti_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_particles_local, dx, dy, dz, vel_x, + vel_y, vel_z, dti_array_dev); CudaCheckError(); // Initialize dt values Real max_dti = 0; // copy the dti array onto the CPU - CudaSafeCall(cudaMemcpy(dti_array_host, dti_array_dev, ngrid * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(dti_array_host, dti_array_dev, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); // find maximum inverse timestep from CFL condition for (int i = 0; i < ngrid; i++) { max_dti = fmax(max_dti, dti_array_host[i]); @@ -113,10 +107,9 @@ Real Particles_3D::Calc_Particles_dt_GPU_function( return max_dti; } -__global__ void Advance_Particles_KDK_Step1_Kernel( - part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +__global__ void Advance_Particles_KDK_Step1_Kernel(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -132,9 +125,9 @@ __global__ void Advance_Particles_KDK_Step1_Kernel( pos_z_dev[tid] += dt * vel_z_dev[tid]; } -__global__ void Advance_Particles_KDK_Step2_Kernel( - part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +__global__ void Advance_Particles_KDK_Step2_Kernel(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -145,10 +138,10 @@ __global__ void Advance_Particles_KDK_Step2_Kernel( vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; } -void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( - part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +void Particles_3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -159,17 +152,15 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, dt, pos_x_dev, pos_y_dev, - pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, - grav_y_dev, grav_z_dev); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev, + pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); CudaCheckError(); } } -void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( - part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +void Particles_3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -180,20 +171,19 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, dt, vel_x_dev, vel_y_dev, - vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev, + vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); CudaCheckError(); } } #ifdef COSMOLOGY -__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( - part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, - Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K) +__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel(part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, + Real Omega_M, Real Omega_L, Real Omega_K) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -232,11 +222,10 @@ __global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( pos_z_dev[tid] += dt_half * vel_z; } -__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( - part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, - Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, - Real Omega_K) +__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel(part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, + Real Omega_M, Real Omega_L, Real Omega_K) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= n_local) return; @@ -250,10 +239,7 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( da_half = da / 2; a_half = current_a - da_half; - dt = da / - (current_a * - Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K)) * - cosmo_h; + dt = da / (current_a * Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K)) * cosmo_h; // Advance velocities by the second half a step vel_x_dev[tid] = (a_half * vel_x + 0.5 * dt * grav_x_dev[tid]) / current_a; @@ -261,11 +247,12 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( vel_z_dev[tid] = (a_half * vel_z + 0.5 * dt * grav_z_dev[tid]) / current_a; } -void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( - part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, - Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, - Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, - Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K) +void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -276,21 +263,19 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, delta_a, pos_x_dev, pos_y_dev, - pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, - grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, - Omega_L, Omega_K); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, + pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, + grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K); CHECK(cudaDeviceSynchronize()); // CudaCheckError(); } } -void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( - part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, - Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, - Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, - Real Omega_K) +void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) { // set values for GPU kernels int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; @@ -301,10 +286,9 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, - dim1dBlock, 0, 0, n_local, delta_a, vel_x_dev, vel_y_dev, - vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, - H0, cosmo_h, Omega_M, Omega_L, Omega_K); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, + vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, + Omega_M, Omega_L, Omega_K); CHECK(cudaDeviceSynchronize()); // CudaCheckError(); } diff --git a/src/particles/supernova.h b/src/particles/supernova.h index 6d6fc7d11..e788ea0b2 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -13,27 +13,20 @@ namespace supernova { -const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, - UNRES_ENERGY = 5; +const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; // supernova rate: 1SN / 100 solar masses per 36 Myr -static const Real DEFAULT_SNR = 2.8e-7; -static const Real ENERGY_PER_SN = - 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; -static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN +static const Real DEFAULT_SNR = 2.8e-7; +static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; +static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN static const Real FINAL_MOMENTUM = - 2.8e5 / LENGTH_UNIT * 1e5 * - TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) -static const Real MU = 0.6; -static const Real R_SH = - 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) -static const Real DEFAULT_SN_END = - 40000; // default value for when SNe stop (40 Myr) -static const Real DEFAULT_SN_START = - 4000; // default value for when SNe start (4 Myr) + 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) +static const Real MU = 0.6; +static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) +static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) +static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) -void initState(struct parameters* P, part_int_t n_local, - Real allocation_factor = 1); +void initState(struct parameters* P, part_int_t n_local, Real allocation_factor = 1); Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); } // namespace supernova #endif // PARTICLES_GPU && SUPERNOVA diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 56370e014..8d53832f1 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -11,8 +11,7 @@ #include "../utils/gpu.hpp" #include "../utils/mhd_utilities.h" -__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int n_cells, +__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) { // declare conserved variables for each stencil @@ -98,10 +97,9 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, } } -__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, - Real *dev_bounds_Rx, Real *dev_bounds_Ly, - Real *dev_bounds_Ry, int nx, int ny, - int n_ghost, Real gamma, int n_fields) +__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, + int n_fields) { // declare conserved variables for each stencil // these will be placed into registers for each thread @@ -256,11 +254,9 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, } } -__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, - Real *dev_bounds_Rx, Real *dev_bounds_Ly, - Real *dev_bounds_Ry, Real *dev_bounds_Lz, - Real *dev_bounds_Rz, int nx, int ny, - int nz, int n_ghost, Real gamma, +__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz, + Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields) { // declare conserved variables for each stencil @@ -293,8 +289,7 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, #endif // SCALAR #ifdef MHD auto const [cellCenteredBx, cellCenteredBy, cellCenteredBz] = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, - n_cells, nx, ny); + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); #endif // MHD #ifdef DE Real const ge = dev_conserved[(n_fields - 1) * n_cells + id]; @@ -363,7 +358,7 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, // Send the x-1/2 Right interface if (xid > 0) { - id = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); + id = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); dev_bounds_Rx[id] = d; dev_bounds_Rx[n_cells + id] = mx; dev_bounds_Rx[2 * n_cells + id] = my; @@ -385,7 +380,7 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, if (yid > 0) { // Send the y-1/2 Right interface - id = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); + id = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); dev_bounds_Ry[id] = d; dev_bounds_Ry[n_cells + id] = mx; dev_bounds_Ry[2 * n_cells + id] = my; @@ -407,7 +402,7 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, if (zid > 0) { // Send the z-1/2 Right interface - id = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny); + id = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny); dev_bounds_Rz[id] = d; dev_bounds_Rz[n_cells + id] = mx; dev_bounds_Rz[2 * n_cells + id] = my; diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index 7050e4704..b6990c11b 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -6,20 +6,16 @@ #ifndef PCM_CUDA_H #define PCM_CUDA_H -__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int n_cells, +__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields); -__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, - Real *dev_bounds_Rx, Real *dev_bounds_Ly, - Real *dev_bounds_Ry, int nx, int ny, - int n_ghost, Real gamma, int n_fields); +__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, + int n_fields); -__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, - Real *dev_bounds_Rx, Real *dev_bounds_Ly, - Real *dev_bounds_Ry, Real *dev_bounds_Lz, - Real *dev_bounds_Rz, int nx, int ny, - int nz, int n_ghost, Real gamma, +__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz, + Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); #endif // PCM_CUDA_H diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 20007fcb1..787449f21 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -21,10 +21,8 @@ gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields) +__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { int n_cells = nx * ny * nz; int o1, o2, o3; @@ -85,8 +83,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], - del_scalar_G[NSCALARS]; + Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; #ifndef VL @@ -142,9 +139,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * - (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR @@ -169,10 +164,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef SCALAR @@ -197,10 +190,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef SCALAR @@ -291,8 +282,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / - (del_scalar_L[i] + del_scalar_R[i]); + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } @@ -369,10 +359,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = 0.0; if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = - sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } #endif // SCALAR @@ -470,13 +459,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - C = scalar_R_imh[i] + scalar_L_iph[i]; - scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_L_iph[i] = C - scalar_R_imh[i]; - scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_R_imh[i] = C - scalar_L_iph[i]; + C = scalar_R_imh[i] + scalar_L_iph[i]; + scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); + scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); + scalar_L_iph[i] = C - scalar_R_imh[i]; + scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); + scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); + scalar_R_imh[i] = C - scalar_L_iph[i]; del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; } #endif // SCALAR @@ -526,8 +515,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * - (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } @@ -549,8 +537,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * - (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } @@ -583,8 +570,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (lambda_m <= 0) { lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * - (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } @@ -606,8 +592,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (lambda_p <= 0) { lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * - (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } @@ -645,9 +630,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; dev_bounds_R[4 * n_cells + id] = - (p_R_imh / (gamma - 1.0)) + - 0.5 * d_R_imh * - (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); + (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; @@ -663,9 +646,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_L[o2 * n_cells + id] = d_L_iph * vy_L_iph; dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; dev_bounds_L[4 * n_cells + id] = - (p_L_iph / (gamma - 1.0)) + - 0.5 * d_L_iph * - (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); + (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 38ae7f815..36c707354 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -15,10 +15,8 @@ gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields); +__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); #endif // PLMC_CUDA_H #endif // PLMC diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index 9be1230f6..1ca3f510b 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -19,10 +19,8 @@ gamma, int dir, int n_fields) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields) +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { int n_cells = nx * ny * nz; int o1, o2, o3; @@ -60,8 +58,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], - dscalar_R[NSCALARS]; + Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; #endif // SCALAR #ifndef VL // Don't use velocities to reconstruct when using VL @@ -123,9 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * - (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR @@ -150,10 +145,8 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_imo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef SCALAR @@ -178,10 +171,8 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_ipo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef SCALAR @@ -204,8 +195,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - Interface_Values_PLM(scalar_imo[i], scalar_i[i], scalar_ipo[i], - &scalar_L[i], &scalar_R[i]); + Interface_Values_PLM(scalar_imo[i], scalar_i[i], scalar_ipo[i], &scalar_L[i], &scalar_R[i]); } #endif // SCALAR @@ -222,10 +212,8 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, my_R = d_R * vy_R; mz_L = d_L * vz_L; mz_R = d_R * vz_R; - E_L = p_L / (gamma - 1.0) + - 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - E_R = p_R / (gamma - 1.0) + - 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); + E_L = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); + E_R = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); #ifdef DE dge_L = d_L * ge_L; dge_R = d_R * ge_R; @@ -322,8 +310,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, } } -__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, - Real *q_L, Real *q_R) +__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) { Real del_q_L, del_q_R, del_q_C, del_q_G; Real lim_slope_a, lim_slope_b, del_q_m; diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h index 627fb52a9..7768722d5 100644 --- a/src/reconstruction/plmp_cuda.h +++ b/src/reconstruction/plmp_cuda.h @@ -13,18 +13,15 @@ gamma, int dir, int n_fields) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plmp. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields); +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); /*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) * \brief Calculates the left and right interface values for a cell using linear reconstruction in the primitive variables with Van Leer or Minmod slope limiting. */ -__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, - Real *q_L, Real *q_R); +__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R); #endif // PLMP_CUDA_H #endif // CUDA diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index d13dd0c60..6a4c4ef12 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -21,10 +21,8 @@ gamma, int dir, int n_fields) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields) +__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { int n_cells = nx * ny * nz; int o1, o2, o3; @@ -93,12 +91,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // VL #endif // DE #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], - scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], - del_scalar_G[NSCALARS]; - Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], - del_scalar_m_ipo[NSCALARS]; + Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; + Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; + Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; // #ifdef CTU #ifndef VL @@ -154,9 +149,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * - (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE @@ -181,10 +174,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef DE @@ -209,10 +200,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef DE @@ -237,10 +226,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imt = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); + p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imt = fmax(p_imt, (Real)TINY_NUMBER); #ifdef DE @@ -265,10 +252,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipt = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); + p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); #ifdef DE @@ -356,8 +341,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / - (del_scalar_L[i] + del_scalar_R[i]); + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } @@ -403,49 +387,42 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = - sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = - sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = - sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = - sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = - sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = - sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_ge_m_imo = 0.0; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * - fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_scalar_m_imo[i] = 0.0; } @@ -535,8 +512,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / - (del_scalar_L[i] + del_scalar_R[i]); + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } @@ -582,39 +558,33 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = - sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = - sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = - sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = - sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = - sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = - sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_ge_m_i = 0.0; #endif // DE @@ -623,8 +593,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * - fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_scalar_m_i[i] = 0.0; } @@ -714,8 +683,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / - (del_scalar_L[i] + del_scalar_R[i]); + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } @@ -761,49 +729,42 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = - sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_1_L * del_a_1_R > 0.0) { lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = - sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_2_L * del_a_2_R > 0.0) { lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = - sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_3_L * del_a_3_R > 0.0) { lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = - sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } if (del_a_4_L * del_a_4_R > 0.0) { lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = - sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = - sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_ge_m_ipo = 0.0; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * - fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else del_scalar_m_ipo[i] = 0.0; } @@ -843,10 +804,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; - scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; + scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; + scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; } #endif // SCALAR @@ -861,37 +820,23 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) vz_L = vz_R = vz_i; if ((p_R - p_i) * (p_i - p_L) <= 0) p_L = p_R = p_i; - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > - (d_R - d_L) * (d_R - d_L)) - d_L = 3.0 * d_i - 2.0 * d_R; - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > - (vx_R - vx_L) * (vx_R - vx_L)) + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) d_L = 3.0 * d_i - 2.0 * d_R; + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) vx_L = 3.0 * vx_i - 2.0 * vx_R; - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > - (vy_R - vy_L) * (vy_R - vy_L)) + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) vy_L = 3.0 * vy_i - 2.0 * vy_R; - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > - (vz_R - vz_L) * (vz_R - vz_L)) + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) vz_L = 3.0 * vz_i - 2.0 * vz_R; - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > - (p_R - p_L) * (p_R - p_L)) - p_L = 3.0 * p_i - 2.0 * p_R; - - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < - -(d_R - d_L) * (d_R - d_L)) - d_R = 3.0 * d_i - 2.0 * d_L; - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < - -(vx_R - vx_L) * (vx_R - vx_L)) + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) p_L = 3.0 * p_i - 2.0 * p_R; + + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) d_R = 3.0 * d_i - 2.0 * d_L; + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) vx_R = 3.0 * vx_i - 2.0 * vx_L; - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < - -(vy_R - vy_L) * (vy_R - vy_L)) + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) vy_R = 3.0 * vy_i - 2.0 * vy_L; - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < - -(vz_R - vz_L) * (vz_R - vz_L)) + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) vz_R = 3.0 * vz_i - 2.0 * vz_L; - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < - -(p_R - p_L) * (p_R - p_L)) - p_R = 3.0 * p_i - 2.0 * p_L; + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) p_R = 3.0 * p_i - 2.0 * p_L; d_L = fmax(fmin(d_i, d_imo), d_L); d_L = fmin(fmax(d_i, d_imo), d_L); @@ -916,11 +861,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #ifdef DE if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > - (ge_R - ge_L) * (ge_R - ge_L)) + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) ge_L = 3.0 * ge_i - 2.0 * ge_R; - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < - -(ge_R - ge_L) * (ge_R - ge_L)) + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) ge_R = 3.0 * ge_i - 2.0 * ge_L; ge_L = fmax(fmin(ge_i, ge_imo), ge_L); ge_L = fmin(fmax(ge_i, ge_imo), ge_L); @@ -930,14 +873,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) - scalar_L[i] = scalar_R[i] = scalar_i[i]; - if (6.0 * (scalar_R[i] - scalar_L[i]) * - (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > + if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) scalar_L[i] = scalar_R[i] = scalar_i[i]; + if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; - if (6.0 * (scalar_R[i] - scalar_L[i]) * - (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < + if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); @@ -974,7 +914,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; - scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); + scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); } #endif // SCALAR @@ -998,60 +938,30 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, lambda_min = fmin(lambda_m, (Real)0); // left interface value, i+1/2 - d_R = - d_R - lambda_max * (0.5 * dtodx) * - (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); - vx_R = vx_R - - lambda_max * (0.5 * dtodx) * - (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); - vy_R = vy_R - - lambda_max * (0.5 * dtodx) * - (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); - vz_R = vz_R - - lambda_max * (0.5 * dtodx) * - (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - p_R = - p_R - lambda_max * (0.5 * dtodx) * - (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + d_R = d_R - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + vx_R = vx_R - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + vy_R = vy_R - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + vz_R = vz_R - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + p_R = p_R - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); // right interface value, i-1/2 - d_L = - d_L - lambda_min * (0.5 * dtodx) * - (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); - vx_L = vx_L - - lambda_min * (0.5 * dtodx) * - (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); - vy_L = vy_L - - lambda_min * (0.5 * dtodx) * - (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); - vz_L = vz_L - - lambda_min * (0.5 * dtodx) * - (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - p_L = - p_L - lambda_min * (0.5 * dtodx) * - (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + d_L = d_L - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + vx_L = vx_L - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + vy_L = vy_L - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); #ifdef DE - ge_R = ge_R - - lambda_max * (0.5 * dtodx) * - (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); - ge_L = ge_L - - lambda_min * (0.5 * dtodx) * - (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); + ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = - scalar_R[i] - - lambda_max * (0.5 * dtodx) * - (del_scalar_m_i[i] - - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); - scalar_L[i] = - scalar_L[i] - - lambda_min * (0.5 * dtodx) * - (del_scalar_m_i[i] + - (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * + (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * + (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); } #endif // SCALAR @@ -1075,8 +985,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, if (lambda_m >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_m); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_p * lambda_p - lambda_m * lambda_m); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); chi_1 = A * (del_d_m_i - d_6) + B * d_6; chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; @@ -1090,8 +999,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, } if (lambda_0 >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_0); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_p * lambda_p - lambda_0 * lambda_0); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); chi_1 = A * (del_d_m_i - d_6) + B * d_6; chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; @@ -1121,8 +1029,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, } if (lambda_p >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_p); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_p * lambda_p - lambda_p * lambda_p); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); chi_1 = A * (del_d_m_i - d_6) + B * d_6; chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; @@ -1166,8 +1073,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // SCALAR if (lambda_m <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_m); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_m * lambda_m - lambda_m * lambda_m); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); chi_1 = C * (del_d_m_i + d_6) + D * d_6; chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; @@ -1181,8 +1087,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, } if (lambda_0 <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_0); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_m * lambda_m - lambda_0 * lambda_0); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); chi_1 = C * (del_d_m_i + d_6) + D * d_6; chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; @@ -1212,8 +1117,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, } if (lambda_p <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_p); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * - (lambda_m * lambda_m - lambda_p * lambda_p); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); chi_1 = C * (del_d_m_i + d_6) + D * d_6; chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; @@ -1259,9 +1163,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = - p_L / (gamma - 1.0) + - 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); + dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; @@ -1276,9 +1178,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = - p_R / (gamma - 1.0) + - 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); + dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index dd7aa7e43..fc584ffb7 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -13,10 +13,8 @@ int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields); +__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); #endif // PPMC_CUDA_H #endif // PPMC diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index a3f32197f..36b74aebf 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -25,10 +25,8 @@ * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields) +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { int n_cells = nx * ny * nz; int o1, o2, o3; @@ -67,7 +65,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #ifndef VL // #ifdef CTU - Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries + Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i Real d_6, vx_6, vy_6, vz_6, p_6; Real beta_m, beta_0, beta_p; @@ -92,13 +90,11 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], - scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; + Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; #ifndef VL // #ifdef CTU - Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], - scalarR_0[NSCALARS]; + Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS]; #endif // CTU #endif // SCALAR @@ -170,9 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * - (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE @@ -197,10 +191,8 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_imo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef DE @@ -225,10 +217,8 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_ipo = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef DE @@ -253,10 +243,8 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_imt = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); + p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * + (gamma - 1.0); #endif // PRESSURE_DE p_imt = fmax(p_imt, (Real)TINY_NUMBER); #ifdef DE @@ -281,10 +269,8 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_ipt = - (dev_conserved[4 * n_cells + id] - - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); + p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * + (gamma - 1.0); #endif // PRESSURE_DE p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); #ifdef DE @@ -300,31 +286,25 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, if (dir == 0) id = xid - 3 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid - 3) * nx + zid * nx * ny; if (dir == 2) id = xid + yid * nx + (zid - 3) * nx * ny; - p_imth = (dev_conserved[4 * n_cells + id] - - 0.5 * - (dev_conserved[o1 * n_cells + id] * - dev_conserved[o1 * n_cells + id] + - dev_conserved[o2 * n_cells + id] * - dev_conserved[o2 * n_cells + id] + - dev_conserved[o3 * n_cells + id] * - dev_conserved[o3 * n_cells + id]) / - dev_conserved[id]) * - (gamma - 1.0); + p_imth = + (dev_conserved[4 * n_cells + id] - 0.5 * + (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] + + dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] + + dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) / + dev_conserved[id]) * + (gamma - 1.0); p_imth = fmax(p_imth, (Real)TINY_NUMBER); // cell i+3 if (dir == 0) id = xid + 3 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid + 3) * nx + zid * nx * ny; if (dir == 2) id = xid + yid * nx + (zid + 3) * nx * ny; - p_ipth = (dev_conserved[4 * n_cells + id] - - 0.5 * - (dev_conserved[o1 * n_cells + id] * - dev_conserved[o1 * n_cells + id] + - dev_conserved[o2 * n_cells + id] * - dev_conserved[o2 * n_cells + id] + - dev_conserved[o3 * n_cells + id] * - dev_conserved[o3 * n_cells + id]) / - dev_conserved[id]) * - (gamma - 1.0); + p_ipth = + (dev_conserved[4 * n_cells + id] - 0.5 * + (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] + + dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] + + dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) / + dev_conserved[id]) * + (gamma - 1.0); p_ipth = fmax(p_imth, (Real)TINY_NUMBER); #endif // FLATTENING @@ -336,8 +316,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt); // Calculate the interface values for density - Interface_Values_PPM(d_imo, d_i, d_ipo, del_q_imo, del_q_i, del_q_ipo, &d_L, - &d_R); + Interface_Values_PPM(d_imo, d_i, d_ipo, del_q_imo, del_q_i, del_q_ipo, &d_L, &d_R); // Calculate the monotonized slopes for cells imo, i, ipo (x-velocity) del_q_imo = Calculate_Slope(vx_imt, vx_imo, vx_i); @@ -345,8 +324,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(vx_i, vx_ipo, vx_ipt); // Calculate the interface values for x-velocity - Interface_Values_PPM(vx_imo, vx_i, vx_ipo, del_q_imo, del_q_i, del_q_ipo, - &vx_L, &vx_R); + Interface_Values_PPM(vx_imo, vx_i, vx_ipo, del_q_imo, del_q_i, del_q_ipo, &vx_L, &vx_R); // Calculate the monotonized slopes for cells imo, i, ipo (y-velocity) del_q_imo = Calculate_Slope(vy_imt, vy_imo, vy_i); @@ -354,8 +332,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(vy_i, vy_ipo, vy_ipt); // Calculate the interface values for y-velocity - Interface_Values_PPM(vy_imo, vy_i, vy_ipo, del_q_imo, del_q_i, del_q_ipo, - &vy_L, &vy_R); + Interface_Values_PPM(vy_imo, vy_i, vy_ipo, del_q_imo, del_q_i, del_q_ipo, &vy_L, &vy_R); // Calculate the monotonized slopes for cells imo, i, ipo (z-velocity) del_q_imo = Calculate_Slope(vz_imt, vz_imo, vz_i); @@ -363,8 +340,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(vz_i, vz_ipo, vz_ipt); // Calculate the interface values for z-velocity - Interface_Values_PPM(vz_imo, vz_i, vz_ipo, del_q_imo, del_q_i, del_q_ipo, - &vz_L, &vz_R); + Interface_Values_PPM(vz_imo, vz_i, vz_ipo, del_q_imo, del_q_i, del_q_ipo, &vz_L, &vz_R); // Calculate the monotonized slopes for cells imo, i, ipo (pressure) del_q_imo = Calculate_Slope(p_imt, p_imo, p_i); @@ -372,8 +348,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(p_i, p_ipo, p_ipt); // Calculate the interface values for pressure - Interface_Values_PPM(p_imo, p_i, p_ipo, del_q_imo, del_q_i, del_q_ipo, &p_L, - &p_R); + Interface_Values_PPM(p_imo, p_i, p_ipo, del_q_imo, del_q_i, del_q_ipo, &p_L, &p_R); #ifdef DE // Calculate the monotonized slopes for cells imo, i, ipo (internal energy) @@ -382,8 +357,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(ge_i, ge_ipo, ge_ipt); // Calculate the interface values for internal energy - Interface_Values_PPM(ge_imo, ge_i, ge_ipo, del_q_imo, del_q_i, del_q_ipo, - &ge_L, &ge_R); + Interface_Values_PPM(ge_imo, ge_i, ge_ipo, del_q_imo, del_q_i, del_q_ipo, &ge_L, &ge_R); #endif // DE #ifdef SCALAR @@ -394,8 +368,8 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, del_q_ipo = Calculate_Slope(scalar_i[i], scalar_ipo[i], scalar_ipt[i]); // Calculate the interface values for the passive scalars - Interface_Values_PPM(scalar_imo[i], scalar_i[i], scalar_ipo[i], del_q_imo, - del_q_i, del_q_ipo, &scalar_L[i], &scalar_R[i]); + Interface_Values_PPM(scalar_imo[i], scalar_i[i], scalar_ipo[i], del_q_imo, del_q_i, del_q_ipo, &scalar_L[i], + &scalar_R[i]); } #endif // SCALAR @@ -414,8 +388,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, // calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) // (Colella Eqn 3.2) if c5 is true, set value of eta for discontinuity // steepening - if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < - 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) { + if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) { // calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) eta_i = calc_eta(d2_rho_imo, d2_rho_ipo, dx, d_imo, d_ipo); // calculate steepening coefficient (Fryxell Eqn 40) (Colella @@ -531,9 +504,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_6[i] = - 6.0 * - (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 + scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 } #endif // SCALAR @@ -565,10 +536,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalarL_0[i] = - scalar_L[i] + - 0.5 * alpha_0 * - (del_scalar[i] + scalar_6[i] * (1 - (2. / 3.) * alpha_0)); + scalarL_0[i] = scalar_L[i] + 0.5 * alpha_0 * (del_scalar[i] + scalar_6[i] * (1 - (2. / 3.) * alpha_0)); } #endif // SCALAR pL_0 = p_L + 0.5 * alpha_0 * (del_p + p_6 * (1 - (2. / 3.) * alpha_0)); @@ -586,9 +554,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * - (del_scalar[i] - - scalar_6[i] * (1 - (2. / 3.) * beta_0)); + scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * (del_scalar[i] - scalar_6[i] * (1 - (2. / 3.) * beta_0)); } #endif // SCALAR pR_0 = p_R - 0.5 * beta_0 * (del_p - p_6 * (1 - (2. / 3.) * beta_0)); @@ -689,9 +655,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = - p_L / (gamma - 1.0) + - 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); + dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; @@ -706,9 +670,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = - p_R / (gamma - 1.0) + - 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); + dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; @@ -762,9 +724,8 @@ __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) * \brief Calculates the left and right interface values for a cell using parabolic reconstruction in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, - Real del_q_imo, Real del_q_i, - Real del_q_ipo, Real *q_L, Real *q_R) +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, + Real *q_L, Real *q_R) { // Calculate the left and right interface values using the limited slopes *q_L = 0.5 * (q_i + q_imo) - (1.0 / 6.0) * (del_q_i - del_q_imo); @@ -777,11 +738,9 @@ __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i; // steep gradient criterion (Fryxell Eqn 53, Fig 12) - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > - (*q_R - *q_L) * (*q_R - *q_L)) + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) *q_L = 3.0 * q_i - 2.0 * (*q_R); - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < - -(*q_R - *q_L) * (*q_R - *q_L)) + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) *q_R = 3.0 * q_i - 2.0 * (*q_L); *q_L = fmax(fmin(q_i, q_imo), *q_L); @@ -801,8 +760,7 @@ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx) /*! \fn calc_eta * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, - Real rho_ipo) +__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo) { Real A, B; diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h index b6cf0d212..ca0bf1553 100644 --- a/src/reconstruction/ppmp_cuda.h +++ b/src/reconstruction/ppmp_cuda.h @@ -14,10 +14,8 @@ * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, - int n_fields); +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); /*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) * \brief Calculates the limited slope across a cell.*/ @@ -28,9 +26,8 @@ __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo); * \brief Calculates the left and right interface values for a cell using parabolic reconstruction in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, - Real del_q_imo, Real del_q_i, - Real del_q_ipo, Real *q_L, Real *q_R); +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, + Real *q_L, Real *q_R); /*! \fn calc_d2_rho * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) @@ -40,8 +37,7 @@ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); /*! \fn calc_eta * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, - Real rho_ipo); +__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo); #endif // PPMP_CUDA_H #endif // CUDA diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 088166742..9e8b2298d 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -19,10 +19,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief Exact Riemann solver based on the Fortran code given in * Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields) +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -79,9 +77,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, dge = dev_bounds_L[(n_fields - 1) * n_cells + tid]; pl = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - pl = (dev_bounds_L[4 * n_cells + tid] - - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * - (gamma - 1.0); + pl = (dev_bounds_L[4 * n_cells + tid] - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0); #endif // PRESSURE_DE pl = fmax(pl, (Real)TINY_NUMBER); #ifdef SCALAR @@ -102,9 +98,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, dge = dev_bounds_R[(n_fields - 1) * n_cells + tid]; pr = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - pr = (dev_bounds_R[4 * n_cells + tid] - - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * - (gamma - 1.0); + pr = (dev_bounds_R[4 * n_cells + tid] - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0); #endif // PRESSURE_DE pr = fmax(pr, (Real)TINY_NUMBER); #ifdef SCALAR @@ -165,8 +159,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, } } -__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, - Real vxr, Real pr, Real cr, Real gamma) +__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) { // purpose: to provide a guessed value for pressure // pm in the Star Region. The choice is made @@ -182,10 +175,8 @@ __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, if (ppv < 0.0) ppv = 0.0; // Two-Shock Riemann solver with PVRS as estimate - gl = sqrt((2.0 / ((gamma + 1.0) * dl)) / - (((gamma - 1.0) / (gamma + 1.0)) * pl + ppv)); - gr = sqrt((2.0 / ((gamma + 1.0) * dr)) / - (((gamma - 1.0) / (gamma + 1.0)) * pr + ppv)); + gl = sqrt((2.0 / ((gamma + 1.0) * dl)) / (((gamma - 1.0) / (gamma + 1.0)) * pl + ppv)); + gr = sqrt((2.0 / ((gamma + 1.0) * dr)) / (((gamma - 1.0) / (gamma + 1.0)) * pr + ppv)); pm = (gl * pl + gr * pr - (vxr - vxl)) / (gl + gr); if (pm < 0.0) pm = TOL; @@ -193,8 +184,7 @@ __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, return pm; } -__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, - Real ck, Real gamma) +__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma) { // purpose: to evaluate the pressure functions // fl and fr in the exact Riemann solver @@ -204,22 +194,17 @@ __device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, if (p <= pk) { // rarefaction wave - *f = (2.0 / (gamma - 1.0)) * ck * - (powf(p / pk, (gamma - 1.0) / (2.0 * gamma)) - 1.0); + *f = (2.0 / (gamma - 1.0)) * ck * (powf(p / pk, (gamma - 1.0) / (2.0 * gamma)) - 1.0); *fd = (1.0 / (dk * ck)) * powf((p / pk), -((gamma + 1.0) / (2.0 * gamma))); } else { // shock wave - qrt = sqrt(((2.0 / (gamma + 1.0)) / dk) / - ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p)); + qrt = sqrt(((2.0 / (gamma + 1.0)) / dk) / ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p)); *f = (p - pk) * qrt; - *fd = - (1.0 - 0.5 * (p - pk) / ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p)) * - qrt; + *fd = (1.0 - 0.5 * (p - pk) / ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p)) * qrt; } } -__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, - Real cl, Real dr, Real vxr, Real pr, Real cr, +__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) { // purpose: Uses Newton-Raphson iteration @@ -253,9 +238,8 @@ __device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, *v = 0.5 * (vxl + vxr + fr - fl); } -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, - Real *p, Real dl, Real vxl, Real pl, Real cl, - Real dr, Real vxr, Real pr, Real cr, Real gamma) +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl, + Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) { // purpose: to sample the solution throughout the wave // pattern. Pressure pm and velocity vm in the @@ -274,8 +258,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, *v = vxl; *p = pl; } else { - if (vm - cl * powf(pm / pl, (gamma - 1.0) / (2.0 * gamma)) < - 0) // sampled point is in star left state + if (vm - cl * powf(pm / pl, (gamma - 1.0) / (2.0 * gamma)) < 0) // sampled point is in star left state { *d = dl * powf(pm / pl, 1.0 / gamma); *v = vm; @@ -290,8 +273,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, } } else // left shock { - sl = vxl - cl * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pl) + - ((gamma - 1.0) / (2.0 * gamma))); + sl = vxl - cl * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pl) + ((gamma - 1.0) / (2.0 * gamma))); if (sl >= 0) // sampled point is in left data state { *d = dl; @@ -299,8 +281,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, *p = pl; } else // sampled point is in star left state { - *d = dl * (pm / pl + ((gamma - 1.0) / (gamma + 1.0))) / - ((pm / pl) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dl * (pm / pl + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pl) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } @@ -309,8 +290,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, { if (pm > pr) // right shock { - sr = vxr + cr * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pr) + - ((gamma - 1.0) / (2.0 * gamma))); + sr = vxr + cr * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pr) + ((gamma - 1.0) / (2.0 * gamma))); if (sr <= 0) // sampled point is in right data state { *d = dr; @@ -318,8 +298,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, *p = pr; } else // sampled point is in star right state { - *d = dr * (pm / pr + ((gamma - 1.0) / (gamma + 1.0))) / - ((pm / pr) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dr * (pm / pr + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pr) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } @@ -331,8 +310,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, *v = vxr; *p = pr; } else { - if (vm + cr * powf(pm / pr, (gamma - 1.0) / (2.0 * gamma)) >= - 0) // sampled point is in star right state + if (vm + cr * powf(pm / pr, (gamma - 1.0) / (2.0 * gamma)) >= 0) // sampled point is in star right state { *d = dr * powf(pm / pr, (1.0 / gamma)); *v = vm; diff --git a/src/riemann_solvers/exact_cuda.h b/src/riemann_solvers/exact_cuda.h index ed62928c6..f1a3d3261 100644 --- a/src/riemann_solvers/exact_cuda.h +++ b/src/riemann_solvers/exact_cuda.h @@ -13,24 +13,18 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief Exact Riemann solver based on the Fortran code given in * Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields); +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); -__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, - Real vxr, Real pr, Real cr, Real gamma); +__device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); -__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, - Real ck, Real gamma); +__device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma); -__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, - Real cl, Real dr, Real vxr, Real pr, Real cr, +__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, - Real *p, Real dl, Real vxl, Real pl, Real cl, - Real dr, Real vxr, Real pr, Real cr, Real gamma); +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl, + Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); #endif // EXACT_CUDA_H #endif // CUDA diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu index 7540a218d..ad8dca3e1 100644 --- a/src/riemann_solvers/hll_cuda.cu +++ b/src/riemann_solvers/hll_cuda.cu @@ -18,10 +18,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief HLLC Riemann solver based on the version described in Toro * (2006), Sec. 10.4. */ -__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields) +__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -49,8 +47,7 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real dgel, dger, f_ge_l, f_ge_r, f_ge, E_kin; #endif #ifdef SCALAR - Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], - f_sc[NSCALARS]; + Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; #endif // Real etah = 0; @@ -240,23 +237,17 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, } // otherwise compute subsonic flux else { - f_d = ((Sr * f_d_l) - (Sl * f_d_r) + Sl * Sr * (dr - dl)) / (Sr - Sl); - f_mx = - ((Sr * f_mx_l) - (Sl * f_mx_r) + Sl * Sr * (mxr - mxl)) / (Sr - Sl); - f_my = - ((Sr * f_my_l) - (Sl * f_my_r) + Sl * Sr * (myr - myl)) / (Sr - Sl); - f_mz = - ((Sr * f_mz_l) - (Sl * f_mz_r) + Sl * Sr * (mzr - mzl)) / (Sr - Sl); - f_E = ((Sr * f_E_l) - (Sl * f_E_r) + Sl * Sr * (Er - El)) / (Sr - Sl); + f_d = ((Sr * f_d_l) - (Sl * f_d_r) + Sl * Sr * (dr - dl)) / (Sr - Sl); + f_mx = ((Sr * f_mx_l) - (Sl * f_mx_r) + Sl * Sr * (mxr - mxl)) / (Sr - Sl); + f_my = ((Sr * f_my_l) - (Sl * f_my_r) + Sl * Sr * (myr - myl)) / (Sr - Sl); + f_mz = ((Sr * f_mz_l) - (Sl * f_mz_r) + Sl * Sr * (mzr - mzl)) / (Sr - Sl); + f_E = ((Sr * f_E_l) - (Sl * f_E_r) + Sl * Sr * (Er - El)) / (Sr - Sl); #ifdef DE - f_ge = - ((Sr * f_ge_l) - (Sl * f_ge_r) + Sl * Sr * (dger - dgel)) / (Sr - Sl); + f_ge = ((Sr * f_ge_l) - (Sl * f_ge_r) + Sl * Sr * (dger - dgel)) / (Sr - Sl); #endif #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - f_sc[i] = ((Sr * f_sc_l[i]) - (Sl * f_sc_r[i]) + - Sl * Sr * (dscr[i] - dscl[i])) / - (Sr - Sl); + f_sc[i] = ((Sr * f_sc_l[i]) - (Sl * f_sc_r[i]) + Sl * Sr * (dscr[i] - dscl[i])) / (Sr - Sl); } #endif diff --git a/src/riemann_solvers/hll_cuda.h b/src/riemann_solvers/hll_cuda.h index 644258b4b..8b4cceb10 100644 --- a/src/riemann_solvers/hll_cuda.h +++ b/src/riemann_solvers/hll_cuda.h @@ -12,10 +12,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief Roe Riemann solver based on the version described in Stone * et al, 2008. */ -__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields); +__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); #endif // HLLC_CUDA_H #endif // CUDA diff --git a/src/riemann_solvers/hllc_cuda.cu b/src/riemann_solvers/hllc_cuda.cu index e2cfa6ce8..912765d23 100644 --- a/src/riemann_solvers/hllc_cuda.cu +++ b/src/riemann_solvers/hllc_cuda.cu @@ -18,10 +18,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief HLLC Riemann solver based on the version described in Toro * (2006), Sec. 10.4. */ -__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields) +__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -49,9 +47,8 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real dgel, dger, gel, ger, gels, gers, f_ge_l, f_ge_r, f_ge, E_kin; #endif #ifdef SCALAR - Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], - scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], - f_sc[NSCALARS]; + Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], + f_sc_r[NSCALARS], f_sc[NSCALARS]; #endif Real etah = 0; @@ -244,8 +241,7 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, else { // compute contact wave speed and pressure in star region (Batten eqns 34 // & 36) - Sm = (dr * vxr * (Sr - vxr) - dl * vxl * (Sl - vxl) + pl - pr) / - (dr * (Sr - vxr) - dl * (Sl - vxl)); + Sm = (dr * vxr * (Sr - vxr) - dl * vxl * (Sl - vxl) + pl - pr) / (dr * (Sr - vxr) - dl * (Sl - vxl)); ps = dl * (vxl - Sl) * (vxl - Sm) + pl; // conserved variables in the left star state (Batten eqns 35 - 40) @@ -279,25 +275,18 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, #endif // compute the hllc flux (Batten eqn 27) - f_d = 0.5 * (f_d_l + f_d_r + (Sr - fabs(Sm)) * drs + - (Sl + fabs(Sm)) * dls - Sl * dl - Sr * dr); - f_mx = 0.5 * (f_mx_l + f_mx_r + (Sr - fabs(Sm)) * mxrs + - (Sl + fabs(Sm)) * mxls - Sl * mxl - Sr * mxr); - f_my = 0.5 * (f_my_l + f_my_r + (Sr - fabs(Sm)) * myrs + - (Sl + fabs(Sm)) * myls - Sl * myl - Sr * myr); - f_mz = 0.5 * (f_mz_l + f_mz_r + (Sr - fabs(Sm)) * mzrs + - (Sl + fabs(Sm)) * mzls - Sl * mzl - Sr * mzr); - f_E = 0.5 * (f_E_l + f_E_r + (Sr - fabs(Sm)) * Ers + - (Sl + fabs(Sm)) * Els - Sl * El - Sr * Er); + f_d = 0.5 * (f_d_l + f_d_r + (Sr - fabs(Sm)) * drs + (Sl + fabs(Sm)) * dls - Sl * dl - Sr * dr); + f_mx = 0.5 * (f_mx_l + f_mx_r + (Sr - fabs(Sm)) * mxrs + (Sl + fabs(Sm)) * mxls - Sl * mxl - Sr * mxr); + f_my = 0.5 * (f_my_l + f_my_r + (Sr - fabs(Sm)) * myrs + (Sl + fabs(Sm)) * myls - Sl * myl - Sr * myr); + f_mz = 0.5 * (f_mz_l + f_mz_r + (Sr - fabs(Sm)) * mzrs + (Sl + fabs(Sm)) * mzls - Sl * mzl - Sr * mzr); + f_E = 0.5 * (f_E_l + f_E_r + (Sr - fabs(Sm)) * Ers + (Sl + fabs(Sm)) * Els - Sl * El - Sr * Er); #ifdef DE - f_ge = 0.5 * (f_ge_l + f_ge_r + (Sr - fabs(Sm)) * gers + - (Sl + fabs(Sm)) * gels - Sl * dgel - Sr * dger); + f_ge = 0.5 * (f_ge_l + f_ge_r + (Sr - fabs(Sm)) * gers + (Sl + fabs(Sm)) * gels - Sl * dgel - Sr * dger); #endif #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - f_sc[i] = - 0.5 * (f_sc_l[i] + f_sc_r[i] + (Sr - fabs(Sm)) * scrs[i] + - (Sl + fabs(Sm)) * scls[i] - Sl * dscl[i] - Sr * dscr[i]); + f_sc[i] = 0.5 * (f_sc_l[i] + f_sc_r[i] + (Sr - fabs(Sm)) * scrs[i] + (Sl + fabs(Sm)) * scls[i] - Sl * dscl[i] - + Sr * dscr[i]); } #endif diff --git a/src/riemann_solvers/hllc_cuda.h b/src/riemann_solvers/hllc_cuda.h index ff6f26cb9..2268c3320 100644 --- a/src/riemann_solvers/hllc_cuda.h +++ b/src/riemann_solvers/hllc_cuda.h @@ -12,10 +12,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int * n_fields) \brief Roe Riemann solver based on the version described in Stone * et al, 2008. */ -__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields); +__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); #endif // HLLC_CUDA_H #endif // CUDA diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index 25c11a73f..c3efe9d96 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -44,8 +44,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * \param[in] gamma The adiabatic index * \return std::vector */ - std::vector computeFluxes(std::vector const &stateLeft, - std::vector const &stateRight, + std::vector computeFluxes(std::vector const &stateLeft, std::vector const &stateRight, Real const &gamma) { // Simulation Paramters @@ -72,21 +71,17 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test CudaSafeCall(cudaMalloc(&devConservedRight, nFields * sizeof(Real))); CudaSafeCall(cudaMalloc(&devTestFlux, nFields * sizeof(Real))); - CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), - nFields * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), - nFields * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, devConservedLeft, // the "left" interface devConservedRight, // the "right" interface - devTestFlux, nx, ny, nz, nGhost, gamma, direction, - nFields); + devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, - nFields * sizeof(Real), cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, nFields * sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); @@ -110,16 +105,13 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * print. It will print after the default GTest output but before the * values that failed are printed */ - void checkResults(std::vector const &fiducialFlux, - std::vector const &testFlux, + void checkResults(std::vector const &fiducialFlux, std::vector const &testFlux, std::string const &customOutput = "") { // Field names - std::vector const fieldNames{ - "Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"}; + std::vector const fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"}; - ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and - (fiducialFlux.size() == fieldNames.size())) + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) << "The fiducial flux, test flux, and field name vectors are not all " "the same length" << std::endl @@ -133,16 +125,14 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test double absoluteDiff; int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl( - fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); - EXPECT_TRUE(areEqual) - << std::endl - << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } } // ===================================================================== @@ -172,12 +162,9 @@ TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name Real const momentumZ = density * velocityZ; Real const gamma = 1.4; Real const energy = (pressure / (gamma - 1)) + - 0.5 * density * - (velocityX * velocityX + velocityY * velocityY + - velocityZ * velocityZ); + 0.5 * density * (velocityX * velocityX + velocityY * velocityY + velocityZ * velocityZ); - std::vector const state{density, momentumX, momentumY, momentumZ, - energy}; + std::vector const state{density, momentumX, momentumY, momentumZ, energy}; std::vector const fiducialFluxes{0, 1, 0, 0, 0}; // Compute the fluxes diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 72fc9cde8..0bc277b73 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -30,11 +30,8 @@ namespace mhd { // ========================================================================= -__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_magnetic_face, - Real *dev_flux, int nx, int ny, - int nz, int n_ghost, Real gamma, +__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_magnetic_face, + Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int direction, int n_fields) { // get a thread index @@ -73,14 +70,13 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, // The magnetic field in the X-direction Real const magneticX = dev_magnetic_face[threadId]; - mhd::_internal::State const stateL = mhd::_internal::loadState( - dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); - mhd::_internal::State const stateR = mhd::_internal::loadState( - dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::_internal::State const stateL = + mhd::_internal::loadState(dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::_internal::State const stateR = + mhd::_internal::loadState(dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); // Compute the approximate Left and Right wave speeds - mhd::_internal::Speeds speed = - mhd::_internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); + mhd::_internal::Speeds speed = mhd::_internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); // ================================================================= // Compute the fluxes in the non-star states @@ -92,8 +88,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, // In this state the flow is supersonic // M&K 2005 equation 66 if (speed.L >= 0.0) { - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, - stateL); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Right state @@ -103,8 +98,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, // In this state the flow is supersonic // M&K 2005 equation 66 if (speed.R <= 0.0) { - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, - stateR); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } @@ -114,45 +108,38 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, // Shared quantities: // - velocityStarX = speedM // - totalPrssureStar is the same on both sides - speed.M = approximateMiddleWaveSpeed(stateL, stateR, speed); - Real const totalPressureStar = - mhd::_internal::starTotalPressure(stateL, stateR, speed); + speed.M = approximateMiddleWaveSpeed(stateL, stateR, speed); + Real const totalPressureStar = mhd::_internal::starTotalPressure(stateL, stateR, speed); // Left star state - mhd::_internal::StarState const starStateL = mhd::_internal::computeStarState( - stateL, speed, speed.L, magneticX, totalPressureStar); + mhd::_internal::StarState const starStateL = + mhd::_internal::computeStarState(stateL, speed, speed.L, magneticX, totalPressureStar); // Left star speed - speed.LStar = mhd::_internal::approximateStarWaveSpeed(starStateL, speed, - magneticX, -1); + speed.LStar = mhd::_internal::approximateStarWaveSpeed(starStateL, speed, magneticX, -1); // If we're in the L* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 if (speed.LStar >= 0.0) { - fluxL = - mhd::_internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, - stateL); + fluxL = mhd::_internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Right star state - mhd::_internal::StarState const starStateR = mhd::_internal::computeStarState( - stateR, speed, speed.R, magneticX, totalPressureStar); + mhd::_internal::StarState const starStateR = + mhd::_internal::computeStarState(stateR, speed, speed.R, magneticX, totalPressureStar); // Right star speed - speed.RStar = - mhd::_internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); + speed.RStar = mhd::_internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); // If we're in the R* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 if (speed.RStar <= 0.0) { - fluxR = - mhd::_internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, - stateR); + fluxR = mhd::_internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } @@ -160,27 +147,22 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, // Compute the fluxes in the double star states // ================================================================= mhd::_internal::DoubleStarState const doubleStarState = - mhd::_internal::computeDoubleStarState(starStateL, starStateR, magneticX, - totalPressureStar, speed); + mhd::_internal::computeDoubleStarState(starStateL, starStateR, magneticX, totalPressureStar, speed); // Compute and return L** fluxes // M&K 2005 equation 66 if (speed.M >= 0.0) { - fluxL = mhd::_internal::computeDoubleStarFluxes( - doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, - speed, speed.L, speed.LStar); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, - stateL); + fluxL = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, + speed, speed.L, speed.LStar); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Compute and return R** fluxes // M&K 2005 equation 66 else { // if (speedStarR >= 0.0) { - fluxR = mhd::_internal::computeDoubleStarFluxes( - doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, - speed, speed.R, speed.RStar); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, - stateR); + fluxR = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, + speed, speed.R, speed.RStar); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } } @@ -189,10 +171,9 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, namespace _internal { // ===================================================================== -__device__ __host__ mhd::_internal::State loadState( - Real const *interfaceArr, Real const &magneticX, Real const &gamma, - int const &threadId, int const &n_cells, int const &o1, int const &o2, - int const &o3) +__device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3) { mhd::_internal::State state; state.density = interfaceArr[threadId + n_cells * grid_enum::density]; @@ -202,60 +183,49 @@ __device__ __host__ mhd::_internal::State loadState( state.velocityZ = interfaceArr[threadId + n_cells * o3] / state.density; state.energy = interfaceArr[threadId + n_cells * grid_enum::Energy]; state.energy = fmax(state.energy, (Real)TINY_NUMBER); - state.magneticY = - interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y]; - state.magneticZ = - interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z]; + state.magneticY = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y]; + state.magneticZ = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z]; #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - state.scalarSpecific[i] = - interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / - state.density; + state.scalarSpecific[i] = interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / state.density; } #endif // SCALAR #ifdef DE - state.thermalEnergySpecific = - interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; + state.thermalEnergySpecific = interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; #endif // DE} #ifdef DE // PRESSURE_DE - Real energyNonThermal = - hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - state.density, state.velocityX, state.velocityY, state.velocityZ) + - mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, - state.magneticZ); - - state.gasPressure = - fmax(hydro_utilities::Get_Pressure_From_DE( - state.energy, state.energy - energyNonThermal, - state.thermalEnergySpecific * state.density, gamma), - (Real)TINY_NUMBER); + Real energyNonThermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(state.density, state.velocityX, + state.velocityY, state.velocityZ) + + mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, state.magneticZ); + + state.gasPressure = fmax(hydro_utilities::Get_Pressure_From_DE(state.energy, state.energy - energyNonThermal, + state.thermalEnergySpecific * state.density, gamma), + (Real)TINY_NUMBER); #else // Note that this function does the positive pressure check // internally state.gasPressure = mhd::utils::computeGasPressure(state, magneticX, gamma); #endif // PRESSURE_DE - state.totalPressure = mhd::utils::computeTotalPressure( - state.gasPressure, magneticX, state.magneticY, state.magneticZ); + state.totalPressure = + mhd::utils::computeTotalPressure(state.gasPressure, magneticX, state.magneticY, state.magneticZ); return state; } // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds( - mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - Real const &magneticX, Real const &gamma) +__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + Real const &magneticX, Real const &gamma) { // Get the fast magnetosonic wave speeds - Real magSonicL = mhd::utils::fastMagnetosonicSpeed( - stateL.density, stateL.gasPressure, magneticX, stateL.magneticY, - stateL.magneticZ, gamma); - Real magSonicR = mhd::utils::fastMagnetosonicSpeed( - stateR.density, stateR.gasPressure, magneticX, stateR.magneticY, - stateR.magneticZ, gamma); + Real magSonicL = mhd::utils::fastMagnetosonicSpeed(stateL.density, stateL.gasPressure, magneticX, stateL.magneticY, + stateL.magneticZ, gamma); + Real magSonicR = mhd::utils::fastMagnetosonicSpeed(stateR.density, stateR.gasPressure, magneticX, stateR.magneticY, + stateR.magneticZ, gamma); // Compute the S_L and S_R wave speeds. // Version suggested by Miyoshi & Kusano 2005 and used in Athena @@ -270,9 +240,9 @@ __device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds( // ===================================================================== // ===================================================================== -__device__ __host__ Real approximateMiddleWaveSpeed( - mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed) +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed) { // Compute the S_M wave speed // M&K 2005 equation 38 @@ -280,19 +250,17 @@ __device__ __host__ Real approximateMiddleWaveSpeed( Real const speed_l_diff = speed.L - stateL.velocityX; return // Numerator - (speed_r_diff * stateR.density * stateR.velocityX - - speed_l_diff * stateL.density * stateL.velocityX - stateR.totalPressure + - stateL.totalPressure) / + (speed_r_diff * stateR.density * stateR.velocityX - speed_l_diff * stateL.density * stateL.velocityX - + stateR.totalPressure + stateL.totalPressure) / // Denominator (speed_r_diff * stateR.density - speed_l_diff * stateL.density); } // ===================================================================== // ===================================================================== -__device__ __host__ Real -approximateStarWaveSpeed(mhd::_internal::StarState const &starState, - mhd::_internal::Speeds const &speed, - Real const &magneticX, Real const &side) +__device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState const &starState, + mhd::_internal::Speeds const &speed, Real const &magneticX, + Real const &side) { // Compute the S_L^* and S_R^* wave speeds // M&K 2005 equation 51 @@ -301,38 +269,31 @@ approximateStarWaveSpeed(mhd::_internal::StarState const &starState, // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Flux nonStarFluxes( - mhd::_internal::State const &state, Real const &magneticX) +__device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State const &state, Real const &magneticX) { mhd::_internal::Flux flux; // M&K 2005 equation 2 flux.density = state.density * state.velocityX; - flux.momentumX = flux.density * state.velocityX + state.totalPressure - - magneticX * magneticX; + flux.momentumX = flux.density * state.velocityX + state.totalPressure - magneticX * magneticX; flux.momentumY = flux.density * state.velocityY - magneticX * state.magneticY; flux.momentumZ = flux.density * state.velocityZ - magneticX * state.magneticZ; - flux.magneticY = - state.magneticY * state.velocityX - magneticX * state.velocityY; - flux.magneticZ = - state.magneticZ * state.velocityX - magneticX * state.velocityZ; + flux.magneticY = state.magneticY * state.velocityX - magneticX * state.velocityY; + flux.magneticZ = state.magneticZ * state.velocityX - magneticX * state.velocityZ; // Group transverse terms for FP associative symmetry flux.energy = state.velocityX * (state.energy + state.totalPressure) - magneticX * (state.velocityX * magneticX + - ((state.velocityY * state.magneticY) + - (state.velocityZ * state.magneticZ))); + ((state.velocityY * state.magneticY) + (state.velocityZ * state.magneticZ))); return flux; } // ===================================================================== // ===================================================================== -__device__ __host__ void returnFluxes(int const &threadId, int const &o1, - int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, - mhd::_internal::Flux const &flux, +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, mhd::_internal::Flux const &flux, mhd::_internal::State const &state) { dev_flux[threadId + n_cells * grid_enum::density] = flux.density; @@ -345,52 +306,45 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = - state.scalarSpecific[i] * flux.density; + dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = state.scalarSpecific[i] * flux.density; } #endif // SCALAR #ifdef DE - dev_flux[threadId + n_cells * grid_enum::GasEnergy] = - state.thermalEnergySpecific * flux.density; + dev_flux[threadId + n_cells * grid_enum::GasEnergy] = state.thermalEnergySpecific * flux.density; #endif // DE } // ===================================================================== // ===================================================================== -__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, +__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, mhd::_internal::Speeds const &speed) { // M&K 2005 equation 41 return // Numerator (stateR.density * stateL.totalPressure * (speed.R - stateR.velocityX) - stateL.density * stateR.totalPressure * (speed.L - stateL.velocityX) + - stateL.density * stateR.density * (speed.R - stateR.velocityX) * - (speed.L - stateL.velocityX) * + stateL.density * stateR.density * (speed.R - stateR.velocityX) * (speed.L - stateL.velocityX) * (stateR.velocityX - stateL.velocityX)) / // Denominator - (stateR.density * (speed.R - stateR.velocityX) - - stateL.density * (speed.L - stateL.velocityX)); + (stateR.density * (speed.R - stateR.velocityX) - stateL.density * (speed.L - stateL.velocityX)); } // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::StarState computeStarState( - mhd::_internal::State const &state, mhd::_internal::Speeds const &speed, - Real const &speedSide, Real const &magneticX, Real const &totalPressureStar) +__device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::State const &state, + mhd::_internal::Speeds const &speed, + Real const &speedSide, Real const &magneticX, + Real const &totalPressureStar) { mhd::_internal::StarState starState; // Compute the densities in the star state // M&K 2005 equation 43 - starState.density = - state.density * (speedSide - state.velocityX) / (speedSide - speed.M); + starState.density = state.density * (speedSide - state.velocityX) / (speedSide - speed.M); // Check for and handle the degenerate case // Explained at the top of page 326 in M&K 2005 - if (fabs(state.density * (speedSide - state.velocityX) * - (speedSide - speed.M) - - (magneticX * magneticX)) < + if (fabs(state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX)) < totalPressureStar * mhd::_internal::_hlldSmallNumber) { starState.velocityY = state.velocityY; starState.velocityZ = state.velocityZ; @@ -398,9 +352,7 @@ __device__ __host__ mhd::_internal::StarState computeStarState( starState.magneticZ = state.magneticZ; } else { // Denominator for M&K 2005 equations 44-47 - Real const denom = - state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - - (magneticX * magneticX); + Real const denom = state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX); // Compute the velocity and magnetic field in the star state // M&K 2005 equations 44 & 46 @@ -409,65 +361,56 @@ __device__ __host__ mhd::_internal::StarState computeStarState( starState.velocityZ = state.velocityZ - state.magneticZ * coef; // M&K 2005 equations 45 & 47 - Real tmpPower = (speedSide - state.velocityX); - tmpPower = tmpPower * tmpPower; - coef = (state.density * tmpPower - (magneticX * magneticX)) / denom; + Real tmpPower = (speedSide - state.velocityX); + tmpPower = tmpPower * tmpPower; + coef = (state.density * tmpPower - (magneticX * magneticX)) / denom; starState.magneticY = state.magneticY * coef; starState.magneticZ = state.magneticZ * coef; } // M&K 2005 equation 48 - starState.energy = - (state.energy * (speedSide - state.velocityX) - - state.totalPressure * state.velocityX + totalPressureStar * speed.M + - magneticX * (math_utils::dotProduct(state.velocityX, state.velocityY, - state.velocityZ, magneticX, - state.magneticY, state.magneticZ) - - math_utils::dotProduct( - speed.M, starState.velocityY, starState.velocityZ, - magneticX, starState.magneticY, starState.magneticZ))) / - (speedSide - speed.M); + starState.energy = (state.energy * (speedSide - state.velocityX) - state.totalPressure * state.velocityX + + totalPressureStar * speed.M + + magneticX * (math_utils::dotProduct(state.velocityX, state.velocityY, state.velocityZ, magneticX, + state.magneticY, state.magneticZ) - + math_utils::dotProduct(speed.M, starState.velocityY, starState.velocityZ, magneticX, + starState.magneticY, starState.magneticZ))) / + (speedSide - speed.M); return starState; } // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Flux starFluxes( - mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide) +__device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, + mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide) { mhd::_internal::Flux starFlux; // Now compute the star state fluxes // M&K 2005 equations 64 - starFlux.density = - flux.density + speedSide * (starState.density - state.density); - starFlux.momentumX = - flux.momentumX + speedSide * (starState.density * speed.M - - state.density * state.velocityX); + starFlux.density = flux.density + speedSide * (starState.density - state.density); + starFlux.momentumX = flux.momentumX + speedSide * (starState.density * speed.M - state.density * state.velocityX); starFlux.momentumY = - flux.momentumY + speedSide * (starState.density * starState.velocityY - - state.density * state.velocityY); + flux.momentumY + speedSide * (starState.density * starState.velocityY - state.density * state.velocityY); starFlux.momentumZ = - flux.momentumZ + speedSide * (starState.density * starState.velocityZ - - state.density * state.velocityZ); - starFlux.energy = flux.energy + speedSide * (starState.energy - state.energy); - starFlux.magneticY = - flux.magneticY + speedSide * (starState.magneticY - state.magneticY); - starFlux.magneticZ = - flux.magneticZ + speedSide * (starState.magneticZ - state.magneticZ); + flux.momentumZ + speedSide * (starState.density * starState.velocityZ - state.density * state.velocityZ); + starFlux.energy = flux.energy + speedSide * (starState.energy - state.energy); + starFlux.magneticY = flux.magneticY + speedSide * (starState.magneticY - state.magneticY); + starFlux.magneticZ = flux.magneticZ + speedSide * (starState.magneticZ - state.magneticZ); return starFlux; } // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( - mhd::_internal::StarState const &starStateL, - mhd::_internal::StarState const &starStateR, Real const &magneticX, - Real const &totalPressureStar, mhd::_internal::Speeds const &speed) +__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd::_internal::StarState const &starStateL, + mhd::_internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::_internal::Speeds const &speed) { mhd::_internal::DoubleStarState doubleStarState; @@ -502,47 +445,35 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( // Double Star velocities // M&K 2005 equations 59 & 60 - doubleStarState.velocityY = - inverseDensities * - (sqrtDL * starStateL.velocityY + sqrtDR * starStateR.velocityY + - magXSign * (starStateR.magneticY - starStateL.magneticY)); - doubleStarState.velocityZ = - inverseDensities * - (sqrtDL * starStateL.velocityZ + sqrtDR * starStateR.velocityZ + - magXSign * (starStateR.magneticZ - starStateL.magneticZ)); + doubleStarState.velocityY = inverseDensities * (sqrtDL * starStateL.velocityY + sqrtDR * starStateR.velocityY + + magXSign * (starStateR.magneticY - starStateL.magneticY)); + doubleStarState.velocityZ = inverseDensities * (sqrtDL * starStateL.velocityZ + sqrtDR * starStateR.velocityZ + + magXSign * (starStateR.magneticZ - starStateL.magneticZ)); // Double star magnetic fields // M&K 2005 equations 61 & 62 doubleStarState.magneticY = - inverseDensities * - (sqrtDL * starStateR.magneticY + sqrtDR * starStateL.magneticY + - magXSign * (sqrtDL * sqrtDR) * - (starStateR.velocityY - starStateL.velocityY)); + inverseDensities * (sqrtDL * starStateR.magneticY + sqrtDR * starStateL.magneticY + + magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityY - starStateL.velocityY)); doubleStarState.magneticZ = - inverseDensities * - (sqrtDL * starStateR.magneticZ + sqrtDR * starStateL.magneticZ + - magXSign * (sqrtDL * sqrtDR) * - (starStateR.velocityZ - starStateL.velocityZ)); + inverseDensities * (sqrtDL * starStateR.magneticZ + sqrtDR * starStateL.magneticZ + + magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityZ - starStateL.velocityZ)); // Double star energy - Real velDblStarDotMagDblStar = math_utils::dotProduct( - speed.M, doubleStarState.velocityY, doubleStarState.velocityZ, - magneticX, doubleStarState.magneticY, doubleStarState.magneticZ); + Real velDblStarDotMagDblStar = + math_utils::dotProduct(speed.M, doubleStarState.velocityY, doubleStarState.velocityZ, magneticX, + doubleStarState.magneticY, doubleStarState.magneticZ); // M&K 2005 equation 63 doubleStarState.energyL = - starStateL.energy - - sqrtDL * magXSign * - (math_utils::dotProduct( - speed.M, starStateL.velocityY, starStateL.velocityZ, magneticX, - starStateL.magneticY, starStateL.magneticZ) - - velDblStarDotMagDblStar); + starStateL.energy - sqrtDL * magXSign * + (math_utils::dotProduct(speed.M, starStateL.velocityY, starStateL.velocityZ, magneticX, + starStateL.magneticY, starStateL.magneticZ) - + velDblStarDotMagDblStar); doubleStarState.energyR = - starStateR.energy + - sqrtDR * magXSign * - (math_utils::dotProduct( - speed.M, starStateR.velocityY, starStateR.velocityZ, magneticX, - starStateR.magneticY, starStateR.magneticZ) - - velDblStarDotMagDblStar); + starStateR.energy + sqrtDR * magXSign * + (math_utils::dotProduct(speed.M, starStateR.velocityY, starStateR.velocityZ, magneticX, + starStateR.magneticY, starStateR.magneticZ) - + velDblStarDotMagDblStar); } return doubleStarState; @@ -551,42 +482,31 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( // ===================================================================== __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( - mhd::_internal::DoubleStarState const &doubleStarState, - Real const &doubleStarStateEnergy, - mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide, - Real const &speedSideStar) + mhd::_internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar) { mhd::_internal::Flux doubleStarFlux; Real const speed_diff = speedSideStar - speedSide; // M&K 2005 equation 65 - doubleStarFlux.density = flux.density - speedSide * state.density - - speed_diff * starState.density + - speedSideStar * starState.density; - - doubleStarFlux.momentumX = flux.momentumX - - speedSide * (state.density * state.velocityX) - - speed_diff * (starState.density * speed.M) + - speedSideStar * (starState.density * speed.M); - doubleStarFlux.momentumY = - flux.momentumY - speedSide * (state.density * state.velocityY) - - speed_diff * (starState.density * starState.velocityY) + - speedSideStar * (starState.density * doubleStarState.velocityY); - doubleStarFlux.momentumZ = - flux.momentumZ - speedSide * (state.density * state.velocityZ) - - speed_diff * (starState.density * starState.velocityZ) + - speedSideStar * (starState.density * doubleStarState.velocityZ); - doubleStarFlux.energy = flux.energy - speedSide * state.energy - - speed_diff * starState.energy + - speedSideStar * doubleStarStateEnergy; - doubleStarFlux.magneticY = flux.magneticY - speedSide * state.magneticY - - speed_diff * starState.magneticY + + doubleStarFlux.density = + flux.density - speedSide * state.density - speed_diff * starState.density + speedSideStar * starState.density; + + doubleStarFlux.momentumX = flux.momentumX - speedSide * (state.density * state.velocityX) - + speed_diff * (starState.density * speed.M) + speedSideStar * (starState.density * speed.M); + doubleStarFlux.momentumY = flux.momentumY - speedSide * (state.density * state.velocityY) - + speed_diff * (starState.density * starState.velocityY) + + speedSideStar * (starState.density * doubleStarState.velocityY); + doubleStarFlux.momentumZ = flux.momentumZ - speedSide * (state.density * state.velocityZ) - + speed_diff * (starState.density * starState.velocityZ) + + speedSideStar * (starState.density * doubleStarState.velocityZ); + doubleStarFlux.energy = + flux.energy - speedSide * state.energy - speed_diff * starState.energy + speedSideStar * doubleStarStateEnergy; + doubleStarFlux.magneticY = flux.magneticY - speedSide * state.magneticY - speed_diff * starState.magneticY + speedSideStar * doubleStarState.magneticY; - doubleStarFlux.magneticZ = flux.magneticZ - speedSide * state.magneticZ - - speed_diff * starState.magneticZ + + doubleStarFlux.magneticZ = flux.magneticZ - speedSide * state.magneticZ - speed_diff * starState.magneticZ + speedSideStar * doubleStarState.magneticZ; return doubleStarFlux; diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 38504abbc..a6247a5cf 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -38,11 +38,8 @@ namespace mhd * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, * 2=Z \param[in] n_fields The total number of fields */ -__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_magnetic_face, - Real *dev_flux, int nx, int ny, - int nz, int n_ghost, Real gamma, +__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_magnetic_face, + Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int direction, int n_fields); /*! @@ -63,8 +60,7 @@ Real static const _hlldSmallNumber = 1.0e-8; * */ struct State { - Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, - gasPressure, totalPressure; + Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, gasPressure, totalPressure; #ifdef SCALAR Real scalarSpecific[grid_enum::nscalars]; #endif // SCALAR @@ -130,34 +126,32 @@ struct Speeds { * \param o3 Direction parameter * \return mhd::_internal::State The loaded state */ -__device__ __host__ mhd::_internal::State loadState( - Real const *interfaceArr, Real const &magneticX, Real const &gamma, - int const &threadId, int const &n_cells, int const &o1, int const &o2, - int const &o3); +__device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3); /*! * \brief Compute the approximate left and right wave speeds. M&K 2005 equation * 67 */ -__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds( - mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - Real const &magneticX, Real const &gamma); +__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + Real const &magneticX, Real const &gamma); /*! * \brief Compute the approximate middle wave speed. M&K 2005 equation 38 */ -__device__ __host__ Real approximateMiddleWaveSpeed( - mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed); +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::_internal::State const &stateL, + mhd::_internal::State const &stateR, + mhd::_internal::Speeds const &speed); /*! * \brief Compute the approximate left and right wave speeds. M&K 2005 equation * 51 */ -__device__ __host__ Real -approximateStarWaveSpeed(mhd::_internal::StarState const &starState, - mhd::_internal::Speeds const &speed, - Real const &magneticX, Real const &side); +__device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState const &starState, + mhd::_internal::Speeds const &speed, Real const &magneticX, + Real const &side); /*! * \brief Compute the fluxes in the left or right non-star state. M&K 2005 @@ -167,8 +161,7 @@ approximateStarWaveSpeed(mhd::_internal::StarState const &starState, * \param magneticX The X magnetic field * \return mhd::_internal::Flux The flux in the state */ -__device__ __host__ mhd::_internal::Flux nonStarFluxes( - mhd::_internal::State const &state, Real const &magneticX); +__device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State const &state, Real const &magneticX); /*! * \brief Write the given flux values to the dev_flux array @@ -183,10 +176,8 @@ __device__ __host__ mhd::_internal::Flux nonStarFluxes( * \param[in] state The left or right state depending on if this is a return for * one of the left states or one of the right states */ -__device__ __host__ void returnFluxes(int const &threadId, int const &o1, - int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, - mhd::_internal::Flux const &flux, +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, mhd::_internal::Flux const &flux, mhd::_internal::State const &state); /*! @@ -197,8 +188,7 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, * \param speed The wave speeds * \return Real The total pressure in the star state */ -__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, +__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, mhd::_internal::Speeds const &speed); /*! @@ -210,10 +200,10 @@ __device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, * in the x direction \param totalPressureStar The total pressure in the * star state \return mhd::_internal::StarState The computed star state */ -__device__ __host__ mhd::_internal::StarState computeStarState( - mhd::_internal::State const &state, mhd::_internal::Speeds const &speed, - Real const &speedSide, Real const &magneticX, - Real const &totalPressureStar); +__device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::State const &state, + mhd::_internal::Speeds const &speed, + Real const &speedSide, Real const &magneticX, + Real const &totalPressureStar); /*! * \brief Compute the flux in the star state. M&K 2005 equation 64 @@ -225,10 +215,10 @@ __device__ __host__ mhd::_internal::StarState computeStarState( * \param speedSide The non-star wave speed on the same side as the star state * \return mhd::_internal::Flux The flux in the star state */ -__device__ __host__ mhd::_internal::Flux starFluxes( - mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide); +__device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState const &starState, + mhd::_internal::State const &state, + mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide); /*! * \brief Compute the double star state. M&K 2005 equations 59-63 @@ -240,10 +230,11 @@ __device__ __host__ mhd::_internal::Flux starFluxes( * \param speed The approximate wave speeds * \return mhd::_internal::DoubleStarState The double star state */ -__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( - mhd::_internal::StarState const &starStateL, - mhd::_internal::StarState const &starStateR, Real const &magneticX, - Real const &totalPressureStar, mhd::_internal::Speeds const &speed); +__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd::_internal::StarState const &starStateL, + mhd::_internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::_internal::Speeds const &speed); /*! * \brief Compute the double star state fluxes. M&K 2005 equation 65 @@ -258,12 +249,9 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState( * \return __device__ */ __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( - mhd::_internal::DoubleStarState const &doubleStarState, - Real const &doubleStarStateEnergy, - mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide, - Real const &speedSideStar); + mhd::_internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, + mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); } // namespace _internal } // end namespace mhd diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 10ceed96e..712756522 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -55,20 +55,16 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. * \return std::vector */ - std::vector computeFluxes(std::vector stateLeft, - std::vector stateRight, - Real const &gamma, int const &direction = 0) + std::vector computeFluxes(std::vector stateLeft, std::vector stateRight, Real const &gamma, + int const &direction = 0) { // Rearrange X, Y, and Z values for the chosen direction - std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, - stateLeft.begin() + 4); - std::rotate(stateRight.begin() + 1, stateRight.begin() + 4 - direction, - stateRight.begin() + 4); + std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); + std::rotate(stateRight.begin() + 1, stateRight.begin() + 4 - direction, stateRight.begin() + 4); // Create new vectors that store the values in the way that the HLLD // solver expects - EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), - stateRight.at(grid_enum::magnetic_x)) + EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x)) << "The left and right magnetic fields are not equal"; std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); @@ -100,36 +96,27 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test Real *devTestFlux; // Allocate device arrays and copy data + CudaSafeCall(cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); + CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); + CudaSafeCall( - cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); + cudaMemcpy(devConservedLeft, stateLeft.data(), stateLeft.size() * sizeof(Real), cudaMemcpyHostToDevice)); CudaSafeCall( - cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); + cudaMemcpy(devConservedRight, stateRight.data(), stateRight.size() * sizeof(Real), cudaMemcpyHostToDevice)); CudaSafeCall( - cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); - - CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), - stateLeft.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), - stateRight.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedMagXFace, magneticX.data(), - magneticX.size() * sizeof(Real), - cudaMemcpyHostToDevice)); + cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel - hipLaunchKernelGGL( - mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devConservedMagXFace, // the magnetic field at the interface - devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface + devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, - testFlux.size() * sizeof(Real), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); @@ -173,34 +160,26 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * \param[in] direction Which plane the interface is. 0 = plane normal to * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. */ - void checkResults(std::vector fiducialFlux, - std::vector scalarFlux, Real thermalEnergyFlux, - std::vector const &testFlux, - std::string const &customOutput = "", - int const &direction = 0) + void checkResults(std::vector fiducialFlux, std::vector scalarFlux, Real thermalEnergyFlux, + std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) { // Field names - std::vector fieldNames{"Densities", "X Momentum", - "Y Momentum", "Z Momentum", - "Energies", "X Magnetic Field", - "Y Magnetic Field", "Z Magnetic Field"}; + std::vector fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", + "Energies", "X Magnetic Field", "Y Magnetic Field", "Z Magnetic Field"}; #ifdef DE fieldNames.push_back("Thermal energy (dual energy)"); fiducialFlux.push_back(thermalEnergyFlux); #endif // DE #ifdef SCALAR std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; - fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, - scalarNames.begin(), + fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(), scalarNames.begin() + grid_enum::nscalars); - fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, - scalarFlux.begin(), + fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(), scalarFlux.begin() + grid_enum::nscalars); #endif // SCALAR - ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and - (fiducialFlux.size() == fieldNames.size())) + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) << "The fiducial flux, test flux, and field name vectors are not all " "the same length" << std::endl @@ -219,19 +198,16 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test double const fixedEpsilon = 2.7E-15; int64_t const ulpsEpsilon = 7; - bool areEqual = testingUtilities::nearlyEqualDbl( - fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, fixedEpsilon, - ulpsEpsilon); - EXPECT_TRUE(areEqual) - << std::endl - << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" - << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, + fixedEpsilon, ulpsEpsilon); + EXPECT_TRUE(areEqual) << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } } // ===================================================================== @@ -250,37 +226,32 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, * y-magnetic field, z-magnetic field. */ - std::vector primitive2Conserved( - std::vector const &input, double const &gamma, - std::vector const &primitiveScalars) + std::vector primitive2Conserved(std::vector const &input, double const &gamma, + std::vector const &primitiveScalars) { std::vector output(input.size()); output.at(0) = input.at(0); // Density output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhd::utils::computeEnergy( - input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), - input.at(5), input.at(6), input.at(7), - gamma); // Pressure to Energy - output.at(5) = input.at(5); // X Magnetic Field - output.at(6) = input.at(6); // Y Magnetic Field - output.at(7) = input.at(7); // Z Magnetic Field + output.at(4) = mhd::utils::computeEnergy(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), + input.at(5), input.at(6), input.at(7), + gamma); // Pressure to Energy + output.at(5) = input.at(5); // X Magnetic Field + output.at(6) = input.at(6); // Y Magnetic Field + output.at(7) = input.at(7); // Z Magnetic Field #ifdef SCALAR std::vector conservedScalar(primitiveScalars.size()); - std::transform(primitiveScalars.begin(), primitiveScalars.end(), - conservedScalar.begin(), + std::transform(primitiveScalars.begin(), primitiveScalars.end(), conservedScalar.begin(), [&](Real const &c) { return c * output.at(0); }); - output.insert(output.begin() + grid_enum::magnetic_start, - conservedScalar.begin(), + output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); #endif // SCALAR #ifdef DE output.push_back(mhd::utils::computeThermalEnergy( - output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), - output.at(grid_enum::magnetic_x), output.at(grid_enum::magnetic_y), - output.at(grid_enum::magnetic_z), gamma)); + output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), output.at(grid_enum::magnetic_x), + output.at(grid_enum::magnetic_y), output.at(grid_enum::magnetic_z), gamma)); #endif // DE return output; } @@ -313,59 +284,44 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * the Brio & Wu Shock tube * */ -TEST_F(tMHDCalculateHLLDFluxesCUDA, - BrioAndWuShockTubeCorrectInputExpectCorrectOutput) +TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectOutput) { // Constant Values Real const gamma = 2.; Real const Vz = 0.0; Real const Bx = 0.75; Real const Bz = 0.0; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // States std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive // Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, - gamma, primitiveScalar), - leftFastRareLeftSide = primitive2Conserved( - {0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, - gamma, primitiveScalar), - leftFastRareRightSide = primitive2Conserved( - {0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, - gamma, primitiveScalar), - compoundLeftSide = primitive2Conserved( - {0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, - gamma, primitiveScalar), - compoundPeak = primitive2Conserved( - {0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, - gamma, primitiveScalar), - compoundRightSide = primitive2Conserved( - {0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, - gamma, primitiveScalar), - contactLeftSide = primitive2Conserved( - {0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, - gamma, primitiveScalar), - contactRightSide = primitive2Conserved( - {0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, - gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved( - {0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, - gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved( - {0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, - gamma, primitiveScalar), - rightFastRareLeftSide = primitive2Conserved( - {0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, - gamma, primitiveScalar), - rightFastRareRightSide = primitive2Conserved( - {0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, - gamma, primitiveScalar), - rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, - gamma, primitiveScalar); + leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), + leftFastRareLeftSide = + primitive2Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), + leftFastRareRightSide = + primitive2Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), + compoundLeftSide = + primitive2Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), + compoundPeak = + primitive2Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), + compoundRightSide = + primitive2Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), + contactLeftSide = + primitive2Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), + contactRightSide = + primitive2Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), + slowShockLeftSide = + primitive2Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), + slowShockRightSide = primitive2Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, + primitiveScalar), + rightFastRareLeftSide = primitive2Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, + gamma, primitiveScalar), + rightFastRareRightSide = primitive2Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, + gamma, primitiveScalar), + rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -378,11 +334,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, // Order of Fluxes is rho, vec(V), E, vec(B) std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -391,14 +345,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; + std::vector const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -407,18 +358,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.20673357746080057, 0.4661897584603672, - 0.061170028480309613, 0, - 0.064707291981509041, 0.0, - 1.0074980455427278, 0}; - std::vector const scalarFlux{ - 0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; - Real thermalEnergyFlux = 0.20673357746080046; - std::vector const testFluxes = - computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + 0.064707291981509041, 0.0, + 1.0074980455427278, 0}; + std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; + Real thermalEnergyFlux = 0.20673357746080046; + std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -427,18 +374,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.20673357746080057, 0.4661897584603672, - 0.061170028480309613, 0, - -0.064707291981509041, 0.0, - -1.0074980455427278, 0}; - std::vector const scalarFlux{ - -0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; - Real thermalEnergyFlux = -0.20673357746080046; - std::vector const testFluxes = - computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + -0.064707291981509041, 0.0, + -1.0074980455427278, 0}; + std::vector const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; + Real thermalEnergyFlux = -0.20673357746080046; + std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -449,18 +392,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.4253304970883941, 0.47729308161522394, - -0.55321646324583107, 0, - 0.92496835095531071, 0.0, - 0.53128887284876058, 0}; - std::vector const scalarFlux{ - 0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; + std::vector const fiducialFlux{0.4253304970883941, 0.47729308161522394, + -0.55321646324583107, 0, + 0.92496835095531071, 0.0, + 0.53128887284876058, 0}; + std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; Real thermalEnergyFlux = 0.41622256825457099; - std::vector const testFluxes = computeFluxes( - leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -469,18 +408,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.070492123816403796, 1.2489600267034342, - -0.71031457071286608, 0, - 0.21008080091470105, 0.0, - 0.058615131833681167, 0}; - std::vector const scalarFlux{ - 0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; + std::vector const fiducialFlux{0.070492123816403796, 1.2489600267034342, + -0.71031457071286608, 0, + 0.21008080091470105, 0.0, + 0.058615131833681167, 0}; + std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; Real thermalEnergyFlux = 0.047345816580591255; - std::vector const testFluxes = computeFluxes( - leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -489,18 +424,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.4470171023231666, 0.60747660800918468, - -0.20506357956052623, 0, - 0.72655525704800772, 0.0, - 0.76278089951123285, 0}; - std::vector const scalarFlux{ - 0.4948468279606959, 0.99623058485843297, 1.482091544807598}; - Real thermalEnergyFlux = 0.38787931087981475; - std::vector const testFluxes = - computeFluxes(compoundLeftSide, compoundRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.4470171023231666, 0.60747660800918468, + -0.20506357956052623, 0, + 0.72655525704800772, 0.0, + 0.76278089951123285, 0}; + std::vector const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598}; + Real thermalEnergyFlux = 0.38787931087981475; + std::vector const testFluxes = computeFluxes(compoundLeftSide, compoundRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -509,18 +440,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.38496850292724116, 0.66092864409611585, - -0.3473204105316457, 0, - 0.89888639514227009, 0.0, - 0.71658566275120927, 0}; - std::vector const scalarFlux{ - 0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; - Real thermalEnergyFlux = 0.28530908823756074; - std::vector const testFluxes = - computeFluxes(compoundRightSide, compoundLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.38496850292724116, 0.66092864409611585, + -0.3473204105316457, 0, + 0.89888639514227009, 0.0, + 0.71658566275120927, 0}; + std::vector const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; + Real thermalEnergyFlux = 0.28530908823756074; + std::vector const testFluxes = computeFluxes(compoundRightSide, compoundLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -529,18 +456,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.41864266180405574, 0.63505764056357727, - -0.1991008813536404, 0, - 0.73707474818824525, 0.0, - 0.74058225030218761, 0}; - std::vector const scalarFlux{ - 0.46343639240225803, 0.93299478173931882, 1.388015684704111}; - Real thermalEnergyFlux = 0.36325864563467081; - std::vector const testFluxes = - computeFluxes(compoundLeftSide, compoundPeak, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.41864266180405574, 0.63505764056357727, + -0.1991008813536404, 0, + 0.73707474818824525, 0.0, + 0.74058225030218761, 0}; + std::vector const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111}; + Real thermalEnergyFlux = 0.36325864563467081; + std::vector const testFluxes = computeFluxes(compoundLeftSide, compoundPeak, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -549,18 +472,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.39520761138156862, 0.6390998385557225, - -0.35132701297727598, 0, - 0.89945171879176522, 0.0, - 0.71026545717401468, 0}; - std::vector const scalarFlux{ - 0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; - Real thermalEnergyFlux = 0.32239432669410983; - std::vector const testFluxes = - computeFluxes(compoundPeak, compoundLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.39520761138156862, 0.6390998385557225, + -0.35132701297727598, 0, + 0.89945171879176522, 0.0, + 0.71026545717401468, 0}; + std::vector const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; + Real thermalEnergyFlux = 0.32239432669410983; + std::vector const testFluxes = computeFluxes(compoundPeak, compoundLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -569,18 +488,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.4285899590904928, 0.6079309920345296, - -0.26055320217638239, 0, - 0.75090757444649436, 0.0, - 0.85591904930227747, 0}; - std::vector const scalarFlux{ - 0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; - Real thermalEnergyFlux = 0.34962629086469987; - std::vector const testFluxes = - computeFluxes(compoundPeak, compoundRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.4285899590904928, 0.6079309920345296, + -0.26055320217638239, 0, + 0.75090757444649436, 0.0, + 0.85591904930227747, 0}; + std::vector const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; + Real thermalEnergyFlux = 0.34962629086469987; + std::vector const testFluxes = computeFluxes(compoundPeak, compoundRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -589,18 +504,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.39102247793946454, 0.65467021266207581, - -0.25227691377588229, 0, - 0.76271525822813691, 0.0, - 0.83594460438033491, 0}; - std::vector const scalarFlux{ - 0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; - Real thermalEnergyFlux = 0.28979582956267347; - std::vector const testFluxes = - computeFluxes(compoundRightSide, compoundPeak, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.39102247793946454, 0.65467021266207581, + -0.25227691377588229, 0, + 0.76271525822813691, 0.0, + 0.83594460438033491, 0}; + std::vector const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; + Real thermalEnergyFlux = 0.28979582956267347; + std::vector const testFluxes = computeFluxes(compoundRightSide, compoundPeak, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -609,18 +520,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.40753761783585118, 0.62106392255463172, - -0.2455554035355339, 0, - 0.73906344777217226, 0.0, - 0.8687394222350926, 0}; - std::vector const scalarFlux{ - 0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; - Real thermalEnergyFlux = 0.30895701155896288; - std::vector const testFluxes = - computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.40753761783585118, 0.62106392255463172, + -0.2455554035355339, 0, + 0.73906344777217226, 0.0, + 0.8687394222350926, 0}; + std::vector const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; + Real thermalEnergyFlux = 0.30895701155896288; + std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -629,18 +536,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.13849588572126192, 0.46025037934770729, - 0.18052412687974539, 0, - 0.35385590617992224, 0.0, - 0.86909622543144227, 0}; - std::vector const scalarFlux{ - 0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; - Real thermalEnergyFlux = 0.30928031735570188; - std::vector const testFluxes = - computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.13849588572126192, 0.46025037934770729, + 0.18052412687974539, 0, + 0.35385590617992224, 0.0, + 0.86909622543144227, 0}; + std::vector const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; + Real thermalEnergyFlux = 0.30928031735570188; + std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -649,19 +552,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 3.5274134848883865e-05, 0.32304849716274459, - 0.60579784881286636, 0, - -0.32813070621836449, 0.0, - 0.40636483121437972, 0}; - std::vector const scalarFlux{3.9048380136491711e-05, - 7.8612589559210735e-05, - 0.00011695189454326261}; + std::vector const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459, + 0.60579784881286636, 0, + -0.32813070621836449, 0.0, + 0.40636483121437972, 0}; + std::vector const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261}; Real thermalEnergyFlux = 4.4037784886918126e-05; - std::vector const testFluxes = computeFluxes( - slowShockLeftSide, slowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -670,18 +568,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.016514307834939734, 0.16452009375678914, - 0.71622171077118635, 0, - -0.37262428139914472, 0.0, - 0.37204015363322052, 0}; - std::vector const scalarFlux{ - -0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; + std::vector const fiducialFlux{-0.016514307834939734, 0.16452009375678914, + 0.71622171077118635, 0, + -0.37262428139914472, 0.0, + 0.37204015363322052, 0}; + std::vector const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; Real thermalEnergyFlux = -0.020617189878790236; - std::vector const testFluxes = computeFluxes( - slowShockRightSide, slowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -690,18 +584,15 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.026222824218991747, 0.22254903570732654, - 0.68544334213642255, 0, - -0.33339172106895454, 0.0, - 0.32319665359522443, 0}; - std::vector const scalarFlux{ - -0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; - Real thermalEnergyFlux = -0.020960370728633469; - std::vector const testFluxes = computeFluxes( - rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.026222824218991747, 0.22254903570732654, + 0.68544334213642255, 0, + -0.33339172106895454, 0.0, + 0.32319665359522443, 0}; + std::vector const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; + Real thermalEnergyFlux = -0.020960370728633469; + std::vector const testFluxes = + computeFluxes(rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -710,19 +601,15 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.001088867226159973, 0.32035322820305906, - 0.74922357263343131, 0, - -0.0099746892805345766, 0.0, - 0.0082135595470345102, 0}; - std::vector const scalarFlux{-0.0012053733294214947, - -0.0024266696462237609, - -0.0036101547366371614}; - Real thermalEnergyFlux = -0.00081785194236053073; - std::vector const testFluxes = computeFluxes( - rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.001088867226159973, 0.32035322820305906, + 0.74922357263343131, 0, + -0.0099746892805345766, 0.0, + 0.0082135595470345102, 0}; + std::vector const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614}; + Real thermalEnergyFlux = -0.00081785194236053073; + std::vector const testFluxes = + computeFluxes(rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -734,83 +621,48 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, * the Dai & Woodward Shock tube * */ -TEST_F(tMHDCalculateHLLDFluxesCUDA, - DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) +TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) { // Constant Values Real const gamma = 5. / 3.; Real const coef = 1. / (std::sqrt(4. * M_PI)); Real const Bx = 4. * coef; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // States - std::vector< - Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | - // Pressure | X-Magnetic Field | Y-Magnetic Field | - // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved( - {1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, - primitiveScalar), - leftFastShockLeftSide = - primitive2Conserved({1.09406, 1.176560, 0.021003, 0.506113, 0.970815, - 1.12838, 1.105355, 0.614087}, - gamma, primitiveScalar), - leftFastShockRightSide = - primitive2Conserved({1.40577, 0.693255, 0.210562, 0.611423, 1.494290, - 1.12838, 1.457700, 0.809831}, - gamma, primitiveScalar), - leftRotationLeftSide = - primitive2Conserved({1.40086, 0.687774, 0.215124, 0.609161, 1.485660, - 1.12838, 1.458735, 0.789960}, - gamma, primitiveScalar), - leftRotationRightSide = - primitive2Conserved({1.40119, 0.687504, 0.330268, 0.334140, 1.486570, - 1.12838, 1.588975, 0.475782}, - gamma, primitiveScalar), - leftSlowShockLeftSide = - primitive2Conserved({1.40519, 0.685492, 0.326265, 0.333664, 1.493710, - 1.12838, 1.575785, 0.472390}, - gamma, primitiveScalar), - leftSlowShockRightSide = - primitive2Conserved({1.66488, 0.578545, 0.050746, 0.250260, 1.984720, - 1.12838, 1.344490, 0.402407}, - gamma, primitiveScalar), - contactLeftSide = - primitive2Conserved({1.65220, 0.578296, 0.049683, 0.249962, 1.981250, - 1.12838, 1.346155, 0.402868}, - gamma, primitiveScalar), - contactRightSide = - primitive2Conserved({1.49279, 0.578276, 0.049650, 0.249924, 1.981160, - 1.12838, 1.346180, 0.402897}, - gamma, primitiveScalar), - rightSlowShockLeftSide = - primitive2Conserved({1.48581, 0.573195, 0.035338, 0.245592, 1.956320, - 1.12838, 1.370395, 0.410220}, - gamma, primitiveScalar), - rightSlowShockRightSide = - primitive2Conserved({1.23813, 0.450361, -0.275532, 0.151746, 1.439000, - 1.12838, 1.609775, 0.482762}, - gamma, primitiveScalar), - rightRotationLeftSide = - primitive2Conserved({1.23762, 0.450102, -0.274410, 0.145585, 1.437950, - 1.12838, 1.606945, 0.493879}, - gamma, primitiveScalar), - rightRotationRightSide = - primitive2Conserved({1.23747, 0.449993, -0.180766, -0.090238, - 1.437350, 1.12838, 1.503855, 0.752090}, - gamma, primitiveScalar), - rightFastShockLeftSide = - primitive2Conserved({1.22305, 0.424403, -0.171402, -0.085701, - 1.409660, 1.12838, 1.447730, 0.723864}, - gamma, primitiveScalar), - rightFastShockRightSide = - primitive2Conserved({1.00006, 0.000121, -0.000057, -0.000028, - 1.000100, 1.12838, 1.128435, 0.564217}, - gamma, primitiveScalar), - rightICs = - primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, - gamma, primitiveScalar); + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, primitiveScalar), + leftFastShockLeftSide = primitive2Conserved( + {1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar), + leftFastShockRightSide = primitive2Conserved( + {1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar), + leftRotationLeftSide = primitive2Conserved( + {1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar), + leftRotationRightSide = primitive2Conserved( + {1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar), + leftSlowShockLeftSide = primitive2Conserved( + {1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar), + leftSlowShockRightSide = primitive2Conserved( + {1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar), + contactLeftSide = primitive2Conserved( + {1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar), + contactRightSide = primitive2Conserved( + {1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar), + rightSlowShockLeftSide = primitive2Conserved( + {1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar), + rightSlowShockRightSide = primitive2Conserved( + {1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar), + rightRotationLeftSide = primitive2Conserved( + {1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar), + rightRotationRightSide = primitive2Conserved( + {1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar), + rightFastShockLeftSide = primitive2Conserved( + {1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar), + rightFastShockRightSide = primitive2Conserved( + {1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar), + rightICs = primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -821,20 +673,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, - 1.0381971863420549, - -1.1459155902616465, - -0.63661977236758127, - 0, - 0.0, - 0, - -1.1102230246251565e-16}; + std::vector const fiducialFlux{0, 1.0381971863420549, -1.1459155902616465, -0.63661977236758127, 0, 0.0, + 0, -1.1102230246251565e-16}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -843,20 +687,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, - 0.35915494309189522, - -1.2732395447351625, - -0.63661977236758127, - -0.63661977236758172, - 0.0, - 2.2204460492503131e-16, - -1.1283791670955123}; + std::vector const fiducialFlux{ + 0, 0.35915494309189522, -1.2732395447351625, -0.63661977236758127, -0.63661977236758172, + 0.0, 2.2204460492503131e-16, -1.1283791670955123}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -865,17 +702,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.17354924587196074, 0.71614983677687327, -1.1940929411768009, - -1.1194725181819352, -0.11432087006939984, 0.0, - 0.056156000248263505, -0.42800560867873094}; - std::vector const scalarFlux{ - 0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; - Real thermalEnergyFlux = 0.24104061926661174; - std::vector const testFluxes = - computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -1.1194725181819352, -0.11432087006939984, 0.0, + 0.056156000248263505, -0.42800560867873094}; + std::vector const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; + Real thermalEnergyFlux = 0.24104061926661174; + std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -884,17 +717,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.17354924587196074, 0.71614983677687327, -1.1940929411768009, - -0.14549552299758384, -0.47242308031148195, 0.0, - -0.056156000248263505, -0.55262526758377528}; - std::vector const scalarFlux{ - -0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; - Real thermalEnergyFlux = -0.24104061926661174; - std::vector const testFluxes = - computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -0.14549552299758384, -0.47242308031148195, 0.0, + -0.056156000248263505, -0.55262526758377528}; + std::vector const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; + Real thermalEnergyFlux = -0.24104061926661174; + std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -905,17 +734,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.96813688187727132, 3.0871217875403394, -1.4687093290523414, - -0.33726008721080036, 4.2986213406773457, 0.0, - 0.84684181393860269, -0.087452560407274671}; - std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, - 3.2098715673061045}; - Real thermalEnergyFlux = 1.2886155333980993; - std::vector const testFluxes = computeFluxes( - leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.96813688187727132, 3.0871217875403394, -1.4687093290523414, + -0.33726008721080036, 4.2986213406773457, 0.0, + 0.84684181393860269, -0.087452560407274671}; + std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045}; + Real thermalEnergyFlux = 1.2886155333980993; + std::vector const testFluxes = + computeFluxes(leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -924,17 +750,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 1.3053938862274184, 2.4685129176021858, -1.181892850065283, - -0.011160487372167127, 5.1797404608257249, 0.0, - 1.1889903073770265, 0.10262704114294516}; - std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, - 4.3280519500627666}; - Real thermalEnergyFlux = 2.081389946702628; - std::vector const testFluxes = computeFluxes( - leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{1.3053938862274184, 2.4685129176021858, -1.181892850065283, + -0.011160487372167127, 5.1797404608257249, 0.0, + 1.1889903073770265, 0.10262704114294516}; + std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666}; + Real thermalEnergyFlux = 2.081389946702628; + std::vector const testFluxes = + computeFluxes(leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -943,17 +766,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.96326128304298586, 2.8879592118317445, -1.4808188010794987, - -0.20403672861184916, 4.014027751838869, 0.0, - 0.7248753989305099, -0.059178137562467162}; - std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, - 3.1937064501984724}; + std::vector const fiducialFlux{0.96326128304298586, 2.8879592118317445, -1.4808188010794987, + -0.20403672861184916, 4.014027751838869, 0.0, + 0.7248753989305099, -0.059178137562467162}; + std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; Real thermalEnergyFlux = 1.5323573637968553; - std::vector const testFluxes = computeFluxes( - leftRotationLeftSide, leftRotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -962,17 +781,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.96353754504060063, 2.8875487093397085, -1.4327309336053695, - -0.31541343522923493, 3.9739842521208342, 0.0, - 0.75541746728406312, -0.13479771672887678}; - std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, - 3.1946224007710313}; + std::vector const fiducialFlux{0.96353754504060063, 2.8875487093397085, -1.4327309336053695, + -0.31541343522923493, 3.9739842521208342, 0.0, + 0.75541746728406312, -0.13479771672887678}; + std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; Real thermalEnergyFlux = 1.5333744977458499; - std::vector const testFluxes = computeFluxes( - leftRotationRightSide, leftRotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -981,17 +796,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.88716095730727451, 2.9828594399125663, -1.417062582518549, - -0.21524331343191233, 3.863474778369334, 0.0, - 0.71242370728996041, -0.05229712416644372}; - std::vector const scalarFlux{ - 0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; - Real thermalEnergyFlux = 1.4145715457049737; - std::vector const testFluxes = computeFluxes( - leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.88716095730727451, 2.9828594399125663, -1.417062582518549, + -0.21524331343191233, 3.863474778369334, 0.0, + 0.71242370728996041, -0.05229712416644372}; + std::vector const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; + Real thermalEnergyFlux = 1.4145715457049737; + std::vector const testFluxes = + computeFluxes(leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1000,17 +812,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 1.042385440439527, 2.7732383399777376, -1.5199872074603551, - -0.21019362664841068, 4.1322001036232585, 0.0, - 0.72170937317481543, -0.049474715634396704}; - std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, - 3.4560437166206879}; - Real thermalEnergyFlux = 1.8639570701934713; - std::vector const testFluxes = computeFluxes( - leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{1.042385440439527, 2.7732383399777376, -1.5199872074603551, + -0.21019362664841068, 4.1322001036232585, 0.0, + 0.72170937317481543, -0.049474715634396704}; + std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879}; + Real thermalEnergyFlux = 1.8639570701934713; + std::vector const testFluxes = + computeFluxes(leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1019,17 +828,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.95545795601418737, 2.8843900822429749, -1.4715039715239722, - -0.21575736014726318, 4.0078718055059257, 0.0, - 0.72241353110189066, -0.049073560388753337}; - std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, - 3.1678344087247892}; - Real thermalEnergyFlux = 1.7186185770667382; - std::vector const testFluxes = - computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.95545795601418737, 2.8843900822429749, -1.4715039715239722, + -0.21575736014726318, 4.0078718055059257, 0.0, + 0.72241353110189066, -0.049073560388753337}; + std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892}; + Real thermalEnergyFlux = 1.7186185770667382; + std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1038,17 +843,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.86324813554422819, 2.8309913324581251, -1.4761428591480787, - -0.23887765947428419, 3.9892942559102793, 0.0, - 0.72244123046603836, -0.049025527032060034}; - std::vector const scalarFlux{ - 0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; - Real thermalEnergyFlux = 1.7184928987481187; - std::vector const testFluxes = - computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.86324813554422819, 2.8309913324581251, -1.4761428591480787, + -0.23887765947428419, 3.9892942559102793, 0.0, + 0.72244123046603836, -0.049025527032060034}; + std::vector const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; + Real thermalEnergyFlux = 1.7184928987481187; + std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1057,17 +858,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.81125524370350677, 2.901639500435365, -1.5141545346789429, - -0.262600896007809, 3.8479660419540087, 0.0, - 0.7218977970017596, -0.049091614519593846}; - std::vector const scalarFlux{ - 0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; - Real thermalEnergyFlux = 1.6022319728249694; - std::vector const testFluxes = computeFluxes( - rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.81125524370350677, 2.901639500435365, -1.5141545346789429, + -0.262600896007809, 3.8479660419540087, 0.0, + 0.7218977970017596, -0.049091614519593846}; + std::vector const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; + Real thermalEnergyFlux = 1.6022319728249694; + std::vector const testFluxes = + computeFluxes(rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1076,17 +874,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.60157947557836688, 2.3888357198399746, -1.9910500022202977, - -0.45610948442354332, 3.5359430988850069, 0.0, - 1.0670963294022622, 0.05554893654378229}; - std::vector const scalarFlux{0.66594699332331575, - 1.3406911495770899, 1.994545286188885}; - Real thermalEnergyFlux = 1.0487665253534804; - std::vector const testFluxes = computeFluxes( - rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.60157947557836688, 2.3888357198399746, -1.9910500022202977, + -0.45610948442354332, 3.5359430988850069, 0.0, + 1.0670963294022622, 0.05554893654378229}; + std::vector const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885}; + Real thermalEnergyFlux = 1.0487665253534804; + std::vector const testFluxes = + computeFluxes(rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1095,17 +890,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.55701691287884714, 2.4652223621237814, -1.9664615862227277, - -0.47490477894092042, 3.3900659850690529, 0.0, - 1.0325648885587542, 0.059165409025635551}; - std::vector const scalarFlux{ - 0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; - Real thermalEnergyFlux = 0.9707694646266285; - std::vector const testFluxes = computeFluxes( - rightRotationLeftSide, rightRotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.55701691287884714, 2.4652223621237814, -1.9664615862227277, + -0.47490477894092042, 3.3900659850690529, 0.0, + 1.0325648885587542, 0.059165409025635551}; + std::vector const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; + Real thermalEnergyFlux = 0.9707694646266285; + std::vector const testFluxes = + computeFluxes(rightRotationLeftSide, rightRotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1114,17 +906,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.55689116371132596, 2.4648517303940851, -1.7972202655166787, - -0.90018282739798461, 3.3401033852664566, 0.0, - 0.88105841856465605, 0.43911718823267476}; - std::vector const scalarFlux{ - 0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; - Real thermalEnergyFlux = 0.9702629326292449; - std::vector const testFluxes = computeFluxes( - rightRotationRightSide, rightRotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.55689116371132596, 2.4648517303940851, -1.7972202655166787, + -0.90018282739798461, 3.3401033852664566, 0.0, + 0.88105841856465605, 0.43911718823267476}; + std::vector const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; + Real thermalEnergyFlux = 0.9702629326292449; + std::vector const testFluxes = + computeFluxes(rightRotationRightSide, rightRotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1133,17 +922,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.48777637414577313, 2.3709438477809708, -1.7282900552525988, - -0.86414423547773778, 2.8885015704245069, 0.0, - 0.77133731061645838, 0.38566794697432505}; - std::vector const scalarFlux{ - 0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; - Real thermalEnergyFlux = 0.84330016382608752; - std::vector const testFluxes = computeFluxes( - rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.48777637414577313, 2.3709438477809708, -1.7282900552525988, + -0.86414423547773778, 2.8885015704245069, 0.0, + 0.77133731061645838, 0.38566794697432505}; + std::vector const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; + Real thermalEnergyFlux = 0.84330016382608752; + std::vector const testFluxes = + computeFluxes(rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1152,17 +938,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.040639426423817904, 1.0717156491947966, -1.2612066401572222, - -0.63060225433149875, 0.15803727234007203, 0.0, - 0.042555541396817498, 0.021277678888288909}; - std::vector const scalarFlux{ - 0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; - Real thermalEnergyFlux = 0.060961577855018087; - std::vector const testFluxes = computeFluxes( - rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.040639426423817904, 1.0717156491947966, -1.2612066401572222, + -0.63060225433149875, 0.15803727234007203, 0.0, + 0.042555541396817498, 0.021277678888288909}; + std::vector const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; + Real thermalEnergyFlux = 0.060961577855018087; + std::vector const testFluxes = + computeFluxes(rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1174,72 +957,55 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, * the Ryu & Jones 4d Shock tube * */ -TEST_F(tMHDCalculateHLLDFluxesCUDA, - RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) +TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) { // Constant Values Real const gamma = 5. / 3.; Real const Bx = 0.7; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // States - std::vector< - Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | - // Pressure | X-Magnetic Field | Y-Magnetic Field | - // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, - gamma, primitiveScalar), - hydroRareLeftSide = - primitive2Conserved({0.990414, 0.012415, 1.458910e-58, 6.294360e-59, - 0.984076, Bx, 1.252355e-57, 5.366795e-58}, - gamma, primitiveScalar), - hydroRareRightSide = - primitive2Conserved({0.939477, 0.079800, 1.557120e-41, 7.505190e-42, - 0.901182, Bx, 1.823624e-40, 8.712177e-41}, - gamma, primitiveScalar), - switchOnSlowShockLeftSide = - primitive2Conserved({0.939863, 0.079142, 1.415730e-02, 7.134030e-03, - 0.901820, Bx, 2.519650e-02, 1.290082e-02}, - gamma, primitiveScalar), - switchOnSlowShockRightSide = - primitive2Conserved({0.651753, 0.322362, 8.070540e-01, 4.425110e-01, - 0.490103, Bx, 6.598380e-01, 3.618000e-01}, - gamma, primitiveScalar), - contactLeftSide = - primitive2Conserved({0.648553, 0.322525, 8.072970e-01, 4.426950e-01, - 0.489951, Bx, 6.599295e-01, 3.618910e-01}, - gamma, primitiveScalar), - contactRightSide = - primitive2Conserved({0.489933, 0.322518, 8.073090e-01, 4.426960e-01, - 0.489980, Bx, 6.599195e-01, 3.618850e-01}, - gamma, primitiveScalar), - slowShockLeftSide = - primitive2Conserved({0.496478, 0.308418, 8.060830e-01, 4.420150e-01, - 0.489823, Bx, 6.686695e-01, 3.666915e-01}, - gamma, primitiveScalar), - slowShockRightSide = - primitive2Conserved({0.298260, -0.016740, 2.372870e-01, 1.287780e-01, - 0.198864, Bx, 8.662095e-01, 4.757390e-01}, - gamma, primitiveScalar), - rotationLeftSide = - primitive2Conserved({0.298001, -0.017358, 2.364790e-01, 1.278540e-01, - 0.198448, Bx, 8.669425e-01, 4.750845e-01}, - gamma, primitiveScalar), - rotationRightSide = - primitive2Conserved({0.297673, -0.018657, 1.059540e-02, 9.996860e-01, - 0.197421, Bx, 9.891580e-01, 1.024949e-04}, - gamma, primitiveScalar), - fastRareLeftSide = - primitive2Conserved({0.297504, -0.020018, 1.137420e-02, 1.000000e+00, - 0.197234, Bx, 9.883860e-01, -4.981931e-17}, - gamma, primitiveScalar), - fastRareRightSide = - primitive2Conserved({0.299996, -0.000033, 1.855120e-05, 1.000000e+00, - 0.199995, Bx, 9.999865e-01, 1.737190e-16}, - gamma, primitiveScalar), - rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, - gamma, primitiveScalar); + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), + hydroRareLeftSide = primitive2Conserved( + {0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma, + primitiveScalar), + hydroRareRightSide = primitive2Conserved( + {0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma, + primitiveScalar), + switchOnSlowShockLeftSide = primitive2Conserved( + {0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma, + primitiveScalar), + switchOnSlowShockRightSide = primitive2Conserved( + {0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma, + primitiveScalar), + contactLeftSide = primitive2Conserved( + {0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma, + primitiveScalar), + contactRightSide = primitive2Conserved( + {0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma, + primitiveScalar), + slowShockLeftSide = primitive2Conserved( + {0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma, + primitiveScalar), + slowShockRightSide = primitive2Conserved( + {0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma, + primitiveScalar), + rotationLeftSide = primitive2Conserved( + {0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma, + primitiveScalar), + rotationRightSide = primitive2Conserved( + {0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma, + primitiveScalar), + fastRareLeftSide = primitive2Conserved( + {0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, -4.981931e-17}, gamma, + primitiveScalar), + fastRareRightSide = primitive2Conserved( + {0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma, + primitiveScalar), + rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -1250,14 +1016,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; + std::vector const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1266,22 +1029,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-5.5511151231257827e-17, - 0.45500000000000013, - -0.69999999999999996, - -5.5511151231257827e-17, - 0, - 0.0, - 0, - -0.69999999999999996}; - std::vector const scalarFlux{-6.1450707278254418e-17, - -1.2371317869019906e-16, - -1.8404800947169341e-16}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + -5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0, + -0.69999999999999996}; + std::vector const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1290,17 +1044,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.092428729855986602, 0.53311593977445149, -0.39622049648437296, - -0.21566989083797167, -0.13287876964320211, 0.0, - -0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{ - 0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; - Real thermalEnergyFlux = 0.13864309478397996; - std::vector const testFluxes = - computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + -0.21566989083797167, -0.13287876964320211, 0.0, + -0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; + Real thermalEnergyFlux = 0.13864309478397996; + std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1309,17 +1059,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.092428729855986602, 0.53311593977445149, -0.39622049648437296, - 0.21566989083797167, 0.13287876964320211, 0.0, - 0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{ - -0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; - Real thermalEnergyFlux = -0.13864309478397996; - std::vector const testFluxes = - computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + 0.21566989083797167, 0.13287876964320211, 0.0, + 0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; + Real thermalEnergyFlux = -0.13864309478397996; + std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -1330,18 +1076,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.074035256375659553, 0.66054553664209648, - -6.1597070943493028e-41, -2.9447391900433873e-41, - 0.1776649658235645, 0.0, - -6.3466063324344113e-41, -3.0340891384335242e-41}; - std::vector const scalarFlux{ - 0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; + std::vector const fiducialFlux{0.074035256375659553, 0.66054553664209648, -6.1597070943493028e-41, + -2.9447391900433873e-41, 0.1776649658235645, 0.0, + -6.3466063324344113e-41, -3.0340891384335242e-41}; + std::vector const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; Real thermalEnergyFlux = 0.11034221894046368; - std::vector const testFluxes = computeFluxes( - hydroRareLeftSide, hydroRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(hydroRareLeftSide, hydroRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1350,18 +1091,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.013336890338886076, 0.74071279157971992, - -6.1745213352160876e-41, -2.9474651270630147e-41, - 0.033152482405470307, 0.0, - 6.2022392844946449e-41, 2.9606965476795895e-41}; - std::vector const scalarFlux{ - 0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; + std::vector const fiducialFlux{0.013336890338886076, 0.74071279157971992, -6.1745213352160876e-41, + -2.9474651270630147e-41, 0.033152482405470307, 0.0, + 6.2022392844946449e-41, 2.9606965476795895e-41}; + std::vector const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; Real thermalEnergyFlux = 0.019189877201961154; - std::vector const testFluxes = computeFluxes( - hydroRareRightSide, hydroRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(hydroRareRightSide, hydroRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1370,18 +1106,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.19734622040826083, 0.47855039640569758, -0.3392293209655618, - -0.18588204716255491, 0.10695446263054809, 0.0, - -0.3558357543098733, -0.19525093130352045}; - std::vector const scalarFlux{ - 0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; + std::vector const fiducialFlux{0.19734622040826083, 0.47855039640569758, -0.3392293209655618, + -0.18588204716255491, 0.10695446263054809, 0.0, + -0.3558357543098733, -0.19525093130352045}; + std::vector const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; Real thermalEnergyFlux = 0.2840373040888583; std::vector const testFluxes = - computeFluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, - gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + computeFluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1390,18 +1122,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.097593254768855386, 0.76483698872352757, -0.02036438492698419, - -0.010747481940703562, 0.25327551496496836, 0.0, - -0.002520109973016129, -0.00088262199017708799}; - std::vector const scalarFlux{ - 0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; + std::vector const fiducialFlux{0.097593254768855386, 0.76483698872352757, -0.02036438492698419, + -0.010747481940703562, 0.25327551496496836, 0.0, + -0.002520109973016129, -0.00088262199017708799}; + std::vector const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; Real thermalEnergyFlux = 0.1100817647375162; std::vector const testFluxes = - computeFluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, - gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + computeFluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1410,17 +1138,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.2091677440314007, 0.5956612619664029, -0.29309091669513981, - -0.16072556008504282, 0.19220050968424285, 0.0, - -0.35226977371803297, -0.19316940226499904}; - std::vector const scalarFlux{ - 0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; - Real thermalEnergyFlux = 0.23702444986592192; - std::vector const testFluxes = - computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.2091677440314007, 0.5956612619664029, -0.29309091669513981, + -0.16072556008504282, 0.19220050968424285, 0.0, + -0.35226977371803297, -0.19316940226499904}; + std::vector const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; + Real thermalEnergyFlux = 0.23702444986592192; + std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1429,17 +1153,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.15801775068597168, 0.57916072367837657, -0.33437339604094024, - -0.18336617461176744, 0.16789791355547545, 0.0, - -0.3522739911439669, -0.19317084712861482}; - std::vector const scalarFlux{ - 0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; - Real thermalEnergyFlux = 0.23704936434506069; - std::vector const testFluxes = - computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{0.15801775068597168, 0.57916072367837657, -0.33437339604094024, + -0.18336617461176744, 0.16789791355547545, 0.0, + -0.3522739911439669, -0.19317084712861482}; + std::vector const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; + Real thermalEnergyFlux = 0.23704936434506069; + std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1448,17 +1168,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.11744487326715558, 0.66868230621718128, -0.35832022960458892, - -0.19650694834641164, 0.057880816021092185, 0.0, - -0.37198011453582402, -0.20397277844271294}; - std::vector const scalarFlux{ - 0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; + std::vector const fiducialFlux{0.11744487326715558, 0.66868230621718128, -0.35832022960458892, + -0.19650694834641164, 0.057880816021092185, 0.0, + -0.37198011453582402, -0.20397277844271294}; + std::vector const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; Real thermalEnergyFlux = 0.1738058891582446; - std::vector const testFluxes = computeFluxes( - slowShockLeftSide, slowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1467,17 +1183,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0.038440990187426027, 0.33776683678923869, -0.62583241538732792, - -0.3437911783906169, -0.13471828103488348, 0.0, - -0.15165427985881363, -0.082233932588833825}; - std::vector const scalarFlux{ - 0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; + std::vector const fiducialFlux{0.038440990187426027, 0.33776683678923869, -0.62583241538732792, + -0.3437911783906169, -0.13471828103488348, 0.0, + -0.15165427985881363, -0.082233932588833825}; + std::vector const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; Real thermalEnergyFlux = 0.038445630017261548; - std::vector const testFluxes = computeFluxes( - slowShockRightSide, slowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1486,17 +1198,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, - -0.33352435102145184, -0.21197843894720192, 0.0, - -0.18030635192654354, -0.098381113757603278}; - std::vector const scalarFlux{ - -0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; - Real thermalEnergyFlux = -0.0052395622905745485; - std::vector const testFluxes = - computeFluxes(rotationLeftSide, rotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, + -0.33352435102145184, -0.21197843894720192, 0.0, + -0.18030635192654354, -0.098381113757603278}; + std::vector const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; + Real thermalEnergyFlux = -0.0052395622905745485; + std::vector const testFluxes = computeFluxes(rotationLeftSide, rotationRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1505,17 +1213,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.005459628948343731, 0.4415038084184626, -0.69273580053867279, - -0.0051834737482743809, -0.037389286119015486, 0.0, - -0.026148289294373184, -0.69914753968916865}; - std::vector const scalarFlux{ - -0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; - Real thermalEnergyFlux = -0.0054536013916442853; - std::vector const testFluxes = - computeFluxes(rotationRightSide, rotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.005459628948343731, 0.4415038084184626, -0.69273580053867279, + -0.0051834737482743809, -0.037389286119015486, 0.0, + -0.026148289294373184, -0.69914753968916865}; + std::vector const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; + Real thermalEnergyFlux = -0.0054536013916442853; + std::vector const testFluxes = computeFluxes(rotationRightSide, rotationLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1524,17 +1228,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, - -0.0059354802028144804, -0.040194357552219451, 0.0, - -0.027710302430178135, -0.70000000000000007}; - std::vector const scalarFlux{ - -0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; - Real thermalEnergyFlux = -0.0059354109546219782; - std::vector const testFluxes = - computeFluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, + -0.0059354802028144804, -0.040194357552219451, 0.0, + -0.027710302430178135, -0.70000000000000007}; + std::vector const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; + Real thermalEnergyFlux = -0.0059354109546219782; + std::vector const testFluxes = computeFluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1543,19 +1243,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Double Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -3.0171858819483255e-05, 0.45503057873272706, - -0.69998654276213712, -3.0171858819427744e-05, - -0.00014827469339251387, 0.0, - -8.2898844654399895e-05, -0.69999999999999984}; - std::vector const scalarFlux{-3.340017317660794e-05, - -6.7241562798797897e-05, - -0.00010003522597924373}; - Real thermalEnergyFlux = -3.000421709818028e-05; - std::vector const testFluxes = - computeFluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706, -0.69998654276213712, + -3.0171858819427744e-05, -0.00014827469339251387, 0.0, + -8.2898844654399895e-05, -0.69999999999999984}; + std::vector const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373}; + Real thermalEnergyFlux = -3.000421709818028e-05; + std::vector const testFluxes = computeFluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1567,8 +1261,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, * the Einfeldt Strong Rarefaction (EFR) * */ -TEST_F(tMHDCalculateHLLDFluxesCUDA, - EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) +TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) { // Constant Values Real const gamma = 5. / 3.; @@ -1578,33 +1271,23 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, Real const Bx = 0.0; Real const Bz = 0.0; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // States - std::vector< - Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | - // Pressure | X-Magnetic Field | Y-Magnetic Field | - // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, - gamma, primitiveScalar), - leftRarefactionCenter = primitive2Conserved( - {0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, - primitiveScalar), - leftVxTurnOver = primitive2Conserved( - {0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, - primitiveScalar), - midPoint = primitive2Conserved( - {0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, - primitiveScalar), - rightVxTurnOver = primitive2Conserved( - {0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, - primitiveScalar), - rightRarefactionCenter = primitive2Conserved( - {0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, - primitiveScalar), - rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, - gamma, primitiveScalar); + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), + leftRarefactionCenter = + primitive2Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), + leftVxTurnOver = + primitive2Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), + midPoint = primitive2Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), + rightVxTurnOver = + primitive2Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), + rightRarefactionCenter = + primitive2Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), + rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -1615,15 +1298,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{ - -2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = - computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1632,15 +1311,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, - 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = - computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1649,14 +1324,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; + std::vector const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1665,14 +1337,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; + std::vector const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = - computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Intermediate state checks @@ -1683,21 +1352,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43523032140000006, - 0.64193857338676208, - -0, - -0, - -0.67142479846795033, - 0.0, - -0.21614384652000002, - -0}; - std::vector const scalarFlux{ - -0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; - Real thermalEnergyFlux = -0.19705631998499995; - std::vector const testFluxes = - computeFluxes(leftICs, leftRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + -0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0}; + std::vector const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; + Real thermalEnergyFlux = -0.19705631998499995; + std::vector const testFluxes = computeFluxes(leftICs, leftRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1706,15 +1366,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - -2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{ - -2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = - computeFluxes(leftRarefactionCenter, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = computeFluxes(leftRarefactionCenter, leftICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1723,21 +1379,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.023176056428381629, - -2.0437812714100764e-05, - 0, - 0, - -0.00098843768795337005, - 0.0, - -0.011512369309265979, - 0}; - std::vector const scalarFlux{ - -0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; + std::vector const fiducialFlux{ + -0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0}; + std::vector const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; Real thermalEnergyFlux = -0.0052127803322822184; - std::vector const testFluxes = computeFluxes( - leftRarefactionCenter, leftVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftRarefactionCenter, leftVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1746,21 +1393,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43613091609689758, - 0.64135749005731213, - 0, - 0, - -0.67086080671260462, - 0.0, - -0.21659109937066717, - 0}; - std::vector const scalarFlux{ - -0.48279584670145054, -0.9719694288205295, -1.445998239926636}; + std::vector const fiducialFlux{ + -0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0}; + std::vector const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636}; Real thermalEnergyFlux = -0.19746407621898149; - std::vector const testFluxes = computeFluxes( - leftVxTurnOver, leftRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(leftVxTurnOver, leftRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1769,22 +1407,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0011656375857387598, - 0.0062355370788444902, - 0, - 0, - -0.00055517615333601446, - 0.0, - -0.0005829533231464588, - 0}; - std::vector const scalarFlux{-0.0012903579278217153, - -0.0025977614899708843, - -0.0038646879530001054}; - Real thermalEnergyFlux = -0.00034184143405415065; - std::vector const testFluxes = - computeFluxes(leftVxTurnOver, midPoint, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + -0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0}; + std::vector const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054}; + Real thermalEnergyFlux = -0.00034184143405415065; + std::vector const testFluxes = computeFluxes(leftVxTurnOver, midPoint, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1793,21 +1421,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Right Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0068097924351817191, - 0.010501781004354172, - 0, - 0, - -0.0027509360975397175, - 0.0, - -0.0033826654536986789, - 0}; - std::vector const scalarFlux{ - -0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; - Real thermalEnergyFlux = -0.001531664896602873; - std::vector const testFluxes = - computeFluxes(midPoint, leftVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + -0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0}; + std::vector const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; + Real thermalEnergyFlux = -0.001531664896602873; + std::vector const testFluxes = computeFluxes(midPoint, leftVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1816,21 +1435,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0013952100758668729, - 0.0061359407125797273, - 0, - 0, - 0.00065984543596031629, - 0.0, - 0.00069776606396793105, - 0}; - std::vector const scalarFlux{ - 0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; - Real thermalEnergyFlux = 0.00040916715364737997; - std::vector const testFluxes = - computeFluxes(midPoint, rightVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + 0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0}; + std::vector const scalarFlux{0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; + Real thermalEnergyFlux = 0.00040916715364737997; + std::vector const testFluxes = computeFluxes(midPoint, rightVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1839,21 +1449,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0090024688079190333, - 0.011769373146023688, - 0, - 0, - 0.003725251767222792, - 0.0, - 0.0045418689996141555, - 0}; - std::vector const scalarFlux{ - 0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; - Real thermalEnergyFlux = 0.0020542406295284269; - std::vector const testFluxes = - computeFluxes(rightVxTurnOver, midPoint, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + 0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0}; + std::vector const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; + Real thermalEnergyFlux = 0.0020542406295284269; + std::vector const testFluxes = computeFluxes(rightVxTurnOver, midPoint, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1862,21 +1463,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.023310393229073981, - 0.0033086897645311728, - 0, - 0, - 0.0034208520409618887, - 0.0, - 0.011760413130542123, - 0}; - std::vector const scalarFlux{ - 0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; + std::vector const fiducialFlux{ + 0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0}; + std::vector const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; Real thermalEnergyFlux = 0.0053191138878843835; - std::vector const testFluxes = computeFluxes( - rightVxTurnOver, rightRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(rightVxTurnOver, rightRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1885,21 +1477,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33914253809565298, - 0.46770133685446141, - 0, - 0, - 0.46453338019960133, - 0.0, - 0.17077520175095764, - 0}; - std::vector const scalarFlux{ - 0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; + std::vector const fiducialFlux{ + 0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0}; + std::vector const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; Real thermalEnergyFlux = 0.1444638874418068; - std::vector const testFluxes = computeFluxes( - rightRarefactionCenter, rightVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(rightRarefactionCenter, rightVxTurnOver, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1908,21 +1491,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33976563660000003, - 0.46733255780629601, - 0, - 0, - 0.46427650313257612, - 0.0, - 0.17108896296000001, - 0}; - std::vector const scalarFlux{ - 0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; - Real thermalEnergyFlux = 0.14472930749999999; - std::vector const testFluxes = - computeFluxes(rightRarefactionCenter, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + 0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0}; + std::vector const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; + Real thermalEnergyFlux = 0.14472930749999999; + std::vector const testFluxes = computeFluxes(rightRarefactionCenter, rightICs, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1931,15 +1505,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, "HLLD State: Left"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, - 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = - computeFluxes(rightICs, rightRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = computeFluxes(rightICs, rightRarefactionCenter, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1956,20 +1526,14 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) // Constant Values Real const gamma = 5. / 3.; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // States - std::vector< - Real> const // | Density | X-Velocity | Y-Velocity | Z-Velocity | - // Pressure | X-Magnetic Field | Y-Magnetic Field | - // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - zeroMagneticField = - primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, - gamma, primitiveScalar), - onesMagneticField = - primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, - gamma, primitiveScalar); + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + zeroMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { { @@ -1981,11 +1545,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) // Order of Fluxes is rho, vec(V), E, vec(B) std::vector const fiducialFlux{0, 1.380658e-05, 0, 0, 0, 0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.; - std::vector const testFluxes = - computeFluxes(zeroMagneticField, zeroMagneticField, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(zeroMagneticField, zeroMagneticField, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1994,22 +1556,13 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) "HLLD State: Left Double Star"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1.42108547152020037174e-14, - 0.50001380657999994, - -1, - -1, - -1.7347234759768071e-18, - 0.0, - 3.4694469519536142e-18, - 3.4694469519536142e-18}; - std::vector const scalarFlux{1.5731381063233131e-14, - 3.1670573744690958e-14, - 4.7116290424753513e-14}; - Real thermalEnergyFlux = 0.; - std::vector const testFluxes = - computeFluxes(onesMagneticField, onesMagneticField, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{ + 1.42108547152020037174e-14, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, + 3.4694469519536142e-18, 3.4694469519536142e-18}; + std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(onesMagneticField, onesMagneticField, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -2020,26 +1573,21 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) * \brief Test the HLLD Riemann Solver with the degenerate state * */ -TEST_F(tMHDCalculateHLLDFluxesCUDA, - DegenerateStateCorrectInputExpectCorrectOutput) +TEST_F(tMHDCalculateHLLDFluxesCUDA, DegenerateStateCorrectInputExpectCorrectOutput) { // Constant Values Real const gamma = 5. / 3.; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; // State std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive // Scalars | - state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, - gamma, primitiveScalar); + state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); - std::vector const fiducialFlux{1, -449999997, -29999, -29999, - -59994, 0.0, -29999, -29999}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, - 3.3155141874999998}; + std::vector const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; Real thermalEnergyFlux = 1.5; std::string const outputString{ "Left State: Degenerate state\n" @@ -2053,10 +1601,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, // the Athena solver with theses tests gave me -0.00080700946455175148 // though for (size_t direction = 0; direction < 3; direction++) { - std::vector const testFluxes = - computeFluxes(state, state, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(state, state, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } // ========================================================================= @@ -2090,10 +1636,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes) for (size_t direction = 0; direction < 3; direction++) { // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const testFluxes = - computeFluxes(state, state, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const testFluxes = computeFluxes(state, state, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } // ========================================================================= @@ -2113,62 +1657,47 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector // | Density | X-Momentum | Y-Momentum | Z-Momentum | // Energy | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - negativePressure = {1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, - negativeEnergy = {1.0, 1.0, 1.0, 1.0, -(5 - gamma), 1.0, 1.0, 1.0}, - negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, - negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, - -gamma, 1.0, 1.0, 1.0}, - negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; + negativePressure = {1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, + negativeEnergy = {1.0, 1.0, 1.0, 1.0, -(5 - gamma), 1.0, 1.0, 1.0}, + negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, + negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0}, + negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; #ifdef SCALAR - std::vector const conservedScalar{1.1069975296, 2.2286185018, - 3.3155141875}; + std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; negativePressure.insert(negativePressure.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); negativeEnergy.insert(negativeEnergy.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); negativeDensity.insert(negativeDensity.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - negativeDensityEnergyPressure.insert( - negativeDensityEnergyPressure.begin() + 5, conservedScalar.begin(), - conservedScalar.begin() + grid_enum::nscalars); - negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, - conservedScalar.begin(), + negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); #endif // SCALAR #ifdef DE negativePressure.push_back(mhd::utils::computeThermalEnergy( - negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), - negativePressure.at(2), negativePressure.at(3), - negativePressure.at(grid_enum::magnetic_x), - negativePressure.at(grid_enum::magnetic_y), + negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), negativePressure.at(2), + negativePressure.at(3), negativePressure.at(grid_enum::magnetic_x), negativePressure.at(grid_enum::magnetic_y), negativePressure.at(grid_enum::magnetic_z), gamma)); negativeEnergy.push_back(mhd::utils::computeThermalEnergy( - negativeEnergy.at(4), negativeEnergy.at(0), negativeEnergy.at(1), - negativeEnergy.at(2), negativeEnergy.at(3), - negativeEnergy.at(grid_enum::magnetic_x), - negativeEnergy.at(grid_enum::magnetic_y), + negativeEnergy.at(4), negativeEnergy.at(0), negativeEnergy.at(1), negativeEnergy.at(2), negativeEnergy.at(3), + negativeEnergy.at(grid_enum::magnetic_x), negativeEnergy.at(grid_enum::magnetic_y), negativeEnergy.at(grid_enum::magnetic_z), gamma)); negativeDensity.push_back(mhd::utils::computeThermalEnergy( - negativeDensity.at(4), negativeDensity.at(0), negativeDensity.at(1), - negativeDensity.at(2), negativeDensity.at(3), - negativeDensity.at(grid_enum::magnetic_x), - negativeDensity.at(grid_enum::magnetic_y), + negativeDensity.at(4), negativeDensity.at(0), negativeDensity.at(1), negativeDensity.at(2), negativeDensity.at(3), + negativeDensity.at(grid_enum::magnetic_x), negativeDensity.at(grid_enum::magnetic_y), negativeDensity.at(grid_enum::magnetic_z), gamma)); negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy( - negativeDensityEnergyPressure.at(4), negativeDensityEnergyPressure.at(0), - negativeDensityEnergyPressure.at(1), negativeDensityEnergyPressure.at(2), - negativeDensityEnergyPressure.at(3), - negativeDensityEnergyPressure.at(grid_enum::magnetic_x), - negativeDensityEnergyPressure.at(grid_enum::magnetic_y), + negativeDensityEnergyPressure.at(4), negativeDensityEnergyPressure.at(0), negativeDensityEnergyPressure.at(1), + negativeDensityEnergyPressure.at(2), negativeDensityEnergyPressure.at(3), + negativeDensityEnergyPressure.at(grid_enum::magnetic_x), negativeDensityEnergyPressure.at(grid_enum::magnetic_y), negativeDensityEnergyPressure.at(grid_enum::magnetic_z), gamma)); negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy( - negativeDensityPressure.at(4), negativeDensityPressure.at(0), - negativeDensityPressure.at(1), negativeDensityPressure.at(2), - negativeDensityPressure.at(3), - negativeDensityPressure.at(grid_enum::magnetic_x), - negativeDensityPressure.at(grid_enum::magnetic_y), - negativeDensityPressure.at(grid_enum::magnetic_z), gamma)); + negativeDensityPressure.at(4), negativeDensityPressure.at(0), negativeDensityPressure.at(1), + negativeDensityPressure.at(2), negativeDensityPressure.at(3), negativeDensityPressure.at(grid_enum::magnetic_x), + negativeDensityPressure.at(grid_enum::magnetic_y), negativeDensityPressure.at(grid_enum::magnetic_z), gamma)); #endif // DE for (size_t direction = 0; direction < 3; direction++) { @@ -2179,15 +1708,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) "HLLD State: Left Star State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{ - 1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, - 3.3155141874999998}; - Real thermalEnergyFlux = -1.5; - std::vector const testFluxes = - computeFluxes(negativePressure, negativePressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; + Real thermalEnergyFlux = -1.5; + std::vector const testFluxes = computeFluxes(negativePressure, negativePressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -2197,13 +1722,10 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, - 3.3155141874999998}; - Real thermalEnergyFlux = -6.333333333333333; - std::vector const testFluxes = - computeFluxes(negativeEnergy, negativeEnergy, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; + Real thermalEnergyFlux = -6.333333333333333; + std::vector const testFluxes = computeFluxes(negativeEnergy, negativeEnergy, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -2212,16 +1734,11 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) "HLLD State: Left State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, - -5e+19, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, - 2.2286185018000002e+20, - 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = - computeFluxes(negativeDensity, negativeDensity, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = computeFluxes(negativeDensity, negativeDensity, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -2230,17 +1747,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) "HLLD State: Right State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, - 1.5E+20, 0, 0, 0}; - std::vector const scalarFlux{-1.1069975296000002e+20, - -2.2286185018000002e+20, - -3.3155141874999997e+20}; + std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0}; + std::vector const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20}; Real thermalEnergyFlux = 1.5000000000000001e+40; std::vector const testFluxes = - computeFluxes(negativeDensityEnergyPressure, - negativeDensityEnergyPressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + computeFluxes(negativeDensityEnergyPressure, negativeDensityEnergyPressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -2249,16 +1761,12 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) "HLLD State: Left State"}; // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, - -1.5e+20, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, - 2.2286185018000002e+20, - 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes( - negativeDensityPressure, negativeDensityPressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, - outputString, direction); + std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = + computeFluxes(negativeDensityPressure, negativeDensityPressure, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -2286,56 +1794,46 @@ struct testParams { std::vector const magneticX{92.75101068883114, 31.588767769990532}; std::vector stateLVec{ - {21.50306776645775, 1.7906564444824999, 0.33040135813215948, - 1.500111692877206, 65.751208381099417, 12.297499156516622, - 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083}, - {48.316634031589935, 0.39291118391272883, 0.69876195899931859, - 1.8528943583250035, 38.461354599479826, 63.744719695704063, - 37.703264551707541, 9.9999999999999995e-21, 3241.38784808316}}, - stateRVec{ - {81.121773176226498, 0.10110493143718589, 0.17103629446142521, - 0.41731155351794952, 18.88982523270516, 84.991914178754897, - 34.852095153095384, 9.9999999999999995e-21, 8605.4286125143772}, - {91.029557388536347, 0.93649399297774782, 0.36277769000180521, - 0.095181318599791204, 83.656397841788944, 35.910258841630984, - 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}}; + {21.50306776645775, 1.7906564444824999, 0.33040135813215948, 1.500111692877206, 65.751208381099417, + 12.297499156516622, 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083}, + {48.316634031589935, 0.39291118391272883, 0.69876195899931859, 1.8528943583250035, 38.461354599479826, + 63.744719695704063, 37.703264551707541, 9.9999999999999995e-21, 3241.38784808316}}, + stateRVec{{81.121773176226498, 0.10110493143718589, 0.17103629446142521, 0.41731155351794952, 18.88982523270516, + 84.991914178754897, 34.852095153095384, 9.9999999999999995e-21, 8605.4286125143772}, + {91.029557388536347, 0.93649399297774782, 0.36277769000180521, 0.095181318599791204, 83.656397841788944, + 35.910258841630984, 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}}; std::vector const starStateLVec{ - {28.520995251761526, 1.5746306813243216, 1.3948193325212686, - 6.579867455284738, 62.093488291430653, 62.765890944643196}, - {54.721668215064945, 1.4363926014039052, 1.1515754515491903, - 30.450436649083692, 54.279167444036723, 93.267654555096414}}, - starStateRVec{ - {49.090695707386047, 1.0519818825796206, 0.68198273634686157, - 90.44484278669114, 26.835645069149873, 7.4302316959173442}, - {72.680005044606091, 0.61418047569879897, 0.71813570322922715, - 61.33664731346812, 98.974446283273181, 10.696380763901459}}; + {28.520995251761526, 1.5746306813243216, 1.3948193325212686, 6.579867455284738, 62.093488291430653, + 62.765890944643196}, + {54.721668215064945, 1.4363926014039052, 1.1515754515491903, 30.450436649083692, 54.279167444036723, + 93.267654555096414}}, + starStateRVec{{49.090695707386047, 1.0519818825796206, 0.68198273634686157, 90.44484278669114, 26.835645069149873, + 7.4302316959173442}, + {72.680005044606091, 0.61418047569879897, 0.71813570322922715, 61.33664731346812, + 98.974446283273181, 10.696380763901459}}; std::vector totalPressureStar{66.80958736783934, 72.29644038317676}; std::vector const DoubleStarStateVec{ - {0.79104271107837087, 0.97609103551927523, 20.943239839455895, - 83.380243826880701, 45.832024557076693, std::nan("0")}, - {1.390870320696683, 0.52222643241336986, 83.851481048702098, - 80.366712517307832, 55.455301414557297, std::nan("0")}}; + {0.79104271107837087, 0.97609103551927523, 20.943239839455895, 83.380243826880701, 45.832024557076693, + std::nan("0")}, + {1.390870320696683, 0.52222643241336986, 83.851481048702098, 80.366712517307832, 55.455301414557297, + std::nan("0")}}; std::vector const flux{ - {12.939239309626116, 65.054814649176265, 73.676928455867824, - 16.873647595664387, 52.718887319724693, 58.989284454159673, - 29.976925743532302}, - {81.715245865170729, 56.098850697078028, 2.7172469834037871, - 39.701329831928732, 81.63926176158796, 57.043444592213589, - 97.733298271413588}}, - starFlux{{0, 74.90125547448865, 16.989138610622945, 38.541822734846185, - 19.095105176247017, 96.239645266242775, 86.225169282683467}, - {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, - 45.432249733131123, 33.053375365947957, 15.621020824107379}}; + {12.939239309626116, 65.054814649176265, 73.676928455867824, 16.873647595664387, 52.718887319724693, + 58.989284454159673, 29.976925743532302}, + {81.715245865170729, 56.098850697078028, 2.7172469834037871, 39.701329831928732, 81.63926176158796, + 57.043444592213589, 97.733298271413588}}, + starFlux{{0, 74.90125547448865, 16.989138610622945, 38.541822734846185, 19.095105176247017, 96.239645266242775, + 86.225169282683467}, + {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, 45.432249733131123, 33.053375365947957, + 15.621020824107379}}; std::vector const speed{ - {-22.40376497145191, -19.710500632936679, -0.81760587897407833, - 9.6740190040662242, 24.295526347371595}, - {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, - 3.4191202933087519, 12.519790189404299}}; + {-22.40376497145191, -19.710500632936679, -0.81760587897407833, 9.6740190040662242, 24.295526347371595}, + {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, 3.4191202933087519, 12.519790189404299}}; testParams() = default; }; @@ -2350,21 +1848,16 @@ struct testParams { TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialSpeedL{-22.40376497145191, - -11.190385012513822}; - std::vector const fiducialSpeedR{24.295526347371595, - 12.519790189404299}; + std::vector const fiducialSpeedL{-22.40376497145191, -11.190385012513822}; + std::vector const fiducialSpeedR{24.295526347371595, 12.519790189404299}; for (size_t i = 0; i < parameters.names.size(); i++) { mhd::_internal::Speeds testSpeed = mhd::_internal::approximateLRWaveSpeeds( - parameters.stateLVec.at(i), parameters.stateRVec.at(i), - parameters.magneticX.at(i), parameters.gamma); + parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma); // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], testSpeed.L, - parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, - parameters.names.at(i) + ", SpeedR"); + testingUtilities::checkResults(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); + testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); } } // ========================================================================= @@ -2374,24 +1867,20 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) * \brief Test the mhd::_internal::approximateMiddleWaveSpeed function * */ -TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, - CorrectInputExpectCorrectOutput) +TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialSpeedM{-0.81760587897407833, - -0.026643804611559244}; + std::vector const fiducialSpeedM{-0.81760587897407833, -0.026643804611559244}; mhd::_internal::Speeds testSpeed; for (size_t i = 0; i < parameters.names.size(); i++) { - testSpeed.M = mhd::_internal::approximateMiddleWaveSpeed( - parameters.stateLVec.at(i), parameters.stateRVec.at(i), - parameters.speed.at(i)); + testSpeed.M = mhd::_internal::approximateMiddleWaveSpeed(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, - parameters.names.at(i) + ", SpeedM"); + testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); } } // ========================================================================= @@ -2404,26 +1893,20 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialSpeedStarL{-18.18506608966894, - -4.2968910457518161}; - std::vector const fiducialSpeedStarR{12.420292938368167, - 3.6786718447209252}; + std::vector const fiducialSpeedStarL{-18.18506608966894, -4.2968910457518161}; + std::vector const fiducialSpeedStarR{12.420292938368167, 3.6786718447209252}; mhd::_internal::Speeds testSpeed; for (size_t i = 0; i < parameters.names.size(); i++) { - testSpeed.LStar = mhd::_internal::approximateStarWaveSpeed( - parameters.starStateLVec.at(i), parameters.speed.at(i), - parameters.magneticX.at(i), -1); - testSpeed.RStar = mhd::_internal::approximateStarWaveSpeed( - parameters.starStateRVec.at(i), parameters.speed.at(i), - parameters.magneticX.at(i), 1); + testSpeed.LStar = mhd::_internal::approximateStarWaveSpeed(parameters.starStateLVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), -1); + testSpeed.RStar = mhd::_internal::approximateStarWaveSpeed(parameters.starStateRVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), 1); // Now check results - testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, - parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, - parameters.names.at(i) + ", SpeedStarR"); + testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); + testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); } } // ========================================================================= @@ -2438,37 +1921,28 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) testParams const parameters; std::vector fiducialFlux{ - {38.504606872151484, -3088.4810263278778, -1127.8835013070616, - -4229.5657456907293, -12344.460641662206, -8.6244637840856555, - -56.365490339906408}, - {18.984145880030045, 2250.9966820900618, -2000.3517480656785, - -1155.8240512956793, -2717.2127176227905, 2.9729840344910059, - -43.716615275067923}}; + {38.504606872151484, -3088.4810263278778, -1127.8835013070616, -4229.5657456907293, -12344.460641662206, + -8.6244637840856555, -56.365490339906408}, + {18.984145880030045, 2250.9966820900618, -2000.3517480656785, -1155.8240512956793, -2717.2127176227905, + 2.9729840344910059, -43.716615275067923}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux testFlux = mhd::_internal::nonStarFluxes( - parameters.stateLVec.at(i), parameters.magneticX.at(i)); + mhd::_internal::Flux testFlux = + mhd::_internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, - testFlux.momentumX, + testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityFlux"); + testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, - testFlux.momentumY, + testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, - testFlux.momentumZ, + testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, - testFlux.magneticY, + testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, - testFlux.magneticZ, + testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyFlux"); + testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); } } // ========================================================================= @@ -2479,38 +1953,31 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) * non-degenerate case * */ -TEST(tMHDHlldInternalComputeStarState, - CorrectInputNonDegenerateExpectCorrectOutput) +TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOutput) { testParams const parameters; std::vector fiducialStarState{ - {24.101290139122913, 1.4626377138501221, 5.7559806612277464, - 1023.8840191068896, 18.648382121236992, 70.095850905078336}, - {50.132466596958501, 0.85967712862308099, 1.9480712959548112, - 172.06840532772659, 66.595692901872582, 39.389537509454122}}; + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 1023.8840191068896, 18.648382121236992, + 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, + 39.389537509454122}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( - parameters.stateLVec.at(i), parameters.speed.at(i), - parameters.speed.at(i).L, parameters.magneticX.at(i), - parameters.totalPressureStar.at(i)); + mhd::_internal::StarState testStarState = + mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, - testStarState.velocityY, + testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, - testStarState.velocityZ, + testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, - testStarState.energy, + testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, - testStarState.magneticY, + testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, - testStarState.magneticZ, + testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2525,44 +1992,35 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) testParams const parameters; std::vector fiducialFlux{ - {-45.270724071132321, 1369.1771532285088, -556.91765728768155, - -2368.4452742393819, -21413.063415617493, -83.294404848633206, - -504.8413875424834}, - {61.395380340435793, 283.48596932136809, -101.75517013858293, - -51.343648925162142, -1413.4750762739586, 25.139956754826922, - 78.863254638038882}}; + {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617493, + -83.294404848633206, -504.8413875424834}, + {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.343648925162142, -1413.4750762739586, + 25.139956754826922, 78.863254638038882}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( - parameters.stateLVec.at(i), parameters.speed.at(i), - parameters.speed.at(i).L, parameters.magneticX.at(i), - parameters.totalPressureStar.at(i)); + mhd::_internal::StarState testStarState = + mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); - mhd::_internal::Flux testFlux = mhd::_internal::starFluxes( - testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), - parameters.speed.at(i), parameters.speed.at(i).L); + mhd::_internal::Flux testFlux = + mhd::_internal::starFluxes(testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults( - fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults( - fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } @@ -2571,42 +2029,35 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) * case * */ -TEST(tMHDHlldInternalComputeStarState, - CorrectInputDegenerateExpectCorrectOutput) +TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput) { testParams parameters; std::vector fiducialStarState{ - {24.101290139122913, 1.4626377138501221, 5.7559806612277464, - 4.5171065808847731e+17, 18.648382121236992, 70.095850905078336}, - {50.132466596958501, 0.85967712862308099, 1.9480712959548112, - 172.06840532772659, 66.595692901872582, 39.389537509454122}}; + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 4.5171065808847731e+17, 18.648382121236992, + 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, + 39.389537509454122}}; // Used to get us into the degenerate case double const totalPressureStarMultiplier = 1E15; parameters.stateLVec.at(0).totalPressure *= totalPressureStarMultiplier; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = mhd::_internal::computeStarState( - parameters.stateLVec.at(i), parameters.speed.at(i), - parameters.speed.at(i).L, parameters.magneticX.at(i), - parameters.totalPressureStar.at(i)); + mhd::_internal::StarState testStarState = + mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, - testStarState.velocityY, + testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, - testStarState.velocityZ, + testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, - testStarState.energy, + testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, - testStarState.magneticY, + testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, - testStarState.magneticZ, + testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2619,43 +2070,34 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) double const totalPressureStarMultiplier = 1E15; std::vector fiducialFlux{ - {-144.2887586578122, 1450.1348804310369, -773.30617492819886, - -151.70644305354989, 1378.3797024673304, -1056.6283526454272, - -340.62268733874163}, - {10.040447333773272, 284.85426012223729, -499.05932057162761, - 336.35271628090368, 171.28451793017882, 162.96661864443826, - -524.05361885198215}}; + {-144.2887586578122, 1450.1348804310369, -773.30617492819886, -151.70644305354989, 1378.3797024673304, + -1056.6283526454272, -340.62268733874163}, + {10.040447333773272, 284.85426012223729, -499.05932057162761, 336.35271628090368, 171.28451793017882, + 162.96661864443826, -524.05361885198215}}; parameters.totalPressureStar.at(0) *= totalPressureStarMultiplier; parameters.totalPressureStar.at(1) *= totalPressureStarMultiplier; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux testFlux = mhd::_internal::starFluxes( - parameters.starStateLVec.at(i), parameters.stateLVec.at(i), - parameters.flux.at(i), parameters.speed.at(i), - parameters.speed.at(i).L); + mhd::_internal::Flux testFlux = + mhd::_internal::starFluxes(parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults( - fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults( - fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2666,43 +2108,34 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) * Non-degenerate state * */ -TEST(tMHDHlldInternalDoubleStarState, - CorrectInputNonDegenerateExpectCorrectOutput) +TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutput) { testParams const parameters; std::vector fiducialState{ - {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, - 36.670978215630669, -2048.1953674500523, 1721.0582276783819}, - {3.803188977150934, -4.2662645349592765, 71.787329583230417, - 53.189673238238178, -999.79694164635089, 252.047167522579}}; + {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, 36.670978215630669, -2048.1953674500523, + 1721.0582276783819}, + {3.803188977150934, -4.2662645349592765, 71.787329583230417, 53.189673238238178, -999.79694164635089, + 252.047167522579}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::DoubleStarState const testState = - mhd::_internal::computeDoubleStarState( - parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), - parameters.magneticX.at(i), parameters.totalPressureStar.at(i), - parameters.speed.at(i)); + mhd::_internal::DoubleStarState const testState = mhd::_internal::computeDoubleStarState( + parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), parameters.magneticX.at(i), + parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults( - fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults( - fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults( - fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults( - fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults( - fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults( - fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2716,36 +2149,29 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) testParams const parameters; std::vector fiducialState{ - {1.0519818825796206, 0.68198273634686157, 26.835645069149873, - 7.4302316959173442, -999.79694164635089, 90.44484278669114}, - {0.61418047569879897, 0.71813570322922715, 98.974446283273181, - 10.696380763901459, -999.79694164635089, 61.33664731346812}}; + {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, -999.79694164635089, + 90.44484278669114}, + {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, -999.79694164635089, + 61.33664731346812}}; for (size_t i = 0; i < parameters.names.size(); i++) { mhd::_internal::DoubleStarState const testState = - mhd::_internal::computeDoubleStarState( - parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, - parameters.totalPressureStar.at(i), parameters.speed.at(i)); + mhd::_internal::computeDoubleStarState(parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, + parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults( - fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults( - fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults( - fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults( - fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults( - fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults( - fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); } } // ========================================================================= @@ -2760,43 +2186,32 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) testParams const parameters; std::vector const fiducialFlux{ - {-144.2887586578122, 1450.1348804310369, -332.80193639987715, - 83.687152337186944, 604.70003506833029, -245.53635448727721, - -746.94190287166407}, - {10.040447333773216, 284.85426012223729, -487.87930516727664, - 490.91728596722157, 59.061079503595323, 30.244176588794346, - -466.15336272175193}}; + {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029, + -245.53635448727721, -746.94190287166407}, + {10.040447333773216, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595323, + 30.244176588794346, -466.15336272175193}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux const testFlux = - mhd::_internal::computeDoubleStarFluxes( - parameters.DoubleStarStateVec.at(i), - parameters.DoubleStarStateVec.at(i).energyL, - parameters.starStateLVec.at(i), parameters.stateLVec.at(i), - parameters.flux.at(i), parameters.speed.at(i), - parameters.speed.at(i).L, parameters.speed.at(i).LStar); + mhd::_internal::Flux const testFlux = mhd::_internal::computeDoubleStarFluxes( + parameters.DoubleStarStateVec.at(i), parameters.DoubleStarStateVec.at(i).energyL, + parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.speed.at(i).LStar); // Now check results - testingUtilities::checkResults( - fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults( - fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults( - fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults( - fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2823,10 +2238,8 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) #endif // DE // Lambda for finding indices and check if they're correct - auto findIndex = [](std::vector const &vec, double const &num, - int const &fidIndex, std::string const &name) { - int index = - std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); + auto findIndex = [](std::vector const &vec, double const &num, int const &fidIndex, std::string const &name) { + int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl; return index; @@ -2858,41 +2271,29 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) int const fiducialMomentumIndexY = threadId + n_cells * o2; int const fiducialMomentumIndexZ = threadId + n_cells * o3; int const fiducialEnergyIndex = threadId + n_cells * grid_enum::Energy; - int const fiducialMagneticYIndex = - threadId + n_cells * (grid_enum::magnetic_x); - int const fiducialMagneticZIndex = - threadId + n_cells * (grid_enum::magnetic_y); + int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); + int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, - testFluxArray.data(), inputFlux, inputState); + mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, testFluxArray.data(), inputFlux, inputState); // Find the indices for the various fields - int densityLoc = findIndex(testFluxArray, inputFlux.density, - fiducialDensityIndex, "density"); - int momentumXLocX = findIndex(testFluxArray, inputFlux.momentumX, - fiducialMomentumIndexX, "momentum X"); - int momentumYLocY = findIndex(testFluxArray, inputFlux.momentumY, - fiducialMomentumIndexY, "momentum Y"); - int momentumZLocZ = findIndex(testFluxArray, inputFlux.momentumZ, - fiducialMomentumIndexZ, "momentum Z"); - int energyLoc = findIndex(testFluxArray, inputFlux.energy, - fiducialEnergyIndex, "energy"); - int magneticYLoc = findIndex(testFluxArray, inputFlux.magneticY, - fiducialMagneticYIndex, "magnetic Y"); - int magneticZLoc = findIndex(testFluxArray, inputFlux.magneticZ, - fiducialMagneticZIndex, "magnetic Z"); + int densityLoc = findIndex(testFluxArray, inputFlux.density, fiducialDensityIndex, "density"); + int momentumXLocX = findIndex(testFluxArray, inputFlux.momentumX, fiducialMomentumIndexX, "momentum X"); + int momentumYLocY = findIndex(testFluxArray, inputFlux.momentumY, fiducialMomentumIndexY, "momentum Y"); + int momentumZLocZ = findIndex(testFluxArray, inputFlux.momentumZ, fiducialMomentumIndexZ, "momentum Z"); + int energyLoc = findIndex(testFluxArray, inputFlux.energy, fiducialEnergyIndex, "energy"); + int magneticYLoc = findIndex(testFluxArray, inputFlux.magneticY, fiducialMagneticYIndex, "magnetic Y"); + int magneticZLoc = findIndex(testFluxArray, inputFlux.magneticZ, fiducialMagneticZIndex, "magnetic Z"); for (size_t i = 0; i < testFluxArray.size(); i++) { // Skip the already checked indices - if ((i != densityLoc) and (i != momentumXLocX) and - (i != momentumYLocY) and (i != momentumZLocZ) and (i != energyLoc) and - (i != magneticYLoc) and (i != magneticZLoc)) { - EXPECT_EQ(dummyValue, testFluxArray.at(i)) - << "Unexpected value at index that _returnFluxes shouldn't be " - "touching" - << std::endl - << "Index = " << i << std::endl - << "Direction = " << direction << std::endl; + if ((i != densityLoc) and (i != momentumXLocX) and (i != momentumYLocY) and (i != momentumZLocZ) and + (i != energyLoc) and (i != magneticYLoc) and (i != magneticZLoc)) { + EXPECT_EQ(dummyValue, testFluxArray.at(i)) << "Unexpected value at index that _returnFluxes shouldn't be " + "touching" + << std::endl + << "Index = " << i << std::endl + << "Direction = " << direction << std::endl; } } } @@ -2908,18 +2309,15 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) { testParams const parameters; - std::vector const fiducialPressure{6802.2800807224075, - 3476.1984612875144}; + std::vector const fiducialPressure{6802.2800807224075, 3476.1984612875144}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real const testPressure = mhd::_internal::starTotalPressure( - parameters.stateLVec.at(i), parameters.stateRVec.at(i), - parameters.speed.at(i)); + Real const testPressure = mhd::_internal::starTotalPressure(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); // Now check results - testingUtilities::checkResults( - fiducialPressure.at(i), testPressure, - parameters.names.at(i) + ", total pressure in the star states"); + testingUtilities::checkResults(fiducialPressure.at(i), testPressure, + parameters.names.at(i) + ", total pressure in the star states"); } } // ========================================================================= @@ -2961,29 +2359,20 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) o3 = 2; } - mhd::_internal::State const testState = mhd::_internal::loadState( - interfaceArray.data(), parameters.magneticX.at(0), parameters.gamma, - threadId, n_cells, o1, o2, o3); + mhd::_internal::State const testState = mhd::_internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), + parameters.gamma, threadId, n_cells, o1, o2, o3); // Now check results - testingUtilities::checkResults(fiducialState.at(direction).density, - testState.density, ", Density"); - testingUtilities::checkResults(fiducialState.at(direction).velocityX, - testState.velocityX, ", velocityX"); - testingUtilities::checkResults(fiducialState.at(direction).velocityY, - testState.velocityY, ", velocityY"); - testingUtilities::checkResults(fiducialState.at(direction).velocityZ, - testState.velocityZ, ", velocityZ"); - testingUtilities::checkResults(fiducialState.at(direction).energy, - testState.energy, ", energy"); - testingUtilities::checkResults(fiducialState.at(direction).magneticY, - testState.magneticY, ", magneticY"); - testingUtilities::checkResults(fiducialState.at(direction).magneticZ, - testState.magneticZ, ", magneticZ"); - testingUtilities::checkResults(fiducialState.at(direction).gasPressure, - testState.gasPressure, ", gasPressure"); - testingUtilities::checkResults(fiducialState.at(direction).totalPressure, - testState.totalPressure, ", totalPressure"); + testingUtilities::checkResults(fiducialState.at(direction).density, testState.density, ", Density"); + testingUtilities::checkResults(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); + testingUtilities::checkResults(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); + testingUtilities::checkResults(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); + testingUtilities::checkResults(fiducialState.at(direction).energy, testState.energy, ", energy"); + testingUtilities::checkResults(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); + testingUtilities::checkResults(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); + testingUtilities::checkResults(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); + testingUtilities::checkResults(fiducialState.at(direction).totalPressure, testState.totalPressure, + ", totalPressure"); } } // ========================================================================= diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index 8c92da290..bfbeb8f5a 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -18,10 +18,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, * int dir, int n_fields) \brief Roe Riemann solver based on the version * described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields) +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -54,8 +52,8 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real dgel, gel, dger, ger, f_ge_l, f_ge_r, E_kin; #endif #ifdef SCALAR - Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], - scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS]; + Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], + f_scalar_r[NSCALARS]; #endif int o1, o2, o3; @@ -246,8 +244,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real coeff = 0.0; // left eigenvector [0] * del_q - a0 = del_d * Na * (0.5 * g1 * vsq + vx * a) - - del_mx * Na * (g1 * vx + a) - del_my * Na * g1 * vy - + a0 = del_d * Na * (0.5 * g1 * vsq + vx * a) - del_mx * Na * (g1 * vx + a) - del_my * Na * g1 * vy - del_mz * Na * g1 * vz + del_E * Na * g1; coeff = a0 * fmax(fabs(lambda_m), etah); sum_0 += coeff; @@ -266,8 +263,8 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, sum_3 += coeff; sum_4 += coeff * vz; // left eigenvector [3] * del_q - a3 = del_d * (1.0 - Na * g1 * vsq) + del_mx * g1 * vx / asq + - del_my * g1 * vy / asq + del_mz * g1 * vz / asq - del_E * g1 / asq; + a3 = del_d * (1.0 - Na * g1 * vsq) + del_mx * g1 * vx / asq + del_my * g1 * vy / asq + del_mz * g1 * vz / asq - + del_E * g1 / asq; coeff = a3 * fmax(fabs(lambda_0), etah); sum_0 += coeff; sum_1 += coeff * vx; @@ -275,8 +272,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, sum_3 += coeff * vz; sum_4 += coeff * 0.5 * vsq; // left eigenvector [4] * del_q - a4 = del_d * Na * (0.5 * g1 * vsq - vx * a) - - del_mx * Na * (g1 * vx - a) - del_my * Na * g1 * vy - + a4 = del_d * Na * (0.5 * g1 * vsq - vx * a) - del_mx * Na * (g1 * vx - a) - del_my * Na * g1 * vy - del_mz * Na * g1 * vz + del_E * Na * g1; coeff = a4 * fmax(fabs(lambda_p), etah); sum_0 += coeff; @@ -297,9 +293,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, if (test0 <= 0.0) { hlle_flag = 1; } - if (test4 - - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < - 0.0) { + if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) { hlle_flag = 2; } } @@ -314,9 +308,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, if (test0 <= 0.0) { hlle_flag = 1; } - if (test4 - - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < - 0.0) { + if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) { hlle_flag = 2; } } @@ -368,25 +360,19 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, // compute the HLLE flux at the interface tmp = 0.5 * (bp + bm) / (bp - bm); - dev_flux[tid] = 0.5 * (f_d_l + f_d_r) + (f_d_l - f_d_r) * tmp; - dev_flux[o1 * n_cells + tid] = - 0.5 * (f_mx_l + f_mx_r) + (f_mx_l - f_mx_r) * tmp; - dev_flux[o2 * n_cells + tid] = - 0.5 * (f_my_l + f_my_r) + (f_my_l - f_my_r) * tmp; - dev_flux[o3 * n_cells + tid] = - 0.5 * (f_mz_l + f_mz_r) + (f_mz_l - f_mz_r) * tmp; - dev_flux[4 * n_cells + tid] = - 0.5 * (f_E_l + f_E_r) + (f_E_l - f_E_r) * tmp; + dev_flux[tid] = 0.5 * (f_d_l + f_d_r) + (f_d_l - f_d_r) * tmp; + dev_flux[o1 * n_cells + tid] = 0.5 * (f_mx_l + f_mx_r) + (f_mx_l - f_mx_r) * tmp; + dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r) + (f_my_l - f_my_r) * tmp; + dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r) + (f_mz_l - f_mz_r) * tmp; + dev_flux[4 * n_cells + tid] = 0.5 * (f_E_l + f_E_r) + (f_E_l - f_E_r) * tmp; #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = - 0.5 * (f_scalar_l[i] + f_scalar_r[i]) + - (f_scalar_l[i] - f_scalar_r[i]) * tmp; + 0.5 * (f_scalar_l[i] + f_scalar_r[i]) + (f_scalar_l[i] - f_scalar_r[i]) * tmp; } #endif #ifdef DE - dev_flux[(n_fields - 1) * n_cells + tid] = - 0.5 * (f_ge_l + f_ge_r) + (f_ge_l - f_ge_r) * tmp; + dev_flux[(n_fields - 1) * n_cells + tid] = 0.5 * (f_ge_l + f_ge_r) + (f_ge_l - f_ge_r) * tmp; #endif return; } diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h index 00df99d71..4ee81022d 100644 --- a/src/riemann_solvers/roe_cuda.h +++ b/src/riemann_solvers/roe_cuda.h @@ -12,10 +12,8 @@ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, * int dir, int n_fields) \brief Roe Riemann solver based on the version * described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, Real *dev_flux, - int nx, int ny, int nz, int n_ghost, - Real gamma, int dir, int n_fields); +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); #endif // ROE_CUDA_H #endif // CUDA diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 76e1fce7b..0922368a0 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -30,15 +30,13 @@ * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi - : public ::testing::TestWithParam +class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, - CorrectInputExpectCorrectOutput) +TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { #ifdef MHD // Loosen correctness check to account for MHD only having PCM. This is @@ -47,9 +45,7 @@ TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, // Don't test the gas energy fields auto datasetNames = sodTest.getDataSetsToTest(); - datasetNames.erase( - std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), - datasetNames.end()); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); // Set the magnetic fiducial datasets to zero size_t const size = std::pow(65, 3); @@ -67,8 +63,7 @@ TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, sodTest.runTest(); } -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tHYDROSYSTEMSodShockTubeParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= @@ -96,10 +91,9 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) double real_kx = 2 * PI; // kx of the physical problem - double kx = real_kx * dx; - double speed = 1; // speed of wave is 1 since P = 0.6 and gamma = 1.666667 - double phase = - kx * 0.5 - speed * time * real_kx; // kx*0.5 for half-cell offset + double kx = real_kx * dx; + double speed = 1; // speed of wave is 1 since P = 0.6 and gamma = 1.666667 + double phase = kx * 0.5 - speed * time * real_kx; // kx*0.5 for half-cell offset double tolerance = 1e-7; systemTest::SystemTestRunner testObject(false, false, false); @@ -114,11 +108,10 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) testObject.openHydroTestData(); - ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine( - testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); ASSERT_NO_FATAL_FAILURE( - testingUtilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, - kx, 0.0, 0.0, phase, tolerance)); + testingUtilities::analyticSine(testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + ASSERT_NO_FATAL_FAILURE( + testingUtilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); // testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); // testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); } @@ -133,31 +126,25 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) * */ /// @{ -class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi - : public ::testing::TestWithParam +class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam { public: - tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() - : waveTest(false, true, false, false){}; + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){}; protected: systemTest::SystemTestRunner waveTest; #ifdef PCM - double const allowedL1Error = - 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; #else // PCM - double const allowedL1Error = - 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #endif // PCM - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, - double const &rEigenVec_E, double const &vx = 0.0) + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &vx = 0.0) { // Constant for all tests size_t const N = 32; @@ -169,20 +156,15 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" tout=" + - to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + - to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); waveTest.chollaLaunchParams.append(" init=Linear_Wave"); waveTest.chollaLaunchParams.append(" xmin=0.0"); waveTest.chollaLaunchParams.append(" ymin=0.0"); waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + - to_string_exact(2 * domain)); - waveTest.chollaLaunchParams.append(" ylen=" + - to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" zlen=" + - to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); waveTest.chollaLaunchParams.append(" xl_bcnd=1"); waveTest.chollaLaunchParams.append(" xu_bcnd=1"); waveTest.chollaLaunchParams.append(" yl_bcnd=1"); @@ -193,24 +175,17 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); waveTest.chollaLaunchParams.append(" vy=0"); waveTest.chollaLaunchParams.append(" vz=0"); - waveTest.chollaLaunchParams.append(" P=" + - to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); waveTest.chollaLaunchParams.append(" Bx=0"); waveTest.chollaLaunchParams.append(" By=0"); waveTest.chollaLaunchParams.append(" Bz=0"); waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + - to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + - to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + - to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); @@ -219,8 +194,7 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi // Sound Waves Moving Left and Right // ================================= -TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, - SoundWaveRightMovingCorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.; @@ -233,8 +207,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, double const rEigenVec_E = 1.5; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -246,8 +219,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError); } -TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, - SoundWaveLeftMovingCorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.; @@ -260,8 +232,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, double const rEigenVec_E = 1.5; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -275,8 +246,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, // Contact Waves Moving Left and Right // =================================== -TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, - HydroContactWaveCorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.0; @@ -290,8 +260,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, double const velocityX = waveSpeed; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, velocityX); // Set the number of MPI ranks @@ -304,7 +273,6 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -INSTANTIATE_TEST_SUITE_P(, tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, - ::testing::Values(1)); +INSTANTIATE_TEST_SUITE_P(, tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1)); /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index ac8be92f7..ca1a63514 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -25,31 +25,27 @@ * */ /// @{ -class tMHDSYSTEMConstantParameterizedMpi - : public ::testing::TestWithParam +class tMHDSYSTEMConstantParameterizedMpi : public ::testing::TestWithParam { protected: systemTest::SystemTestRunner constantTest; }; // Test with all mangetic fields set to zero -TEST_P(tMHDSYSTEMConstantParameterizedMpi, - ZeroMagneticFieldCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMConstantParameterizedMpi, ZeroMagneticFieldCorrectInputExpectCorrectOutput) { constantTest.numMpiRanks = GetParam(); constantTest.runTest(); } // Test with all mangetic fields set to one -TEST_P(tMHDSYSTEMConstantParameterizedMpi, - MagneticFieldCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMConstantParameterizedMpi, MagneticFieldCorrectInputExpectCorrectOutput) { constantTest.numMpiRanks = GetParam(); constantTest.runTest(); } -INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMConstantParameterizedMpi, - ::testing::Values(1, 2, 4)); +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMConstantParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= @@ -63,34 +59,26 @@ INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMConstantParameterizedMpi, * */ /// @{ -class tMHDSYSTEMLinearWavesParameterizedAngle - : public ::testing::TestWithParam> +class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam> { public: - tMHDSYSTEMLinearWavesParameterizedAngle() - : waveTest(false, true, false, false){}; + tMHDSYSTEMLinearWavesParameterizedAngle() : waveTest(false, true, false, false){}; protected: systemTest::SystemTestRunner waveTest; #ifdef PCM - double const allowedL1Error = - 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; #else // PCM - double const allowedL1Error = - 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #endif // PCM - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, - double const &rEigenVec_E, double const &rEigenVec_Bx, - double const &rEigenVec_By, double const &rEigenVec_Bz, - double const &pitch, double const &yaw, - double const &domain, int const &domain_direction, + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, + double const &pitch, double const &yaw, double const &domain, int const &domain_direction, double const &vx = 0.0) { // Constant for all tests @@ -134,8 +122,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); break; default: - throw std::invalid_argument( - "Invalid value of domain_direction given to setLaunchParams"); + throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); break; } @@ -143,20 +130,15 @@ class tMHDSYSTEMLinearWavesParameterizedAngle waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); - waveTest.chollaLaunchParams.append(" tout=" + - to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + - to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); waveTest.chollaLaunchParams.append(" init=Linear_Wave"); waveTest.chollaLaunchParams.append(" xmin=0.0"); waveTest.chollaLaunchParams.append(" ymin=0.0"); waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + - to_string_exact(x_len)); - waveTest.chollaLaunchParams.append(" ylen=" + - to_string_exact(y_len)); - waveTest.chollaLaunchParams.append(" zlen=" + - to_string_exact(z_len)); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); waveTest.chollaLaunchParams.append(" xl_bcnd=1"); waveTest.chollaLaunchParams.append(" xu_bcnd=1"); waveTest.chollaLaunchParams.append(" yl_bcnd=1"); @@ -164,52 +146,31 @@ class tMHDSYSTEMLinearWavesParameterizedAngle waveTest.chollaLaunchParams.append(" zl_bcnd=1"); waveTest.chollaLaunchParams.append(" zu_bcnd=1"); waveTest.chollaLaunchParams.append(" rho=1.0"); - waveTest.chollaLaunchParams.append(" vx=" + - to_string_exact(vx_rot)); - waveTest.chollaLaunchParams.append(" vy=" + - to_string_exact(vy_rot)); - waveTest.chollaLaunchParams.append(" vz=" + - to_string_exact(vz_rot)); - waveTest.chollaLaunchParams.append(" P=" + - to_string_exact(1 / gamma)); - waveTest.chollaLaunchParams.append(" Bx=" + - to_string_exact(Bx_rot)); - waveTest.chollaLaunchParams.append(" By=" + - to_string_exact(By_rot)); - waveTest.chollaLaunchParams.append(" Bz=" + - to_string_exact(Bz_rot)); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx_rot)); + waveTest.chollaLaunchParams.append(" vy=" + to_string_exact(vy_rot)); + waveTest.chollaLaunchParams.append(" vz=" + to_string_exact(vz_rot)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact(Bx_rot)); + waveTest.chollaLaunchParams.append(" By=" + to_string_exact(By_rot)); + waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact(Bz_rot)); waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + - to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + - to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumX=" + - to_string_exact(rEigenVec_MomentumX_rot)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumY=" + - to_string_exact(rEigenVec_MomentumY_rot)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumZ=" + - to_string_exact(rEigenVec_MomentumZ_rot)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + - to_string_exact(rEigenVec_E)); - waveTest.chollaLaunchParams.append( - " rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); - waveTest.chollaLaunchParams.append( - " rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); - waveTest.chollaLaunchParams.append( - " rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); - waveTest.chollaLaunchParams.append(" pitch=" + - to_string_exact(pitch)); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); + waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); } }; // Fast Magnetosonic Waves Moving Left and Right // ============================================= -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 2.; @@ -229,10 +190,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -245,8 +204,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, #endif // PCM } -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 2.; @@ -266,10 +224,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -284,8 +240,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, // Slow Magnetosonic Waves Moving Left and Right // ============================================= -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 0.5; @@ -305,10 +260,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -317,8 +270,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 0.5; @@ -338,10 +290,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -352,8 +302,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, // Alfven Waves Moving Left and Right // ============================================= -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.0; @@ -372,10 +321,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -384,8 +331,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.0; @@ -404,10 +350,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -418,8 +362,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, // Contact Wave Moving Right // =================================== -TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, - MHDContactWaveCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 1.0; @@ -439,10 +382,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, - domain_direction, velocityX); + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -455,14 +396,13 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, #endif // PCM } -INSTANTIATE_TEST_SUITE_P( - , tMHDSYSTEMLinearWavesParameterizedAngle, - ::testing::Values(std::make_tuple(0.0 * M_PI, 0.0 * M_PI, 0.5, 1), - std::make_tuple(0.0 * M_PI, 0.5 * M_PI, 0.5, 2), - std::make_tuple(0.5 * M_PI, 0.0 * M_PI, 0.5, 3) - // std::make_tuple(std::asin(2./3.), - // std::asin(2./std::sqrt(5.)), 1.5, 1) - )); +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, + ::testing::Values(std::make_tuple(0.0 * M_PI, 0.0 * M_PI, 0.5, 1), + std::make_tuple(0.0 * M_PI, 0.5 * M_PI, 0.5, 2), + std::make_tuple(0.5 * M_PI, 0.0 * M_PI, 0.5, 3) + // std::make_tuple(std::asin(2./3.), + // std::asin(2./std::sqrt(5.)), 1.5, 1) + )); /// @} // ============================================================================= @@ -479,8 +419,7 @@ INSTANTIATE_TEST_SUITE_P( * */ /// @{ -class tMHDSYSTEMSodShockTubeParameterizedMpi - : public ::testing::TestWithParam +class tMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: systemTest::SystemTestRunner sodTest; @@ -492,8 +431,7 @@ TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) sodTest.runTest(); } -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tMHDSYSTEMSodShockTubeParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tMHDSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= @@ -518,32 +456,25 @@ TEST(tMHDSYSTEMEinfeldtStrongRarefaction, CorrectInputExpectCorrectOutput) * */ /// @{ -class tMHDSYSTEMLinearWavesParameterizedMpi - : public ::testing::TestWithParam +class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam { public: - tMHDSYSTEMLinearWavesParameterizedMpi() - : waveTest(false, true, false, false){}; + tMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){}; protected: systemTest::SystemTestRunner waveTest; #ifdef PCM - double const allowedL1Error = - 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; #else // PCM - double const allowedL1Error = - 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #endif // PCM - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, - double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, - double const &rEigenVec_MomentumZ, - double const &rEigenVec_E, double const &rEigenVec_Bx, - double const &rEigenVec_By, double const &rEigenVec_Bz) + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz) { // Constant for all tests size_t const N = 32; @@ -555,20 +486,15 @@ class tMHDSYSTEMLinearWavesParameterizedMpi waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); - waveTest.chollaLaunchParams.append(" tout=" + - to_string_exact(tOut)); - waveTest.chollaLaunchParams.append(" outstep=" + - to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); waveTest.chollaLaunchParams.append(" init=Linear_Wave"); waveTest.chollaLaunchParams.append(" xmin=0.0"); waveTest.chollaLaunchParams.append(" ymin=0.0"); waveTest.chollaLaunchParams.append(" zmin=0.0"); - waveTest.chollaLaunchParams.append(" xlen=" + - to_string_exact(2 * domain)); - waveTest.chollaLaunchParams.append(" ylen=" + - to_string_exact(domain)); - waveTest.chollaLaunchParams.append(" zlen=" + - to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); waveTest.chollaLaunchParams.append(" xl_bcnd=1"); waveTest.chollaLaunchParams.append(" xu_bcnd=1"); waveTest.chollaLaunchParams.append(" yl_bcnd=1"); @@ -579,37 +505,26 @@ class tMHDSYSTEMLinearWavesParameterizedMpi waveTest.chollaLaunchParams.append(" vx=0"); waveTest.chollaLaunchParams.append(" vy=0"); waveTest.chollaLaunchParams.append(" vz=0"); - waveTest.chollaLaunchParams.append(" P=" + - to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); waveTest.chollaLaunchParams.append(" Bx=1"); waveTest.chollaLaunchParams.append(" By=1.5"); waveTest.chollaLaunchParams.append(" Bz=0"); waveTest.chollaLaunchParams.append(" A='1e-6'"); - waveTest.chollaLaunchParams.append(" gamma=" + - to_string_exact(gamma)); - waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + - to_string_exact(rEigenVec_rho)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); - waveTest.chollaLaunchParams.append( - " rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); - waveTest.chollaLaunchParams.append(" rEigenVec_E=" + - to_string_exact(rEigenVec_E)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + - to_string_exact(rEigenVec_Bx)); - waveTest.chollaLaunchParams.append(" rEigenVec_By=" + - to_string_exact(rEigenVec_By)); - waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + - to_string_exact(rEigenVec_Bz)); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz)); } }; // Slow Magnetosonic Waves Moving Left and Right // ============================================= -TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, - SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 0.5; @@ -629,8 +544,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, waveTest.numMpiRanks = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); // Set the number of timesteps @@ -640,8 +554,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, - SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) { // Specific to this test double const waveSpeed = 0.5; @@ -661,8 +574,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, waveTest.numMpiRanks = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, - rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); // Set the number of timesteps @@ -672,7 +584,6 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, waveTest.runL1ErrorTest(allowedL1Error, allowedError); } -INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedMpi, - ::testing::Values(1, 2, 4)); +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index d6ecd73e7..9f6059d62 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -55,16 +55,14 @@ void systemTest::SystemTestRunner::runTest() if (_hydroDataExists) { std::string fileName = "/1.h5." + std::to_string(fileIndex); _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); } // Load the particles data if (_particleDataExists) { std::string fileName = "/1_particles.h5." + std::to_string(fileIndex); _checkFileExists(_outputDirectory + fileName); - _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); + _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); } } @@ -73,23 +71,18 @@ void systemTest::SystemTestRunner::runTest() if (_particleDataExists) { _testParticleIDs = _loadTestParticleData("particle_IDs"); - if (_fiducialFileExists) - _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + if (_fiducialFileExists) _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); } // Get the list of test dataset names - if (_hydroDataExists) - _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + if (_hydroDataExists) _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); if (_particleDataExists) { // Load the data, replace the density value with the new name, then append - std::vector particleNames = - _findDataSetNames(_testParticlesFileVec[0]); - auto iter = - std::find(particleNames.begin(), particleNames.end(), "density"); - *iter = "particle_density"; - - _testDataSetNames.insert(_testDataSetNames.end(), particleNames.begin(), - particleNames.end()); + std::vector particleNames = _findDataSetNames(_testParticlesFileVec[0]); + auto iter = std::find(particleNames.begin(), particleNames.end(), "density"); + *iter = "particle_density"; + + _testDataSetNames.insert(_testDataSetNames.end(), particleNames.begin(), particleNames.end()); } // Start Performing Checks @@ -101,19 +94,15 @@ void systemTest::SystemTestRunner::runTest() // file. Provide a warning if the datasets are not the same size EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) << std::endl - << "Warning: The test data has " << _testDataSetNames.size() - << " datasets and the fiducial data has " << _fiducialDataSetNames.size() - << " datasets" << std::endl + << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() << " datasets" << std::endl << std::endl; // Loop over the datasets to be tested for (auto dataSetName : _fiducialDataSetNames) { // check that the test data has the dataset in it - ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), - dataSetName), - 1) - << "The test data does not contain the dataset '" + dataSetName + - "' or contains it more than once."; + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once."; // Get data vectors std::vector testDims(3, 1); @@ -122,10 +111,8 @@ void systemTest::SystemTestRunner::runTest() // This is just a vector of all the different dataset names for // particles to help choose whether to call _loadTestParticleData // or loadTestFieldData - std::vector particleIDs = { - "particle_IDs", "pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"}; - if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) != - particleIDs.end()) { + std::vector particleIDs = {"particle_IDs", "pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"}; + if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) != particleIDs.end()) { // This is a particle data set // Set some basic parameters @@ -148,31 +135,26 @@ void systemTest::SystemTestRunner::runTest() // Check that they're the same length ASSERT_EQ(fiducialData.size(), testData.size()) - << "The fiducial and test '" << dataSetName - << "' datasets are not the same length"; + << "The fiducial and test '" << dataSetName << "' datasets are not the same length"; // Compare values for (size_t i = 0; i < testDims[0]; i++) { for (size_t j = 0; j < testDims[1]; j++) { for (size_t k = 0; k < testDims[2]; k++) { - size_t index = - (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; // Check for equality and iff not equal return difference double absoluteDiff; int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl( - fiducialData.at(index), testData.at(index), absoluteDiff, - ulpsDiff, _fixedEpsilon); - ASSERT_TRUE(areEqual) - << std::endl - << "Difference in " << dataSetName << " dataset at [" << i << "," - << j << "," << k << "]" << std::endl - << "The fiducial value is: " << fiducialData[index] - << std::endl - << "The test value is: " << testData[index] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, + ulpsDiff, _fixedEpsilon); + ASSERT_TRUE(areEqual) << std::endl + << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k + << "]" << std::endl + << "The fiducial value is: " << fiducialData[index] << std::endl + << "The test value is: " << testData[index] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } } } @@ -181,8 +163,7 @@ void systemTest::SystemTestRunner::runTest() // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runL1ErrorTest( - double const &maxAllowedL1Error, double const &maxAllowedError) +void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError) { /// Only run if this variable is set to `true`. Generally this and /// globalCompareSystemTestResults should only be used for large MPI / tests @@ -196,13 +177,11 @@ void systemTest::SystemTestRunner::runL1ErrorTest( // Check that there is hydro data and no particle data if (_particleDataExists) { - std::string errMessage = - "Error: SystemTestRunner::runL1ErrorTest does not support particles"; + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles"; throw std::runtime_error(errMessage); } if (not _hydroDataExists) { - std::string errMessage = - "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; throw std::runtime_error(errMessage); } @@ -219,14 +198,12 @@ void systemTest::SystemTestRunner::runL1ErrorTest( // Initial time data std::string fileName = "/0.h5." + std::to_string(fileIndex); _checkFileExists(_outputDirectory + fileName); - initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); + initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); // Final time data fileName = "/1.h5." + std::to_string(fileIndex); _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); } // Get the list of test dataset names @@ -242,9 +219,8 @@ void systemTest::SystemTestRunner::runL1ErrorTest( // file. Provide a warning if the datasets are not the same size EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) << std::endl - << "Warning: The test data has " << _testDataSetNames.size() - << " datasets and the fiducial data has " << _fiducialDataSetNames.size() - << " datasets" << std::endl + << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() << " datasets" << std::endl << std::endl; // Loop over the datasets to be tested @@ -256,11 +232,8 @@ void systemTest::SystemTestRunner::runL1ErrorTest( } // check that the test data has the dataset in it - ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), - dataSetName), - 1) - << "The test data does not contain the dataset '" + dataSetName + - "' or contains it more than once."; + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once."; // Get data vectors std::vector initialDims(3, 1); @@ -269,16 +242,13 @@ void systemTest::SystemTestRunner::runL1ErrorTest( std::vector finalData; // This is a field data set - initialData = - loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); + initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); // Get fiducial data - finalData = - loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); + finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); // Check that they're the same length ASSERT_EQ(initialData.size(), finalData.size()) - << "The initial and final '" << dataSetName - << "' datasets are not the same length"; + << "The initial and final '" << dataSetName << "' datasets are not the same length"; // Compute the L1 Error. double L1Error = 0; @@ -288,24 +258,20 @@ void systemTest::SystemTestRunner::runL1ErrorTest( maxError = (diff > maxError) ? diff : maxError; } - L1Error *= (1. / static_cast(initialDims[0] * initialDims[1] * - initialDims[2])); + L1Error *= (1. / static_cast(initialDims[0] * initialDims[1] * initialDims[2])); L2Norm += L1Error * L1Error; // Perform the correctness check EXPECT_LT(L1Error, maxAllowedL1Error) - << "the L1 error for the " << dataSetName - << " data has exceeded the allowed value"; + << "the L1 error for the " << dataSetName << " data has exceeded the allowed value"; } // Check the L1 Norm L2Norm = std::sqrt(L2Norm); - EXPECT_LT(L2Norm, maxAllowedL1Error) - << "the norm of the L1 error vector has exceeded the allowed value"; + EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error - EXPECT_LT(maxError, maxAllowedError) - << "The maximum error has exceeded the allowed value"; + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; } // ============================================================================= @@ -314,22 +280,16 @@ void systemTest::SystemTestRunner::launchCholla() { // Launch Cholla. Note that this dumps all console output to the console // log file as requested by the user. - std::string const chollaRunCommand = - globalMpiLauncher.getString() + " " + std::to_string(numMpiRanks) + " " + - _chollaPath + " " + _chollaSettingsPath + " " + chollaLaunchParams + " " + - "outdir=" + _outputDirectory + "/" + " >> " + _consoleOutputPath + - " 2>&1 "; - auto returnEcho = system( - ("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath) - .c_str()); + std::string const chollaRunCommand = globalMpiLauncher.getString() + " " + std::to_string(numMpiRanks) + " " + + _chollaPath + " " + _chollaSettingsPath + " " + chollaLaunchParams + " " + + "outdir=" + _outputDirectory + "/" + " >> " + _consoleOutputPath + " 2>&1 "; + auto returnEcho = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str()); auto returnLaunch = system((chollaRunCommand).c_str()); - EXPECT_EQ(returnEcho, 0) - << "Warning: Echoing the launch command to the console output file " - << "returned a non-zero exit status code. Launch command is `" - << chollaRunCommand << "`" << std::endl; - EXPECT_EQ(returnLaunch, 0) - << "Warning: Launching Cholla returned a non-zero exit status. Likely " - << "failed to launch. Please see the log files" << std::endl; + EXPECT_EQ(returnEcho, 0) << "Warning: Echoing the launch command to the console output file " + << "returned a non-zero exit status code. Launch command is `" << chollaRunCommand << "`" + << std::endl; + EXPECT_EQ(returnLaunch, 0) << "Warning: Launching Cholla returned a non-zero exit status. Likely " + << "failed to launch. Please see the log files" << std::endl; _safeMove("run_output.log", _outputDirectory); // TODO: instead of commenting out, change to check if exist @@ -344,20 +304,18 @@ void systemTest::SystemTestRunner::openHydroTestData() for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { std::string fileName = "/1.h5." + std::to_string(fileIndex); _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::setFiducialData( - std::string const &fieldName, std::vector const &dataVec) +void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) { // First check if there's a fiducial data file if (_fiducialDataSets.count(fieldName) > 0) { - std::string errMessage = "Error: Fiducial dataset for field '" + fieldName + - "' already exists and cannot be overwritten"; + std::string errMessage = + "Error: Fiducial dataset for field '" + fieldName + "' already exists and cannot be overwritten"; throw std::runtime_error(errMessage); } @@ -367,8 +325,8 @@ void systemTest::SystemTestRunner::setFiducialData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateConstantData( - double const &value, size_t const &nx, size_t const &ny, size_t const &nz) +std::vector systemTest::SystemTestRunner::generateConstantData(double const &value, size_t const &nx, + size_t const &ny, size_t const &nz) { size_t const length = nx * ny * nz; std::vector outVec(length); @@ -380,18 +338,17 @@ std::vector systemTest::SystemTestRunner::generateConstantData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateSineData( - double const &offset, double const &litude, double const &kx, - double const &ky, double const &kz, double const &phase, size_t const &nx, - size_t const &ny, size_t const &nz) +std::vector systemTest::SystemTestRunner::generateSineData(double const &offset, double const &litude, + double const &kx, double const &ky, double const &kz, + double const &phase, size_t const &nx, + size_t const &ny, size_t const &nz) { size_t const length = nx * ny * nz; std::vector outVec(length); for (size_t i = 0; i < nx; i++) { for (size_t j = 0; j < ny; j++) { for (size_t k = 0; k < nz; k++) { - double value = - offset + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + double value = offset + amplitude * std::sin(kx * i + ky * j + kz * k + phase); size_t index = (i * ny * nz) + (j * nz) + k; outVec[index] = value; @@ -404,19 +361,16 @@ std::vector systemTest::SystemTestRunner::generateSineData( // ============================================================================= // Constructor -systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, - bool const &hydroData, - bool const &useFiducialFile, - bool const &useSettingsFile) +systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData, + bool const &useFiducialFile, bool const &useSettingsFile) : _particleDataExists(particleData), _hydroDataExists(hydroData) { // Get the test name, with and underscore instead of a "." since // we're actually generating file names - const ::testing::TestInfo *const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream nameStream; std::string suiteName = test_info->test_suite_name(); - suiteName = suiteName.substr(suiteName.find("/") + 1, suiteName.length()); + suiteName = suiteName.substr(suiteName.find("/") + 1, suiteName.length()); nameStream << suiteName << "_" << test_info->name(); std::string fullTestName = nameStream.str(); _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); @@ -424,25 +378,19 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, // Generate the input paths. Strip out everything after a "/" since that // probably indicates a parameterized test. Also, check that the files exist // and load fiducial HDF5 file if required - _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + - ::globalChollaBuild.getString() + "." + + _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." + ::globalChollaMachine.getString(); _checkFileExists(_chollaPath); if (useSettingsFile) { - _chollaSettingsPath = ::globalChollaRoot.getString() + - "/src/system_tests/input_files/" + _fullTestFileName + - ".txt"; + _chollaSettingsPath = + ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + _fullTestFileName + ".txt"; _checkFileExists(_chollaSettingsPath); } else { - _chollaSettingsPath = ::globalChollaRoot.getString() + - "/src/system_tests/input_files/" + - "blank_settings_file.txt"; + _chollaSettingsPath = ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + "blank_settings_file.txt"; _checkFileExists(_chollaSettingsPath); } if (useFiducialFile) { - _fiducialFilePath = ::globalChollaRoot.getString() + - "/cholla-tests-data/system_tests/" + _fullTestFileName + - ".h5"; + _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; _checkFileExists(_fiducialFilePath); _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); _fiducialDataSetNames = _findDataSetNames(_fiducialFile); @@ -452,17 +400,15 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, } // Generate output paths, these files don't exist yet - _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; - _consoleOutputPath = - _outputDirectory + "/" + _fullTestFileName + "_console.log"; + _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; + _consoleOutputPath = _outputDirectory + "/" + _fullTestFileName + "_console.log"; // Create the new directory and check that it exists // TODO: C++17: When we update to C++17 or newer this section should // TODO: use std::filesystem to create the directory and check that // TODO: it exists if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) { - std::cerr << "Warning: Directory '" + _outputDirectory + - "' either already exists or could not be created." + std::cerr << "Warning: Directory '" + _outputDirectory + "' either already exists or could not be created." << std::endl; } } @@ -498,16 +444,12 @@ void systemTest::SystemTestRunner::_checkFileExists(std::string const &filePath) // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::_safeMove( - std::string const &sourcePath, std::string const &destinationDirectory) +void systemTest::SystemTestRunner::_safeMove(std::string const &sourcePath, std::string const &destinationDirectory) { // TODO C++17 std::filesystem does this better _checkFileExists(sourcePath); - if (std::rename(sourcePath.c_str(), - (destinationDirectory + "/" + sourcePath).c_str()) < 0) { - std::string errMessage = "Error: File '" + sourcePath + - "' could not be moved to '" + - destinationDirectory + "`"; + if (std::rename(sourcePath.c_str(), (destinationDirectory + "/" + sourcePath).c_str()) < 0) { + std::string errMessage = "Error: File '" + sourcePath + "' could not be moved to '" + destinationDirectory + "`"; throw std::invalid_argument(errMessage); } } @@ -524,8 +466,7 @@ void systemTest::SystemTestRunner::_checkNumTimeSteps() } else if (_particleDataExists) { tStepAttr = _testParticlesFileVec[0].openAttribute("n_step"); } else { - std::string errMessage = - "Error: Both hydro and particle data are turned off."; + std::string errMessage = "Error: Both hydro and particle data are turned off."; throw std::invalid_argument(errMessage); } @@ -538,15 +479,14 @@ void systemTest::SystemTestRunner::_checkNumTimeSteps() fiducialNSteps = _numFiducialTimeSteps; } - EXPECT_EQ(fiducialNSteps, testNSteps) - << "The number of time steps is not equal"; + EXPECT_EQ(fiducialNSteps, testNSteps) << "The number of time steps is not equal"; }; // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::loadTestFieldData( - std::string dataSetName, std::vector &testDims, - std::vector file) +std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string dataSetName, + std::vector &testDims, + std::vector file) { // Switch which fileset we're using if it's a particle dataset if (dataSetName == "particle_density") { @@ -558,9 +498,7 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( // Get the size of each dimension. First check if the field is a magnetic // field or not to make sure we're retreiving the right dimensions - std::string dimsName = (dataSetName.find("magnetic") != std::string::npos) - ? "magnetic_field_dims" - : "dims"; + std::string dimsName = (dataSetName.find("magnetic") != std::string::npos) ? "magnetic_field_dims" : "dims"; H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); @@ -593,11 +531,8 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( // Get dims_local std::vector dimsLocal(3, 1); std::string dimsNameLocal = - (dataSetName.find("magnetic") != std::string::npos) - ? "magnetic_field_dims_local" - : "dims_local"; - H5::Attribute dimsLocalAttr = - file[rank].openAttribute(dimsNameLocal.c_str()); + (dataSetName.find("magnetic") != std::string::npos) ? "magnetic_field_dims_local" : "dims_local"; + H5::Attribute dimsLocalAttr = file[rank].openAttribute(dimsNameLocal.c_str()); dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); // Now we add the data to the larger vector @@ -606,8 +541,7 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) { for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) { // Compute the location to put the next element - size_t overallIndex = - (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; // Perform copy testData[overallIndex] = tempArr[localIndex]; @@ -625,8 +559,7 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadTestParticleData( - std::string const &dataSetName) +std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::string const &dataSetName) { // Determine the total number of particles if (_testTotalNumParticles == 0) { @@ -650,8 +583,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData( // Load in the data for (size_t rank = 0; rank < numMpiRanks; rank++) { // Open the dataset - H5::DataSet const testDataSet = - _testParticlesFileVec[rank].openDataSet(dataSetName); + H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName); // Determine dataset size/shape and check that it's correct H5::DataSpace const testDataSpace = testDataSet.getSpace(); @@ -661,8 +593,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData( // Read in data testDataSet.read(tempVector.data(), H5::PredType::NATIVE_DOUBLE); - unsortedTestData.insert(unsortedTestData.end(), tempVector.begin(), - tempVector.end()); + unsortedTestData.insert(unsortedTestData.end(), tempVector.begin(), tempVector.end()); } // Generate the sorting vector if it's not already generated @@ -671,9 +602,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData( tempSortedIndices.resize(_testTotalNumParticles); std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), - [&](size_t A, size_t B) -> bool { - return unsortedTestData[A] < unsortedTestData[B]; - }); + [&](size_t A, size_t B) -> bool { return unsortedTestData[A] < unsortedTestData[B]; }); } std::vector static const sortedIndices = tempSortedIndices; @@ -688,8 +617,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialFieldData( - std::string const &dataSetName) +std::vector systemTest::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) { if (_fiducialFileExists) { // Open the dataset @@ -716,8 +644,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialFieldData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( - std::string const &dataSetName) +std::vector systemTest::SystemTestRunner::_loadFiducialParticleData(std::string const &dataSetName) { if (_fiducialFileExists) { // Determine the total number of particles @@ -747,8 +674,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); // Read in data - fiducialDataSet.read(unsortedFiducialData.data(), - H5::PredType::NATIVE_DOUBLE); + fiducialDataSet.read(unsortedFiducialData.data(), H5::PredType::NATIVE_DOUBLE); // Generate the sorting vector if it's not already generated std::vector tempSortedIndices; @@ -756,10 +682,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( tempSortedIndices.resize(_fiducialTotalNumParticles); std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), - [&](size_t A, size_t B) -> bool { - return unsortedFiducialData.at(A) < - unsortedFiducialData.at(B); - }); + [&](size_t A, size_t B) -> bool { return unsortedFiducialData.at(A) < unsortedFiducialData.at(B); }); } std::vector const static sortedIndices = tempSortedIndices; @@ -777,8 +700,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_findDataSetNames( - H5::H5File const &inputFile) +std::vector systemTest::SystemTestRunner::_findDataSetNames(H5::H5File const &inputFile) { std::vector outputVector; diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index e9eb2a0cb..2003d72fd 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -80,8 +80,7 @@ class systemTest::SystemTestRunner * \param[in] maxAllowedError The maximum allowed for any value in the test * */ - void runL1ErrorTest(double const &maxAllowedL1Error, - double const &maxAllowedError = 1E-7); + void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError = 1E-7); /*! * \brief Launch Cholla as it is set up @@ -135,20 +134,14 @@ class systemTest::SystemTestRunner * \param index The MPI rank of the file you want to return. Defaults to 0 * \return H5::H5File */ - H5::H5File getTestFile(size_t const &i = 0) - { - return _testHydroFieldsFileVec[i]; - }; + H5::H5File getTestFile(size_t const &i = 0) { return _testHydroFieldsFileVec[i]; }; /*! * \brief Get the vector of datasets that will be tested * * \return std::vector */ - std::vector getDataSetsToTest() - { - return _fiducialDataSetNames; - }; + std::vector getDataSetsToTest() { return _fiducialDataSetNames; }; /*! * \brief Set the Fixed Epsilon value @@ -166,10 +159,7 @@ class systemTest::SystemTestRunner * \param[in] dataSetNames A std::vector of std::strings where each entry is * a dataset name. Note that it is case sensitive */ - void setDataSetsToTest(std::vector const &dataSetNames) - { - _fiducialDataSetNames = dataSetNames; - }; + void setDataSetsToTest(std::vector const &dataSetNames) { _fiducialDataSetNames = dataSetNames; }; /*! * \brief Set the Compare Num Time Steps object @@ -177,10 +167,7 @@ class systemTest::SystemTestRunner * \param[in] compare Defaults to `true`. If false then the number of * timesteps is not compared. */ - void setCompareNumTimeSteps(bool const &compare) - { - _compareNumTimeSteps = compare; - }; + void setCompareNumTimeSteps(bool const &compare) { _compareNumTimeSteps = compare; }; /*! * \brief Set or add a fiducial dataset @@ -189,18 +176,14 @@ class systemTest::SystemTestRunner * \param[in] dataArr The std::vector for the data vector to be added as * a data set */ - void setFiducialData(std::string const &fieldName, - std::vector const &dataVec); + void setFiducialData(std::string const &fieldName, std::vector const &dataVec); /*! * \brief Set the Fiducial Num Time Steps object * * \param numTimeSteps The number of time steps in the fiducial data */ - void setFiducialNumTimeSteps(int const &numTimeSteps) - { - _numFiducialTimeSteps = numTimeSteps; - }; + void setFiducialNumTimeSteps(int const &numTimeSteps) { _numFiducialTimeSteps = numTimeSteps; }; /*! * \brief Generate an vector of the specified size populated by the specified @@ -216,9 +199,7 @@ class systemTest::SystemTestRunner * \return std::vector A 1-dimensional std::vector of the required * size containing the data. */ - std::vector generateConstantData(double const &value, - size_t const &nx = 1, - size_t const &ny = 1, + std::vector generateConstantData(double const &value, size_t const &nx = 1, size_t const &ny = 1, size_t const &nz = 1); /*! @@ -231,8 +212,7 @@ class systemTest::SystemTestRunner * \param[in] file (optional) The vector of HDF5 files to load * \return std::vector A vector containing the data */ - std::vector loadTestFieldData(std::string dataSetName, - std::vector &testDims, + std::vector loadTestFieldData(std::string dataSetName, std::vector &testDims, std::vector file = {}); /*! @@ -256,10 +236,9 @@ class systemTest::SystemTestRunner * \return std::vector A 1-dimensional std::vector of the required * size containing the data. */ - std::vector generateSineData( - double const &offset, double const &litude, double const &kx, - double const &ky, double const &kz, double const &phase, - size_t const &nx = 1, size_t const &ny = 1, size_t const &nz = 1); + std::vector generateSineData(double const &offset, double const &litude, double const &kx, + double const &ky, double const &kz, double const &phase, size_t const &nx = 1, + size_t const &ny = 1, size_t const &nz = 1); // Constructor and Destructor /*! @@ -275,9 +254,7 @@ class systemTest::SystemTestRunner * convention. If false then the user MUST provide all the required settings * with the SystemTestRunner::chollaLaunchParams member variable */ - SystemTestRunner(bool const &particleData = false, - bool const &hydroData = true, - bool const &useFiducialFile = true, + SystemTestRunner(bool const &particleData = false, bool const &hydroData = true, bool const &useFiducialFile = true, bool const &useSettingsFile = true); ~SystemTestRunner(); @@ -352,8 +329,7 @@ class systemTest::SystemTestRunner * \param[in] destinationDirectory The path to the director the file should * be moved to */ - void _safeMove(std::string const &sourcePath, - std::string const &destinationDirectory); + void _safeMove(std::string const &sourcePath, std::string const &destinationDirectory); /*! * \brief Checks if the given file exists. Throws an exception if the diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 337052a50..1af7aa3da 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -144,10 +144,7 @@ class DeviceVector * * \param[in] vecIn The array whose contents are to be copied */ - void cpyHostToDevice(std::vector const &vecIn) - { - cpyHostToDevice(vecIn.data(), vecIn.size()); - } + void cpyHostToDevice(std::vector const &vecIn) { cpyHostToDevice(vecIn.data(), vecIn.size()); } /*! * \brief Copy the array from the device to a host array. Checks if the @@ -164,10 +161,7 @@ class DeviceVector * * \param[out] vecOut The std::vector to copy the device array into */ - void cpyDeviceToHost(std::vector &vecOut) - { - cpyDeviceToHost(vecOut.data(), vecOut.size()); - } + void cpyDeviceToHost(std::vector &vecOut) { cpyDeviceToHost(vecOut.data(), vecOut.size()); } private: /// The size of the device array @@ -254,8 +248,7 @@ template T DeviceVector::operator[](size_t const &index) { T hostValue; - CudaSafeCall(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), cudaMemcpyDeviceToHost)); return hostValue; } // ========================================================================= @@ -293,15 +286,13 @@ template void DeviceVector::cpyHostToDevice(const T *arrIn, size_t const &arrSize) { if (arrSize <= _size) { - CudaSafeCall( - cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); } else { throw std::out_of_range( "Warning: Couldn't copy array to device," " device array is too small. Host array" " size=" + - std::to_string(arrSize) + - ", device array size=" + std::to_string(arrSize)); + std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize)); } } // ========================================================================= @@ -311,19 +302,17 @@ template void DeviceVector::cpyDeviceToHost(T *arrOut, size_t const &arrSize) { if (_size <= arrSize) { - CudaSafeCall( - cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); } else { throw std::out_of_range( "Warning: Couldn't copy array to host, " "host array is too small. Host array " "size=" + - std::to_string(arrSize) + - ", device array size=" + std::to_string(arrSize)); + std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize)); } } // ========================================================================= } // end namespace cuda_utilities -// ============================================================================= -// End definition of DeviceVector class -// ============================================================================= \ No newline at end of file + // ============================================================================= + // End definition of DeviceVector class + // ============================================================================= \ No newline at end of file diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index c873f2106..2759e253a 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -57,8 +57,7 @@ void checkPointerAttributes(cuda_utilities::DeviceVector &devVector) // ============================================================================= // Tests for expected behavior // ============================================================================= -TEST(tALLDeviceVectorConstructor, - CheckConstructorDataAndSizeExpectProperAllocationAndValues) +TEST(tALLDeviceVectorConstructor, CheckConstructorDataAndSizeExpectProperAllocationAndValues) { // Initialize the DeviceVector size_t const vectorSize = 10; @@ -92,7 +91,7 @@ TEST(tALLDeviceVectorDestructor, CheckDestructorExpectProperDeallocation) "1 is cudaMemoryTypeHost, " "2 is cudaMemoryTypeDevice, and" "3 is cudaMemoryTypeManaged"; - std::string deviceMessage = "The pointer should be null which is device -2"; + std::string deviceMessage = "The pointer should be null which is device -2"; std::string const devPtrMessage = "The device pointer is nullptr"; std::string const hostPtrMessage = "The host pointer is not nullptr"; @@ -155,8 +154,7 @@ TEST(tALLDeviceVectorArrayHostToDeviceCopyAndIndexing, } } -TEST(tALLDeviceVectorArrayAssignmentMethod, - AssignSingleValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorArrayAssignmentMethod, AssignSingleValuesExpectCorrectMemoryValues) { // Initialize the vectors size_t const vectorSize = 10; @@ -171,8 +169,7 @@ TEST(tALLDeviceVectorArrayAssignmentMethod, EXPECT_EQ(17, devVector.at(4)); } -TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, - CheckHostMemoryValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues) { // Initialize the vectors size_t const vectorSize = 10; @@ -192,8 +189,7 @@ TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, } } -TEST(tALLDeviceVectorArrayDeviceToHostCopy, - CheckHostMemoryValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorArrayDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues) { // Initialize the vectors size_t const vectorSize = 10; @@ -317,8 +313,7 @@ TEST(tALLDeviceVectorAt, OutOfBoundsAccessExpectThrowOutOfRange) EXPECT_THROW(devVector.at(100), std::out_of_range); } -TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, - OutOfBoundsCopyExpectThrowOutOfRange) +TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, OutOfBoundsCopyExpectThrowOutOfRange) { // Initialize the vectors size_t const vectorSize = 10; @@ -330,8 +325,7 @@ TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); } -TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, - OutOfBoundsCopyExpectThrowOutOfRange) +TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, OutOfBoundsCopyExpectThrowOutOfRange) { // Initialize the vectors size_t const vectorSize = 10; diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 0df707a66..9838ae2d8 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -24,9 +24,8 @@ namespace cuda_utilities * \param[out] yid The y index * \param[out] zid The z index */ -inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, - int const &ny, int &xid, - int &yid, int &zid) +inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, int const &ny, int &xid, int &yid, + int &zid) { zid = id / (nx * ny); yid = (id - zid * nx * ny) / nx; @@ -43,18 +42,14 @@ inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, * \param ny The total number of cells in the y direction * \return int The 1D index */ -inline __host__ __device__ int compute1DIndex(int const &xid, int const &yid, - int const &zid, int const &nx, +inline __host__ __device__ int compute1DIndex(int const &xid, int const &yid, int const &zid, int const &nx, int const &ny) { return xid + yid * nx + zid * nx * ny; } -inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, - int const &nx, int const &ny, - int const &nz, int &is, - int &ie, int &js, int &je, - int &ks, int &ke) +inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, + int &is, int &ie, int &js, int &je, int &ks, int &ke) { is = n_ghost; ie = nx - n_ghost; @@ -80,10 +75,7 @@ inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, * \param[in] ptr The pointer to GPU memory * \param[in] N The size of the array in bytes */ -inline void initGpuMemory(Real *ptr, size_t N) -{ - CudaSafeCall(cudaMemset(ptr, 0, N)); -} +inline void initGpuMemory(Real *ptr, size_t N) { CudaSafeCall(cudaMemset(ptr, 0, N)); } // ===================================================================== /*! @@ -113,8 +105,7 @@ struct AutomaticLaunchParams { */ AutomaticLaunchParams(T &kernel, size_t numElements = 0) { - cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, - 0); + cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); if (numElements > 0) { numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index a215fc976..08c1004b2 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -37,8 +37,7 @@ struct TestParams { std::vector nx{100, 2048, 2048, 2048}; std::vector ny{1, 2048, 2048, 2048}; std::vector nz{1, 4096, 4096, 4096}; - std::vector names{"Single-cell 3D PCM/PLMP case", - "Large 3D PCM/PLMP case", "Large PLMC case", + std::vector names{"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", "Large PPMP/PPMC case"}; }; } // namespace @@ -46,10 +45,8 @@ struct TestParams { TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector> fiducial_indices{{2, 98, 0, 1, 0, 1}, - {2, 2046, 2, 2046, 2, 4094}, - {3, 2045, 3, 2045, 3, 4093}, - {4, 2044, 4, 2044, 4, 4092}}; + std::vector> fiducial_indices{ + {2, 98, 0, 1, 0, 1}, {2, 2046, 2, 2046, 2, 4094}, {3, 2045, 3, 2045, 3, 4093}, {4, 2044, 4, 2044, 4, 4092}}; for (size_t i = 0; i < parameters.names.size(); i++) { int is; @@ -58,17 +55,15 @@ TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) int je; int ks; int ke; - cuda_utilities::Get_Real_Indices( - parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), - parameters.nz.at(i), is, ie, js, je, ks, ke); + cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), + parameters.nz.at(i), is, ie, js, je, ks, ke); std::vector index_names{"is", "ie", "js", "je", "ks", "ke"}; std::vector test_indices{is, ie, js, je, ks, ke}; for (size_t j = 0; j < test_indices.size(); j++) { - testingUtilities::checkResults( - fiducial_indices[i][j], test_indices[j], - index_names[j] + " " + parameters.names[i]); + testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], + index_names[j] + " " + parameters.names[i]); } } } diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index b8187a502..4b91efee5 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -13,16 +13,14 @@ #include "../utils/error_check_cuda.h" #include "../utils/gpu.hpp" -__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, - int ny, int nz, int n_ghost, +__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value) { int tid_j = blockIdx.x * blockDim.x + threadIdx.x; int tid_k = blockIdx.y * blockDim.y + threadIdx.y; if (blockDim.x != N_Y || blockDim.y != N_Z) { - if (tid_j == 0 && tid_k == 0) - printf("ERROR CHECK: Block Dimension Error \n"); + if (tid_j == 0 && tid_k == 0) printf("ERROR CHECK: Block Dimension Error \n"); return; } @@ -55,17 +53,16 @@ __global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, if (tid_j == 0 && tid_k == 0) *return_value = error; } -int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, - int n_ghost, dim3 Grid_Error, dim3 Block_Error) +int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, + dim3 Block_Error) { int *error_value_dev; CudaSafeCall(cudaMalloc((void **)&error_value_dev, sizeof(int))); - hipLaunchKernelGGL(Check_Value_Along_Axis, Grid_Error, Block_Error, 0, 0, - dev_conserved, 0, nx, ny, nz, n_ghost, error_value_dev); + hipLaunchKernelGGL(Check_Value_Along_Axis, Grid_Error, Block_Error, 0, 0, dev_conserved, 0, nx, ny, nz, n_ghost, + error_value_dev); int error_value_host; - CudaSafeCall(cudaMemcpy(&error_value_host, error_value_dev, sizeof(int), - cudaMemcpyDeviceToHost)); + CudaSafeCall(cudaMemcpy(&error_value_host, error_value_dev, sizeof(int), cudaMemcpyDeviceToHost)); return error_value_host; } diff --git a/src/utils/error_check_cuda.h b/src/utils/error_check_cuda.h index 42061a6a2..110a1b035 100644 --- a/src/utils/error_check_cuda.h +++ b/src/utils/error_check_cuda.h @@ -11,11 +11,10 @@ #define N_Z 24 #define N_Y 24 -int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, - int n_ghost, dim3 Grid_Error, dim3 Block_Error); +int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, + dim3 Block_Error); -__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, - int ny, int nz, int n_ghost, +__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value); #endif // ERROR_CHECK_CUDA_H diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu index e36a26545..4604f58d6 100644 --- a/src/utils/gpu_arrays_functions.cu +++ b/src/utils/gpu_arrays_functions.cu @@ -5,27 +5,22 @@ #include "../utils/gpu.hpp" #include "../utils/gpu_arrays_functions.h" -void Extend_GPU_Array_Real(Real **current_array_d, int current_size, - int new_size, bool print_out) +void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_size, bool print_out) { if (new_size <= current_size) return; - if (print_out) - std::cout << " Extending GPU Array, size: " << current_size - << " new_size: " << new_size << std::endl; + if (print_out) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); #ifdef PRINT_GPU_MEMORY - printf("ReAllocating GPU Memory: %d MB free \n", - (int)global_free / 1000000); + printf("ReAllocating GPU Memory: %d MB free \n", (int)global_free / 1000000); #endif if (global_free < new_size * sizeof(Real)) { printf("ERROR: Not enough global device memory \n"); printf(" Available Memory: %d MB \n", (int)(global_free / 1000000)); - printf(" Requested Memory: %d MB \n", - (int)(new_size * sizeof(Real) / 1000000)); + printf(" Requested Memory: %d MB \n", (int)(new_size * sizeof(Real) / 1000000)); // exit(-1); } @@ -39,9 +34,7 @@ void Extend_GPU_Array_Real(Real **current_array_d, int current_size, } // Copy the content of the original array to the new array - CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, - current_size * sizeof(Real), - cudaMemcpyDeviceToDevice)); + CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(Real), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); CudaCheckError(); diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h index 848b9af4d..5ed935bce 100644 --- a/src/utils/gpu_arrays_functions.h +++ b/src/utils/gpu_arrays_functions.h @@ -9,13 +9,10 @@ #include "../utils/gpu_arrays_functions.h" template -void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, - bool print_out) +void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool print_out) { if (new_size <= current_size) return; - if (print_out) - std::cout << " Extending GPU Array, size: " << current_size - << " new_size: " << new_size << std::endl; + if (print_out) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -41,8 +38,7 @@ void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, } // Copy the content of the original array to the new array - CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, - current_size * sizeof(T), cudaMemcpyDeviceToDevice)); + CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(T), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); CudaCheckError(); diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 7a2549b38..358d7e352 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -29,9 +29,8 @@ namespace hydro_utilities { -inline __host__ __device__ Real -Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, - Real const &vy, Real const &vz, Real const &gamma) +inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, + Real const &vz, Real const &gamma) { Real P; P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); @@ -39,9 +38,8 @@ Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, return P; } -inline __host__ __device__ Real -Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, - Real const &my, Real const &mz, Real const &gamma) +inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, + Real const &mz, Real const &gamma) { Real P = (E - 0.5 * (mx * mx + my * my + mz * mz) / d) * (gamma - 1.); return fmax(P, TINY_NUMBER); @@ -54,26 +52,21 @@ inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) } #ifdef DE -inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, - Real const &gamma, Real const &n) +inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const &n) { Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); return T; } #endif // DE -inline __host__ __device__ Real -Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, - Real const &vy, Real const &vz, Real const &gamma) +inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, + Real const &vz, Real const &gamma) { // Compute and return energy - return (fmax(P, TINY_NUMBER) / (gamma - 1.)) + - 0.5 * d * (vx * vx + vy * vy + vz * vz); + return (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); } -inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, - Real const &U_total, - Real const &U_advected, +inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { Real U, P; @@ -98,8 +91,8 @@ inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, * \param[in] vz The z velocity * \return Real The kinetic energy */ -inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity( - Real const &d, Real const &vx, Real const &vy, Real const &vz) +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, Real const &vx, Real const &vy, + Real const &vz) { return 0.5 * d * (vx * vx + vy * vy * vz * vz); } @@ -113,16 +106,14 @@ inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity( * \param[in] mz The z momentum * \return Real The kinetic energy */ -inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum( - Real const &d, Real const &mx, Real const &my, Real const &mz) +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, Real const &mx, Real const &my, + Real const &mz) { return (0.5 / d) * (mx * mx + my * my * mz * mz); } -inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, - Real const &mx, Real const &my, - Real const &mz, - Real const &gamma) +inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, + Real const &mz, Real const &gamma) { Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); return sqrt(gamma * P / d); diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index 0348a362b..6c8c37cf1 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -49,57 +49,46 @@ struct TestParams { std::vector n{3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; std::vector ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; std::vector U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100}; - std::vector U_advected{1.3847303413e-10, 1.0756968986e2, - 1.0882403847e100}; - std::vector names{"Small number case", "Medium number case", - "Large number case"}; + std::vector U_advected{1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; + std::vector names{"Small number case", "Medium number case", "Large number case"}; }; } // namespace TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139983415580.5549, - 1.2697896247496674e+301}; + std::vector fiducial_Ps{1e-20, 139983415580.5549, 1.2697896247496674e+301}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Primitive( - parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), + parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139984604373.87094, - 1.3965808056866668e+301}; + std::vector fiducial_Ps{1e-20, 139984604373.87094, 1.3965808056866668e+301}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Conserved( - parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), - parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), + parameters.my.at(i), parameters.mz.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ts{3465185.0560059389, 29370603.906644326, - 28968949.83344138}; + std::vector fiducial_Ts{3465185.0560059389, 29370603.906644326, 28968949.83344138}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ts = - hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); + Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } @@ -107,16 +96,13 @@ TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ts{5.123106988008801e-09, 261106139.02514684, - 1.2105231166585662e+107}; + std::vector fiducial_Ts{5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107}; for (size_t i = 0; i < parameters.names.size(); i++) { Real test_Ts = - hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), - parameters.gamma, parameters.n.at(i)); + hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } #endif // DE @@ -124,49 +110,40 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Es{3.3366124363499997e-10, 1784507.7619407175, - 1.9018677140549926e+300}; + std::vector fiducial_Es{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Es = hydro_utilities::Calc_Energy_Primitive( - parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), + parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Es.at(i), test_Es, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); } } TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1.5927160260000002e-10, 71.713126573333341, - 7.2549358980000001e+99}; + std::vector fiducial_Ps{1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Get_Pressure_From_DE( - parameters.E.at(i), parameters.U_total.at(i), - parameters.U_advected.at(i), parameters.gamma); + Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), + parameters.U_advected.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, - parameters.names.at(i)); + testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducialEnergies{0.0, 6.307524975350106e-145, - 7.3762470327090601e+249}; + std::vector fiducialEnergies{0.0, 6.307524975350106e-145, 7.3762470327090601e+249}; double const coef = 1E-50; for (size_t i = 0; i < parameters.names.size(); i++) { Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - coef * parameters.d.at(i), coef * parameters.vx.at(i), - coef * parameters.vy.at(i), coef * parameters.vz.at(i)); + coef * parameters.d.at(i), coef * parameters.vx.at(i), coef * parameters.vy.at(i), coef * parameters.vz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } @@ -178,10 +155,8 @@ TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( - coef * parameters.d.at(i), coef * parameters.mx.at(i), - coef * parameters.my.at(i), coef * parameters.mz.at(i)); + coef * parameters.d.at(i), coef * parameters.mx.at(i), coef * parameters.my.at(i), coef * parameters.mz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } \ No newline at end of file diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h index 9c329b1d7..1480f852c 100644 --- a/src/utils/math_utilities.h +++ b/src/utils/math_utilities.h @@ -39,8 +39,7 @@ namespace math_utils * order . Intended to be captured with structured binding */ template -inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, - Real const &x_3, Real const &pitch, +inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, Real const &x_3, Real const &pitch, Real const &yaw) { // Compute the sines and cosines. Correct for floating point errors if @@ -51,10 +50,8 @@ inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, Real const cos_pitch = (pitch == 0.5 * M_PI) ? 0 : std::cos(pitch); // Perform the rotation - Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + - (x_3 * sin_pitch * cos_yaw); - Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + - (x_3 * sin_pitch * sin_yaw); + Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw); + Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw); Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); if (std::is_same::value) { @@ -78,8 +75,7 @@ inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, * * \return Real The dot product of a and b */ -inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, - Real const &a3, Real const &b1, +inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, Real const &a3, Real const &b1, Real const &b2, Real const &b3) { return a1 * b1 + ((a2 * b2) + (a3 * b3)); diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp index 37c4596bc..665a5981c 100644 --- a/src/utils/math_utilities_tests.cpp +++ b/src/utils/math_utilities_tests.cpp @@ -29,8 +29,7 @@ TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) double const x_2_rot_fid = 14.745363873361605; double const x_3_rot_fid = -76.05402749550727; - auto [x_1_rot, x_2_rot, x_3_rot] = - math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); + auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); @@ -45,19 +44,16 @@ TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) */ TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) { - std::vector a{21.503067766457753, 48.316634031589935, - 81.12177317622657}, + std::vector a{21.503067766457753, 48.316634031589935, 81.12177317622657}, b{38.504606872151484, 18.984145880030045, 89.52561861038686}; double const fiducialDotProduct = 9007.6941261535867; double testDotProduct; - testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), - b.at(1), b.at(2)); + testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), b.at(1), b.at(2)); // Now check results - testingUtilities::checkResults(fiducialDotProduct, testDotProduct, - "dot product"); + testingUtilities::checkResults(fiducialDotProduct, testDotProduct, "dot product"); } // ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 9f5903299..aeca4aa8b 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -43,23 +43,20 @@ namespace _internal * magnetosonic wave * \return Real The speed of the fast or slow magnetosonic wave */ -inline __host__ __device__ Real _magnetosonicSpeed( - Real const &density, Real const &gasPressure, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma, - Real const &waveChoice) +inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real const &gasPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma, + Real const &waveChoice) { // Compute the sound speed Real bXSquared = magneticX * magneticX; - Real bSquared = - bXSquared + ((magneticY * magneticY) + (magneticZ * magneticZ)); + Real bSquared = bXSquared + ((magneticY * magneticY) + (magneticZ * magneticZ)); Real term1 = gamma * gasPressure + bSquared; Real term2 = (term1 * term1) - 4. * gamma * gasPressure * bXSquared; term2 = sqrt(term2); - return sqrt((term1 + waveChoice * term2) / - (2.0 * fmax(density, TINY_NUMBER))); + return sqrt((term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER))); } // ===================================================================== } // namespace _internal @@ -80,19 +77,15 @@ inline __host__ __device__ Real _magnetosonicSpeed( * \param[in] gamma The adiabatic index * \return Real The energy within a cell */ -inline __host__ __device__ Real computeEnergy( - Real const &pressure, Real const &density, Real const &velocityX, - Real const &velocityY, Real const &velocityZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) +inline __host__ __device__ Real computeEnergy(Real const &pressure, Real const &density, Real const &velocityX, + Real const &velocityY, Real const &velocityZ, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) { // Compute and return energy Real energy = (fmax(pressure, TINY_NUMBER) / (gamma - 1.)) + - 0.5 * density * - (velocityX * velocityX + - ((velocityY * velocityY) + (velocityZ * velocityZ))); + 0.5 * density * (velocityX * velocityX + ((velocityY * velocityY) + (velocityZ * velocityZ))); #ifdef MHD - energy += 0.5 * (magneticX * magneticX + - ((magneticY * magneticY) + (magneticZ * magneticZ))); + energy += 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); #endif // MHD return energy; @@ -114,19 +107,14 @@ inline __host__ __device__ Real computeEnergy( * \param[in] gamma The adiabatic index * \return Real The gas pressure in a cell */ -inline __host__ __device__ Real computeGasPressure( - Real const &energy, Real const &density, Real const &momentumX, - Real const &momentumY, Real const &momentumZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) +inline __host__ __device__ Real computeGasPressure(Real const &energy, Real const &density, Real const &momentumX, + Real const &momentumY, Real const &momentumZ, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) { - Real pressure = (gamma - 1.) * - (energy - - 0.5 * - (momentumX * momentumX + - ((momentumY * momentumY) + (momentumZ * momentumZ))) / - density - - 0.5 * (magneticX * magneticX + - ((magneticY * magneticY) + (magneticZ * magneticZ)))); + Real pressure = + (gamma - 1.) * + (energy - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / density - + 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ)))); return fmax(pressure, TINY_NUMBER); } @@ -140,14 +128,12 @@ inline __host__ __device__ Real computeGasPressure( * \param gamma The adiabatic index * \return Real The gas pressure */ -inline __host__ __device__ Real -computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, - Real const &gamma) +inline __host__ __device__ Real computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, + Real const &gamma) { - return mhd::utils::computeGasPressure( - state.energy, state.density, state.velocityX * state.density, - state.velocityY * state.density, state.velocityZ * state.density, - magneticX, state.magneticY, state.magneticZ, gamma); + return mhd::utils::computeGasPressure(state.energy, state.density, state.velocityX * state.density, + state.velocityY * state.density, state.velocityZ * state.density, magneticX, + state.magneticY, state.magneticZ, gamma); } // ========================================================================= @@ -166,18 +152,15 @@ computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, * \param[in] gamma The adiabatic index * \return Real The thermal energy in a cell */ -inline __host__ __device__ Real computeThermalEnergy( - Real const &energyTot, Real const &density, Real const &momentumX, - Real const &momentumY, Real const &momentumZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) +inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, Real const &density, Real const &momentumX, + Real const &momentumY, Real const &momentumZ, + Real const &magneticX, Real const &magneticY, + Real const &magneticZ, Real const &gamma) { return energyTot - - 0.5 * - (momentumX * momentumX + - ((momentumY * momentumY) + (momentumZ * momentumZ))) / + 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / fmax(density, TINY_NUMBER) - - 0.5 * (magneticX * magneticX + - ((magneticY * magneticY) + (magneticZ * magneticZ))); + 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); } // ========================================================================= @@ -190,12 +173,10 @@ inline __host__ __device__ Real computeThermalEnergy( * \param[in] magneticZ The magnetic field in the Z-direction * \return Real The magnetic energy */ -inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, - Real const &magneticY, +inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY, Real const &magneticZ) { - return 0.5 * (magneticX * magneticX + - ((magneticY * magneticY) + (magneticZ * magneticZ))); + return 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); } // ========================================================================= @@ -210,14 +191,10 @@ inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, * \param[in] magneticZ Magnetic field in the z-direction * \return Real The total MHD pressure */ -inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ) +inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ) { - Real pTot = - gasPressure + 0.5 * (magneticX * magneticX + - ((magneticY * magneticY) + (magneticZ * magneticZ))); + Real pTot = gasPressure + 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); return fmax(pTot, TINY_NUMBER); } @@ -235,13 +212,11 @@ inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, * \param gamma The adiabatic index * \return Real The speed of the fast magnetosonic wave */ -inline __host__ __device__ Real fastMagnetosonicSpeed( - Real const &density, Real const &pressure, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) +inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) { // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed( - density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); } // ========================================================================= @@ -257,13 +232,11 @@ inline __host__ __device__ Real fastMagnetosonicSpeed( * \param gamma The adiabatic index * \return Real The speed of the slow magnetosonic wave */ -inline __host__ __device__ Real slowMagnetosonicSpeed( - Real const &density, Real const &pressure, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) +inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) { // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed( - density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); } // ========================================================================= @@ -276,8 +249,7 @@ inline __host__ __device__ Real slowMagnetosonicSpeed( * \param[in] density The density in the cell * \return Real The Alfven wave speed */ -inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, - Real const &density) +inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, Real const &density) { // Compute the Alfven wave speed return fabs(magneticX) / sqrt(fmax(density, TINY_NUMBER)); @@ -306,45 +278,32 @@ inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, * fields. Intended to be called with structured binding like `auto [x, y, * z] = mhd::utils::cellCenteredMagneticFields(*args*) */ -inline __host__ __device__ auto cellCenteredMagneticFields( - Real const *dev_conserved, size_t const &id, size_t const &xid, - size_t const &yid, size_t const &zid, size_t const &n_cells, - size_t const &nx, size_t const &ny) +inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, size_t const &id, + size_t const &xid, size_t const &yid, size_t const &zid, + size_t const &n_cells, size_t const &nx, size_t const &ny) { // Ternary operator to check that no values outside of the magnetic field // arrays are loaded. If the cell is on the edge that doesn't have magnetic // fields on both sides then instead set the centered magnetic field to be // equal to the magnetic field of the closest edge. T - Real avgBx = - (xid > 0) - ? - /*if true*/ 0.5 * - (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + - dev_conserved[(grid_enum::magnetic_x)*n_cells + - cuda_utilities::compute1DIndex(xid - 1, yid, zid, - nx, ny)]) - : - /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; - Real avgBy = - (yid > 0) - ? - /*if true*/ 0.5 * - (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + - dev_conserved[(grid_enum::magnetic_y)*n_cells + - cuda_utilities::compute1DIndex(xid, yid - 1, zid, - nx, ny)]) - : - /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; - Real avgBz = - (zid > 0) - ? - /*if true*/ 0.5 * - (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + - dev_conserved[(grid_enum::magnetic_z)*n_cells + - cuda_utilities::compute1DIndex(xid, yid, zid - 1, - nx, ny)]) - : - /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; + Real avgBx = (xid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_x)*n_cells + + cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; + Real avgBy = (yid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_y)*n_cells + + cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; + Real avgBz = (zid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_z)*n_cells + + cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; struct returnStruct { Real x, y, z; diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 8212f49a3..bd2579ab9 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -27,34 +27,20 @@ namespace { struct testParams { double gamma = 5. / 3.; - std::vector density{8.4087201154e-100, 1.6756968986e2, - 5.4882403847e100}; - std::vector velocityX{7.0378624601e-100, 7.0829278656e2, - 1.8800514112e100}; - std::vector velocityY{7.3583469014e-100, 5.9283073464e2, - 5.2725717864e100}; - std::vector velocityZ{1.7182972216e-100, 8.8417748226e2, - 1.5855352639e100}; - std::vector momentumX{8.2340416681e-100, 8.1019429453e2, - 5.5062596954e100}; - std::vector momentumY{4.9924582299e-100, 7.1254780684e2, - 6.5939640992e100}; - std::vector momentumZ{3.6703192739e-100, 7.5676716066e2, - 7.2115881803e100}; - std::vector energy{3.0342082433e-100, 7.6976906577e2, - 1.9487120853e100}; - std::vector pressureGas{2.2244082909e-100, 8.6772951021e2, - 6.7261085663e100}; - std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, - 1.8242151369e100}; - std::vector magneticX{2.8568843801e-100, 9.2400807786e2, - 2.1621115264e100}; - std::vector magneticY{9.2900880344e-100, 8.0382409757e2, - 6.6499532343e100}; - std::vector magneticZ{9.5795678229e-100, 3.3284839263e2, - 9.2337456649e100}; - std::vector names{"Small number case", "Medium number case", - "Large number case"}; + std::vector density{8.4087201154e-100, 1.6756968986e2, 5.4882403847e100}; + std::vector velocityX{7.0378624601e-100, 7.0829278656e2, 1.8800514112e100}; + std::vector velocityY{7.3583469014e-100, 5.9283073464e2, 5.2725717864e100}; + std::vector velocityZ{1.7182972216e-100, 8.8417748226e2, 1.5855352639e100}; + std::vector momentumX{8.2340416681e-100, 8.1019429453e2, 5.5062596954e100}; + std::vector momentumY{4.9924582299e-100, 7.1254780684e2, 6.5939640992e100}; + std::vector momentumZ{3.6703192739e-100, 7.5676716066e2, 7.2115881803e100}; + std::vector energy{3.0342082433e-100, 7.6976906577e2, 1.9487120853e100}; + std::vector pressureGas{2.2244082909e-100, 8.6772951021e2, 6.7261085663e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; + std::vector magneticX{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; + std::vector magneticY{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; + std::vector magneticZ{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; + std::vector names{"Small number case", "Medium number case", "Large number case"}; }; } // namespace // ============================================================================= @@ -70,19 +56,15 @@ struct testParams { TEST(tMHDComputeEnergy, CorrectInputExpectCorrectOutput) { testParams parameters; - std::vector fiducialEnergies{ - 3.3366124363499995e-100, 137786230.15630624, 9.2884430880010847e+301}; + std::vector fiducialEnergies{3.3366124363499995e-100, 137786230.15630624, 9.2884430880010847e+301}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhd::utils::computeEnergy( - parameters.pressureGas.at(i), parameters.density.at(i), - parameters.velocityX.at(i), parameters.velocityY.at(i), - parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), - parameters.gamma); + Real testEnergy = + mhd::utils::computeEnergy(parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), + parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } @@ -94,19 +76,15 @@ TEST(tMHDComputeEnergy, CorrectInputExpectCorrectOutput) TEST(tMHDComputeEnergy, NegativePressureExpectAutomaticFix) { testParams parameters; - std::vector fiducialEnergies{ - 3.3366124363499995e-100, 137784928.56204093, 9.2884430880010847e+301}; + std::vector fiducialEnergies{3.3366124363499995e-100, 137784928.56204093, 9.2884430880010847e+301}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhd::utils::computeEnergy( - -parameters.pressureGas.at(i), parameters.density.at(i), - parameters.velocityX.at(i), parameters.velocityY.at(i), - parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), - parameters.gamma); + Real testEnergy = + mhd::utils::computeEnergy(-parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), + parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } // ============================================================================= @@ -125,19 +103,15 @@ TEST(tMHDComputeGasPressure, CorrectInputExpectCorrectOutput) { testParams parameters; std::vector energyMultiplier{3, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{ - 1.8586864490415075e-100, 4591434.7663756227, 1.29869419465575e+205}; + std::vector fiducialGasPressures{1.8586864490415075e-100, 4591434.7663756227, 1.29869419465575e+205}; for (size_t i = 0; i < parameters.names.size(); i++) { Real testGasPressure = mhd::utils::computeGasPressure( - energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), parameters.momentumX.at(i), - parameters.momentumY.at(i), parameters.momentumZ.at(i), - parameters.magneticX.at(i), parameters.magneticY.at(i), + energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), + parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); } } @@ -152,16 +126,13 @@ TEST(tMHDComputeGasPressure, NegativePressureExpectAutomaticFix) for (size_t i = 0; i < parameters.names.size(); i++) { Real testGasPressure = mhd::utils::computeGasPressure( - parameters.energy.at(i), parameters.density.at(i), - parameters.momentumX.at(i), parameters.momentumY.at(i), - parameters.momentumZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), + parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), + parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); // I'm using the binary equality assertion here since in the case of // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testGasPressure) - << "Difference in " << parameters.names.at(i) << std::endl; + EXPECT_EQ(TINY_NUMBER, testGasPressure) << "Difference in " << parameters.names.at(i) << std::endl; } } // ============================================================================= @@ -180,19 +151,15 @@ TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) { testParams parameters; std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, - 1.9480412919836246e+205}; + std::vector fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, 1.9480412919836246e+205}; for (size_t i = 0; i < parameters.names.size(); i++) { Real testGasPressure = mhd::utils::computeThermalEnergy( - energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), parameters.momentumX.at(i), - parameters.momentumY.at(i), parameters.momentumZ.at(i), - parameters.magneticX.at(i), parameters.magneticY.at(i), + energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), + parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); } } // ============================================================================= @@ -211,16 +178,13 @@ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) { testParams parameters; std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; - std::vector fiducialEnergy{0.0, 805356.08013056568, - 6.7079331637514162e+201}; + std::vector fiducialEnergy{0.0, 805356.08013056568, 6.7079331637514162e+201}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testMagneticEnergy = mhd::utils::computeMagneticEnergy( - parameters.magneticX.at(i), parameters.magneticY.at(i), - parameters.magneticZ.at(i)); + Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); } } // ============================================================================= @@ -238,16 +202,13 @@ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) { testParams parameters; - std::vector fiducialTotalPressures{ - 9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201}; + std::vector fiducialTotalPressures{9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhd::utils::computeTotalPressure( - parameters.pressureGas.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i)); + Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialTotalPressures.at(i), - testTotalPressure, parameters.names.at(i)); + testingUtilities::checkResults(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); } } @@ -263,15 +224,13 @@ TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhd::utils::computeTotalPressure( - pressureMultiplier.at(i) * parameters.pressureGas.at(i), - parameters.magneticX.at(i), parameters.magneticY.at(i), - parameters.magneticZ.at(i)); + Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); // I'm using the binary equality assertion here since in the case of // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testTotalPressure) - << "Difference in " << parameters.names.at(i) << std::endl; + EXPECT_EQ(TINY_NUMBER, testTotalPressure) << "Difference in " << parameters.names.at(i) << std::endl; } } // ============================================================================= @@ -290,20 +249,16 @@ TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{ - 1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38}; + std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38}; std::vector coef{1.0, 1.0, 1.0e-25}; for (size_t i = 0; i < parameters.names.size(); i++) { Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( - coef.at(i) * parameters.density.at(i), - coef.at(i) * parameters.pressureGas.at(i), - coef.at(i) * parameters.magneticX.at(i), - coef.at(i) * parameters.magneticY.at(i), + coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, + testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, parameters.names.at(i)); } } @@ -317,20 +272,16 @@ TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{ - 1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86}; + std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86}; std::vector coef{1.0, 1.0, 1.0e-25}; for (size_t i = 0; i < parameters.names.size(); i++) { Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( - -coef.at(i) * parameters.density.at(i), - coef.at(i) * parameters.pressureGas.at(i), - coef.at(i) * parameters.magneticX.at(i), - coef.at(i) * parameters.magneticY.at(i), + -coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, + testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, parameters.names.at(i)); } } @@ -350,19 +301,16 @@ TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, - 0.26678309355540852}; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, 0.26678309355540852}; // Coefficient to make sure the output is well defined and not nan or inf double const coef = 1E-95; for (size_t i = 2; i < parameters.names.size(); i++) { Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( - parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, parameters.gamma); + parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, + testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, parameters.names.at(i)); } } @@ -376,19 +324,16 @@ TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, - 1976400098318.3574}; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, 1976400098318.3574}; // Coefficient to make sure the output is well defined and not nan or inf double const coef = 1E-95; for (size_t i = 2; i < parameters.names.size(); i++) { Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( - -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, parameters.gamma); + -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, + testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, parameters.names.at(i)); } } @@ -407,15 +352,12 @@ TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) { testParams parameters; - std::vector fiducialAlfvenSpeed{ - 2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49}; + std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), - parameters.density.at(i)); + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } @@ -427,15 +369,12 @@ TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) { testParams parameters; - std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, - 2.1621115263999998e+110}; + std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, 2.1621115263999998e+110}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), - -parameters.density.at(i)); + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, - parameters.names.at(i)); + testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } // ============================================================================= @@ -463,21 +402,16 @@ TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) std::iota(std::begin(testGrid), std::end(testGrid), 0.); // Fiducial and test variables - double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, - fiducialAvgBz = 883.5; + double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, fiducialAvgBz = 883.5; // Call the function to test auto [testAvgBx, testAvgBy, testAvgBz] = - mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, - n_cells, nx, ny); + mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); // Check the results - testingUtilities::checkResults(fiducialAvgBx, testAvgBx, - "cell centered Bx value"); - testingUtilities::checkResults(fiducialAvgBy, testAvgBy, - "cell centered By value"); - testingUtilities::checkResults(fiducialAvgBz, testAvgBz, - "cell centered Bz value"); + testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); + testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); + testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } #endif // MHD // ============================================================================= diff --git a/src/utils/parallel_omp.cpp b/src/utils/parallel_omp.cpp index 3e85efcd0..08e7bbb87 100644 --- a/src/utils/parallel_omp.cpp +++ b/src/utils/parallel_omp.cpp @@ -2,8 +2,8 @@ #include "../utils/parallel_omp.h" -void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, - int *omp_gridIndx_start, int *omp_gridIndx_end) +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, + int *omp_gridIndx_end) { int grid_reminder, n_grid_omp, g_start, g_end; grid_reminder = n_grid_cells % n_omp_procs; @@ -24,8 +24,7 @@ void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, } #ifdef PARTICLES -void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, - int omp_proc_id, part_int_t *omp_pIndx_start, +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end) { part_int_t n_parts_omp, parts_reminder, p_start, p_end; diff --git a/src/utils/parallel_omp.h b/src/utils/parallel_omp.h index 836cd91a4..5e8f6cffa 100644 --- a/src/utils/parallel_omp.h +++ b/src/utils/parallel_omp.h @@ -12,12 +12,11 @@ #include "../global/global.h" #include "math.h" -void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, - int *omp_gridIndx_start, int *omp_gridIndx_end); +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, + int *omp_gridIndx_end); #ifdef PARTICLES -void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, - int omp_proc_id, part_int_t *omp_pIndx_start, +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end); #endif diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h index 9e038ce8f..6f8eebc77 100644 --- a/src/utils/prng_utilities.h +++ b/src/utils/prng_utilities.h @@ -23,14 +23,11 @@ class ChollaPrngGenerator // and MPI rank to strings, concatenated them, then hash the result. // This should give a fairly random seed even if std::random_device // isn't random - std::string hashString = - std::to_string(std::random_device{}()) + std::string hashString = std::to_string(std::random_device{}()) #ifdef MPI_CHOLLA - + std::to_string(static_cast(procID)) + + std::to_string(static_cast(procID)) #endif - + std::to_string(std::chrono::high_resolution_clock::now() - .time_since_epoch() - .count()); + + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); std::size_t hashedSeed = std::hash{}(hashString); P->prng_seed = static_cast(hashedSeed); } diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 0c54f6296..518572cd2 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -23,8 +23,7 @@ __global__ void kernelReduceMax(Real* in, Real* out, size_t N) Real maxVal = -DBL_MAX; // Grid stride loop to perform as much of the reduction as possible - for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; - i += blockDim.x * gridDim.x) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { // A transformation could go here // Grid stride reduction diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index e54ccd764..d7fdaf3d3 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -29,8 +29,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) { // Launch parameters // ================= - cuda_utilities::AutomaticLaunchParams static const launchParams( - reduction_utilities::kernelReduceMax); + cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax); // Grid Parameters & testing parameters // ==================================== @@ -42,8 +41,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // Fill grid with random values and assign maximum value std::mt19937 prng(1); - std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, - std::abs(maxValue) - 1); + std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, std::abs(maxValue) - 1); std::uniform_int_distribution intRand(0, host_grid.size() - 1); for (size_t i = 0; i < host_grid.size(); i++) { host_grid.at(i) = doubleRand(prng); @@ -60,14 +58,12 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // Do the reduction // ================ - hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, - launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_grid.data(), dev_max.data(), host_grid.size()); CudaCheckError(); // Perform comparison - testingUtilities::checkResults(maxValue, dev_max.at(0), - "maximum value found"); + testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 32263001f..a279288ad 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -51,8 +51,7 @@ int64_t ulpsDistanceDbl(double const &a, double const &b) // ========================================================================= // ========================================================================= -bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, - int64_t &ulpsDiff, +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, double const &fixedEpsilon, // = 1E-14 by default int64_t const &ulpsEpsilon) // = 4 by default { @@ -75,8 +74,7 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, } // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, - double test_value, double fid_value, +void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon = 5.0E-12) { std::string outString; @@ -89,44 +87,35 @@ void wrapperEqual(int i, int j, int k, std::string dataSetName, outString += k; outString += "]"; - ASSERT_NO_FATAL_FAILURE( - checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); + ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); } -void analyticConstant(systemTest::SystemTestRunner testObject, - std::string dataSetName, double value) +void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) { std::vector testDims(3, 1); - std::vector testData = - testObject.loadTestFieldData(dataSetName, testDims); + std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); for (size_t i = 0; i < testDims[0]; i++) { for (size_t j = 0; j < testDims[1]; j++) { for (size_t k = 0; k < testDims[2]; k++) { size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - ASSERT_NO_FATAL_FAILURE( - wrapperEqual(i, j, k, dataSetName, testData.at(index), value)); + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value)); } } } } -void analyticSine(systemTest::SystemTestRunner testObject, - std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, - double tolerance) +void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, + double kx, double ky, double kz, double phase, double tolerance) { std::vector testDims(3, 1); - std::vector testData = - testObject.loadTestFieldData(dataSetName, testDims); + std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); for (size_t i = 0; i < testDims[0]; i++) { for (size_t j = 0; j < testDims[1]; j++) { for (size_t k = 0; k < testDims[2]; k++) { - double value = - constant + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + double value = constant + amplitude * std::sin(kx * i + ky * j + kz * k + phase); size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - ASSERT_NO_FATAL_FAILURE(wrapperEqual( - i, j, k, dataSetName, testData.at(index), value, tolerance)); + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value, tolerance)); } } } diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 0f19d3265..12daf0969 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -91,21 +91,17 @@ int64_t ulpsDistanceDbl(double const &a, double const &b); * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4 * \return bool Whether or not the numbers are equal */ -bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, - int64_t &ulpsDiff, double const &fixedEpsilon = 1E-14, - int64_t const &ulpsEpsilon = 4); +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, + double const &fixedEpsilon = 1E-14, int64_t const &ulpsEpsilon = 4); // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, - double test_value, double fid_value, double fixedEpsilon); +void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, + double fixedEpsilon); -void analyticConstant(systemTest::SystemTestRunner testObject, - std::string dataSetName, double value); +void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value); -void analyticSine(systemTest::SystemTestRunner testObject, - std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, - double tolerance); +void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, + double kx, double ky, double kz, double phase, double tolerance); // ========================================================================= /*! @@ -124,8 +120,7 @@ void analyticSine(systemTest::SystemTestRunner testObject, * values are ignored and default behaviour is used */ template -void checkResults(double fiducialNumber, double testNumber, - std::string outString, double fixedEpsilon = -999, +void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference @@ -134,22 +129,18 @@ void checkResults(double fiducialNumber, double testNumber, bool areEqual; if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, - absoluteDiff, ulpsDiff); + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); } else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) { - areEqual = testingUtilities::nearlyEqualDbl( - fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); } else { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, - absoluteDiff, ulpsDiff, - fixedEpsilon, ulpsEpsilon); + areEqual = + testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon, ulpsEpsilon); } std::stringstream outputMessage; - outputMessage << std::setprecision(std::numeric_limits::max_digits10) - << "Difference in " << outString << std::endl - << "The fiducial value is: " << fiducialNumber + outputMessage << std::setprecision(std::numeric_limits::max_digits10) << "Difference in " << outString << std::endl + << "The fiducial value is: " << fiducialNumber << std::endl << "The test value is: " << testNumber << std::endl << "The absolute difference is: " << absoluteDiff << std::endl << "The ULP difference is: " << ulpsDiff << std::endl; diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 050977e1b..ae33ea089 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -63,20 +63,15 @@ void OneTime::RecordTime(Real time) void OneTime::PrintStep() { - chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, - t_max, t_avg); + chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, t_max, t_avg); } void OneTime::PrintAverage() { - if (n_steps > 1) - chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); + if (n_steps > 1) chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); } -void OneTime::PrintAll() -{ - chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); -} +void OneTime::PrintAll() { chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); } Time::Time(void) {} @@ -144,10 +139,8 @@ void Time::Print_Average_Times(struct parameters P) chprintf("Writing timing values to file: %s \n", file_name.c_str()); - std::string gitHash = - "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); - std::string macroFlags = - "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); + std::string gitHash = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); + std::string macroFlags = "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); header = "#n_proc nx ny nz n_omp n_steps "; From b779b212b24ed19592ac309eab1c3ccb7ba66212 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 9 Feb 2023 12:32:33 -0800 Subject: [PATCH 207/694] format hpp and cuh as well --- src/chemistry_gpu/rates.cuh | 27 +++----- src/chemistry_gpu/rates_Katz95.cuh | 37 +++------- src/gravity/paris/HenryPeriodic.hpp | 68 +++++++------------ src/gravity/paris/ParisPeriodic.hpp | 3 +- src/gravity/paris/PoissonZero3DBlockedGPU.hpp | 3 +- src/utils/gpu.hpp | 41 ++++------- 6 files changed, 57 insertions(+), 122 deletions(-) diff --git a/src/chemistry_gpu/rates.cuh b/src/chemistry_gpu/rates.cuh index 95886502b..bf09aabc9 100644 --- a/src/chemistry_gpu/rates.cuh +++ b/src/chemistry_gpu/rates.cuh @@ -113,8 +113,7 @@ __host__ __device__ Real comp_rate(Real n_e, Real T, Real zr, Real units); __host__ __device__ Real cool_compton_rate(Real T, Real units); // X-ray compton heating -__host__ __device__ Real xray_heat_rate(Real n_e, Real T, Real Redshift, - Real units); +__host__ __device__ Real xray_heat_rate(Real n_e, Real T, Real Redshift, Real units); // Colisional excitation of neutral hydrogen (HI) and singly ionized helium // (HeII) @@ -138,34 +137,24 @@ Real __device__ Recombination_Rate_HeII_Hui97(Real temp); Real __device__ Recombination_Rate_HeIII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_HII_Hui97(Real n_e, Real n_HII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HII_Hui97(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Hui97(Real n_e, Real n_HII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HeII_Hui97(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Hui97(Real n_e, Real n_HII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HeIII_Hui97(Real n_e, Real n_HII, Real temp); Real __device__ Recombination_Rate_dielectronic_HeII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97(Real n_e, - Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97(Real n_e, Real n_HeII, Real temp); Real __device__ Collisional_Ionization_Rate_e_HI_Hui97(Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97(Real n_e, - Real n_HI, - Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97(Real n_e, Real n_HI, Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97(Real n_e, - Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97(Real n_e, Real n_HeII, Real temp); // Compton cooling off the CMB -Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01(Real n_e, Real temp, - Real z); +Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01(Real n_e, Real temp, Real z); // Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real // current_z, cosmo ); diff --git a/src/chemistry_gpu/rates_Katz95.cuh b/src/chemistry_gpu/rates_Katz95.cuh index 01a88c12b..18c5e54c2 100644 --- a/src/chemistry_gpu/rates_Katz95.cuh +++ b/src/chemistry_gpu/rates_Katz95.cuh @@ -6,26 +6,16 @@ // Colisional excitation of neutral hydrogen (HI) and singly ionized helium // (HeII) -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95(Real n_e, - Real n_HI, - Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95(Real n_e, Real n_HI, Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95(Real n_e, - Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp); // Colisional ionization of HI, HeI and HeII -Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95(Real n_e, - Real n_HI, - Real temp); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95(Real n_e, Real n_HI, Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95(Real n_e, - Real n_HeI, - Real temp); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95(Real n_e, Real n_HeI, Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95(Real n_e, - Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp); Real __device__ Collisional_Ionization_Rate_e_HI_Katz95(Real temp); @@ -35,14 +25,11 @@ Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95(Real temp); // Standard Recombination of HII, HeII and HeIII -Real __device__ Cooling_Rate_Recombination_HII_Katz95(Real n_e, Real n_HII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HII_Katz95(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Katz95(Real n_e, Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HeII_Katz95(Real n_e, Real n_HeII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Katz95(Real n_e, Real n_HeIII, - Real temp); +Real __device__ Cooling_Rate_Recombination_HeIII_Katz95(Real n_e, Real n_HeIII, Real temp); Real __device__ Recombination_Rate_HII_Katz95(Real temp); @@ -51,18 +38,14 @@ Real __device__ Recombination_Rate_HeII_Katz95(Real temp); Real __device__ Recombination_Rate_HeIII_Katz95(Real temp); // Dielectronic recombination of HeII -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95(Real n_e, - Real n_HeII, - Real temp); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95(Real n_e, Real n_HeII, Real temp); Real __device__ Recombination_Rate_dielectronic_HeII_Katz95(Real temp); // Free-Free emission (Bremsstrahlung) Real __device__ gaunt_factor(Real log10_T); -Real __device__ Cooling_Rate_Bremsstrahlung_Katz95(Real n_e, Real n_HII, - Real n_HeII, Real n_HeIII, - Real temp); +Real __device__ Cooling_Rate_Bremsstrahlung_Katz95(Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp); // Compton cooling off the CMB Real __device__ Cooling_Rate_Compton_CMB_Katz95(Real n_e, Real temp, Real z); diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index 82b2307e6..4fa2467b2 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -24,8 +24,7 @@ class HenryPeriodic * @param[in] m[3] { Number of MPI tasks in each dimension. } * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } */ - HenryPeriodic(const int n[3], const double lo[3], const double hi[3], - const int m[3], const int id[3]); + HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); ~HenryPeriodic(); @@ -59,22 +58,21 @@ class HenryPeriodic * the FFT. The function should return the filtered value. } */ template - void filter(const size_t bytes, double *const before, double *const after, - const F f) const; + void filter(const size_t bytes, double *const before, double *const after, const F f) const; private: int idi_, idj_, idk_; //!< MPI coordinates of 3D block int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain - int nh_; //!< Global number of complex values in Z dimension, after R2C - //!< transform - int ni_, nj_, nk_; //!< Global number of real points in each dimension - int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil - int idp_, idq_; //!< X and Y task IDs within Z pencil + int nh_; //!< Global number of complex values in Z dimension, after R2C + //!< transform + int ni_, nj_, nk_; //!< Global number of real points in each dimension + int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil + int idp_, idq_; //!< X and Y task IDs within Z pencil MPI_Comm commI_, commJ_, - commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils + commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils int dh_, di_, dj_, dk_; //!< Max number of local points in each dimension int dhq_, dip_, djp_, - djq_; //!< Max number of local points in dimensions of 2D decompositions + djq_; //!< Max number of local points in dimensions of 2D decompositions size_t bytes_; //!< Max bytes needed for argument arrays cufftHandle c2ci_, c2cj_, c2rk_, r2ck_; //!< Objects for forward and inverse FFTs @@ -86,8 +84,7 @@ class HenryPeriodic #if defined(__HIP__) || defined(__CUDACC__) template -void HenryPeriodic::filter(const size_t bytes, double *const before, - double *const after, const F f) const +void HenryPeriodic::filter(const size_t bytes, double *const before, double *const after, const F f) const { // Make sure arguments have enough space assert(bytes >= bytes_); @@ -117,9 +114,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, // Reorder 3D block into sub-pencils gpuFor( - mp, mq, dip, djq, dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, - const int k) { + mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { const int ii = p * dip + i; const int jj = q * djq + j; const int ia = k + dk * (j + djq * (i + dip * (q + mq * p))); @@ -146,8 +141,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int jLo = idj * dj + idq * djq; const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi - iLo, jHi - jLo, mk, dk, - GPU_LAMBDA(const int i, const int j, const int pq, const int k) { + iHi - iLo, jHi - jLo, mk, dk, GPU_LAMBDA(const int i, const int j, const int pq, const int k) { const int kk = pq * dk + k; if (kk < nk) { const int ia = kk + nk * (j + djq * i); @@ -167,8 +161,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int jLo = idj_ * dj_ + idq * djq; const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mjq, iHi - iLo, jHi - jLo, dhq, - GPU_LAMBDA(const int q, const int i, const int j, const int k) { + mjq, iHi - iLo, jHi - jLo, dhq, GPU_LAMBDA(const int q, const int i, const int j, const int k) { const int kk = q * dhq + k; if (kk < nh) { const int ia = k + dhq * (j + djq * (i + dip * q)); @@ -196,9 +189,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi - kLo, iHi - iLo, mj, mq, djq, - GPU_LAMBDA(const int k, const int i, const int r, const int q, - const int j) { + kHi - kLo, iHi - iLo, mj, mq, djq, GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) { const int rdj = r * dj; const int jj = rdj + q * djq + j; if ((jj < nj) && (jj < rdj + dj)) { @@ -219,8 +210,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - mip, kHi - kLo, iHi - iLo, djp, - GPU_LAMBDA(const int p, const int k, const int i, const int j) { + mip, kHi - kLo, iHi - iLo, djp, GPU_LAMBDA(const int p, const int k, const int i, const int j) { const int jj = p * djp + j; if (jj < nj) { const int ia = j + djp * (i + dip * (k + dhq * p)); @@ -248,9 +238,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi - jLo, kHi - kLo, mi, mp, dip, - GPU_LAMBDA(const int j, const int k, const int r, const int p, - const int i) { + jHi - jLo, kHi - kLo, mi, mp, dip, GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) { const int rdi = r * di; const int ii = rdi + p * dip + i; if ((ii < ni) && (ii < rdi + di)) { @@ -272,8 +260,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi - jLo, kHi - kLo, ni, - GPU_LAMBDA(const int j0, const int k0, const int i) { + jHi - jLo, kHi - kLo, ni, GPU_LAMBDA(const int j0, const int k0, const int i) { const int j = jLo + j0; const int k = kLo + k0; const int iab = i + ni * (k0 + dhq * j0); @@ -290,9 +277,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - mi, mp, jHi - jLo, kHi - kLo, dip, - GPU_LAMBDA(const int r, const int p, const int j, const int k, - const int i) { + mi, mp, jHi - jLo, kHi - kLo, dip, GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) { const int rdi = r * di; const int ii = rdi + p * dip + i; if ((ii < ni) && (ii < rdi + di)) { @@ -320,8 +305,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi - kLo, iHi - iLo, mip, djp, - GPU_LAMBDA(const int k, const int i, const int p, const int j) { + kHi - kLo, iHi - iLo, mip, djp, GPU_LAMBDA(const int k, const int i, const int p, const int j) { const int jj = p * djp + j; if (jj < nj) { const int ia = jj + nj * (i + dip * k); @@ -341,9 +325,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idjq * dhq; const int kHi = std::min(kLo + dhq, nh); gpuFor( - mj, mq, kHi - kLo, iHi - iLo, djq, - GPU_LAMBDA(const int r, const int q, const int k, const int i, - const int j) { + mj, mq, kHi - kLo, iHi - iLo, djq, GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) { const int rdj = r * dj; const int jj = rdj + q * djq + j; if ((jj < nj) && (jj < rdj + dj)) { @@ -371,8 +353,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int jLo = idj * dj + idq * djq; const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi - iLo, jHi - jLo, mjq, dhq, - GPU_LAMBDA(const int i, const int j, const int q, const int k) { + iHi - iLo, jHi - jLo, mjq, dhq, GPU_LAMBDA(const int i, const int j, const int q, const int k) { const int kk = q * dhq + k; if (kk < nh) { const int ia = kk + nh * (j + djq * i); @@ -392,8 +373,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int jLo = idj * dj + idq * djq; const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mk, iHi - iLo, jHi - jLo, dk, - GPU_LAMBDA(const int pq, const int i, const int j, const int k) { + mk, iHi - iLo, jHi - jLo, dk, GPU_LAMBDA(const int pq, const int i, const int j, const int k) { const int kk = pq * dk + k; if (kk < nk) { const int ia = k + dk * (j + djq * (i + dip * pq)); @@ -419,9 +399,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, const int kLo = idk * dk; const int kHi = std::min(kLo + dk, nk); gpuFor( - mp, dip, mq, djq, kHi - kLo, - GPU_LAMBDA(const int p, const int i, const int q, const int j, - const int k) { + mp, dip, mq, djq, kHi - kLo, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { const int ii = p * dip + i; const int jj = q * djq + j; if ((ii < di) && (jj < dj)) { diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index 11c34fe8c..2650e156d 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -20,8 +20,7 @@ class ParisPeriodic * @param[in] m[3] { Number of MPI tasks in each dimension. } * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } */ - ParisPeriodic(const int n[3], const double lo[3], const double hi[3], - const int m[3], const int id[3]); + ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); /** * @return { Number of bytes needed for array arguments for @ref solve. } diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp index 0ff37015f..0094f5b0d 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp @@ -7,8 +7,7 @@ class PoissonZero3DBlockedGPU { public: - PoissonZero3DBlockedGPU(const int n[3], const double lo[3], - const double hi[3], const int m[3], const int id[3]); + PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); ~PoissonZero3DBlockedGPU(); long bytes() const { return bytes_; } void solve(long bytes, double *density, double *potential) const; diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 52acbca24..a248ef9fe 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -12,12 +12,10 @@ #include -static void __attribute__((unused)) -check(const hipfftResult err, const char *const file, const int line) +static void __attribute__((unused)) check(const hipfftResult err, const char *const file, const int line) { if (err == HIPFFT_SUCCESS) return; - fprintf(stderr, "HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, - err); + fprintf(stderr, "HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, err); fflush(stderr); exit(err); } @@ -110,12 +108,11 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define curand hiprand #define curand_poisson hiprand_poisson -static void __attribute__((unused)) -check(const hipError_t err, const char *const file, const int line) +static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line) { if (err == hipSuccess) return; - fprintf(stderr, "HIP ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, - hipGetErrorName(err), hipGetErrorString(err)); + fprintf(stderr, "HIP ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, hipGetErrorName(err), + hipGetErrorString(err)); fflush(stderr); exit(err); } @@ -141,8 +138,8 @@ static void check(const cufftResult err, const char *const file, const int line) static void check(const cudaError_t err, const char *const file, const int line) { if (err == cudaSuccess) return; - fprintf(stderr, "CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, - cudaGetErrorName(err), cudaGetErrorString(err)); + fprintf(stderr, "CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, cudaGetErrorName(err), + cudaGetErrorString(err)); fflush(stderr); exit(err); } @@ -161,8 +158,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #if defined(__CUDACC__) || defined(__HIPCC__) template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, const F f) { const int i0 = blockIdx.x * blockDim.x + threadIdx.x; if (i0 < n0) f(i0); @@ -195,8 +191,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x1(const F f) } template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x * blockDim.x + threadIdx.x; @@ -252,8 +247,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x1(const F f) } template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; @@ -317,9 +311,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x1(const F f) } template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, - const int n3, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, const int n3, const F f) { const int i23 = blockIdx.x * blockDim.x + threadIdx.x; if (i23 < n23) { @@ -381,8 +373,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x2(const F f) } template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, const F f) { const int i01 = blockIdx.z; const int i0 = i01 / n1; @@ -394,10 +385,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, } template -__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, - const int n34, - const int n4, - const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, const int n34, const int n4, const F f) { const int i34 = blockIdx.x * blockDim.x + threadIdx.x; if (i34 < n34) { @@ -412,8 +400,7 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, } template -void gpuFor(const int n0, const int n1, const int n2, const int n3, - const int n4, const F f) +void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4, const F f) { if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) return; const long nl01 = long(n0) * long(n1); From f757fd517b571db046a04c1256ada466a9905b17 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 9 Feb 2023 12:50:16 -0800 Subject: [PATCH 208/694] update git blame ignore revs --- .git-blame-ignore-revs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 331d6f122..86ae19b2e 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -12,3 +12,7 @@ b78d8c96680c9c2d5a5d41656895cb3795e1e204 # Reformat Code with clang-format 729ef8ed307eaa2cf42baa1f5af6c389ad614ac4 + +# Reformat Code with clang-format increasing column width to 120 +b779b212b24ed19592ac309eab1c3ccb7ba66212 +8e5b4619734e0922d815f4d259323c68002af6db \ No newline at end of file From a143f2ddc373ca5b27828f76dca4acdc49f5eff2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 8 Feb 2023 10:45:00 -0500 Subject: [PATCH 209/694] Add new grid enums for CT electric field --- src/grid/grid_enum.h | 4 +++ src/mhd/ct_electric_fields.cu | 63 +++++++++++++++++++++++++---------- src/mhd/magnetic_update.cu | 51 +++++++++++++++++++--------- 3 files changed, 85 insertions(+), 33 deletions(-) diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 6df1d6e26..15e1d604a 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -86,6 +86,10 @@ enum : int { magnetic_start = magnetic_x, magnetic_end = magnetic_z, + ct_elec_x = 0, + ct_elec_y = 1, + ct_elec_z = 2, + // Note that the direction of the flux, the suffix _? indicates the direction // of the electric field, not the magnetic flux fluxX_magnetic_z = magnetic_start, diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 349779f34..24cc3b15c 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -70,7 +70,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -83,7 +85,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -95,7 +99,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -107,7 +113,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny)]; + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -133,15 +141,18 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 0 * n_cells] = 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + - slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); + ctElectricFields[threadId + grid_enum::ct_elec_x * n_cells] = + 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); // ================ // Y electric field // ================ // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]; + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -153,7 +164,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + signUpwind = + fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -165,7 +178,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -178,7 +193,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny)]; + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -203,15 +220,18 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 1 * n_cells] = 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + - slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); + ctElectricFields[threadId + grid_enum::ct_elec_y * n_cells] = + 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); // ================ // Z electric field // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]; + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -223,7 +243,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + signUpwind = + fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -235,7 +257,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]; + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -248,7 +272,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny)]; + signUpwind = + fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -273,8 +299,9 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // sum and average face centered electric fields and slopes to get the // edge averaged electric field. // S&G 2009 equation 22 - ctElectricFields[threadId + 2 * n_cells] = 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + - slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); + ctElectricFields[threadId + grid_enum::ct_elec_z * n_cells] = + 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); } } // ========================================================================= diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 7d1b48086..2541d0956 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -39,34 +39,55 @@ __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid // Load the various edge electric fields required. The '1' and '2' // fields are not shared and the '3' fields are shared by two of the // updates - Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny))]; - Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny))]; - Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny))]; - Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + n_cells]; - Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + n_cells]; - Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid + 1, nx, ny)) + n_cells]; - Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + 2 * n_cells]; - Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + 2 * n_cells]; - Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid + 1, zid, nx, ny)) + 2 * n_cells]; + Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid + 1, zid, nx, ny)) + + grid_enum::ct_elec_x * n_cells]; + Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid, zid + 1, nx, ny)) + + grid_enum::ct_elec_x * n_cells]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid + 1, zid + 1, nx, ny)) + + grid_enum::ct_elec_x * n_cells]; + Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid, nx, ny)) + + grid_enum::ct_elec_y * n_cells]; + Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid, zid + 1, nx, ny)) + + grid_enum::ct_elec_y * n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid + 1, nx, ny)) + + grid_enum::ct_elec_y * n_cells]; + Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid, zid, nx, ny)) + + grid_enum::ct_elec_z * n_cells]; + Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid, yid + 1, zid, nx, ny)) + + grid_enum::ct_elec_z * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex( + xid + 1, yid + 1, zid, nx, ny)) + + grid_enum::ct_elec_z * n_cells]; // Perform Updates // X field update // S&G 2009 equation 10 - destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + dtodz * (electric_y_3 - electric_y_1) + + destinationGrid[threadId + grid_enum::magnetic_x * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_x * n_cells] + + dtodz * (electric_y_3 - electric_y_1) + dtody * (electric_z_1 - electric_z_3); // Y field update // S&G 2009 equation 11 - destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + dtodx * (electric_z_3 - electric_z_2) + + destinationGrid[threadId + grid_enum::magnetic_y * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_y * n_cells] + + dtodx * (electric_z_3 - electric_z_2) + dtodz * (electric_x_1 - electric_x_3); // Z field update // S&G 2009 equation 12 - destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = - sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + dtody * (electric_x_3 - electric_x_2) + + destinationGrid[threadId + grid_enum::magnetic_z * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_z * n_cells] + + dtody * (electric_x_3 - electric_x_2) + dtodx * (electric_y_2 - electric_y_3); } } From 19ee2c6efdaa54c41bd1d658342befdc0d7c4160 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 8 Feb 2023 13:07:35 -0500 Subject: [PATCH 210/694] Loosen threadguards on CT Electric fields and magnetic update --- src/integrators/VL_3D_cuda.cu | 1 - src/mhd/ct_electric_fields.cu | 2 +- src/mhd/ct_electric_fields_tests.cu | 20 ++++++++++---------- src/mhd/magnetic_update.cu | 3 ++- src/mhd/magnetic_update_tests.cu | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 52d8124ff..3af257da7 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -117,7 +117,6 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #ifdef MHD CudaSafeCall(cudaMalloc((void **)&ctElectricFields, ctArraySize)); - cuda_utilities::initGpuMemory(ctElectricFields, ctArraySize); #endif // MHD #if defined(GRAVITY) diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 24cc3b15c..15ed17783 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -28,7 +28,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // Thread guard to avoid overrun and to skip the first two cells since // those ghost cells can't be reconstructed - if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and zid < nz) { + if (xid > 0 and yid > 0 and zid > 0 and xid < nx and yid < ny and zid < nz) { // According to Stone et al. 2008 section 5.3 and the source code of // Athena, the following equation relate the magnetic flux to the // face centered electric fields/EMF. -cross(V,B)x is the negative diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 05b610805..a57f8afe2 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -39,7 +39,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test * */ tMHDCalculateCTElectricFields() - : nx(3), + : nx(2), ny(nx), nz(nx), n_cells(nx * ny * nz), @@ -130,9 +130,9 @@ class tMHDCalculateCTElectricFields : public ::testing::Test TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput) { // Fiducial values - fiducialData.at(26) = 206.29859653255295; - fiducialData.at(53) = -334.90052254763339; - fiducialData.at(80) = 209.53472440298236; + fiducialData.at(7) = 60.951467108788492; + fiducialData.at(15) = -98.736587665919359; + fiducialData.at(23) = 61.768055665002557; // Launch kernel and check results runTest(); @@ -143,9 +143,9 @@ TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput) TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput) { // Fiducial values - fiducialData.at(26) = 203.35149422304994; - fiducialData.at(53) = -330.9860399765279; - fiducialData.at(80) = 208.55149905461991; + fiducialData.at(7) = 59.978246483260179; + fiducialData.at(15) = -97.279949010457187; + fiducialData.at(23) = 61.280813140085613; // Set the density fluxes to be negative to indicate a negative velocity // across the face @@ -164,9 +164,9 @@ TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput) TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput) { // Fiducial values - fiducialData.at(26) = 204.82504537780144; - fiducialData.at(53) = -332.94328126208063; - fiducialData.at(80) = 209.04311172880114; + fiducialData.at(7) = 60.464856796024335; + fiducialData.at(15) = -98.008268338188287; + fiducialData.at(23) = 61.524434402544081; // Set the density fluxes to be negative to indicate a negative velocity // across the face diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 2541d0956..77eae8a2a 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -30,7 +30,8 @@ __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid // Thread guard to avoid overrun and to skip ghost cells that cannot be // evolved due to missing electric fields that can't be reconstructed - if (xid < nx - 2 and yid < ny - 2 and zid < nz - 2) { + if (xid > 0 and yid > 0 and zid > 0 and xid < nx - 1 and yid < ny - 1 and + zid < nz - 1) { // Compute the three dt/dx quantities Real const dtodx = dt / dx; Real const dtody = dt / dy; diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index f311271b2..79dc81db7 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -122,9 +122,9 @@ class tMHDUpdateMagneticField3D : public ::testing::Test TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput) { // Fiducial values - fiducialData.at(135) = 142.68000000000001; - fiducialData.at(162) = 151.75999999999999; - fiducialData.at(189) = 191.56; + fiducialData.at(148) = 155.68000000000001; + fiducialData.at(175) = 164.75999999999999; + fiducialData.at(202) = 204.56; // Launch kernel and check results runTest(); From 3cfaaf6c0e6e51caf5bc1ce4f1d1577681c86b95 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 9 Feb 2023 11:46:47 -0500 Subject: [PATCH 211/694] Add grid_enum to ct slopes --- src/mhd/ct_electric_fields.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 349aa42c8..f1f81c942 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -97,13 +97,22 @@ inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conse // variable, B2 and B3 are the next two fields cyclically. i.e. if // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The // same rules apply for the momentum - Real const density = dev_conserved[idxCentered]; - Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + 1) * n_cells]; - Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + 1) * n_cells]; - Real const B2Centered = 0.5 * (dev_conserved[idxCentered + (modPlus1 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB2Shift + (modPlus1 + grid_enum::magnetic_start) * n_cells]); - Real const B3Centered = 0.5 * (dev_conserved[idxCentered + (modPlus2 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB3Shift + (modPlus2 + grid_enum::magnetic_start) * n_cells]); + Real const density = + dev_conserved[idxCentered + grid_enum::density * n_cells]; + Real const Momentum2 = + dev_conserved[idxCentered + (modPlus1 + grid_enum::momentum_x) * n_cells]; + Real const Momentum3 = + dev_conserved[idxCentered + (modPlus2 + grid_enum::momentum_x) * n_cells]; + Real const B2Centered = + 0.5 * (dev_conserved[idxCentered + + (modPlus1 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB2Shift + + (modPlus1 + grid_enum::magnetic_start) * n_cells]); + Real const B3Centered = + 0.5 * (dev_conserved[idxCentered + + (modPlus2 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB3Shift + + (modPlus2 + grid_enum::magnetic_start) * n_cells]); // Compute the electric field in the center with a cross product Real const electric_centered = (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; From 09ac528dc9d843f42f5959b585b79bc371d20e04 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Feb 2023 11:29:16 -0500 Subject: [PATCH 212/694] Remove unneeded variables from HLLD solver --- src/integrators/VL_3D_cuda.cu | 39 ++++++++++++++++---------- src/riemann_solvers/hlld_cuda.cu | 20 ++++++------- src/riemann_solvers/hlld_cuda.h | 21 +++++++------- src/riemann_solvers/hlld_cuda_tests.cu | 21 +++++++------- 4 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 3af257da7..d3fc956ce 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -176,12 +176,18 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int 2, n_fields); #endif // HLL #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, - &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, - &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, - &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lx, Q_Rx, + &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, + n_cells, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Ly, Q_Ry, + &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, + n_cells, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lz, Q_Rz, + &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, + n_cells, gama, 2, n_fields); #endif // HLLD CudaCheckError(); @@ -277,15 +283,18 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int 2, n_fields); #endif // HLLC #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, - &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, - n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, - &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, - n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, - &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, - n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lx, Q_Rx, + &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), + F_x, n_cells, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Ly, Q_Ry, + &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), + F_y, n_cells, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, + 0, Q_Lz, Q_Rz, + &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), + F_z, n_cells, gama, 2, n_fields); #endif // HLLD CudaCheckError(); diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 0bc277b73..08ad7b684 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -30,21 +30,16 @@ namespace mhd { // ========================================================================= -__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_magnetic_face, - Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, - int direction, int n_fields) +__global__ void Calculate_HLLD_Fluxes_CUDA( + Real const *dev_bounds_L, Real const *dev_bounds_R, + Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, + Real const gamma, int const direction, int const n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int threadId = threadIdx.x + blockId * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + int threadId = threadIdx.x + blockIdx.x * blockDim.x; // Thread guard to avoid overrun - if (xid >= nx or yid >= ny or zid >= nz) return; - - // Number of cells - int n_cells = nx * ny * nz; + if (threadId >= n_cells) return; // Offsets & indices int o1, o2, o3; @@ -296,6 +291,9 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int co int const &n_cells, Real *dev_flux, mhd::_internal::Flux const &flux, mhd::_internal::State const &state) { + // Note that the direction of the grid_enum::fluxX_magnetic_DIR is the + // direction of the electric field that the magnetic flux is, not the magnetic + // flux dev_flux[threadId + n_cells * grid_enum::density] = flux.density; dev_flux[threadId + n_cells * o1] = flux.momentumX; dev_flux[threadId + n_cells * o2] = flux.momentumY; diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index a6247a5cf..8c092005d 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -25,22 +25,23 @@ namespace mhd * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 * * \param[in] dev_bounds_L The interface states on the left side of the - * interface \param[in] dev_bounds_R The interface states on the right side of - * the interface \param[in] dev_magnetic_face A pointer to the begining of the + * interface + * \param[in] dev_bounds_R The interface states on the right side of + * the interface + * \param[in] dev_magnetic_face A pointer to the begining of the * conserved magnetic field array that is stored at the interface. I.e. for the * X-direction solve this would be the begining of the X-direction fields * \param[out] dev_flux The output flux - * \param[in] nx Number of cells in the X-direction - * \param[in] ny Number of cells in the Y-direction - * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells * \param[in] n_ghost Number of ghost cells on each side - * \param[in] gamma The adiabatic index * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, - * 2=Z \param[in] n_fields The total number of fields + * 2=Z + * \param[in] n_fields The total number of fields */ -__global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_magnetic_face, - Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, - int direction, int n_fields); +__global__ void Calculate_HLLD_Fluxes_CUDA( + Real const *dev_bounds_L, Real const *dev_bounds_R, + Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, + Real const gamma, int const direction, int const n_fields); /*! * \brief Namespace to hold private functions used within the HLLD diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 712756522..c35b0ef31 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -71,11 +71,11 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction - int const ny = 1; // Number of cells in the y-direction - int const nz = 1; // Number of cells in the z-direction - int const nGhost = 0; // Isn't actually used it appears - int nFields = 8; // Total number of conserved fields + int const nx = 1; // Number of cells in the x-direction + int const ny = 1; // Number of cells in the y-direction + int const nz = 1; // Number of cells in the z-direction + int const n_cells = nx * ny * nz; + int nFields = 8; // Total number of conserved fields #ifdef SCALAR nFields += NSCALARS; #endif // SCALAR @@ -109,11 +109,12 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devConservedMagXFace, // the magnetic field at the interface - devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); + hipLaunchKernelGGL( + mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface + devTestFlux, n_cells, gamma, direction, nFields); CudaCheckError(); CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); From edf83abb006ef78b98f301752ddfe8ed256f7785 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Feb 2023 10:45:56 -0500 Subject: [PATCH 213/694] formatting --- src/integrators/VL_3D_cuda.cu | 36 ++++++---------- src/mhd/ct_electric_fields.cu | 60 ++++++++------------------ src/mhd/ct_electric_fields.h | 23 +++------- src/mhd/magnetic_update.cu | 51 ++++++++-------------- src/riemann_solvers/hlld_cuda.cu | 7 ++- src/riemann_solvers/hlld_cuda.h | 7 ++- src/riemann_solvers/hlld_cuda_tests.cu | 11 +++-- 7 files changed, 67 insertions(+), 128 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index d3fc956ce..2227172bf 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -176,18 +176,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int 2, n_fields); #endif // HLL #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lx, Q_Rx, - &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, - n_cells, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Ly, Q_Ry, - &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, - n_cells, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lz, Q_Rz, - &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, - n_cells, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, + &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, + &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, + &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); #endif // HLLD CudaCheckError(); @@ -283,18 +277,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int 2, n_fields); #endif // HLLC #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lx, Q_Rx, - &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), - F_x, n_cells, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Ly, Q_Ry, - &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), - F_y, n_cells, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, - 0, Q_Lz, Q_Rz, - &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), - F_z, n_cells, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, + &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, + &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, + &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); #endif // HLLD CudaCheckError(); diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 15ed17783..8e95b8dbd 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -70,9 +70,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -85,9 +83,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -99,9 +95,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -113,9 +107,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -142,17 +134,15 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // edge averaged electric field. // S&G 2009 equation 22 ctElectricFields[threadId + grid_enum::ct_elec_x * n_cells] = - 0.25 * (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + - slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); + 0.25 * + (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); // ================ // Y electric field // ================ // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -164,9 +154,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = - fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -178,9 +166,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the positive Z side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -193,9 +179,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Z-direction slope on the negative Z side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -221,17 +205,15 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // edge averaged electric field. // S&G 2009 equation 22 ctElectricFields[threadId + grid_enum::ct_elec_y * n_cells] = - 0.25 * (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + - slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); + 0.25 * + (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); // ================ // Z electric field // ================ // Y-direction slope on the positive Y side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -243,9 +225,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // Y-direction slope on the negative Y side. S&G 2009 equation 23 - signUpwind = - fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -257,9 +237,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the positive X side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -272,9 +250,7 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux } // X-direction slope on the negative X side. S&G 2009 equation 23 - signUpwind = - fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + - grid_enum::density * n_cells]; + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { @@ -300,8 +276,8 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // edge averaged electric field. // S&G 2009 equation 22 ctElectricFields[threadId + grid_enum::ct_elec_z * n_cells] = - 0.25 * (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + - slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); + 0.25 * + (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); } } // ========================================================================= diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index f1f81c942..7fb6c5063 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -97,22 +97,13 @@ inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conse // variable, B2 and B3 are the next two fields cyclically. i.e. if // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The // same rules apply for the momentum - Real const density = - dev_conserved[idxCentered + grid_enum::density * n_cells]; - Real const Momentum2 = - dev_conserved[idxCentered + (modPlus1 + grid_enum::momentum_x) * n_cells]; - Real const Momentum3 = - dev_conserved[idxCentered + (modPlus2 + grid_enum::momentum_x) * n_cells]; - Real const B2Centered = - 0.5 * (dev_conserved[idxCentered + - (modPlus1 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB2Shift + - (modPlus1 + grid_enum::magnetic_start) * n_cells]); - Real const B3Centered = - 0.5 * (dev_conserved[idxCentered + - (modPlus2 + grid_enum::magnetic_start) * n_cells] + - dev_conserved[idxB3Shift + - (modPlus2 + grid_enum::magnetic_start) * n_cells]); + Real const density = dev_conserved[idxCentered + grid_enum::density * n_cells]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + grid_enum::momentum_x) * n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + grid_enum::momentum_x) * n_cells]; + Real const B2Centered = 0.5 * (dev_conserved[idxCentered + (modPlus1 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB2Shift + (modPlus1 + grid_enum::magnetic_start) * n_cells]); + Real const B3Centered = 0.5 * (dev_conserved[idxCentered + (modPlus2 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB3Shift + (modPlus2 + grid_enum::magnetic_start) * n_cells]); // Compute the electric field in the center with a cross product Real const electric_centered = (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu index 77eae8a2a..acfd44982 100644 --- a/src/mhd/magnetic_update.cu +++ b/src/mhd/magnetic_update.cu @@ -30,8 +30,7 @@ __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid // Thread guard to avoid overrun and to skip ghost cells that cannot be // evolved due to missing electric fields that can't be reconstructed - if (xid > 0 and yid > 0 and zid > 0 and xid < nx - 1 and yid < ny - 1 and - zid < nz - 1) { + if (xid > 0 and yid > 0 and zid > 0 and xid < nx - 1 and yid < ny - 1 and zid < nz - 1) { // Compute the three dt/dx quantities Real const dtodx = dt / dx; Real const dtody = dt / dy; @@ -40,32 +39,23 @@ __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid // Load the various edge electric fields required. The '1' and '2' // fields are not shared and the '3' fields are shared by two of the // updates - Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid + 1, zid, nx, ny)) + + Real electric_x_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_x * n_cells]; + Real electric_x_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_x * n_cells]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny)) + grid_enum::ct_elec_x * n_cells]; - Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid, zid + 1, nx, ny)) + - grid_enum::ct_elec_x * n_cells]; - Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid + 1, zid + 1, nx, ny)) + - grid_enum::ct_elec_x * n_cells]; - Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid, nx, ny)) + - grid_enum::ct_elec_y * n_cells]; - Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid, zid + 1, nx, ny)) + + Real electric_y_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_y * n_cells]; + Real electric_y_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_y * n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_y * n_cells]; - Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid + 1, nx, ny)) + - grid_enum::ct_elec_y * n_cells]; - Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid, zid, nx, ny)) + - grid_enum::ct_elec_z * n_cells]; - Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid, yid + 1, zid, nx, ny)) + - grid_enum::ct_elec_z * n_cells]; - Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex( - xid + 1, yid + 1, zid, nx, ny)) + + Real electric_z_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells]; + Real electric_z_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells]; // Perform Updates @@ -73,22 +63,19 @@ __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid // X field update // S&G 2009 equation 10 destinationGrid[threadId + grid_enum::magnetic_x * n_cells] = - sourceGrid[threadId + grid_enum::magnetic_x * n_cells] + - dtodz * (electric_y_3 - electric_y_1) + + sourceGrid[threadId + grid_enum::magnetic_x * n_cells] + dtodz * (electric_y_3 - electric_y_1) + dtody * (electric_z_1 - electric_z_3); // Y field update // S&G 2009 equation 11 destinationGrid[threadId + grid_enum::magnetic_y * n_cells] = - sourceGrid[threadId + grid_enum::magnetic_y * n_cells] + - dtodx * (electric_z_3 - electric_z_2) + + sourceGrid[threadId + grid_enum::magnetic_y * n_cells] + dtodx * (electric_z_3 - electric_z_2) + dtodz * (electric_x_1 - electric_x_3); // Z field update // S&G 2009 equation 12 destinationGrid[threadId + grid_enum::magnetic_z * n_cells] = - sourceGrid[threadId + grid_enum::magnetic_z * n_cells] + - dtody * (electric_x_3 - electric_x_2) + + sourceGrid[threadId + grid_enum::magnetic_z * n_cells] + dtody * (electric_x_3 - electric_x_2) + dtodx * (electric_y_2 - electric_y_3); } } diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 08ad7b684..9fb71ab55 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -30,10 +30,9 @@ namespace mhd { // ========================================================================= -__global__ void Calculate_HLLD_Fluxes_CUDA( - Real const *dev_bounds_L, Real const *dev_bounds_R, - Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, - Real const gamma, int const direction, int const n_fields) +__global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const *dev_bounds_R, + Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, + Real const gamma, int const direction, int const n_fields) { // get a thread index int threadId = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 8c092005d..b2311071f 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -38,10 +38,9 @@ namespace mhd * 2=Z * \param[in] n_fields The total number of fields */ -__global__ void Calculate_HLLD_Fluxes_CUDA( - Real const *dev_bounds_L, Real const *dev_bounds_R, - Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, - Real const gamma, int const direction, int const n_fields); +__global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const *dev_bounds_R, + Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, + Real const gamma, int const direction, int const n_fields); /*! * \brief Namespace to hold private functions used within the HLLD diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c35b0ef31..c1b753a71 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -109,12 +109,11 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel - hipLaunchKernelGGL( - mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devConservedMagXFace, // the magnetic field at the interface - devTestFlux, n_cells, gamma, direction, nFields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface + devTestFlux, n_cells, gamma, direction, nFields); CudaCheckError(); CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); From 39824a328e81cf88354139596618fa83a04804e1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 14 Feb 2023 12:05:01 -0500 Subject: [PATCH 214/694] Disable diode outflow boundaries for MHD Also, minor tweaks to fix clang-tidy's `readability-qualified-auto` and `clang-analyzer-optin.cplusplus.UninitializedObject` checks. The readablity check was fixed by commenting out some currently used code, it's not deleted since it will be used again when MHD is finished; at which time the actual error will be fixed. The clang-analyzer check was fixed by initializing some member variables in the OneTime class --- src/grid/cuda_boundaries.cu | 7 +++-- src/system_tests/hydro_system_tests.cpp | 40 ++++++++++++------------- src/utils/timing_functions.h | 14 ++++----- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 8261c63b5..b3dd62f6c 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -116,6 +116,8 @@ __global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, int n_ if (flags[4] == 2 || flags[5] == 2) { c_head[gidx + 3 * n_cells] *= a[2]; } + +#ifndef MHD // energy and momentum correction for transmission // Diode: only allow outflow if (flags[dir] == 3) { @@ -140,8 +142,9 @@ __global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, int n_ c_head[momdex] = 0.0; } } - } // end energy correction for transmissive boundaries - } // end idx>=0 + } // end energy correction for transmissive boundaries +#endif // not MHD + } // end idx>=0 } // end function void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize, diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 0922368a0..f73d77ed9 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -38,26 +38,26 @@ class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { -#ifdef MHD - // Loosen correctness check to account for MHD only having PCM. This is - // about the error between PCM and PPMP in hydro - sodTest.setFixedEpsilon(1E-3); - - // Don't test the gas energy fields - auto datasetNames = sodTest.getDataSetsToTest(); - datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); - - // Set the magnetic fiducial datasets to zero - size_t const size = std::pow(65, 3); - std::vector const magVec(0, size); - - for (auto field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { - sodTest.setFiducialData(field, magVec); - datasetNames.push_back(field); - } - - sodTest.setDataSetsToTest(datasetNames); -#endif // MHD + // #ifdef MHD + // // Loosen correctness check to account for MHD only having PCM. This is + // // about the error between PCM and PPMP in hydro + // sodTest.setFixedEpsilon(1E-3); + + // // Don't test the gas energy fields + // auto datasetNames = sodTest.getDataSetsToTest(); + // datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + + // // Set the magnetic fiducial datasets to zero + // size_t const size = std::pow(65, 3); + // std::vector const magVec(0, size); + + // for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { + // sodTest.setFiducialData(field, magVec); + // datasetNames.push_back(field); + // } + + // sodTest.setDataSetsToTest(datasetNames); + // #endif // MHD sodTest.numMpiRanks = GetParam(); sodTest.runTest(); diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 54caa56d0..364736ab1 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -13,13 +13,13 @@ class OneTime { public: const char* name; - int n_steps = 0; - Real time_start; - Real t_min; - Real t_max; - Real t_avg; - Real t_all = 0; - bool inactive = true; + int n_steps = 0; + Real time_start = 0; + Real t_min = 0; + Real t_max = 0; + Real t_avg = 0; + Real t_all = 0; + bool inactive = true; OneTime(void) {} OneTime(const char* input_name) { From ab1033738636e7a0d14f673dd7ffeea2cba811ca Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 14 Feb 2023 14:30:16 -0500 Subject: [PATCH 215/694] Fix D&W shock tube ICs & up size of tubes to 64^3 --- examples/3D/Dai_and_Woodward.txt | 30 +++++++++++++++--------------- examples/3D/Ryu_and_Jones_2a.txt | 6 +++--- examples/3D/Ryu_and_Jones_4d.txt | 6 +++--- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/examples/3D/Dai_and_Woodward.txt b/examples/3D/Dai_and_Woodward.txt index 64c5351e6..f05098690 100644 --- a/examples/3D/Dai_and_Woodward.txt +++ b/examples/3D/Dai_and_Woodward.txt @@ -7,11 +7,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=64 # number of grid cells in the y dimension -ny=32 +ny=64 # number of grid cells in the z dimension -nz=32 +nz=64 # final output time tout=0.2 # time interval for output @@ -43,28 +43,28 @@ outdir=./ # density of left state rho_l=1.08 # velocity of left state -vx_l=0.0 -vy_l=0.0 -vz_l=0.0 +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 # pressure of left state -P_l=1.0 +P_l=0.95 # Magnetic field of the left state -Bx_l=14.17963081 -By_l=12.76166773 -Bz_l=7.0898154 +Bx_l=1.1283791670955126 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 # density of right state rho_r=1.0 # velocity of right state vx_r=0.0 vy_r=0.0 -vz_r=1.0 +vz_r=0.0 # pressure of right state -P_r=0.2 +P_r=1.0 # Magnetic field of the right state -Bx_r=14.17963081 -By_r=14.17963081 -Bz_r=7.0898154 +Bx_r=1.1283791670955126 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 # location of initial discontinuity diaph=0.5 diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/examples/3D/Ryu_and_Jones_2a.txt index f886b4de3..62375609f 100644 --- a/examples/3D/Ryu_and_Jones_2a.txt +++ b/examples/3D/Ryu_and_Jones_2a.txt @@ -9,11 +9,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=64 # number of grid cells in the y dimension -ny=32 +ny=64 # number of grid cells in the z dimension -nz=32 +nz=64 # final output time tout=0.2 # time interval for output diff --git a/examples/3D/Ryu_and_Jones_4d.txt b/examples/3D/Ryu_and_Jones_4d.txt index 68fcbbbb3..6596c2b01 100644 --- a/examples/3D/Ryu_and_Jones_4d.txt +++ b/examples/3D/Ryu_and_Jones_4d.txt @@ -9,11 +9,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=64 # number of grid cells in the y dimension -ny=32 +ny=64 # number of grid cells in the z dimension -nz=32 +nz=64 # final output time tout=0.16 # time interval for output From a18566e83ee895553f58885954b5cfd9dab6c1fb Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 17 Feb 2023 16:27:39 -0500 Subject: [PATCH 216/694] Fix formatting bug in README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 610cb1a35..7f9bb18f0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![Build & Lint](https://github.com/cholla-hydro/cholla/actions/workflows/build_and_lint.yml/badge.svg) ![Code Formatting](https://github.com/cholla-hydro/cholla/actions/workflows/code_formatting.yml/badge.svg) + CHOLLA ============ A 3D GPU-based hydrodynamics code (Schneider & Robertson, ApJS, 2015). From 5e2f865a2b75510059868feaaef9b8482c6bb8e7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Feb 2023 15:05:40 -0500 Subject: [PATCH 217/694] HLLD solver: make state selection more explicit --- src/riemann_solvers/hlld_cuda.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 9fb71ab55..2ad5ac931 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -81,7 +81,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // If we're in the L state then assign fluxes and return. // In this state the flow is supersonic // M&K 2005 equation 66 - if (speed.L >= 0.0) { + if (speed.L > 0.0) { mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } @@ -91,7 +91,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // If we're in the R state then assign fluxes and return. // In this state the flow is supersonic // M&K 2005 equation 66 - if (speed.R <= 0.0) { + if (speed.R < 0.0) { mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } @@ -115,7 +115,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // If we're in the L* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 - if (speed.LStar >= 0.0) { + if (speed.LStar > 0.0 and speed.L <= 0.0) { fluxL = mhd::_internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; @@ -131,7 +131,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // If we're in the R* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 - if (speed.RStar <= 0.0) { + if (speed.RStar <= 0.0 and speed.R >= 0.0) { fluxR = mhd::_internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; @@ -145,7 +145,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // Compute and return L** fluxes // M&K 2005 equation 66 - if (speed.M >= 0.0) { + if (speed.M > 0.0 and speed.LStar <= 0.0) { fluxL = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, speed, speed.L, speed.LStar); mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); @@ -153,7 +153,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const } // Compute and return R** fluxes // M&K 2005 equation 66 - else { // if (speedStarR >= 0.0) { + if (speed.RStar > 0.0 and speed.M <= 0.0) { fluxR = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, speed, speed.R, speed.RStar); mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); From 9813d35fdc1cdd9af73dc1a4eee7fcaa9fbecc18 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Feb 2023 15:06:12 -0500 Subject: [PATCH 218/694] Fix typos in MHD shock tube initial conditions There were some typos in the papers I referenced that led to issues in this code. I've fixed them --- examples/3D/Brio_and_Wu.txt | 2 +- examples/3D/Dai_and_Woodward.txt | 10 +++++----- examples/3D/Ryu_and_Jones_1a.txt | 6 +++--- examples/3D/Ryu_and_Jones_2a.txt | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/3D/Brio_and_Wu.txt b/examples/3D/Brio_and_Wu.txt index fba126396..c1a9fe387 100644 --- a/examples/3D/Brio_and_Wu.txt +++ b/examples/3D/Brio_and_Wu.txt @@ -68,5 +68,5 @@ Bz_r=0.0 # location of initial discontinuity diaph=0.5 # value of gamma -gamma=2 +gamma=2.0 diff --git a/examples/3D/Dai_and_Woodward.txt b/examples/3D/Dai_and_Woodward.txt index f05098690..a266cbb66 100644 --- a/examples/3D/Dai_and_Woodward.txt +++ b/examples/3D/Dai_and_Woodward.txt @@ -7,11 +7,11 @@ ################################################ # number of grid cells in the x dimension -nx=64 +nx=256 # number of grid cells in the y dimension -ny=64 +ny=256 # number of grid cells in the z dimension -nz=64 +nz=256 # final output time tout=0.2 # time interval for output @@ -49,7 +49,7 @@ vz_l=0.5 # pressure of left state P_l=0.95 # Magnetic field of the left state -Bx_l=1.1283791670955126 +Bx_l=0.5641895835477563 By_l=1.0155412503859613 Bz_l=0.5641895835477563 @@ -62,7 +62,7 @@ vz_r=0.0 # pressure of right state P_r=1.0 # Magnetic field of the right state -Bx_r=1.1283791670955126 +Bx_r=0.5641895835477563 By_r=1.1283791670955126 Bz_r=0.5641895835477563 diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt index 168fcdffa..3b5b44400 100644 --- a/examples/3D/Ryu_and_Jones_1a.txt +++ b/examples/3D/Ryu_and_Jones_1a.txt @@ -9,11 +9,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=256 # number of grid cells in the y dimension -ny=32 +ny=256 # number of grid cells in the z dimension -nz=32 +nz=256 # final output time tout=0.08 # time interval for output diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/examples/3D/Ryu_and_Jones_2a.txt index 62375609f..70a7ef8e9 100644 --- a/examples/3D/Ryu_and_Jones_2a.txt +++ b/examples/3D/Ryu_and_Jones_2a.txt @@ -51,7 +51,7 @@ vz_l=0.5 # pressure of left state P_l=0.95 # Magnetic field of the left state -Bx_l=7.0898154036220635 +Bx_l=0.5641895835477563 By_l=1.0155412503859613 Bz_l=0.5641895835477563 @@ -64,7 +64,7 @@ vz_r=0.0 # pressure of right state P_r=1.0 # Magnetic field of the right state -Bx_r=7.0898154036220635 +Bx_r=0.5641895835477563 By_r=1.1283791670955126 Bz_r=0.5641895835477563 From e1f545c357f96ecb63fd3bea3a4abf1473b2a907 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 23 Feb 2023 15:21:01 -0500 Subject: [PATCH 219/694] Fix bug in HLLD solver - Fixed a bug in the HLLD solver's double star state conditional when Bx = 0. It should have been comparing to 0.5 * Bx^2 but was just comparing to Bx - Updated tests for this change - Added a test to catch this error in the future - Added `bin/*` to .gitignore --- .gitignore | 1 + cholla-tests-data | 2 +- src/riemann_solvers/hlld_cuda.cu | 2 +- src/riemann_solvers/hlld_cuda_tests.cu | 25 +++++++++++++++++++++++-- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 72bf7018e..7ce01c9e9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ ############################## googletest* tidy_results*.txt +bin/* # Compiled source # ################### diff --git a/cholla-tests-data b/cholla-tests-data index 4f3087125..93cfe12c5 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 4f3087125f6bf3fee07221c29a59b962b4b4c39e +Subproject commit 93cfe12c50a44f7a3e81f9299abb238315c58002 diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 2ad5ac931..abf3851de 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -414,7 +414,7 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd:: // if Bx is zero then just return the star state // Explained at the top of page 328 in M&K 2005. Essentially when // magneticX is 0 this reduces to the HLLC solver - if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { + if (0.5 * (magneticX * magneticX) < mhd::_internal::_hlldSmallNumber * totalPressureStar) { if (speed.M >= 0.0) { // We're in the L** state but Bx=0 so return L* state doubleStarState.velocityY = starStateL.velocityY; diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c1b753a71..e6ab03fe9 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -367,6 +367,27 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } + { + std::string const outputString{ + "Left State: Left Brio & Wu state with negative Bx\n" + "Right State: Right Brio & Wu state with negative Bx\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, + -0.061170028480309613, 0, + 0.064707291981509041, 0.0, + 1.0074980455427278, 0}; + std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; + Real thermalEnergyFlux = 0.20673357746080046; + + std::vector leftICsNegBx = leftICs, rightICsNegBx = rightICs; + leftICsNegBx[5] = -leftICsNegBx[5]; + rightICsNegBx[5] = -rightICsNegBx[5]; + + std::vector const testFluxes = computeFluxes(leftICsNegBx, rightICsNegBx, gamma, direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } { std::string const outputString{ "Left State: Right Brio & Wu state\n" @@ -1557,8 +1578,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) std::vector const fiducialFlux{ - 1.42108547152020037174e-14, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, - 3.4694469519536142e-18, 3.4694469519536142e-18}; + -1.42108547152020037174e-14, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, + 3.4694469519536142e-18, 3.4694469519536142e-18}; std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; Real thermalEnergyFlux = 0.; std::vector const testFluxes = computeFluxes(onesMagneticField, onesMagneticField, gamma, direction); From 0014efc485ec3c6cacb4680a7307b6021532c81e Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 23 Feb 2023 14:26:15 -0800 Subject: [PATCH 220/694] prototype refactor --- src/io/io.cpp | 910 ++++++----------------------------------------- src/io/io.h | 1 + src/io/io_gpu.cu | 10 +- 3 files changed, 115 insertions(+), 806 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index cd1af83a7..6fe61c544 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1147,6 +1147,18 @@ herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, const char *name status = H5Dclose(dataset_id); return status; } + +herr_t Read_HDF5_Dataset(hid_t file_id, float *dataset_buffer, const char *name) +{ + hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); + herr_t status = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + status = H5Dclose(dataset_id); + return status; +} + + + + // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, const char *name) @@ -2422,6 +2434,64 @@ void Grid3D::Read_Grid_Binary(FILE *fp) } #ifdef HDF5 + + +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer) +{ + // Note: for 1D ny_real and nz_real are not used + // And for 2D nz_real is not used. + // This protects the magnetic case where ny_real/nz_real += 1 + + int i, j, k, id, buf_id; + // 3D case + if (nx > 1 && ny > 1 && nz > 1) { + for (k = 0; k < nz_real; k++) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; + } + } + } + return; + } + + // 2D case + if (nx > 1 && ny > 1 && nz == 1) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; + } + } + return; + } + + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + id = n_ghost; + memcpy(&grid_buffer[id], &hdf5_buffer[0], nx_real * sizeof(Real)); + return; + } +} + +void Read_Grid_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, Real* grid_buffer, const char* name) +{ + Read_HDF5_Dataset(file_id, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); +} + +void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, Real* grid_buffer, const char* name) +{ + // Magnetic has 1 more real cell, 1 fewer n_ghost on one side. + Read_HDF5_Dataset(file_id, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, dataset_buffer, grid_buffer); +} + + + /*! \fn void Read_Grid_HDF5(hid_t file_id) * \brief Read in grid data from an hdf5 file. */ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) @@ -2445,818 +2515,52 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); status = H5Aclose(attribute_id); - // 1D case - if (H.nx > 1 && H.ny == 1 && H.nz == 1) { - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *)malloc(H.nx_real * sizeof(Real)); - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the density array to the grid - id = H.n_ghost; - memcpy(&(C.density[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - - // Open the x momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the x momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_x[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - - // Open the y momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the y momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_y[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the z momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_z[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - - // Open the Energy dataset - dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the Energy array to the grid - id = H.n_ghost; - memcpy(&(C.Energy[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); + #ifdef MHD + dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); + #else + dataset_buffer = (Real *)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real)); + #endif + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.density, "/density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_x, "/momentum_x"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_y, "/momentum_y"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_z, "/momentum_z"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.Energy, "/Energy"); #ifdef DE - // Open the internal energy dataset - dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the internal energy array to the grid - id = H.n_ghost; - memcpy(&(C.GasEnergy[id]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - #endif // DE + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.GasEnergy, "/GasEnergy"); + #endif #ifdef SCALAR - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - - // Open the passive scalar dataset - dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); - // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the scalar array to the grid - id = H.n_ghost; - memcpy(&(C.scalar[id + s * H.n_cells]), &dataset_buffer[0], H.nx_real * sizeof(Real)); - } - #endif // SCALAR + #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) + for (int s = 0; s < NSCALARS; s++) { + // create the name of the dataset + char dataset_name[100]; + char number[10]; + strcpy(dataset_name, "/scalar"); + sprintf(number, "%d", s); + strcat(dataset_name, number); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &C.scalar[s * H.n_cells], dataset_name); } - - // 2D case - if (H.nx > 1 && H.ny > 1 && H.nz == 1) { - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *)malloc(H.ny_real * H.nx_real * sizeof(Real)); - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the density array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.density[id] = dataset_buffer[buf_id]; - } - } - - // Open the x momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the x momentum array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.momentum_x[id] = dataset_buffer[buf_id]; - } - } - - // Open the y momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the y momentum array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.momentum_y[id] = dataset_buffer[buf_id]; - } - } - - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the z momentum array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.momentum_z[id] = dataset_buffer[buf_id]; - } - } - - // Open the Energy dataset - dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the Energy array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.Energy[id] = dataset_buffer[buf_id]; - } - } - - #ifdef DE - // Open the internal energy dataset - dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the internal energy array into the dataset buffer // NOTE: NEED TO - // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the internal energy array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.GasEnergy[id] = dataset_buffer[buf_id]; - } - } - #endif // DE - - #ifdef SCALAR - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - - // Open the scalar dataset - dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); - // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the scalar array to the grid - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - C.scalar[id + s * H.n_cells] = dataset_buffer[buf_id]; - } - } - } + #else + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HII_density, "/HII_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeI_density, "/HeI_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeII_density, "/HeII_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeIII_density, "/HeIII_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.e_density, "/e_density"); + #ifdef GRACKLE_METALS + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.metal_density, "/metal_density"); + #endif // GRACKLE_METALS + #endif // COOLING_GRACKLE , CHEMISTRY_GPU #endif // SCALAR - } - - // 3D case - if (H.nx > 1 && H.ny > 1 && H.nz > 1) { - // Compute Statistic of Initial data - Real mean_l, min_l, max_l; - Real mean_g, min_g, max_g; - - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *)malloc(H.nz_real * H.ny_real * H.nx_real * sizeof(Real)); - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - - // Copy the density array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.density[id] = dataset_buffer[buf_id]; - mean_l += C.density[id]; - if (C.density[id] > max_l) max_l = C.density[id]; - if (C.density[id] < min_l) min_l = C.density[id]; - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf(" Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the x momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the x momentum array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.momentum_x[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.momentum_x[id]); - if (fabs(C.momentum_x[id]) > max_l) max_l = fabs(C.momentum_x[id]); - if (fabs(C.momentum_x[id]) < min_l) min_l = fabs(C.momentum_x[id]); - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Momentum X) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " - "km s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the y momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the y momentum array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.momentum_y[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.momentum_y[id]); - if (fabs(C.momentum_y[id]) > max_l) max_l = fabs(C.momentum_y[id]); - if (fabs(C.momentum_y[id]) < min_l) min_l = fabs(C.momentum_y[id]); - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Momentum Y) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " - "km s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX - // FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the z momentum array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.momentum_z[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.momentum_z[id]); - if (fabs(C.momentum_z[id]) > max_l) max_l = fabs(C.momentum_z[id]); - if (fabs(C.momentum_z[id]) < min_l) min_l = fabs(C.momentum_z[id]); - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Momentum Z) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 " - "km s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the Energy dataset - dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the Energy array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.Energy[id] = dataset_buffer[buf_id]; - mean_l += C.Energy[id]; - if (C.Energy[id] > max_l) max_l = C.Energy[id]; - if (C.Energy[id] < min_l) min_l = C.Energy[id]; - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " Energy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 " - "s^-2 ] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - #ifdef DE - // Open the internal Energy dataset - dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the internal Energy array into the dataset buffer // NOTE: NEED TO - // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - Real temp, temp_max_l, temp_min_l, temp_mean_l; - Real temp_min_g, temp_max_g, temp_mean_g; - temp_mean_l = 0; - temp_min_l = 1e65; - temp_max_l = -1; - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the internal Energy array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.GasEnergy[id] = dataset_buffer[buf_id]; - mean_l += C.GasEnergy[id]; - if (C.GasEnergy[id] > max_l) max_l = C.GasEnergy[id]; - if (C.GasEnergy[id] < min_l) min_l = C.GasEnergy[id]; - temp = C.GasEnergy[id] / C.density[id] * (gama - 1) * MP / KB * 1e10; - temp_mean_l += temp; - // chprintf( "%f\n", temp); - if (temp > temp_max_l) temp_max_l = temp; - if (temp < temp_min_l) temp_min_l = temp; - } - } - } - mean_l /= (H.nz_real * H.ny_real * H.nx_real); - temp_mean_l /= (H.nz_real * H.ny_real * H.nx_real); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - temp_mean_g = ReduceRealAvg(temp_mean_l); - temp_max_g = ReduceRealMax(temp_max_l); - temp_min_g = ReduceRealMin(temp_min_l); - temp_mean_l = temp_mean_g; - temp_max_l = temp_max_g; - temp_min_l = temp_min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " GasEnergy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 " - "s^-2 ] \n", - mean_l, min_l, max_l); - chprintf(" Temperature Mean: %f Min: %f Max: %f [ K ] \n", temp_mean_l, temp_min_l, temp_max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - #endif // DE - - #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont Load scalars when using grackle or - // CHEMISTRY_GPU - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - - // Open the scalar dataset - dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT); - // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR - // FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + #ifdef MHD + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); + #endif - // Copy the scalar array to the grid - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.scalar[id + s * H.n_cells] = dataset_buffer[buf_id]; - } - } - } - } - #else // Load Chemistry when using GRACKLE or CHEMISTRY_GPU - if (P.nfile == 0) { - Real dens; - Real HI_frac = INITIAL_FRACTION_HI; - Real HII_frac = INITIAL_FRACTION_HII; - Real HeI_frac = INITIAL_FRACTION_HEI; - Real HeII_frac = INITIAL_FRACTION_HEII; - Real HeIII_frac = INITIAL_FRACTION_HEIII; - Real e_frac = INITIAL_FRACTION_ELECTRON; - Real metal_frac = INITIAL_FRACTION_METAL; - chprintf(" Initial HI Fraction: %e \n", HI_frac); - chprintf(" Initial HII Fraction: %e \n", HII_frac); - chprintf(" Initial HeI Fraction: %e \n", HeI_frac); - chprintf(" Initial HeII Fraction: %e \n", HeII_frac); - chprintf(" Initial HeIII Fraction: %e \n", HeIII_frac); - chprintf(" Initial elect Fraction: %e \n", e_frac); - #ifdef GRACKLE_METALS - chprintf(" Initial metal Fraction: %e \n", metal_frac); - #endif // GRACKEL_METALS - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - dens = C.density[id]; - C.HI_density[id] = HI_frac * dens; - C.HII_density[id] = HII_frac * dens; - C.HeI_density[id] = HeI_frac * dens; - C.HeII_density[id] = HeII_frac * dens; - C.HeIII_density[id] = HeIII_frac * dens; - C.e_density[id] = e_frac * dens; - #ifdef GRACKLE_METALS - C.metal_density[id] = metal_frac * dens; - #endif - } - } - } - } else { - dataset_id = H5Dopen(file_id, "/HI_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.HI_density[id] = dataset_buffer[buf_id]; - // chprintf("%f \n", C.scalar[0*H.n_cells + id] / C.density[id]); - } - } - } - dataset_id = H5Dopen(file_id, "/HII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.HII_density[id] = dataset_buffer[buf_id]; - } - } - } - dataset_id = H5Dopen(file_id, "/HeI_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.HeI_density[id] = dataset_buffer[buf_id]; - } - } - } - dataset_id = H5Dopen(file_id, "/HeII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.HeII_density[id] = dataset_buffer[buf_id]; - } - } - } - dataset_id = H5Dopen(file_id, "/HeIII_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.HeIII_density[id] = dataset_buffer[buf_id]; - } - } - } - dataset_id = H5Dopen(file_id, "/e_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.e_density[id] = dataset_buffer[buf_id]; - } - } - } - #ifdef GRACKLE_METALS - dataset_id = H5Dopen(file_id, "/metal_density", H5P_DEFAULT); - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - status = H5Dclose(dataset_id); - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - C.metal_density[id] = dataset_buffer[buf_id]; - } - } - } - #endif // GRACKLE_METALS - } - #endif // COOLING_GRACKLE - #endif // SCALAR - #ifdef MHD - // Start by creating a dataspace and buffer that is large enough for the - // magnetic field since it's one larger than the rest - free(dataset_buffer); - dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); - - // Open the x magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_x", H5P_DEFAULT); - // Read the x magnetic field array into the dataset buffer // NOTE: NEED TO - // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the x magnetic field array to the grid - for (k = 0; k < H.nz_real + 1; k++) { - for (j = 0; j < H.ny_real + 1; j++) { - for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); - C.magnetic_x[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_x[id]); - if (fabs(C.magnetic_x[id]) > max_l) max_l = fabs(C.magnetic_x[id]); - if (fabs(C.magnetic_x[id]) < min_l) min_l = fabs(C.magnetic_x[id]); - } - } - } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Magnetic X) Mean: %f Min: %f Max: %f [ Msun^1/2 " - "kpc^-1/2 s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the y magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); - // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO - // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the y magnetic field array to the grid - for (k = 0; k < H.nz_real + 1; k++) { - for (j = 0; j < H.ny_real + 1; j++) { - for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); - C.magnetic_y[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_y[id]); - if (fabs(C.magnetic_y[id]) > max_l) max_l = fabs(C.magnetic_y[id]); - if (fabs(C.magnetic_y[id]) < min_l) min_l = fabs(C.magnetic_y[id]); - } - } - } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Magnetic Y) Mean: %f Min: %f Max: %f [ Msun^1/2 " - "kpc^-1/2 s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - - // Open the z magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); - // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO - // FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the z magnetic field array to the grid - for (k = 0; k < H.nz_real + 1; k++) { - for (j = 0; j < H.ny_real + 1; j++) { - for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); - C.magnetic_z[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_z[id]); - if (fabs(C.magnetic_z[id]) > max_l) max_l = fabs(C.magnetic_z[id]); - if (fabs(C.magnetic_z[id]) < min_l) min_l = fabs(C.magnetic_z[id]); - } - } - } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg(mean_l); - max_g = ReduceRealMax(max_l); - min_g = ReduceRealMin(min_l); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif // MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( - " abs(Magnetic Z) Mean: %f Min: %f Max: %f [ Msun^1/2 " - "kpc^-1/2 s^-1] \n", - mean_l, min_l, max_l); - #endif // PRINT_INITIAL_STATS and COSMOLOGY - #endif // MHD - } free(dataset_buffer); } #endif diff --git a/src/io/io.h b/src/io/io.h index fb47246a8..6e52c6cb7 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -57,6 +57,7 @@ herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name); herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); +herr_t Read_HDF5_Dataset(hid_t file_id, float* dataset_buffer, const char* name); herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 62f0473a8..1f3ba65c6 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -4,12 +4,16 @@ #include #include "../grid/grid3D.h" - #include "../io/io.h" // To provide io.h with OutputViz3D + #include "../io/io.h" // Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real -// whereas the conserved variables have size nx,ny,nz Note that magnetic fields +// whereas the conserved variables have size nx,ny,nz. + +// Note that magnetic fields // add +1 to nx_real ny_real nz_real since an extra face needs to be output, but -// also has the same size nx ny nz For the magnetic field case, a different +// also has the same size nx ny nz. + +// For the magnetic field case, a different // nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. // Copy Real (non-ghost) cells from source to a double destination (for writing From 9d184e729c3a09d99783b5b1a548a0cf01956241 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 23 Feb 2023 21:52:15 -0500 Subject: [PATCH 221/694] Refactor and add MHD system tests - Minor refactor to how shock tubes and similar tests are done to reduce code duplication - Add MHD system tests for the following 5 shock tubes - Brio & Wu - Dai & Woodward - Ryu & Jones 1a - Ryu & Jones 2a - Ryu & Jones 4d --- cholla-tests-data | 2 +- ...ockTubeCorrectInputExpectCorrectOutput.txt | 72 ++++++++ ...cFieldCorrectInputExpectCorrectOutput.txt} | 0 ...cFieldCorrectInputExpectCorrectOutput.txt} | 0 ...ockTubeCorrectInputExpectCorrectOutput.txt | 73 ++++++++ ...actionCorrectInputExpectCorrectOutput.txt} | 0 ...ockTubeCorrectInputExpectCorrectOutput.txt | 74 ++++++++ ...ockTubeCorrectInputExpectCorrectOutput.txt | 74 ++++++++ ...ockTubeCorrectInputExpectCorrectOutput.txt | 74 ++++++++ ...ckTubeCorrectInputExpectCorrectOutput.txt} | 0 src/system_tests/mhd_system_tests.cpp | 159 +++++++++--------- 11 files changed, 452 insertions(+), 76 deletions(-) create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt rename src/system_tests/input_files/{tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt => tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt} (100%) rename src/system_tests/input_files/{tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt => tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt} (100%) create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt rename src/system_tests/input_files/{tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt => tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt} (100%) create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt rename src/system_tests/input_files/{tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt => tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt} (100%) diff --git a/cholla-tests-data b/cholla-tests-data index 93cfe12c5..8c3c4476f 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 93cfe12c50a44f7a3e81f9299abb238315c58002 +Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..514dd3359 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,72 @@ +# +# Parameter File for 3D Brio & Wu MHD shock tube +# Citation: Brio & Wu 1988 "An Upwind Differencing Scheme for the Equations of +# Ideal Magnetohydrodynamics" +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.1 +# time interval for output +outstep=0.1 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0 +vy_l=0 +vz_l=0 +# pressure of left state +P_l=1.0 +# Magnetic field of the left state +Bx_l=0.75 +By_l=1.0 +Bz_l=0.0 + +# density of right state +rho_r=0.128 +# velocity of right state +vx_r=0 +vy_r=0 +vz_r=0 +# pressure of right state +P_r=0.1 +# Magnetic field of the right state +Bx_r=0.75 +By_r=-1.0 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=2.0 + diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..538984951 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,73 @@ +# +# Parameter File for 3D Dai & Woodward MHD shock tube +# Citation: Dai & Woodward 1998 "On The Diverrgence-Free Condition and +# Conservation Laws in Numerical Simulations for Supersonic Magnetohydrodynamic +# Flows" +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.08 +# velocity of left state +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 +# pressure of left state +P_l=0.95 +# Magnetic field of the left state +Bx_l=0.5641895835477563 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=0.5641895835477563 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..07440faa3 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.08 +# time interval for output +outstep=0.08 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=10.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=20.0 +# Magnetic field of the left state +Bx_l=1.4104739588693909 +By_l=1.4104739588693909 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=-10.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=1.4104739588693909 +By_r=1.4104739588693909 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..70a7ef8e9 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.08 +# velocity of left state +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 +# pressure of left state +P_l=0.95 +# Magnetic field of the left state +Bx_l=0.5641895835477563 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=0.5641895835477563 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..6596c2b01 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.16 +# time interval for output +outstep=0.16 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# Magnetic field of the left state +Bx_l=0.7 +By_l=0.0 +Bz_l=0.0 + +# density of right state +rho_r=0.3 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=1.0 +# pressure of right state +P_r=0.2 +# Magnetic field of the right state +Bx_r=0.7 +By_r=1.0 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index ca1a63514..a443c9bcb 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -15,40 +15,6 @@ #include "../io/io.h" #include "../system_tests/system_tester.h" -// ============================================================================= -// Test Suite: tMHDSYSTEMConstantParameterizedMpi -// ============================================================================= -/*! - * \defgroup tMHDSYSTEMConstantParameterizedMpi - * \brief Test the constant initial conditions as a parameterized test - * with varying numbers of MPI ranks - * - */ -/// @{ -class tMHDSYSTEMConstantParameterizedMpi : public ::testing::TestWithParam -{ - protected: - systemTest::SystemTestRunner constantTest; -}; - -// Test with all mangetic fields set to zero -TEST_P(tMHDSYSTEMConstantParameterizedMpi, ZeroMagneticFieldCorrectInputExpectCorrectOutput) -{ - constantTest.numMpiRanks = GetParam(); - constantTest.runTest(); -} - -// Test with all mangetic fields set to one -TEST_P(tMHDSYSTEMConstantParameterizedMpi, MagneticFieldCorrectInputExpectCorrectOutput) -{ - constantTest.numMpiRanks = GetParam(); - constantTest.runTest(); -} - -INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMConstantParameterizedMpi, ::testing::Values(1, 2, 4)); -/// @} -// ============================================================================= - // ============================================================================= // Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle // ============================================================================= @@ -406,46 +372,6 @@ INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, /// @} // ============================================================================= -// ============================================================================= -// Test Suite: tMHDSYSTEMSodShockTube -// TODO: This is temporary. Remove once PPMP is implemented for MHD and replace -// with the hydro sod test -// ============================================================================= -/*! - * \defgroup - * tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput \brief - * Test the Sod Shock tube initial conditions as a parameterized test with - * varying numbers of MPI ranks - * - */ -/// @{ -class tMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam -{ - protected: - systemTest::SystemTestRunner sodTest; -}; - -TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) -{ - sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); -} - -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tMHDSYSTEMSodShockTubeParameterizedMpi, - ::testing::Values(1, 2, 4)); -/// @} -// ============================================================================= - -// ============================================================================= -// Test Suite: tMHDSYSTEMEinfeldtStrongRarefaction -// ============================================================================= -TEST(tMHDSYSTEMEinfeldtStrongRarefaction, CorrectInputExpectCorrectOutput) -{ - systemTest::SystemTestRunner rarefactionTest; - rarefactionTest.runTest(); -} -// ============================================================================= - // ============================================================================= // Test Suite: tMHDSYSTEMLinearWavesParameterizedMpi // ============================================================================= @@ -522,6 +448,8 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam +{ + protected: + systemTest::SystemTestRunner test_runner; +}; +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMParameterizedMpi, ::testing::Values(1, 2, 4)); + +/// Test constant state with all magnetic fields set to zero +TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test constant state with all magnetic fields set to one +TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithMagneticFieldCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// TODO: This is temporary. Remove once PPMP is implemented for MHD and replace +/// TODO: with the hydro sod test +TEST_P(tMHDSYSTEMParameterizedMpi, SodShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the MHD Einfeldt Strong Rarefaction (Einfeldt et al. 1991) +TEST_P(tMHDSYSTEMParameterizedMpi, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Brio & Wu Shock Tube (Brio & Wu 1988) +TEST_P(tMHDSYSTEMParameterizedMpi, BrioAndWuShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Dai & Woodward Shock Tube (Dai & Woodward 1998) +TEST_P(tMHDSYSTEMParameterizedMpi, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Ryu & Jones 1a Shock Tube (Ryu & Jones 1995) +TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Ryu & Jones 2a Shock Tube (Ryu & Jones 1995) +TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Ryu & Jones 4d Shock Tube (Ryu & Jones 1995) +TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} /// @} // ============================================================================= \ No newline at end of file From a12c90e4da0985a6e6d9938398c45c06360d1ab8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 24 Feb 2023 13:43:45 -0500 Subject: [PATCH 222/694] Add a function to check that the configuration is correct --- src/main.cpp | 3 ++ src/utils/error_handling.cpp | 79 ++++++++++++++++++++++++++++++++++++ src/utils/error_handling.h | 9 ++++ 3 files changed, 91 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index bcda7a32a..2256f981d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -83,6 +83,9 @@ int main(int argc, char *argv[]) if (strcmp(P.init, "Read_Grid") == 0) chprintf("Input directory: %s\n", P.indir); chprintf("Output directory: %s\n", P.outdir); + // Check the configuration + Check_Configuration(P); + // Create a Log file to output run-time messages and output the git hash and // macro flags used Create_Log_File(P); diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 8fafb48bd..fc09c3363 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -1,4 +1,7 @@ #include "../utils/error_handling.h" + +#include + #ifdef MPI_CHOLLA #include void chexit(int code) @@ -21,3 +24,79 @@ void chexit(int code) exit(code); } #endif /*MPI_CHOLLA*/ + +void Check_Configuration(parameters const &P) +{ +// General Checks +// ============== +#ifndef GIT_HASH + #error "GIT_HASH is not defined" +#endif //! GIT_HASH + + // Check that GIT_HASH is the correct length. It needs to be 41 and not 40 since strings are null terminated + static_assert(sizeof(GIT_HASH) == 41); + +#ifndef MACRO_FLAGS + #error "MACRO_FLAGS is not defined" +#endif //! MACRO_FLAGS + + // Check that MACRO_FLAGS has contents + static_assert(sizeof(MACRO_FLAGS) > 1); + + // Must have CUDA +#ifndef CUDA + #error "The CUDA macro is required" +#endif //! CUDA + + // warn if error checking is disabled +#ifndef CUDA_ERROR_CHECK + #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" +#endif //! CUDA_ERROR_CHECK + + // Check that PRECISION is 2 +#ifndef PRECISION + #error "The PRECISION macro is required" +#endif //! PRECISION + static_assert(PRECISION == 2, "PRECISION must be 2. Single precision is not currently supported"); + +// MHD Checks +// ========== +#ifdef MHD + assert(P.nx < 2 or P.ny < 2 or P.nz < 2 and "MHD runs must be 3D"); + + // Must use the correct integrator + #if !defined(VL) || defined(SIMPLE) + #error "MHD only supports the Van Leer integrator" + #endif //! VL or SIMPLE + + // must only use HLLD + #if !defined(HLLD) || defined(EXACT) || defined(ROE) || defined(HLL) || defined(HLLC) + #error "MHD only supports the HLLD Riemann Solver" + #endif //! HLLD or EXACT or ROE or HLL or HLLC + + // May only use certain reconstructions + #if !defined(PCM) || defined(PLMP) || defined(PLMC) || defined(PPMC) || defined(PPMP) + #error "MHD only supports PCM reconstruction" + #endif //! PCM or PLMP or PLMC or PPMC or PPMP + + // must have HDF5 + #ifndef HDF5 + #error "MHD only supports HDF5 output" + #endif //! HDF5 + + // Warn that diode boundaries are disabled + if (P.xl_bcnd == 3 or P.xu_bcnd == 3 or P.yl_bcnd == 3 or P.yu_bcnd == 3 or P.zl_bcnd == 3 or P.zu_bcnd == 3) { + std::cerr << "Warning: The diode on the outflow boundaries is disabled for MHD" << std::endl; + } + + // Error if unsupported boundary condition is used + assert(P.xl_bcnd == 2 or P.xu_bcnd == 2 or P.yl_bcnd == 2 or P.yu_bcnd == 2 or P.zl_bcnd == 2 or + P.zu_bcnd == 2 && "MHD does not support reflective boundary conditions"); + + // AVERAGE_SLOW_CELLS not supported on MHD + #ifdef AVERAGE_SLOW_CELLS + #error "MHD does not support AVERAGE_SLOW_CELLS" + #endif // AVERAGE_SLOW_CELLS + +#endif // MHD +} diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index 174c083b6..d539f0e50 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -1,5 +1,14 @@ #ifndef ERROR_HANDLING_CHOLLA_H #define ERROR_HANDLING_CHOLLA_H #include + +#include "../global/global.h" void chexit(int code); + +/*! + * \brief Check that the Cholla configuration and parameters don't have any significant errors. Mostly compile time + * checks. + * + */ +void Check_Configuration(parameters const &P); #endif /*ERROR_HANDLING_CHOLLA_H*/ From afae1dfcedb1abec91fe6b4d779e7116e97695ae Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 2 Mar 2023 16:06:07 -0500 Subject: [PATCH 223/694] add google-readability-braces-around-statements to clang tidy checks --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 4c0eacf8c..23ec43b87 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -34,6 +34,7 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, + google-readability-braces-around-statements, -bugprone-assignment-in-if-condition, -bugprone-branch-clone, @@ -95,7 +96,6 @@ Checks: "*, -google-build-namespaces, -google-explicit-constructor, -google-global-names-in-headers, - -google-readability-braces-around-statements, -google-readability-casting, -google-readability-namespace-comments, -google-readability-todo, From 05ac4bc4d47625eeebbc5a6686c03896eaa6e3ac Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 2 Mar 2023 17:10:22 -0500 Subject: [PATCH 224/694] add braces to satisfy google-readability-braces-around-statement clang tidy check --- src/global/global.cpp | 242 ++++++++++++++++---------------- src/global/global_cuda.h | 5 +- src/grid/cuda_boundaries.cu | 54 ++++--- src/grid/mpi_boundaries.cpp | 30 ++-- src/hydro/hydro_cuda.cu | 3 +- src/io/io.cpp | 36 +++-- src/model/disk_ICs.cpp | 3 +- src/mpi/cuda_mpi_routines.cu | 6 +- src/mpi/mpi_routines.cpp | 16 ++- src/reconstruction/ppmc_cuda.cu | 30 ++-- 10 files changed, 240 insertions(+), 185 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index ec28208e5..55ca787f4 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -48,10 +48,11 @@ double get_time(void) * \brief Mathematical sign function. Returns sign of x. */ int sgn(Real x) { - if (x < 0) + if (x < 0) { return -1; - else + } else { return 1; + } } #ifndef CUDA @@ -152,15 +153,17 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a /* Parse name/value pair from line */ char name[MAXLEN], value[MAXLEN]; s = strtok(buff, "="); - if (s == NULL) + if (s == NULL) { continue; - else + } else { strncpy(name, s, MAXLEN); + } s = strtok(NULL, "="); - if (s == NULL) + if (s == NULL) { continue; - else + } else { strncpy(value, s, MAXLEN); + } trim(value); parse_param(name, value, parms); } @@ -171,15 +174,17 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a for (int i = 0; i < argc; ++i) { char name[MAXLEN], value[MAXLEN]; s = strtok(argv[i], "="); - if (s == NULL) + if (s == NULL) { continue; - else + } else { strncpy(name, s, MAXLEN); + } s = strtok(NULL, "="); - if (s == NULL) + if (s == NULL) { continue; - else + } else { strncpy(value, s, MAXLEN); + } parse_param(name, value, parms); chprintf("Override with %s=%s\n", name, value); } @@ -190,244 +195,245 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a void parse_param(char *name, char *value, struct parameters *parms) { /* Copy into correct entry in parameters struct */ - if (strcmp(name, "nx") == 0) + if (strcmp(name, "nx") == 0) { parms->nx = atoi(value); - else if (strcmp(name, "ny") == 0) + } else if (strcmp(name, "ny") == 0) { parms->ny = atoi(value); - else if (strcmp(name, "nz") == 0) + } else if (strcmp(name, "nz") == 0) { parms->nz = atoi(value); - else if (strcmp(name, "tout") == 0) + } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); - else if (strcmp(name, "outstep") == 0) + } else if (strcmp(name, "outstep") == 0) { parms->outstep = atof(value); - else if (strcmp(name, "n_steps_output") == 0) + } else if (strcmp(name, "n_steps_output") == 0) { parms->n_steps_output = atoi(value); - else if (strcmp(name, "gamma") == 0) + } else if (strcmp(name, "gamma") == 0) { parms->gamma = atof(value); - else if (strcmp(name, "init") == 0) + } else if (strcmp(name, "init") == 0) { strncpy(parms->init, value, MAXLEN); - else if (strcmp(name, "nfile") == 0) + } else if (strcmp(name, "nfile") == 0) { parms->nfile = atoi(value); - else if (strcmp(name, "n_hydro") == 0) + } else if (strcmp(name, "n_hydro") == 0) { parms->n_hydro = atoi(value); - else if (strcmp(name, "n_particle") == 0) + } else if (strcmp(name, "n_particle") == 0) { parms->n_particle = atoi(value); - else if (strcmp(name, "n_projection") == 0) + } else if (strcmp(name, "n_projection") == 0) { parms->n_projection = atoi(value); - else if (strcmp(name, "n_rotated_projection") == 0) + } else if (strcmp(name, "n_rotated_projection") == 0) { parms->n_rotated_projection = atoi(value); - else if (strcmp(name, "n_slice") == 0) + } else if (strcmp(name, "n_slice") == 0) { parms->n_slice = atoi(value); - else if (strcmp(name, "n_out_float32") == 0) + } else if (strcmp(name, "n_out_float32") == 0) { parms->n_out_float32 = atoi(value); - else if (strcmp(name, "out_float32_density") == 0) + } else if (strcmp(name, "out_float32_density") == 0) { parms->out_float32_density = atoi(value); - else if (strcmp(name, "out_float32_momentum_x") == 0) + } else if (strcmp(name, "out_float32_momentum_x") == 0) { parms->out_float32_momentum_x = atoi(value); - else if (strcmp(name, "out_float32_momentum_y") == 0) + } else if (strcmp(name, "out_float32_momentum_y") == 0) { parms->out_float32_momentum_y = atoi(value); - else if (strcmp(name, "out_float32_momentum_z") == 0) + } else if (strcmp(name, "out_float32_momentum_z") == 0) { parms->out_float32_momentum_z = atoi(value); - else if (strcmp(name, "out_float32_Energy") == 0) + } else if (strcmp(name, "out_float32_Energy") == 0) { parms->out_float32_Energy = atoi(value); #ifdef DE - else if (strcmp(name, "out_float32_GasEnergy") == 0) + } else if (strcmp(name, "out_float32_GasEnergy") == 0) { parms->out_float32_GasEnergy = atoi(value); #endif // DE #ifdef MHD - else if (strcmp(name, "out_float32_magnetic_x") == 0) + } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); - else if (strcmp(name, "out_float32_magnetic_y") == 0) + } else if (strcmp(name, "out_float32_magnetic_y") == 0) { parms->out_float32_magnetic_y = atoi(value); - else if (strcmp(name, "out_float32_magnetic_z") == 0) + } else if (strcmp(name, "out_float32_magnetic_z") == 0) { parms->out_float32_magnetic_z = atoi(value); #endif // MHD - else if (strcmp(name, "xmin") == 0) + } else if (strcmp(name, "xmin") == 0) { parms->xmin = atof(value); - else if (strcmp(name, "ymin") == 0) + } else if (strcmp(name, "ymin") == 0) { parms->ymin = atof(value); - else if (strcmp(name, "zmin") == 0) + } else if (strcmp(name, "zmin") == 0) { parms->zmin = atof(value); - else if (strcmp(name, "xlen") == 0) + } else if (strcmp(name, "xlen") == 0) { parms->xlen = atof(value); - else if (strcmp(name, "ylen") == 0) + } else if (strcmp(name, "ylen") == 0) { parms->ylen = atof(value); - else if (strcmp(name, "zlen") == 0) + } else if (strcmp(name, "zlen") == 0) { parms->zlen = atof(value); - else if (strcmp(name, "xl_bcnd") == 0) + } else if (strcmp(name, "xl_bcnd") == 0) { parms->xl_bcnd = atoi(value); - else if (strcmp(name, "xu_bcnd") == 0) + } else if (strcmp(name, "xu_bcnd") == 0) { parms->xu_bcnd = atoi(value); - else if (strcmp(name, "yl_bcnd") == 0) + } else if (strcmp(name, "yl_bcnd") == 0) { parms->yl_bcnd = atoi(value); - else if (strcmp(name, "yu_bcnd") == 0) + } else if (strcmp(name, "yu_bcnd") == 0) { parms->yu_bcnd = atoi(value); - else if (strcmp(name, "zl_bcnd") == 0) + } else if (strcmp(name, "zl_bcnd") == 0) { parms->zl_bcnd = atoi(value); - else if (strcmp(name, "zu_bcnd") == 0) + } else if (strcmp(name, "zu_bcnd") == 0) { parms->zu_bcnd = atoi(value); - else if (strcmp(name, "custom_bcnd") == 0) + } else if (strcmp(name, "custom_bcnd") == 0) { strncpy(parms->custom_bcnd, value, MAXLEN); - else if (strcmp(name, "outdir") == 0) + } else if (strcmp(name, "outdir") == 0) { strncpy(parms->outdir, value, MAXLEN); - else if (strcmp(name, "indir") == 0) + } else if (strcmp(name, "indir") == 0) { strncpy(parms->indir, value, MAXLEN); - else if (strcmp(name, "rho") == 0) + } else if (strcmp(name, "rho") == 0) { parms->rho = atof(value); - else if (strcmp(name, "vx") == 0) + } else if (strcmp(name, "vx") == 0) { parms->vx = atof(value); - else if (strcmp(name, "vy") == 0) + } else if (strcmp(name, "vy") == 0) { parms->vy = atof(value); - else if (strcmp(name, "vz") == 0) + } else if (strcmp(name, "vz") == 0) { parms->vz = atof(value); - else if (strcmp(name, "P") == 0) + } else if (strcmp(name, "P") == 0) { parms->P = atof(value); - else if (strcmp(name, "Bx") == 0) + } else if (strcmp(name, "Bx") == 0) { parms->Bx = atof(value); - else if (strcmp(name, "By") == 0) + } else if (strcmp(name, "By") == 0) { parms->By = atof(value); - else if (strcmp(name, "Bz") == 0) + } else if (strcmp(name, "Bz") == 0) { parms->Bz = atof(value); - else if (strcmp(name, "A") == 0) + } else if (strcmp(name, "A") == 0) { parms->A = atof(value); - else if (strcmp(name, "rho_l") == 0) + } else if (strcmp(name, "rho_l") == 0) { parms->rho_l = atof(value); - else if (strcmp(name, "vx_l") == 0) + } else if (strcmp(name, "vx_l") == 0) { parms->vx_l = atof(value); - else if (strcmp(name, "vy_l") == 0) + } else if (strcmp(name, "vy_l") == 0) { parms->vy_l = atof(value); - else if (strcmp(name, "vz_l") == 0) + } else if (strcmp(name, "vz_l") == 0) { parms->vz_l = atof(value); - else if (strcmp(name, "P_l") == 0) + } else if (strcmp(name, "P_l") == 0) { parms->P_l = atof(value); - else if (strcmp(name, "Bx_l") == 0) + } else if (strcmp(name, "Bx_l") == 0) { parms->Bx_l = atof(value); - else if (strcmp(name, "By_l") == 0) + } else if (strcmp(name, "By_l") == 0) { parms->By_l = atof(value); - else if (strcmp(name, "Bz_l") == 0) + } else if (strcmp(name, "Bz_l") == 0) { parms->Bz_l = atof(value); - else if (strcmp(name, "rho_r") == 0) + } else if (strcmp(name, "rho_r") == 0) { parms->rho_r = atof(value); - else if (strcmp(name, "vx_r") == 0) + } else if (strcmp(name, "vx_r") == 0) { parms->vx_r = atof(value); - else if (strcmp(name, "vy_r") == 0) + } else if (strcmp(name, "vy_r") == 0) { parms->vy_r = atof(value); - else if (strcmp(name, "vz_r") == 0) + } else if (strcmp(name, "vz_r") == 0) { parms->vz_r = atof(value); - else if (strcmp(name, "P_r") == 0) + } else if (strcmp(name, "P_r") == 0) { parms->P_r = atof(value); - else if (strcmp(name, "Bx_r") == 0) + } else if (strcmp(name, "Bx_r") == 0) { parms->Bx_r = atof(value); - else if (strcmp(name, "By_r") == 0) + } else if (strcmp(name, "By_r") == 0) { parms->By_r = atof(value); - else if (strcmp(name, "Bz_r") == 0) + } else if (strcmp(name, "Bz_r") == 0) { parms->Bz_r = atof(value); - else if (strcmp(name, "diaph") == 0) + } else if (strcmp(name, "diaph") == 0) { parms->diaph = atof(value); - else if (strcmp(name, "rEigenVec_rho") == 0) + } else if (strcmp(name, "rEigenVec_rho") == 0) { parms->rEigenVec_rho = atof(value); - else if (strcmp(name, "rEigenVec_MomentumX") == 0) + } else if (strcmp(name, "rEigenVec_MomentumX") == 0) { parms->rEigenVec_MomentumX = atof(value); - else if (strcmp(name, "rEigenVec_MomentumY") == 0) + } else if (strcmp(name, "rEigenVec_MomentumY") == 0) { parms->rEigenVec_MomentumY = atof(value); - else if (strcmp(name, "rEigenVec_MomentumZ") == 0) + } else if (strcmp(name, "rEigenVec_MomentumZ") == 0) { parms->rEigenVec_MomentumZ = atof(value); - else if (strcmp(name, "rEigenVec_E") == 0) + } else if (strcmp(name, "rEigenVec_E") == 0) { parms->rEigenVec_E = atof(value); - else if (strcmp(name, "rEigenVec_Bx") == 0) + } else if (strcmp(name, "rEigenVec_Bx") == 0) { parms->rEigenVec_Bx = atof(value); - else if (strcmp(name, "rEigenVec_By") == 0) + } else if (strcmp(name, "rEigenVec_By") == 0) { parms->rEigenVec_By = atof(value); - else if (strcmp(name, "rEigenVec_Bz") == 0) + } else if (strcmp(name, "rEigenVec_Bz") == 0) { parms->rEigenVec_Bz = atof(value); - else if (strcmp(name, "pitch") == 0) + } else if (strcmp(name, "pitch") == 0) { parms->pitch = atof(value); - else if (strcmp(name, "yaw") == 0) + } else if (strcmp(name, "yaw") == 0) { parms->yaw = atof(value); #ifdef PARTICLES - else if (strcmp(name, "prng_seed") == 0) + } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); #endif // PARTICLES #ifdef SUPERNOVA - else if (strcmp(name, "snr_filename") == 0) + } else if (strcmp(name, "snr_filename") == 0) { strncpy(parms->snr_filename, value, MAXLEN); #endif #ifdef ROTATED_PROJECTION - else if (strcmp(name, "nxr") == 0) + } else if (strcmp(name, "nxr") == 0) { parms->nxr = atoi(value); - else if (strcmp(name, "nzr") == 0) + } else if (strcmp(name, "nzr") == 0) { parms->nzr = atoi(value); - else if (strcmp(name, "delta") == 0) + } else if (strcmp(name, "delta") == 0) { parms->delta = atof(value); - else if (strcmp(name, "theta") == 0) + } else if (strcmp(name, "theta") == 0) { parms->theta = atof(value); - else if (strcmp(name, "phi") == 0) + } else if (strcmp(name, "phi") == 0) { parms->phi = atof(value); - else if (strcmp(name, "Lx") == 0) + } else if (strcmp(name, "Lx") == 0) { parms->Lx = atof(value); - else if (strcmp(name, "Lz") == 0) + } else if (strcmp(name, "Lz") == 0) { parms->Lz = atof(value); - else if (strcmp(name, "n_delta") == 0) + } else if (strcmp(name, "n_delta") == 0) { parms->n_delta = atoi(value); - else if (strcmp(name, "ddelta_dt") == 0) + } else if (strcmp(name, "ddelta_dt") == 0) { parms->ddelta_dt = atof(value); - else if (strcmp(name, "flag_delta") == 0) + } else if (strcmp(name, "flag_delta") == 0) { parms->flag_delta = atoi(value); #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY - else if (strcmp(name, "scale_outputs_file") == 0) + } else if (strcmp(name, "scale_outputs_file") == 0) { strncpy(parms->scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "Init_redshift") == 0) + } else if (strcmp(name, "Init_redshift") == 0) { parms->Init_redshift = atof(value); - else if (strcmp(name, "End_redshift") == 0) + } else if (strcmp(name, "End_redshift") == 0) { parms->End_redshift = atof(value); - else if (strcmp(name, "H0") == 0) + } else if (strcmp(name, "H0") == 0) { parms->H0 = atof(value); - else if (strcmp(name, "Omega_M") == 0) + } else if (strcmp(name, "Omega_M") == 0) { parms->Omega_M = atof(value); - else if (strcmp(name, "Omega_L") == 0) + } else if (strcmp(name, "Omega_L") == 0) { parms->Omega_L = atof(value); - else if (strcmp(name, "Omega_b") == 0) + } else if (strcmp(name, "Omega_b") == 0) { parms->Omega_b = atof(value); #endif // COSMOLOGY #ifdef TILED_INITIAL_CONDITIONS - else if (strcmp(name, "tile_length") == 0) + } else if (strcmp(name, "tile_length") == 0) { parms->tile_length = atof(value); #endif // TILED_INITIAL_CONDITIONS #ifdef SET_MPI_GRID - // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] - else if (strcmp(name, "n_proc_x") == 0) + // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] + } else if (strcmp(name, "n_proc_x") == 0) { parms->n_proc_x = atoi(value); - else if (strcmp(name, "n_proc_y") == 0) + } else if (strcmp(name, "n_proc_y") == 0) { parms->n_proc_y = atoi(value); - else if (strcmp(name, "n_proc_z") == 0) + } else if (strcmp(name, "n_proc_z") == 0) { parms->n_proc_z = atoi(value); #endif - else if (strcmp(name, "bc_potential_type") == 0) + } else if (strcmp(name, "bc_potential_type") == 0) { parms->bc_potential_type = atoi(value); #ifdef CHEMISTRY_GPU - else if (strcmp(name, "UVB_rates_file") == 0) + } else if (strcmp(name, "UVB_rates_file") == 0) { strncpy(parms->UVB_rates_file, value, MAXLEN); #endif #ifdef COOLING_GRACKLE - else if (strcmp(name, "UVB_rates_file") == 0) + } else if (strcmp(name, "UVB_rates_file") == 0) { strncpy(parms->UVB_rates_file, value, MAXLEN); #endif #ifdef ANALYSIS - else if (strcmp(name, "analysis_scale_outputs_file") == 0) + } else if (strcmp(name, "analysis_scale_outputs_file") == 0) { strncpy(parms->analysis_scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "analysisdir") == 0) + } else if (strcmp(name, "analysisdir") == 0) { strncpy(parms->analysisdir, value, MAXLEN); - else if (strcmp(name, "lya_skewers_stride") == 0) + } else if (strcmp(name, "lya_skewers_stride") == 0) { parms->lya_skewers_stride = atoi(value); - else if (strcmp(name, "lya_Pk_d_log_k") == 0) + } else if (strcmp(name, "lya_Pk_d_log_k") == 0) { parms->lya_Pk_d_log_k = atof(value); #ifdef OUTPUT_SKEWERS - else if (strcmp(name, "skewersdir") == 0) + } else if (strcmp(name, "skewersdir") == 0) { strncpy(parms->skewersdir, value, MAXLEN); #endif #endif - else if (!is_param_valid(name)) + } else if (!is_param_valid(name)) { chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); + } } diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index c0d71dd22..0e3253af9 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -90,10 +90,11 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort = true) * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) { - if (x < 0) + if (x < 0) { return -1; - else + } else { return 1; + } } // Define atomic_add if it's not supported diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index b3dd62f6c..f92ff4710 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -390,23 +390,27 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); - // for 3D calculate spherical r - else + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D calculate spherical r + } else { r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) + if (nz > 1) { vz = -z_pos / r; - else + } else { vz = 0; + } // set the conserved quantities - if (nz > 1) + if (nz > 1) { c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); - else + } else { c_device[gid] = d_0 * (1.0 + t / r); + } c_device[gid + 1 * n_cells] = vx * c_device[gid]; c_device[gid + 2 * n_cells] = vy * c_device[gid]; c_device[gid + 3 * n_cells] = vz * c_device[gid]; @@ -435,23 +439,27 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); - // for 3D, calculate spherical r - else + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D, calculate spherical r + } else { r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) + if (nz > 1) { vz = -z_pos / r; - else + } else { vz = 0; + } // set the conserved quantities - if (nz > 1) + if (nz > 1) { c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); - else + } else { c_device[gid] = d_0 * (1.0 + t / r); + } c_device[gid + 1 * n_cells] = vx * c_device[gid]; c_device[gid + 2 * n_cells] = vy * c_device[gid]; c_device[gid + 3 * n_cells] = vz * c_device[gid]; @@ -483,23 +491,27 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos * x_pos + y_pos * y_pos); - // for 3D, calculate spherical r - else + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D, calculate spherical r + } else { r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) + if (nz > 1) { vz = -z_pos / r; - else + } else { vz = 0; + } // set the conserved quantities - if (nz > 1) + if (nz > 1) { c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); - else + } else { c_device[gid] = d_0 * (1.0 + t / r); + } c_device[gid + 1 * n_cells] = vx * c_device[gid]; c_device[gid + 2 * n_cells] = vy * c_device[gid]; c_device[gid + 3 * n_cells] = vz * c_device[gid]; diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 9f6f6d3de..da05f8447 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -777,22 +777,28 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags) // find out how many recvs we need to wait for if (dir == 0) { - if (flags[0] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[1] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[0] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[1] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 1) { - if (flags[2] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[3] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[2] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[3] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 2) { - if (flags[4] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[5] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[4] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[5] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } // wait for any receives to complete diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index cb59cea22..b56315e6a 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -66,8 +66,9 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n); #endif - if (dev_conserved[id] != dev_conserved[id]) + if (dev_conserved[id] != dev_conserved[id]) { printf("%3d Thread crashed in final update. %f\n", id, dev_conserved[id]); + } /* d = dev_conserved[ id]; d_inv = 1.0 / d; diff --git a/src/io/io.cpp b/src/io/io.cpp index cd1af83a7..695775e63 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -302,36 +302,45 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) float *device_dataset_buffer = device_dataset_vector.data(); float *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); - if (P.out_float32_density > 0) + if (P.out_float32_density > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); - if (P.out_float32_momentum_x > 0) + } + if (P.out_float32_momentum_x > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); - if (P.out_float32_momentum_y > 0) + } + if (P.out_float32_momentum_y > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); - if (P.out_float32_momentum_z > 0) + } + if (P.out_float32_momentum_z > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); - if (P.out_float32_Energy > 0) + } + if (P.out_float32_Energy > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); + } #ifdef DE - if (P.out_float32_GasEnergy > 0) + if (P.out_float32_GasEnergy > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); + } #endif // DE #ifdef MHD - if (P.out_float32_magnetic_x > 0) + if (P.out_float32_magnetic_x > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); - if (P.out_float32_magnetic_y > 0) + } + if (P.out_float32_magnetic_y > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); - if (P.out_float32_magnetic_z > 0) + } + if (P.out_float32_magnetic_z > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); + } #endif free(dataset_buffer); @@ -1743,8 +1752,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) // Free the dataspace ids status = H5Sclose(dataspace_xz_id); status = H5Sclose(dataspace_xy_id); - } else + } else { printf("Projection write only works for 3D data.\n"); + } free(dataset_buffer_dxy); free(dataset_buffer_dxz); @@ -1901,8 +1911,9 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) free(dataset_buffer_vyxzr); free(dataset_buffer_vzxzr); - } else + } else { chprintf("Rotated projection write only implemented for 3D data.\n"); + } } #endif // HDF5 @@ -2221,8 +2232,9 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) free(dataset_buffer_scalar); #endif - } else + } else { printf("Slice write only works for 3D data.\n"); + } } #endif // HDF5 diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index fb2b9fa96..8cfba49ca 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -308,11 +308,12 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in z_1 = z_hc_D3D(ks, dz, nz, ng) + 0.5 * dz; // cell ceiling D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); - if (exp(-1 * D_rho) < 0.1) + if (exp(-1 * D_rho) < 0.1) { printf( "WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e " "Phi_0 %E cs %e\n", R, D_rho, z_1, phi_total_D3D(R, z_1, hdp), Phi_0, cs); + } // let's find the cell above the disk where the // density falls by exp(-7) < 1.0e-3. diff --git a/src/mpi/cuda_mpi_routines.cu b/src/mpi/cuda_mpi_routines.cu index 8983797fa..987d5fe67 100644 --- a/src/mpi/cuda_mpi_routines.cu +++ b/src/mpi/cuda_mpi_routines.cu @@ -21,16 +21,18 @@ int initialize_cuda_mpi(int myid, int nprocs) // check for errors if (flag_error != cudaSuccess) { - if (flag_error == cudaErrorNoDevice) + if (flag_error == cudaErrorNoDevice) { fprintf(stderr, "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " "cudaErrorNoDevice\n", myid, n_device); - if (flag_error == cudaErrorInsufficientDriver) + } + if (flag_error == cudaErrorInsufficientDriver) { fprintf(stderr, "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " "cudaErrorInsufficientDriver\n", myid, n_device); + } fflush(stderr); return 1; } diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index e11b5a257..cf35af02a 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -241,15 +241,17 @@ void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int // set grid dimensions H->nx = nx_local + 2 * H->n_ghost; H->nx_real = nx_local; - if (ny_local == 1) + if (ny_local == 1) { H->ny = 1; - else + } else { H->ny = ny_local + 2 * H->n_ghost; + } H->ny_real = ny_local; - if (nz_local == 1) + if (nz_local == 1) { H->nz = 1; - else + } else { H->nz = nz_local + 2 * H->n_ghost; + } H->nz_real = nz_local; // set total number of cells @@ -324,8 +326,8 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin // for(k=0;k (d_R - d_L) * (d_R - d_L)) d_L = 3.0 * d_i - 2.0 * d_R; - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { vx_L = 3.0 * vx_i - 2.0 * vx_R; - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) + } + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { vy_L = 3.0 * vy_i - 2.0 * vy_R; - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) + } + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { vz_L = 3.0 * vz_i - 2.0 * vz_R; + } if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) p_L = 3.0 * p_i - 2.0 * p_R; if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) d_R = 3.0 * d_i - 2.0 * d_L; - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { vx_R = 3.0 * vx_i - 2.0 * vx_L; - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) + } + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { vy_R = 3.0 * vy_i - 2.0 * vy_L; - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) + } + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { vz_R = 3.0 * vz_i - 2.0 * vz_L; + } if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) p_R = 3.0 * p_i - 2.0 * p_L; d_L = fmax(fmin(d_i, d_imo), d_L); @@ -861,10 +867,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { ge_L = 3.0 * ge_i - 2.0 * ge_R; - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) + } + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { ge_R = 3.0 * ge_i - 2.0 * ge_L; + } ge_L = fmax(fmin(ge_i, ge_imo), ge_L); ge_L = fmin(fmax(ge_i, ge_imo), ge_L); ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); @@ -875,11 +883,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou for (int i = 0; i < NSCALARS; i++) { if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) scalar_L[i] = scalar_R[i] = scalar_i[i]; if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > - (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) + (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; + } if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < - -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) + -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; + } scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); From 93b0897aae8a002bde86705de52fe37cc111ba95 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 11:10:14 -0500 Subject: [PATCH 225/694] resolve readability-braces-around-statements check for disk build --- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 6 +- src/hydro/hydro_cuda.cu | 15 +++-- src/model/disk_ICs.cpp | 23 +++++--- src/particles/feedback_CIC_gpu.cu | 24 ++++---- src/particles/io_particles.cpp | 3 +- src/particles/particles_3D.cpp | 9 +-- src/particles/particles_boundaries.cpp | 60 ++++++++++++-------- src/particles/particles_boundaries_gpu.cu | 3 +- src/reconstruction/ppmc_cuda.cu | 9 ++- src/riemann_solvers/roe_cuda.cu | 10 ++-- 10 files changed, 98 insertions(+), 64 deletions(-) diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index a741b567e..fc994753a 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -158,8 +158,9 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { const int iLo = p * dip; const int jLo = q * djq; - if ((i + iLo < di) && (j + jLo < dj)) + if ((i + iLo < di) && (j + jLo < dj)) { ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = ub[((i + iLo) * dj + j + jLo) * dk + k]; + } }); #ifndef MPI_GPU CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); @@ -510,8 +511,9 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou mp, dip, mq, djq, dk, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { const int iLo = p * dip; const int jLo = q * djq; - if ((iLo + i < di) && (jLo + j < dj)) + if ((iLo + i < di) && (jLo + j < dj)) { ua[((i + iLo) * dj + j + jLo) * dk + k] = ub[(((p * mq + q) * dip + i) * djq + j) * dk + k]; + } }); } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index b56315e6a..c3d48ec86 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -862,10 +862,11 @@ __global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_gho Emax = fmax(dev_conserved[4 * n_cells + imo], E); Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); - if (U_total / Emax > eta_2) + if (U_total / Emax > eta_2) { U = U_total; - else + } else { U = U_advected; + } // Optional: Avoid Negative Internal Energies U = fmax(U, (Real)TINY_NUMBER); @@ -919,10 +920,11 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, i Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); - if (U_total / Emax > eta_2) + if (U_total / Emax > eta_2) { U = U_total; - else + } else { U = U_advected; + } // Optional: Avoid Negative Internal Energies U = fmax(U, (Real)TINY_NUMBER); @@ -983,10 +985,11 @@ __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, i Emax = fmax(Emax, dev_conserved[4 * n_cells + kmo]); Emax = fmax(Emax, dev_conserved[4 * n_cells + kpo]); - if (U_total / Emax > eta_2) + if (U_total / Emax > eta_2) { U = U_total; - else + } else { U = U_advected; + } // Optional: Avoid Negative Internal Energies U = fmax(U, (Real)TINY_NUMBER); diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 8cfba49ca..bce2d674d 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -905,14 +905,16 @@ void Grid3D::Disk_3D(parameters p) // pressure gradient along x direction // gradient calc is first order at boundaries - if (i == H.n_ghost) + if (i == H.n_ghost) { idm = i + j * H.nx + k * H.nx * H.ny; - else + } else { idm = (i - 1) + j * H.nx + k * H.nx * H.ny; - if (i == H.nx - H.n_ghost - 1) + } + if (i == H.nx - H.n_ghost - 1) { idp = i + j * H.nx + k * H.nx * H.ny; - else + } else { idp = (i + 1) + j * H.nx + k * H.nx * H.ny; + } Get_Position(i - 1, j, k, &xpm, &ypm, &zpm); Get_Position(i + 1, j, k, &xpp, &ypp, &zpp); Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently @@ -920,14 +922,16 @@ void Grid3D::Disk_3D(parameters p) dPdx = (Pp - Pm) / (xpp - xpm); // pressure gradient along y direction - if (j == H.n_ghost) + if (j == H.n_ghost) { idm = i + j * H.nx + k * H.nx * H.ny; - else + } else { idm = i + (j - 1) * H.nx + k * H.nx * H.ny; - if (j == H.ny - H.n_ghost - 1) + } + if (j == H.ny - H.n_ghost - 1) { idp = i + j * H.nx + k * H.nx * H.ny; - else + } else { idp = i + (j + 1) * H.nx + k * H.nx * H.ny; + } Get_Position(i, j - 1, k, &xpm, &ypm, &zpm); Get_Position(i, j + 1, k, &xpp, &ypp, &zpm); Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently @@ -963,8 +967,9 @@ void Grid3D::Disk_3D(parameters p) // sheepishly check for NaN's! - if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || (P != P)) + if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || (P != P)) { printf("d %e P %e i %d j %d k %d id %d\n", d, P, i, j, k, id); + } if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || (vy != vy) || (vz != vz)) { printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, j, k, id); diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 18c5cd185..186973afb 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -103,9 +103,10 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat } else if (i == 1) { snr.push_back(pow(10, std::stof(std::string(data))) / 1000); } - if (i > 0) + if (i > 0) { break; // only care about the first 2 items. Once i = 1 can break - // here. + } // here. + data = strtok(nullptr, s99_delim); i++; } @@ -285,9 +286,9 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real int N = 0; // only calculate this if there will be SN feedback if ((t - age_dev[gtid]) <= time_sn_end) { - if (direction == -1) + if (direction == -1) { N = -prev_N[gtid]; - else { + } else { Real average_num_sn = GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt; @@ -310,9 +311,9 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real mass_dev[gtid] -= N * supernova::MASS_PER_SN; feedback_energy = N * supernova::ENERGY_PER_SN / dV; feedback_density = N * supernova::MASS_PER_SN / dV; - if (direction == -1) + if (direction == -1) { n_0 = prev_dens[gtid]; - else { + } else { n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); prev_dens[gtid] = n_0; } @@ -326,17 +327,19 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real feedback_momentum = direction * supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; shell_radius = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; - if (!is_resolved) + if (!is_resolved) { kernel_printf( "UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, " "n_0=%0.4e\n", t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); + } s_info[FEED_INFO_N * tid] = 1. * N; - if (is_resolved) + if (is_resolved) { s_info[FEED_INFO_N * tid + 1] = direction * 1.0; - else + } else { s_info[FEED_INFO_N * tid + 2] = direction * 1.0; + } int indx; @@ -423,9 +426,10 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); } - if (direction > 0) + if (direction > 0) { local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, energy, indx, dx, dy, dz)); + } } } } diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 66709bea8..a0fc4f32b 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -548,8 +548,9 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) // Print a warning if the number of particles has changed from the initial // number of particles. This will indicate an error on the Particles // transfers. - if (N_particles_total != Particles.n_total_initial) + if (N_particles_total != Particles.n_total_initial) { chprintf(" WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total); + } // Create the data space for the datasets dims[0] = n_local; diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index b68a966a8..1012e9ba5 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -194,16 +194,17 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R Initialize_Grid_Values(); // Initialize Particles - if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) + if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) { Initialize_Sphere(P); - else if (strcmp(P->init, "Zeldovich_Pancake") == 0) + } else if (strcmp(P->init, "Zeldovich_Pancake") == 0) { Initialize_Zeldovich_Pancake(P); - else if (strcmp(P->init, "Read_Grid") == 0) + } else if (strcmp(P->init, "Read_Grid") == 0) { Load_Particles_Data(P); #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) - else if (strcmp(P->init, "Disk_3D_particles") == 0) + } else if (strcmp(P->init, "Disk_3D_particles") == 0) { Initialize_Disk_Stellar_Clusters(P); #endif + } #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 24692be02..0befb46d8 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -53,22 +53,28 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flag // find out how many recvs we need to wait for if (dir == 0) { - if (flags[0] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[1] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[0] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[1] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 1) { - if (flags[2] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[3] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[2] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[3] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 2) { - if (flags[4] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[5] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[4] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[5] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } // wait for any receives to complete @@ -111,22 +117,28 @@ void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, i // find out how many recvs we need to wait for if (dir == 0) { - if (flags[0] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[1] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[0] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[1] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 1) { - if (flags[2] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[3] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[2] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[3] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } if (dir == 2) { - if (flags[4] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm - if (flags[5] == 5) // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[4] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[5] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } int ireq_particles_transfer = 0; diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index ba60916e9..e4e254e18 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -141,8 +141,9 @@ __global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *pre // Write results to device memory if (block_start + 2 * tid_block < n_total) prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; - if (block_start + 2 * tid_block + 1 < n_total) + if (block_start + 2 * tid_block + 1 < n_total) { prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1]; + } // Write the block sum int last_flag_block = (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 697726e93..2058f4b56 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -414,8 +414,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_ge_m_imo = 0.0; + } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -585,8 +586,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_ge_m_i = 0.0; + } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -756,8 +758,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_ge_m_ipo = 0.0; + } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index bfbeb8f5a..e14be647e 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -385,17 +385,19 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[4 * n_cells + tid] = 0.5 * (f_E_l + f_E_r - sum_4); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if (dev_flux[tid] >= 0.0) + if (dev_flux[tid] >= 0.0) { dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarl[i]; - else + } else { dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarr[i]; + } } #endif #ifdef DE - if (dev_flux[tid] >= 0.0) + if (dev_flux[tid] >= 0.0) { dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * gel; - else + } else { dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * ger; + } #endif } } From f5bab0264815e46ca359a88ba24c8eceab5a9791 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 11:17:51 -0500 Subject: [PATCH 226/694] resolve readability-braces-around-statements check for disk build --- src/particles/particles_boundaries.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 0befb46d8..19aec15a7 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -117,27 +117,27 @@ void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, i // find out how many recvs we need to wait for if (dir == 0) { - if (flags[0] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[0] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - if (flags[1] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[1] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } } if (dir == 1) { - if (flags[2] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[2] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - if (flags[3] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[3] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } } if (dir == 2) { - if (flags[4] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[4] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } - if (flags[5] == 5) { // there is communication on this face - wait_max++; // so we'll need to wait for its comm + if (flags[5] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm } } From d9948547de8921112474ae49a0393021ea4831b4 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 11:32:25 -0500 Subject: [PATCH 227/694] resolve readability-braces-around-statements check for cosmology build --- src/cosmology/cosmology_functions.cpp | 5 +++-- src/cosmology/io_cosmology.cpp | 3 ++- src/grid/grid3D.cpp | 5 +++-- src/io/io.cpp | 3 ++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index 517549dd6..23ee5c5d4 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -42,10 +42,11 @@ Real Cosmology::Get_Hubble_Parameter(Real a) void Grid3D::Change_Cosmological_Frame_Sytem(bool forward) { - if (forward) + if (forward) { chprintf(" Converting to Cosmological Comoving System\n"); - else + } else { chprintf(" Converting to Cosmological Physical System\n"); + } Change_DM_Frame_System(forward); #ifndef ONLY_PARTICLES diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp index dbaeb983f..862b8be0d 100644 --- a/src/cosmology/io_cosmology.cpp +++ b/src/cosmology/io_cosmology.cpp @@ -62,8 +62,9 @@ void Cosmology::Set_Scale_Outputs(struct parameters *P) next_output = current_a; chprintf(" Next output index: %d \n", next_output_indx); chprintf(" Next output z value: %f \n", 1. / next_output - 1); - } else + } else { Load_Scale_Outputs(P); + } } void Cosmology::Set_Next_Scale_Output() diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index c5101c06a..a086485d9 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -271,10 +271,11 @@ void Grid3D::Initialize(struct parameters *P) #endif #ifdef COSMOLOGY - if (P->scale_outputs_file[0] == '\0') + if (P->scale_outputs_file[0] == '\0') { H.OUTPUT_SCALE_FACOR = false; - else + } else { H.OUTPUT_SCALE_FACOR = true; + } #endif H.Output_Initial = true; diff --git a/src/io/io.cpp b/src/io/io.cpp index 695775e63..1c6304c52 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -138,8 +138,9 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) chprintf(" Saved Snapshot: %d z:%f Exiting now\n", nfile, G.Cosmo.current_z); } - } else + } else { chprintf(" Saved Snapshot: %d z:%f\n", nfile, G.Cosmo.current_z); + } G.Change_Cosmological_Frame_Sytem(true); chprintf("\n"); G.H.Output_Now = false; From 0d396f1b6ca4107a4a05947183239a925c32635c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 11:34:38 -0500 Subject: [PATCH 228/694] add dust to build matrix --- .github/workflows/build_and_lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index 4c8e374a9..0ea34c687 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - make-type: [hydro, gravity, disk, particles, cosmology, mhd] + make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust] container: [{name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"}, {name: "HIP",link: "docker://chollahydro/cholla:rocm_github"},] # Setup environment variables From d75e778cb9e915401e55a092648753fe780e92de Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 17:13:51 -0500 Subject: [PATCH 229/694] resolve readability-braces-around-statements for hydro, gravity, and disk builds --- .clang-tidy | 2 - src/analysis/feedback_analysis_gpu.cu | 24 +- src/cooling/cooling_cuda.cu | 8 +- src/global/global.cpp | 16 +- src/global/global_cuda.h | 4 +- src/gravity/grav3D.cpp | 4 +- src/gravity/gravity_boundaries.cpp | 156 +++++++++---- src/gravity/gravity_boundaries_gpu.cu | 184 +++++++++++---- src/gravity/gravity_functions.cpp | 24 +- src/gravity/gravity_functions_gpu.cu | 12 +- src/gravity/paris/HenryPeriodic.cu | 4 +- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 8 +- src/gravity/potential_paris_3D.cu | 16 +- src/gravity/potential_paris_galactic.cu | 16 +- src/grid/boundary_conditions.cpp | 224 ++++++++++++++----- src/grid/cuda_boundaries.cu | 4 +- src/grid/grid3D.cpp | 24 +- src/grid/mpi_boundaries.cpp | 128 ++++++++--- src/hydro/hydro_cuda.cu | 8 +- src/io/io.cpp | 92 ++++++-- src/main.cpp | 16 +- src/main_tests.cpp | 4 +- src/model/disk_ICs.cpp | 40 +++- src/model/disk_galaxy.h | 4 +- src/mpi/MPI_Comm_node.c | 4 +- src/mpi/mpi_routines.cpp | 84 +++++-- src/particles/density_CIC_gpu.cu | 16 +- src/particles/density_boundaries.cpp | 60 +++-- src/particles/density_boundaries_gpu.cu | 108 ++++++--- src/particles/feedback_CIC_gpu.cu | 36 ++- src/particles/gravity_CIC_gpu.cu | 24 +- src/particles/io_particles.cpp | 60 +++-- src/particles/particles_3D.cpp | 24 +- src/particles/particles_3D_gpu.cu | 8 +- src/particles/particles_boundaries.cpp | 42 +++- src/particles/particles_boundaries_gpu.cu | 100 ++++++--- src/particles/particles_dynamics.cpp | 24 +- src/particles/particles_dynamics_gpu.cu | 12 +- src/reconstruction/plmp_cuda.cu | 36 ++- src/reconstruction/ppmc_cuda.cu | 104 ++++++--- src/riemann_solvers/exact_cuda.cu | 16 +- src/system_tests/system_tester.cpp | 32 ++- src/utils/error_check_cuda.cu | 12 +- src/utils/gpu.hpp | 40 +++- src/utils/gpu_arrays_functions.cu | 8 +- src/utils/gpu_arrays_functions.h | 8 +- src/utils/parallel_omp.cpp | 16 +- src/utils/reduction_utilities.h | 20 +- src/utils/testing_utilities.cpp | 20 +- src/utils/timing_functions.cpp | 24 +- 50 files changed, 1468 insertions(+), 492 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 23ec43b87..3066298bb 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -34,7 +34,6 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - google-readability-braces-around-statements, -bugprone-assignment-in-if-condition, -bugprone-branch-clone, @@ -143,7 +142,6 @@ Checks: "*, -performance-for-range-copy, -performance-inefficient-vector-operation, -performance-unnecessary-value-param, - -readability-braces-around-statements, -readability-const-return-type, -readability-convert-member-functions-to-static, -readability-delete-null-pointer, diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index f18b33f59..fadd841f2 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -13,12 +13,24 @@ __device__ void warpReduce(volatile Real *buff, size_t tid) { - if (TPB_ANALYSIS >= 64) buff[tid] += buff[tid + 32]; - if (TPB_ANALYSIS >= 32) buff[tid] += buff[tid + 16]; - if (TPB_ANALYSIS >= 16) buff[tid] += buff[tid + 8]; - if (TPB_ANALYSIS >= 8) buff[tid] += buff[tid + 4]; - if (TPB_ANALYSIS >= 4) buff[tid] += buff[tid + 2]; - if (TPB_ANALYSIS >= 2) buff[tid] += buff[tid + 1]; + if (TPB_ANALYSIS >= 64) { + buff[tid] += buff[tid + 32]; + } + if (TPB_ANALYSIS >= 32) { + buff[tid] += buff[tid + 16]; + } + if (TPB_ANALYSIS >= 16) { + buff[tid] += buff[tid + 8]; + } + if (TPB_ANALYSIS >= 8) { + buff[tid] += buff[tid + 4]; + } + if (TPB_ANALYSIS >= 4) { + buff[tid] += buff[tid + 2]; + } + if (TPB_ANALYSIS >= 2) { + buff[tid] += buff[tid + 1]; + } } void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x, diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index dd076b839..4b09527d0 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -85,7 +85,9 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int d = dev_conserved[id]; E = dev_conserved[4 * n_cells + id]; // don't apply cooling if this thread crashed - if (E < 0.0 || E != E) return; + if (E < 0.0 || E != E) { + return; + } // #ifndef DE vx = dev_conserved[1 * n_cells + id] / d; vy = dev_conserved[2 * n_cells + id] / d; @@ -249,7 +251,9 @@ __device__ Real primordial_cool(Real n, Real T) n_hepp = n_hep * (gamma_ehep + gamma_lhep / n_e) / alpha_hepp; n_e = n_hp + n_hep + 2 * n_hepp; diff = fabs(n_e_old - n_e); - if (diff < tol) break; + if (diff < tol) { + break; + } } } else { n_h0 = n_h * alpha_hp / (alpha_hp + gamma_eh0); diff --git a/src/global/global.cpp b/src/global/global.cpp index 55ca787f4..3b029c413 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -82,11 +82,15 @@ char *trim(char *s) char *s1 = s, *s2 = &s[strlen(s) - 1]; /* Trim and delimit right side */ - while ((isspace(*s2)) && (s2 >= s1)) s2--; + while ((isspace(*s2)) && (s2 >= s1)) { + s2--; + } *(s2 + 1) = '\0'; /* Trim left side */ - while ((isspace(*s1)) && (s1 < s2)) s1++; + while ((isspace(*s1)) && (s1 < s2)) { + s1++; + } /* Copy finished string */ strcpy(s, s1); @@ -104,7 +108,9 @@ const std::set optionalParams = { int is_param_valid(const char *param_name) { for (auto it = optionalParams.begin(); it != optionalParams.end(); ++it) { - if (strcmp(param_name, *it) == 0) return 1; + if (strcmp(param_name, *it) == 0) { + return 1; + } } return 0; } @@ -148,7 +154,9 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a /* Read next line */ while ((s = fgets(buff, sizeof buff, fp)) != NULL) { /* Skip blank lines and comments */ - if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') continue; + if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') { + continue; + } /* Parse name/value pair from line */ char name[MAXLEN], value[MAXLEN]; diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 0e3253af9..296fa31f1 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -82,7 +82,9 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort = true) { if (code != cudaSuccess) { fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); + if (abort) { + exit(code); + } } } diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index e5032e0f3..20ca67655 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -155,7 +155,9 @@ void Grav3D::AllocateMemory_CPU(void) void Grav3D::Set_Boundary_Flags(int *flags) { - for (int i = 0; i < 6; i++) boundary_flags[i] = flags[i]; + for (int i = 0; i < 6; i++) { + boundary_flags[i] = flags[i]; + } } void Grav3D::Initialize_values_CPU(void) diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index a4291316a..a49b75185 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -14,12 +14,24 @@ void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P // Set Isolated Boundaries for the ghost cells. int bc_potential_type = P->bc_potential_type; // bc_potential_type = 0 -> Point mass potential GM/r - if (dir == 0) Compute_Potential_Isolated_Boundary(0, 0, bc_potential_type); - if (dir == 1) Compute_Potential_Isolated_Boundary(0, 1, bc_potential_type); - if (dir == 2) Compute_Potential_Isolated_Boundary(1, 0, bc_potential_type); - if (dir == 3) Compute_Potential_Isolated_Boundary(1, 1, bc_potential_type); - if (dir == 4) Compute_Potential_Isolated_Boundary(2, 0, bc_potential_type); - if (dir == 5) Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type); + if (dir == 0) { + Compute_Potential_Isolated_Boundary(0, 0, bc_potential_type); + } + if (dir == 1) { + Compute_Potential_Isolated_Boundary(0, 1, bc_potential_type); + } + if (dir == 2) { + Compute_Potential_Isolated_Boundary(1, 0, bc_potential_type); + } + if (dir == 3) { + Compute_Potential_Isolated_Boundary(1, 1, bc_potential_type); + } + if (dir == 4) { + Compute_Potential_Isolated_Boundary(2, 0, bc_potential_type); + } + if (dir == 5) { + Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type); + } } void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *flags) @@ -40,24 +52,36 @@ void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *fla if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_x0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_x1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_x0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_x1; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_y0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_y1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_y0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_y1; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_z0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_z1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_z0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_z1; + } } #endif @@ -69,16 +93,28 @@ void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *fla id_buffer = i + j * n_i + k * n_i * n_j; if (direction == 0) { - if (side == 0) id_grid = (k) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; - if (side == 1) id_grid = (k + nx_local + nGHST) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + if (side == 0) { + id_grid = (k) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + } + if (side == 1) { + id_grid = (k + nx_local + nGHST) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + } } if (direction == 1) { - if (side == 0) id_grid = (i + nGHST) + (k)*nx_g + (j + nGHST) * nx_g * ny_g; - if (side == 1) id_grid = (i + nGHST) + (k + ny_local + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + if (side == 0) { + id_grid = (i + nGHST) + (k)*nx_g + (j + nGHST) * nx_g * ny_g; + } + if (side == 1) { + id_grid = (i + nGHST) + (k + ny_local + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g; + } } if (direction == 2) { - if (side == 0) id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k + nz_local + nGHST) * nx_g * ny_g; + if (side == 0) { + id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k + nz_local + nGHST) * nx_g * ny_g; + } } Grav.F.potential_h[id_grid] = pot_boundary[id_buffer]; @@ -103,8 +139,12 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc domain_l = Grav.xMin; n_i = Grav.ny_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_x0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_x1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_x0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_x1; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y @@ -112,8 +152,12 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc domain_l = Grav.yMin; n_i = Grav.nx_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_y0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_y1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_y0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_y1; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z @@ -121,8 +165,12 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc domain_l = Grav.zMin; n_i = Grav.nx_local; n_j = Grav.ny_local; - if (side == 0) pot_boundary = Grav.F.pot_boundary_z0; - if (side == 1) pot_boundary = Grav.F.pot_boundary_z1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_z0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_z1; + } } #endif @@ -149,7 +197,9 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc if (direction == 0) { // pos_x = Grav.xMin - ( nGHST + k + 0.5 ) * Grav.dx; pos_x = Grav.xMin + (k + 0.5 - nGHST) * Grav.dx; - if (side == 1) pos_x += Lx_local + nGHST * Grav.dx; + if (side == 1) { + pos_x += Lx_local + nGHST * Grav.dx; + } pos_y = Grav.yMin + (i + 0.5) * Grav.dy; pos_z = Grav.zMin + (j + 0.5) * Grav.dz; } @@ -157,7 +207,9 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc if (direction == 1) { // pos_y = Grav.yMin - ( nGHST + k + 0.5 ) * Grav.dy; pos_y = Grav.yMin + (k + 0.5 - nGHST) * Grav.dy; - if (side == 1) pos_y += Ly_local + nGHST * Grav.dy; + if (side == 1) { + pos_y += Ly_local + nGHST * Grav.dy; + } pos_x = Grav.xMin + (i + 0.5) * Grav.dx; pos_z = Grav.zMin + (j + 0.5) * Grav.dz; } @@ -165,7 +217,9 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc if (direction == 2) { // pos_z = Grav.zMin - ( nGHST + k + 0.5 ) * Grav.dz; pos_z = Grav.zMin + (k + 0.5 - nGHST) * Grav.dz; - if (side == 1) pos_z += Lz_local + nGHST * Grav.dz; + if (side == 1) { + pos_z += Lz_local + nGHST * Grav.dz; + } pos_x = Grav.xMin + (i + 0.5) * Grav.dx; pos_y = Grav.yMin + (j + 0.5) * Grav.dy; } @@ -280,8 +334,12 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buff for (k = 0; k < nz_g; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { - if (side == 0) indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + } indx_buff = (j) + (k)*ny_g + i * ny_g * nz_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -295,8 +353,12 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buff for (k = 0; k < nz_g; k++) { for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + } indx_buff = (i) + (k)*nx_g + j * nx_g * nz_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -310,8 +372,12 @@ int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buff for (k = 0; k < nGHST; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; - if (side == 1) indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + } indx_buff = (i) + (j)*nx_g + k * nx_g * ny_g; buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx]; } @@ -335,8 +401,12 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, Real for (k = 0; k < nz_g; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + } indx_buff = (j) + (k)*ny_g + i * ny_g * nz_g; Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff]; } @@ -349,8 +419,12 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, Real for (k = 0; k < nz_g; k++) { for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + } indx_buff = (i) + (k)*nx_g + j * nx_g * nz_g; Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff]; } @@ -363,8 +437,12 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, Real for (k = 0; k < nGHST; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + } indx_buff = (i) + (j)*nx_g + k * nx_g * ny_g; Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff]; } diff --git a/src/gravity/gravity_boundaries_gpu.cu b/src/gravity/gravity_boundaries_gpu.cu index b72bb1701..86727edd7 100644 --- a/src/gravity/gravity_boundaries_gpu.cu +++ b/src/gravity/gravity_boundaries_gpu.cu @@ -19,21 +19,35 @@ void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; - if (side == 1) tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + if (side == 0) { + tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + } } if (direction == 1) { - if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; - if (side == 1) tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + (tid_j + n_ghost) * nx * ny; + if (side == 0) { + tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + (tid_j + n_ghost) * nx * ny; + } } if (direction == 2) { - if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; - if (side == 1) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) { + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (nz - n_ghost + tid_k) * nx * ny; + } } potential_d[tid_pot] = pot_boundary_d[tid_buffer]; @@ -53,30 +67,54 @@ void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary_h = Grav.F.pot_boundary_x0; - if (side == 1) pot_boundary_h = Grav.F.pot_boundary_x1; - if (side == 0) pot_boundary_d = Grav.F.pot_boundary_x0_d; - if (side == 1) pot_boundary_d = Grav.F.pot_boundary_x1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_x0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_x1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_x0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_x1_d; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if (side == 0) pot_boundary_h = Grav.F.pot_boundary_y0; - if (side == 1) pot_boundary_h = Grav.F.pot_boundary_y1; - if (side == 0) pot_boundary_d = Grav.F.pot_boundary_y0_d; - if (side == 1) pot_boundary_d = Grav.F.pot_boundary_y1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_y0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_y1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_y0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_y1_d; + } } #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if (side == 0) pot_boundary_h = Grav.F.pot_boundary_z0; - if (side == 1) pot_boundary_h = Grav.F.pot_boundary_z1; - if (side == 0) pot_boundary_d = Grav.F.pot_boundary_z0_d; - if (side == 1) pot_boundary_d = Grav.F.pot_boundary_z1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_z0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_z1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_z0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_z1_d; + } } #endif @@ -110,25 +148,51 @@ void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } if (direction == 0) { - if (side == 0) tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 0) tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 0) tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; - if (side == 1) tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; - if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; - if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; - if (side == 1) tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) { + tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } } potential_d[tid_dst] = potential_d[tid_src]; @@ -181,21 +245,35 @@ __global__ void Load_Transfer_Buffer_GPU_kernel(int direction, int side, int siz tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) { + return; + } tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; - if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 0) { + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + } } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; } @@ -257,21 +335,35 @@ __global__ void Unload_Transfer_Buffer_GPU_kernel(int direction, int side, int s tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) { + return; + } tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; - if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; + if (side == 0) { + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; + } } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; } diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 16dcfd6f9..1f4a08f7f 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -495,18 +495,30 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) } #ifdef GRAV_ISOLATED_BOUNDARY_X - if (Grav.boundary_flags[0] == 3) Compute_Potential_Boundaries_Isolated(0, P); - if (Grav.boundary_flags[1] == 3) Compute_Potential_Boundaries_Isolated(1, P); + if (Grav.boundary_flags[0] == 3) { + Compute_Potential_Boundaries_Isolated(0, P); + } + if (Grav.boundary_flags[1] == 3) { + Compute_Potential_Boundaries_Isolated(1, P); + } // chprintf("Isolated X\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - if (Grav.boundary_flags[2] == 3) Compute_Potential_Boundaries_Isolated(2, P); - if (Grav.boundary_flags[3] == 3) Compute_Potential_Boundaries_Isolated(3, P); + if (Grav.boundary_flags[2] == 3) { + Compute_Potential_Boundaries_Isolated(2, P); + } + if (Grav.boundary_flags[3] == 3) { + Compute_Potential_Boundaries_Isolated(3, P); + } // chprintf("Isolated Y\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - if (Grav.boundary_flags[4] == 3) Compute_Potential_Boundaries_Isolated(4, P); - if (Grav.boundary_flags[5] == 3) Compute_Potential_Boundaries_Isolated(5, P); + if (Grav.boundary_flags[4] == 3) { + Compute_Potential_Boundaries_Isolated(4, P); + } + if (Grav.boundary_flags[5] == 3) { + Compute_Potential_Boundaries_Isolated(5, P); + } // chprintf("Isolated Z\n"); #endif diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 93533b46f..236670b49 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -73,7 +73,9 @@ void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, Real * tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) { + return; + } tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; @@ -141,7 +143,9 @@ void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, Real *potential_ tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) return; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) { + return; + } tid = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; @@ -200,7 +204,9 @@ void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) return; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) { + return; + } tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu index 8053dfecb..28ece4feb 100644 --- a/src/gravity/paris/HenryPeriodic.cu +++ b/src/gravity/paris/HenryPeriodic.cu @@ -22,7 +22,9 @@ HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi { // Pencil sub-decomposition within a 3D block mq_ = int(round(sqrt(mk_))); - while (mk_ % mq_) mq_--; + while (mk_ % mq_) { + mq_--; + } mp_ = mk_ / mq_; assert(mp_ * mq_ == mk_); diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index fc994753a..4b9e74e4c 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -39,7 +39,9 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo nk_(n[2]) { mq_ = int(round(sqrt(mk_))); - while (mk_ % mq_) mq_--; + while (mk_ % mq_) { + mq_--; + } mp_ = mk_ / mq_; assert(mp_ * mq_ == mk_); @@ -113,7 +115,9 @@ void print(const char *const title, const int ni, const int nj, const int nk, co printf("%s:\n", title); for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { - for (int k = 0; k < nk; k++) printf("%.6f ", v[(i * nj + j) * nk + k]); + for (int k = 0; k < nk; k++) { + printf("%.6f ", v[(i * nj + j) * nk + k]); + } printf(" "); } printf("\n"); diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 26de6a619..ab8bdc0b5 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -36,7 +36,9 @@ printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]), maxs[1] / maxs[0]); fflush(stdout); - if (!plot) return; + if (!plot) { + return; + } printf("###\n"); const int k = nz / 2; @@ -178,15 +180,21 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, void Potential_Paris_3D::Reset() { - if (db_) CHECK(cudaFree(db_)); + if (db_) { + CHECK(cudaFree(db_)); + } db_ = nullptr; - if (da_) CHECK(cudaFree(da_)); + if (da_) { + CHECK(cudaFree(da_)); + } da_ = nullptr; potentialBytes_ = densityBytes_ = minBytes_ = 0; - if (pp_) delete pp_; + if (pp_) { + delete pp_; + } pp_ = nullptr; myLo_[2] = myLo_[1] = myLo_[0] = 0; diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index c6c341aa6..291f2a059 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -168,20 +168,28 @@ void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Re void Potential_Paris_Galactic::Reset() { #ifndef GRAVITY_GPU - if (dc_) CHECK(cudaFree(dc_)); + if (dc_) { + CHECK(cudaFree(dc_)); + } dc_ = nullptr; potentialBytes_ = 0; #endif - if (db_) CHECK(cudaFree(db_)); + if (db_) { + CHECK(cudaFree(db_)); + } db_ = nullptr; - if (da_) CHECK(cudaFree(da_)); + if (da_) { + CHECK(cudaFree(da_)); + } da_ = nullptr; densityBytes_ = minBytes_ = 0; - if (pp_) delete pp_; + if (pp_) { + delete pp_; + } pp_ = nullptr; myLo_[2] = myLo_[1] = myLo_[0] = 0; diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index a1dfd7132..50c55126d 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -181,11 +181,15 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) int *iaBoundary, *iaCell; /*if the cell face is an custom boundary, exit */ - if (flags[dir] == 4) return; + if (flags[dir] == 4) { + return; + } #ifdef MPI_CHOLLA /*if the cell face is an mpi boundary, exit */ - if (flags[dir] == 5) return; + if (flags[dir] == 5) { + return; + } #endif /*MPI_CHOLLA*/ #ifdef GRAVITY @@ -193,36 +197,84 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) if (flags[dir] == 1) { // Set Periodic Boundaries for the ghost cells. #ifdef GRAVITY_GPU - if (dir == 0) Set_Potential_Boundaries_Periodic_GPU(0, 0, flags); - if (dir == 1) Set_Potential_Boundaries_Periodic_GPU(0, 1, flags); - if (dir == 2) Set_Potential_Boundaries_Periodic_GPU(1, 0, flags); - if (dir == 3) Set_Potential_Boundaries_Periodic_GPU(1, 1, flags); - if (dir == 4) Set_Potential_Boundaries_Periodic_GPU(2, 0, flags); - if (dir == 5) Set_Potential_Boundaries_Periodic_GPU(2, 1, flags); + if (dir == 0) { + Set_Potential_Boundaries_Periodic_GPU(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Periodic_GPU(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Periodic_GPU(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Periodic_GPU(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Periodic_GPU(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Periodic_GPU(2, 1, flags); + } #else - if (dir == 0) Set_Potential_Boundaries_Periodic(0, 0, flags); - if (dir == 1) Set_Potential_Boundaries_Periodic(0, 1, flags); - if (dir == 2) Set_Potential_Boundaries_Periodic(1, 0, flags); - if (dir == 3) Set_Potential_Boundaries_Periodic(1, 1, flags); - if (dir == 4) Set_Potential_Boundaries_Periodic(2, 0, flags); - if (dir == 5) Set_Potential_Boundaries_Periodic(2, 1, flags); + if (dir == 0) { + Set_Potential_Boundaries_Periodic(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Periodic(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Periodic(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Periodic(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Periodic(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Periodic(2, 1, flags); + } #endif } if (flags[dir] == 3) { #ifdef GRAVITY_GPU - if (dir == 0) Set_Potential_Boundaries_Isolated_GPU(0, 0, flags); - if (dir == 1) Set_Potential_Boundaries_Isolated_GPU(0, 1, flags); - if (dir == 2) Set_Potential_Boundaries_Isolated_GPU(1, 0, flags); - if (dir == 3) Set_Potential_Boundaries_Isolated_GPU(1, 1, flags); - if (dir == 4) Set_Potential_Boundaries_Isolated_GPU(2, 0, flags); - if (dir == 5) Set_Potential_Boundaries_Isolated_GPU(2, 1, flags); + if (dir == 0) { + Set_Potential_Boundaries_Isolated_GPU(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Isolated_GPU(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Isolated_GPU(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Isolated_GPU(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Isolated_GPU(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Isolated_GPU(2, 1, flags); + } #else - if (dir == 0) Set_Potential_Boundaries_Isolated(0, 0, flags); - if (dir == 1) Set_Potential_Boundaries_Isolated(0, 1, flags); - if (dir == 2) Set_Potential_Boundaries_Isolated(1, 0, flags); - if (dir == 3) Set_Potential_Boundaries_Isolated(1, 1, flags); - if (dir == 4) Set_Potential_Boundaries_Isolated(2, 0, flags); - if (dir == 5) Set_Potential_Boundaries_Isolated(2, 1, flags); + if (dir == 0) { + Set_Potential_Boundaries_Isolated(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Isolated(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Isolated(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Isolated(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Isolated(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Isolated(2, 1, flags); + } #endif // GRAVITY_GPU } return; @@ -230,12 +282,24 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) #ifdef SOR if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) { if (flags[dir] == 1) { - if (dir == 0) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 0); - if (dir == 1) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 1); - if (dir == 2) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 0); - if (dir == 3) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 1); - if (dir == 4) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 0); - if (dir == 5) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 1); + if (dir == 0) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 0); + } + if (dir == 1) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 1); + } + if (dir == 2) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 0); + } + if (dir == 3) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 1); + } + if (dir == 4) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 0); + } + if (dir == 5) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 1); + } } return; } @@ -247,20 +311,44 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) if (flags[dir] == 1) { // Set Periodic Boundaries for the particles density. #ifdef PARTICLES_GPU - if (dir == 0) Set_Particles_Density_Boundaries_Periodic_GPU(0, 0); - if (dir == 1) Set_Particles_Density_Boundaries_Periodic_GPU(0, 1); - if (dir == 2) Set_Particles_Density_Boundaries_Periodic_GPU(1, 0); - if (dir == 3) Set_Particles_Density_Boundaries_Periodic_GPU(1, 1); - if (dir == 4) Set_Particles_Density_Boundaries_Periodic_GPU(2, 0); - if (dir == 5) Set_Particles_Density_Boundaries_Periodic_GPU(2, 1); + if (dir == 0) { + Set_Particles_Density_Boundaries_Periodic_GPU(0, 0); + } + if (dir == 1) { + Set_Particles_Density_Boundaries_Periodic_GPU(0, 1); + } + if (dir == 2) { + Set_Particles_Density_Boundaries_Periodic_GPU(1, 0); + } + if (dir == 3) { + Set_Particles_Density_Boundaries_Periodic_GPU(1, 1); + } + if (dir == 4) { + Set_Particles_Density_Boundaries_Periodic_GPU(2, 0); + } + if (dir == 5) { + Set_Particles_Density_Boundaries_Periodic_GPU(2, 1); + } #endif #ifdef PARTICLES_CPU - if (dir == 0) Set_Particles_Density_Boundaries_Periodic(0, 0); - if (dir == 1) Set_Particles_Density_Boundaries_Periodic(0, 1); - if (dir == 2) Set_Particles_Density_Boundaries_Periodic(1, 0); - if (dir == 3) Set_Particles_Density_Boundaries_Periodic(1, 1); - if (dir == 4) Set_Particles_Density_Boundaries_Periodic(2, 0); - if (dir == 5) Set_Particles_Density_Boundaries_Periodic(2, 1); + if (dir == 0) { + Set_Particles_Density_Boundaries_Periodic(0, 0); + } + if (dir == 1) { + Set_Particles_Density_Boundaries_Periodic(0, 1); + } + if (dir == 2) { + Set_Particles_Density_Boundaries_Periodic(1, 0); + } + if (dir == 3) { + Set_Particles_Density_Boundaries_Periodic(1, 1); + } + if (dir == 4) { + Set_Particles_Density_Boundaries_Periodic(2, 0); + } + if (dir == 5) { + Set_Particles_Density_Boundaries_Periodic(2, 1); + } #endif } return; @@ -271,21 +359,45 @@ void Grid3D::Set_Boundaries(int dir, int flags[]) if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { if (flags[dir] == 1) { #ifdef PARTICLES_CPU - if (dir == 0) Set_Particles_Boundary(0, 0); - if (dir == 1) Set_Particles_Boundary(0, 1); - if (dir == 2) Set_Particles_Boundary(1, 0); - if (dir == 3) Set_Particles_Boundary(1, 1); - if (dir == 4) Set_Particles_Boundary(2, 0); - if (dir == 5) Set_Particles_Boundary(2, 1); + if (dir == 0) { + Set_Particles_Boundary(0, 0); + } + if (dir == 1) { + Set_Particles_Boundary(0, 1); + } + if (dir == 2) { + Set_Particles_Boundary(1, 0); + } + if (dir == 3) { + Set_Particles_Boundary(1, 1); + } + if (dir == 4) { + Set_Particles_Boundary(2, 0); + } + if (dir == 5) { + Set_Particles_Boundary(2, 1); + } #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - if (dir == 0) Set_Particles_Boundary_GPU(0, 0); - if (dir == 1) Set_Particles_Boundary_GPU(0, 1); - if (dir == 2) Set_Particles_Boundary_GPU(1, 0); - if (dir == 3) Set_Particles_Boundary_GPU(1, 1); - if (dir == 4) Set_Particles_Boundary_GPU(2, 0); - if (dir == 5) Set_Particles_Boundary_GPU(2, 1); + if (dir == 0) { + Set_Particles_Boundary_GPU(0, 0); + } + if (dir == 1) { + Set_Particles_Boundary_GPU(0, 1); + } + if (dir == 2) { + Set_Particles_Boundary_GPU(1, 0); + } + if (dir == 3) { + Set_Particles_Boundary_GPU(1, 1); + } + if (dir == 4) { + Set_Particles_Boundary_GPU(2, 0); + } + if (dir == 5) { + Set_Particles_Boundary_GPU(2, 1); + } #endif // PARTICLES_GPU } else if (flags[dir] == 3) { diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index f92ff4710..1e0257380 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -468,7 +468,9 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int __syncthreads(); // +z boundary last (only if 3D) - if (nz == 1) return; + if (nz == 1) { + return; + } isize = nx; jsize = ny; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index a086485d9..d83c32e0b 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -616,14 +616,26 @@ void Grid3D::FreeMemory(void) // If memory is single allocated, free the memory at the end of the simulation. #ifdef VL - if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_VL_1D(); - if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_VL_2D(); - if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_VL_3D(); + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + Free_Memory_VL_1D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + Free_Memory_VL_2D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Free_Memory_VL_3D(); + } #endif // VL #ifdef SIMPLE - if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_Simple_1D(); - if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_Simple_2D(); - if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_Simple_3D(); + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + Free_Memory_Simple_1D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + Free_Memory_Simple_2D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Free_Memory_Simple_3D(); + } #endif // SIMPLE #ifdef GRAVITY diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index da05f8447..9b858c8cf 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -52,7 +52,9 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(0, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); + } #endif } } @@ -72,7 +74,9 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(1, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); + } #endif } } @@ -92,13 +96,17 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Wait_and_Unload_MPI_Comm_Buffers(2, flags); #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); + } #endif } } #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Finish_Particles_Transfer(); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Finish_Particles_Transfer(); + } #endif } @@ -458,7 +466,9 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for X #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } #endif } @@ -608,7 +618,9 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for Y #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } #endif } @@ -757,7 +769,9 @@ void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) } // Receive the number of particles transfer for Z #ifdef PARTICLES - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } #endif } } @@ -767,7 +781,9 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags) #ifdef PARTICLES // If we are transfering the particles buffers we dont need to unload the main // buffers - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) return; + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + return; + } #endif int iwait; @@ -893,12 +909,24 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) #endif // GRAVITY_GPU - if (index == 0) (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); - if (index == 1) (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); - if (index == 2) (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); - if (index == 3) (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); - if (index == 4) (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); - if (index == 5) (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); + if (index == 0) { + (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); + } + if (index == 1) { + (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); + } + if (index == 2) { + (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); + } + if (index == 3) { + (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); + } + if (index == 4) { + (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); + } + if (index == 5) { + (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); + } } #ifdef SOR @@ -910,12 +938,24 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - if (index == 0) Unload_Poisson_Boundary_From_Buffer(0, 0, l_recv_buffer_x0); - if (index == 1) Unload_Poisson_Boundary_From_Buffer(0, 1, l_recv_buffer_x1); - if (index == 2) Unload_Poisson_Boundary_From_Buffer(1, 0, l_recv_buffer_y0); - if (index == 3) Unload_Poisson_Boundary_From_Buffer(1, 1, l_recv_buffer_y1); - if (index == 4) Unload_Poisson_Boundary_From_Buffer(2, 0, l_recv_buffer_z0); - if (index == 5) Unload_Poisson_Boundary_From_Buffer(2, 1, l_recv_buffer_z1); + if (index == 0) { + Unload_Poisson_Boundary_From_Buffer(0, 0, l_recv_buffer_x0); + } + if (index == 1) { + Unload_Poisson_Boundary_From_Buffer(0, 1, l_recv_buffer_x1); + } + if (index == 2) { + Unload_Poisson_Boundary_From_Buffer(1, 0, l_recv_buffer_y0); + } + if (index == 3) { + Unload_Poisson_Boundary_From_Buffer(1, 1, l_recv_buffer_y1); + } + if (index == 4) { + Unload_Poisson_Boundary_From_Buffer(2, 0, l_recv_buffer_z0); + } + if (index == 5) { + Unload_Poisson_Boundary_From_Buffer(2, 1, l_recv_buffer_z1); + } } #endif // SOR @@ -940,12 +980,24 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) #else #ifdef MPI_GPU - if (index == 0) Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles); - if (index == 1) Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles); - if (index == 2) Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles); - if (index == 3) Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles); - if (index == 4) Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles); - if (index == 5) Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles); + if (index == 0) { + Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles); + } + if (index == 1) { + Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles); + } + if (index == 2) { + Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles); + } + if (index == 3) { + Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles); + } + if (index == 4) { + Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles); + } + if (index == 5) { + Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles); + } l_recv_buffer_x0 = h_recv_buffer_x0_particles; l_recv_buffer_x1 = h_recv_buffer_x1_particles; l_recv_buffer_y0 = h_recv_buffer_y0_particles; @@ -965,12 +1017,24 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) #endif // PARTICLES_GPU - if (index == 0) (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); - if (index == 1) (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); - if (index == 2) (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); - if (index == 3) (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); - if (index == 4) (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); - if (index == 5) (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); + if (index == 0) { + (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); + } + if (index == 1) { + (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); + } + if (index == 2) { + (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); + } + if (index == 3) { + (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); + } + if (index == 4) { + (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); + } + if (index == 5) { + (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); + } } #endif // PARTICLES diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c3d48ec86..dad6f3b66 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1112,11 +1112,15 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int Ekin = 0.5 * d * (vx * vx + vy * vy + vz * vz); U = (E - Ekin) / d; - if (U < U_floor) dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; + if (U < U_floor) { + dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; + } #ifdef DE U = dev_conserved[(n_fields - 1) * n_cells + id] / d; - if (U < U_floor) dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; + if (U < U_floor) { + dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; + } #endif } } diff --git a/src/io/io.cpp b/src/io/io.cpp index 1c6304c52..cc2a922b8 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -33,7 +33,9 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real void Create_Log_File(struct parameters P) { #ifdef MPI_CHOLLA - if (procID != 0) return; + if (procID != 0) { + return; + } #endif std::string file_name(LOG_FILE_NAME); @@ -61,7 +63,9 @@ void Create_Log_File(struct parameters P) void Write_Message_To_Log_File(const char *message) { #ifdef MPI_CHOLLA - if (procID != 0) return; + if (procID != 0) { + return; + } #endif std::string file_name(LOG_FILE_NAME); @@ -103,28 +107,40 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) #ifndef ONLY_PARTICLES /*call the data output routine for Hydro data*/ - if (nfile % P.n_hydro == 0) OutputData(G, P, nfile); + if (nfile % P.n_hydro == 0) { + OutputData(G, P, nfile); + } #endif // This function does other checks to make sure it is valid (3D only) #ifdef HDF5 - if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G, P, nfile); + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) { + OutputFloat32(G, P, nfile); + } #endif #ifdef PROJECTION - if (nfile % P.n_projection == 0) OutputProjectedData(G, P, nfile); + if (nfile % P.n_projection == 0) { + OutputProjectedData(G, P, nfile); + } #endif /*PROJECTION*/ #ifdef ROTATED_PROJECTION - if (nfile % P.n_rotated_projection == 0) OutputRotatedProjectedData(G, P, nfile); + if (nfile % P.n_rotated_projection == 0) { + OutputRotatedProjectedData(G, P, nfile); + } #endif /*ROTATED_PROJECTION*/ #ifdef SLICES - if (nfile % P.n_slice == 0) OutputSlices(G, P, nfile); + if (nfile % P.n_slice == 0) { + OutputSlices(G, P, nfile); + } #endif /*SLICES*/ #ifdef PARTICLES - if (nfile % P.n_particle == 0) G.WriteData_Particles(P, nfile); + if (nfile % P.n_particle == 0) { + G.WriteData_Particles(P, nfile); + } #endif #ifdef COSMOLOGY @@ -2728,8 +2744,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.density[id] = dataset_buffer[buf_id]; mean_l += C.density[id]; - if (C.density[id] > max_l) max_l = C.density[id]; - if (C.density[id] < min_l) min_l = C.density[id]; + if (C.density[id] > max_l) { + max_l = C.density[id]; + } + if (C.density[id] < min_l) { + min_l = C.density[id]; + } } } } @@ -2767,8 +2787,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_x[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_x[id]); - if (fabs(C.momentum_x[id]) > max_l) max_l = fabs(C.momentum_x[id]); - if (fabs(C.momentum_x[id]) < min_l) min_l = fabs(C.momentum_x[id]); + if (fabs(C.momentum_x[id]) > max_l) { + max_l = fabs(C.momentum_x[id]); + } + if (fabs(C.momentum_x[id]) < min_l) { + min_l = fabs(C.momentum_x[id]); + } } } } @@ -2809,8 +2833,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_y[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_y[id]); - if (fabs(C.momentum_y[id]) > max_l) max_l = fabs(C.momentum_y[id]); - if (fabs(C.momentum_y[id]) < min_l) min_l = fabs(C.momentum_y[id]); + if (fabs(C.momentum_y[id]) > max_l) { + max_l = fabs(C.momentum_y[id]); + } + if (fabs(C.momentum_y[id]) < min_l) { + min_l = fabs(C.momentum_y[id]); + } } } } @@ -2851,8 +2879,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.momentum_z[id] = dataset_buffer[buf_id]; mean_l += fabs(C.momentum_z[id]); - if (fabs(C.momentum_z[id]) > max_l) max_l = fabs(C.momentum_z[id]); - if (fabs(C.momentum_z[id]) < min_l) min_l = fabs(C.momentum_z[id]); + if (fabs(C.momentum_z[id]) > max_l) { + max_l = fabs(C.momentum_z[id]); + } + if (fabs(C.momentum_z[id]) < min_l) { + min_l = fabs(C.momentum_z[id]); + } } } } @@ -2893,8 +2925,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.Energy[id] = dataset_buffer[buf_id]; mean_l += C.Energy[id]; - if (C.Energy[id] > max_l) max_l = C.Energy[id]; - if (C.Energy[id] < min_l) min_l = C.Energy[id]; + if (C.Energy[id] > max_l) { + max_l = C.Energy[id]; + } + if (C.Energy[id] < min_l) { + min_l = C.Energy[id]; + } } } } @@ -2941,13 +2977,21 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; C.GasEnergy[id] = dataset_buffer[buf_id]; mean_l += C.GasEnergy[id]; - if (C.GasEnergy[id] > max_l) max_l = C.GasEnergy[id]; - if (C.GasEnergy[id] < min_l) min_l = C.GasEnergy[id]; + if (C.GasEnergy[id] > max_l) { + max_l = C.GasEnergy[id]; + } + if (C.GasEnergy[id] < min_l) { + min_l = C.GasEnergy[id]; + } temp = C.GasEnergy[id] / C.density[id] * (gama - 1) * MP / KB * 1e10; temp_mean_l += temp; // chprintf( "%f\n", temp); - if (temp > temp_max_l) temp_max_l = temp; - if (temp < temp_min_l) temp_min_l = temp; + if (temp > temp_max_l) { + temp_max_l = temp; + } + if (temp < temp_min_l) { + temp_min_l = temp; + } } } } @@ -3346,7 +3390,9 @@ void write_debug(Real *Value, const char *fname, int nValues, int iProc) sprintf(fn, "%s_%07d.txt", fname, iProc); FILE *fp = fopen(fn, "w"); - for (int iV = 0; iV < nValues; iV++) fprintf(fp, "%e\n", Value[iV]); + for (int iV = 0; iV < nValues; iV++) { + fprintf(fp, "%e\n", Value[iV]); + } fclose(fp); } diff --git a/src/main.cpp b/src/main.cpp index bcda7a32a..2d1e9e4d7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -80,7 +80,9 @@ int main(int argc, char *argv[]) "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); - if (strcmp(P.init, "Read_Grid") == 0) chprintf("Input directory: %s\n", P.indir); + if (strcmp(P.init, "Read_Grid") == 0) { + chprintf("Input directory: %s\n", P.indir); + } chprintf("Output directory: %s\n", P.outdir); // Create a Log file to output run-time messages and output the git hash and @@ -140,7 +142,9 @@ int main(int argc, char *argv[]) #ifdef ANALYSIS G.Initialize_Analysis_Module(&P); - if (G.Analysis.Output_Now) G.Compute_and_Output_Analysis(&P); + if (G.Analysis.Output_Now) { + G.Compute_and_Output_Analysis(&P); + } #endif #if defined(SUPERNOVA) && defined(PARTICLE_AGE) @@ -235,7 +239,9 @@ int main(int argc, char *argv[]) // determine the global timestep G.set_dt(dti); - if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; + if (G.H.t + G.H.dt > outtime) { + G.H.dt = outtime - G.H.t; + } #if defined(SUPERNOVA) && defined(PARTICLE_AGE) supernova::Cluster_Feedback(G, sn_analysis); @@ -305,7 +311,9 @@ int main(int argc, char *argv[]) #endif #ifdef ANALYSIS - if (G.Analysis.Output_Now) G.Compute_and_Output_Analysis(&P); + if (G.Analysis.Output_Now) { + G.Compute_and_Output_Analysis(&P); + } #if defined(SUPERNOVA) && defined(PARTICLE_AGE) sn_analysis.Compute_Gas_Velocity_Dispersion(G); #endif diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 5c3a58be6..1efd9f43e 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -85,7 +85,9 @@ class InputParser */ InputParser(int &argc, char **argv) { - for (int i = 1; i < argc; ++i) this->_tokens.push_back(std::string(argv[i])); + for (int i = 1; i < argc; ++i) { + this->_tokens.push_back(std::string(argv[i])); + } } ~InputParser() = default; // ===================================================================== diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index bce2d674d..fcefbf767 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -121,7 +121,9 @@ Real phi_halo_D3D(Real R, Real z, Real *hdp) Real C = GN * M_h / (R_h * log_func(c_vir)); // limit x to non-zero value - if (x < 1.0e-9) x = 1.0e-9; + if (x < 1.0e-9) { + x = 1.0e-9; + } // checked with wolfram alpha return -C * log(1 + x) / x; @@ -320,7 +322,9 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in for (k = ks; k < nzt; k++) { z_1 = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; // cell ceiling D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); - if (D_rho >= 7.0) break; + if (D_rho >= 7.0) { + break; + } } // if(R<1.0) // printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = @@ -351,7 +355,9 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in // find cell center, bottom, and top z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; - if (z_int_max > z_disk_max) z_int_max = z_disk_max; + if (z_int_max > z_disk_max) { + z_int_max = z_disk_max; + } if (!flag) { dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; @@ -385,7 +391,9 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in // check the surface density phi_int = 0.0; - for (k = 0; k < nzt; k++) phi_int += rho[k] * dz; + for (k = 0; k < nzt; k++) { + phi_int += rho[k] * dz; + } // printf("Surface density check R %e Sigma_r %e integral(rho*dz) // %e\n",R,Sigma_r,phi_int); printf("Done with isothermal disk.\n"); @@ -499,7 +507,9 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in A_0 = D_rho - (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); - if (fabs(z_2 - z_1) / fabs(z_1) > 10.) z_2 = 10. * z_1; + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { + z_2 = 10. * z_1; + } // advance limit z_0 = z_1; z_1 = z_2; @@ -509,7 +519,9 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in A_0 = D_rho - (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); // make sure we haven't crossed 0 - if (A_1 < 0) z_1 = z_0; + if (A_1 < 0) { + z_1 = z_0; + } } iter_phi++; if (iter_phi > 1000) { @@ -566,7 +578,9 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in // find cell center, bottom, and top z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; - if (z_int_max > z_disk_max) z_int_max = z_disk_max; + if (z_int_max > z_disk_max) { + z_int_max = z_disk_max; + } if (!flag) { dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; @@ -602,7 +616,9 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in // check the surface density phi_int = 0.0; - for (k = 0; k < nzt; k++) phi_int += rho[k] * dz; + for (k = 0; k < nzt; k++) { + phi_int += rho[k] * dz; + } } Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) @@ -633,7 +649,9 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) A_1 = 1.0 - (phi_total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); - if (fabs(z_2 - z_1) / fabs(z_1) > 10.) z_2 = 10. * z_1; + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { + z_2 = 10. * z_1; + } // advance limit z_0 = z_1; @@ -645,7 +663,9 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) A_0 = 1.0 - (phi_total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs); A_1 = 1.0 - (phi_total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); // make sure we haven't crossed 0 - if (A_1 < 0) z_1 = z_0; + if (A_1 < 0) { + z_1 = z_0; + } } iter_phi++; if (iter_phi > 1000) { diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index 08e1190be..04249853f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -76,7 +76,9 @@ class DiskGalaxy Real C = GN * M_h / (R_h * log_func(c_vir)); // limit x to non-zero value - if (x < 1.0e-9) x = 1.0e-9; + if (x < 1.0e-9) { + x = 1.0e-9; + } return -C * log(1 + x) / x; }; diff --git a/src/mpi/MPI_Comm_node.c b/src/mpi/MPI_Comm_node.c index ae519dec5..80edaf594 100644 --- a/src/mpi/MPI_Comm_node.c +++ b/src/mpi/MPI_Comm_node.c @@ -58,7 +58,9 @@ int djb2_hash(char *str) { int hash = 5381; int c; - while ((c = *str++)) hash = ((hash << 5) + hash) + c; /*hash*33 + c*/ + while ((c = *str++)) { + hash = ((hash << 5) + hash) + c; + } /*hash*33 + c*/ return hash; } #endif /*MPI_CHOLLA*/ diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index cf35af02a..2a464c8a4 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -337,19 +337,31 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin if (n == procID) { dest[0] = i - 1; - if (dest[0] < 0) dest[0] += nproc_x; + if (dest[0] < 0) { + dest[0] += nproc_x; + } dest[1] = i + 1; - if (dest[1] >= nproc_x) dest[1] -= nproc_x; + if (dest[1] >= nproc_x) { + dest[1] -= nproc_x; + } dest[2] = j - 1; - if (dest[2] < 0) dest[2] += nproc_y; + if (dest[2] < 0) { + dest[2] += nproc_y; + } dest[3] = j + 1; - if (dest[3] >= nproc_y) dest[3] -= nproc_y; + if (dest[3] >= nproc_y) { + dest[3] -= nproc_y; + } dest[4] = k - 1; - if (dest[4] < 0) dest[4] += nproc_z; + if (dest[4] < 0) { + dest[4] += nproc_z; + } dest[5] = k + 1; - if (dest[5] >= nproc_z) dest[5] -= nproc_z; + if (dest[5] >= nproc_z) { + dest[5] -= nproc_z; + } } n++; } @@ -402,7 +414,9 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin } // find MPI sources - for (i = 0; i < 6; i++) source[i] = dest[i]; + for (i = 0; i < 6; i++) { + source[i] = dest[i]; + } // find MPI destinations dest[0] = tiling[dest[0]][iy[procID]][iz[procID]]; @@ -442,11 +456,15 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin if (ix[procID] == 0) { P->xu_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->xl_bcnd == 1) P->xl_bcnd = 5; + if (P->xl_bcnd == 1) { + P->xl_bcnd = 5; + } } else { P->xl_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->xu_bcnd == 1) P->xu_bcnd = 5; + if (P->xu_bcnd == 1) { + P->xu_bcnd = 5; + } } } else { // this is completely an interior cell @@ -465,11 +483,15 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin if (iy[procID] == 0) { P->yu_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->yl_bcnd == 1) P->yl_bcnd = 5; + if (P->yl_bcnd == 1) { + P->yl_bcnd = 5; + } } else { P->yl_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->yu_bcnd == 1) P->yu_bcnd = 5; + if (P->yu_bcnd == 1) { + P->yu_bcnd = 5; + } } } else { // this is completely an interior cell @@ -488,11 +510,15 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin if (iz[procID] == 0) { P->zu_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->zl_bcnd == 1) P->zl_bcnd = 5; + if (P->zl_bcnd == 1) { + P->zl_bcnd = 5; + } } else { P->zl_bcnd = 5; // if the global bcnd is periodic, use MPI bcnds at ends - if (P->zu_bcnd == 1) P->zu_bcnd = 5; + if (P->zu_bcnd == 1) { + P->zu_bcnd = 5; + } } } else { // this is completely an interior cell @@ -715,7 +741,9 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local) MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world); global_offset = 0; for (int other_rank = 0; other_rank < nproc; other_rank++) { - if (other_rank < procID) global_offset += n_local_all[other_rank]; + if (other_rank < procID) { + global_offset += n_local_all[other_rank]; + } } // printf("global_offset = %ld \n", global_offset ); free(n_local_send); @@ -751,7 +779,9 @@ void Print_Domain_Properties(struct Header H) void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size) { int current_size = *current_size_ptr; - if (new_size <= current_size) return; + if (new_size <= current_size) { + return; + } new_size = (int)2 * new_size; std::cout << " ####### Growing Particles Transfer Buffer, size: " << current_size << " new_size: " << new_size @@ -774,14 +804,18 @@ int greatest_prime_factor(int n) int ns = n; int np = 2; - if (n == 1 || n == 2) return n; + if (n == 1 || n == 2) { + return n; + } while (true) { while (!(ns % np)) { ns = ns / np; } - if (ns == 1) break; + if (ns == 1) { + break; + } np++; } @@ -841,7 +875,9 @@ void TileBlockDecomposition(void) /*increase ny, nz round-robin*/ while (np_x * np_y * np_z < nproc) { np_y *= 2; - if (np_x * np_y * np_z == nproc) break; + if (np_x * np_y * np_z == nproc) { + break; + } np_z *= 2; } } @@ -852,9 +888,13 @@ void TileBlockDecomposition(void) /*increase nx, ny, nz round-robin*/ while (np_x * np_y * np_z < nproc) { np_x *= 2; - if (np_x * np_y * np_z == nproc) break; + if (np_x * np_y * np_z == nproc) { + break; + } np_y *= 2; - if (np_x * np_y * np_z == nproc) break; + if (np_x * np_y * np_z == nproc) { + break; + } np_z *= 2; } } @@ -908,7 +948,9 @@ int ***three_dimensional_int_array(int n, int l, int m) void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m) { for (int i = 0; i < n; i++) { - for (int j = 0; j < l; j++) delete[] x[i][j]; + for (int j = 0; j < l; j++) { + delete[] x[i][j]; + } delete[] x[i]; } delete x; diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index e4d8a0e52..86ddd7e36 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -53,7 +53,9 @@ __global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, R int ny, int nz, int n_ghost) { int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } int nx_g, ny_g; nx_g = nx + 2 * n_ghost; @@ -79,9 +81,15 @@ __global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, R bool in_local = true; - if (pos_x < xMin || pos_x >= xMax) in_local = false; - if (pos_y < yMin || pos_y >= yMax) in_local = false; - if (pos_z < zMin || pos_z >= zMax) in_local = false; + if (pos_x < xMin || pos_x >= xMax) { + in_local = false; + } + if (pos_y < yMin || pos_y >= yMax) { + in_local = false; + } + if (pos_z < zMin || pos_z >= zMax) { + in_local = false; + } if (!in_local) { printf( " Density CIC Error: Particle outside local domain [%f %f %f] [%f " diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 9f3c73f89..15680d6f8 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -93,9 +93,15 @@ void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host(int direction, int sid ny_g = Particles.G.ny_local + 2 * nGHST; nz_g = Particles.G.nz_local + 2 * nGHST; - if (direction == 0) buffer_length = nGHST * ny_g * nz_g; - if (direction == 1) buffer_length = nGHST * nx_g * nz_g; - if (direction == 2) buffer_length = nGHST * nx_g * ny_g; + if (direction == 0) { + buffer_length = nGHST * ny_g * nz_g; + } + if (direction == 1) { + buffer_length = nGHST * nx_g * nz_g; + } + if (direction == 2) { + buffer_length = nGHST * nx_g * ny_g; + } cudaMemcpy(buffer_h, buffer_d, buffer_length * sizeof(Real), cudaMemcpyDeviceToHost); } @@ -116,8 +122,12 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, R for (k = 0; k < nGHST; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g; + } indx_buff = i + j * nx_g + k * nx_g * ny_g; buffer[indx_buff] = Particles.G.density[indx]; } @@ -131,8 +141,12 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, R for (k = 0; k < nz_g; k++) { for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g; + } indx_buff = i + k * nx_g + j * nx_g * nz_g; buffer[indx_buff] = Particles.G.density[indx]; } @@ -146,8 +160,12 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, R for (k = 0; k < nz_g; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + } indx_buff = j + k * ny_g + i * ny_g * nz_g; buffer[indx_buff] = Particles.G.density[indx]; } @@ -174,8 +192,12 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, int si for (k = 0; k < nGHST; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; - if (side == 1) indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + if (side == 0) { + indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g; + } indx_buff = i + j * nx_g + k * nx_g * ny_g; Particles.G.density[indx] += buffer[indx_buff]; } @@ -188,8 +210,12 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, int si for (k = 0; k < nz_g; k++) { for (j = 0; j < nGHST; j++) { for (i = 0; i < nx_g; i++) { - if (side == 0) indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g; + } indx_buff = i + k * nx_g + j * nx_g * nz_g; Particles.G.density[indx] += buffer[indx_buff]; } @@ -202,8 +228,12 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, int si for (k = 0; k < nz_g; k++) { for (j = 0; j < ny_g; j++) { for (i = 0; i < nGHST; i++) { - if (side == 0) indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; - if (side == 1) indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + if (side == 0) { + indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g; + } + if (side == 1) { + indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g; + } indx_buff = j + k * ny_g + i * ny_g * nz_g; Particles.G.density[indx] += buffer[indx_buff]; } diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu index c13a27347..fd5c4ddca 100644 --- a/src/particles/density_boundaries_gpu.cu +++ b/src/particles/density_boundaries_gpu.cu @@ -16,25 +16,51 @@ __global__ void Set_Particles_Density_Boundaries_Periodic_kernel(int direction, tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } if (direction == 0) { - if (side == 0) tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 0) tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 0) tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; - if (side == 0) tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; - if (side == 1) tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; - if (side == 1) tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 0) { + tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } } density_d[tid_dst] += density_d[tid_src]; @@ -87,21 +113,35 @@ __global__ void Load_Particles_Density_Boundary_to_Buffer_kernel(int direction, tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; - if (side == 1) tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + if (side == 0) { + tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } } transfer_buffer_d[tid_buffer] = density_d[tid_dens]; } @@ -161,21 +201,35 @@ __global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel(int direction tid_j = (tid - tid_k * n_i * n_j) / n_i; tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; if (direction == 0) { - if (side == 0) tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; - if (side == 1) tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 0) { + tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } if (direction == 1) { - if (side == 0) tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; - if (side == 1) tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + if (side == 0) { + tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } if (direction == 2) { - if (side == 0) tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; - if (side == 1) tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + if (side == 0) { + tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } } density_d[tid_dens] += transfer_buffer_d[tid_buffer]; } diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 186973afb..3746d8226 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -92,7 +92,9 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat while (snr_in.good()) { std::getline(snr_in, line); - if (line_counter++ < N_HEADER) continue; // skip header processing + if (line_counter++ < N_HEADER) { + continue; + } // skip header processing int i = 0; char* data = strtok(line.data(), s99_delim); @@ -142,8 +144,12 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat __device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end) { - if (t < t_start || t >= t_end) return 0; - if (dev_snr == nullptr) return supernova::DEFAULT_SNR; + if (t < t_start || t >= t_end) { + return 0; + } + if (dev_snr == nullptr) { + return supernova::DEFAULT_SNR; + } int index = (int)((t - t_start) / snr_dt); return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; @@ -204,10 +210,18 @@ __device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_ Real z0 = pos_z_dev[gtid]; // Brute force loop to see if particle is alone for (int i = 0; i < n_local; i++) { - if (i == gtid) continue; - if (abs(x0 - pos_x_dev[i]) > dx) continue; - if (abs(y0 - pos_y_dev[i]) > dx) continue; - if (abs(z0 - pos_z_dev[i]) > dx) continue; + if (i == gtid) { + continue; + } + if (abs(x0 - pos_x_dev[i]) > dx) { + continue; + } + if (abs(y0 - pos_y_dev[i]) > dx) { + continue; + } + if (abs(z0 - pos_z_dev[i]) > dx) { + continue; + } // If we made it here, something is too close. return false; } @@ -596,7 +610,9 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real } } } - if (direction > 0) atomicMax(dti, local_dti); + if (direction > 0) { + atomicMax(dti, local_dti); + } } } } @@ -633,7 +649,9 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) G.Timer.Feedback.Start(); #endif - if (G.H.dt == 0) return 0.0; + if (G.H.dt == 0) { + return 0.0; + } /* if (G.Particles.n_local > supernova::n_states) { diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 6da73f28a..19aee8941 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -37,7 +37,9 @@ __global__ void Get_Gravity_Field_Particles_Kernel(Real *potential_dev, Real *gr ny_grav = ny + 2 * n_ghost_particles_grid; nz_grav = nz + 2 * n_ghost_particles_grid; - if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav) return; + if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav) { + return; + } int tid = tid_x + tid_y * nx_grav + tid_z * nx_grav * ny_grav; int nx_pot, ny_pot; @@ -152,7 +154,9 @@ __global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } int nx_g, ny_g; nx_g = nx + 2 * n_ghost; @@ -175,9 +179,15 @@ __global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, bool in_local = true; - if (pos_x < xMin || pos_x >= xMax) in_local = false; - if (pos_y < yMin || pos_y >= yMax) in_local = false; - if (pos_z < zMin || pos_z >= zMax) in_local = false; + if (pos_x < xMin || pos_x >= xMax) { + in_local = false; + } + if (pos_y < yMin || pos_y >= yMax) { + in_local = false; + } + if (pos_z < zMin || pos_z >= zMax) { + in_local = false; + } if (!in_local) { printf(" Gravity CIC Error: Particle outside local domain"); return; @@ -290,7 +300,9 @@ void __global__ Copy_Particles_Density_Kernel(Real *dst_density, Real *src_densi tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) { + return; + } tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index a0fc4f32b..02a7d6c3a 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -282,9 +282,15 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par if (pPos_z < G.domainMin_z || pPos_z > G.domainMax_z) { std::cout << " Particle outside global domain " << std::endl; } - if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false; - if (pPos_y < G.yMin || pPos_y >= G.yMax) in_local = false; - if (pPos_z < G.zMin || pPos_z >= G.zMax) in_local = false; + if (pPos_x < G.xMin || pPos_x >= G.xMax) { + in_local = false; + } + if (pPos_y < G.yMin || pPos_y >= G.yMax) { + in_local = false; + } + if (pPos_z < G.zMin || pPos_z >= G.zMax) { + in_local = false; + } if (!in_local) { #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << pID << std::endl; @@ -302,21 +308,45 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par // Keep track of the max and min position and velocity to print Initial // Statistics - if (pPos_x > px_max) px_max = pPos_x; - if (pPos_y > py_max) py_max = pPos_y; - if (pPos_z > pz_max) pz_max = pPos_z; + if (pPos_x > px_max) { + px_max = pPos_x; + } + if (pPos_y > py_max) { + py_max = pPos_y; + } + if (pPos_z > pz_max) { + pz_max = pPos_z; + } - if (pPos_x < px_min) px_min = pPos_x; - if (pPos_y < py_min) py_min = pPos_y; - if (pPos_z < pz_min) pz_min = pPos_z; + if (pPos_x < px_min) { + px_min = pPos_x; + } + if (pPos_y < py_min) { + py_min = pPos_y; + } + if (pPos_z < pz_min) { + pz_min = pPos_z; + } - if (pVel_x > vx_max) vx_max = pVel_x; - if (pVel_y > vy_max) vy_max = pVel_y; - if (pVel_z > vz_max) vz_max = pVel_z; + if (pVel_x > vx_max) { + vx_max = pVel_x; + } + if (pVel_y > vy_max) { + vy_max = pVel_y; + } + if (pVel_z > vz_max) { + vz_max = pVel_z; + } - if (pVel_x < vx_min) vx_min = pVel_x; - if (pVel_y < vy_min) vy_min = pVel_y; - if (pVel_z < vz_min) vz_min = pVel_z; + if (pVel_x < vx_min) { + vx_min = pVel_x; + } + if (pVel_y < vy_min) { + vy_min = pVel_y; + } + if (pVel_z < vz_min) { + vz_min = pVel_z; + } #ifdef PARTICLES_CPU // Add the particle data to the particles vectors diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 1012e9ba5..f2b56f62c 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -37,7 +37,9 @@ void Grid3D::Initialize_Particles(struct parameters *P) Particles.G.potential_dev = Grav.F.potential_d; #endif - if (strcmp(P->init, "Uniform") == 0) Initialize_Uniform_Particles(); + if (strcmp(P->init, "Uniform") == 0) { + Initialize_Uniform_Particles(); + } #ifdef MPI_CHOLLA MPI_Barrier(world); @@ -535,7 +537,9 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + (pPos_y - center_y) * (pPos_y - center_y) + (pPos_z - center_z) * (pPos_z - center_z)); - if (r > sphereR) continue; + if (r > sphereR) { + continue; + } #ifdef PARTICLES_CPU // Copy the particle data to the particles vectors @@ -692,9 +696,15 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) y = R * sin(phi); z = zDist(generator); - if (x < G.xMin || x >= G.xMax) continue; - if (y < G.yMin || y >= G.yMax) continue; - if (z < G.zMin || z >= G.zMax) continue; + if (x < G.xMin || x >= G.xMax) { + continue; + } + if (y < G.yMin || y >= G.yMax) { + continue; + } + if (z < G.zMin || z >= G.zMax) { + continue; + } ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); vPhi = sqrt(R * ac); @@ -775,7 +785,9 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) Copy_Particles_Array_Real_Host_to_Device(temp_age.data(), age_dev, n_local); #endif // PARTICLES_GPU - if (lost_particles > 0) chprintf(" lost %lu particles\n", lost_particles); + if (lost_particles > 0) { + chprintf(" lost %lu particles\n", lost_particles); + } chprintf( "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, " "total_mass: %.3e s.m.\n", diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 8a8898718..fc5210f77 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -66,7 +66,9 @@ template void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, part_int_t size) { int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < size) dst_array_dev[tid] = src_array_dev[tid]; + if (tid < size) { + dst_array_dev[tid] = src_array_dev[tid]; + } } template @@ -176,7 +178,9 @@ void Particles_3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev __global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, part_int_t size) { int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < size) array_dev[tid] = value; + if (tid < size) { + array_dev[tid] = value; + } } void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 19aec15a7..689beaccc 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -90,16 +90,34 @@ void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flag void Grid3D::Unload_Particles_From_Buffers_BLOCK(int index, int *flags) { // Make sure not to unload when not transfering particles - if (Particles.TRANSFER_DENSITY_BOUNDARIES) return; - if (H.TRANSFER_HYDRO_BOUNDARIES) return; - if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) return; - - if (index == 0) Unload_Particles_from_Buffer_X0(flags); - if (index == 1) Unload_Particles_from_Buffer_X1(flags); - if (index == 2) Unload_Particles_from_Buffer_Y0(flags); - if (index == 3) Unload_Particles_from_Buffer_Y1(flags); - if (index == 4) Unload_Particles_from_Buffer_Z0(flags); - if (index == 5) Unload_Particles_from_Buffer_Z1(flags); + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + return; + } + if (H.TRANSFER_HYDRO_BOUNDARIES) { + return; + } + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + return; + } + + if (index == 0) { + Unload_Particles_from_Buffer_X0(flags); + } + if (index == 1) { + Unload_Particles_from_Buffer_X1(flags); + } + if (index == 2) { + Unload_Particles_from_Buffer_Y0(flags); + } + if (index == 3) { + Unload_Particles_from_Buffer_Y1(flags); + } + if (index == 4) { + Unload_Particles_from_Buffer_Z0(flags); + } + if (index == 5) { + Unload_Particles_from_Buffer_Z1(flags); + } } // Wait for the Number of particles that will be transferred, and request the @@ -107,7 +125,9 @@ void Grid3D::Unload_Particles_From_Buffers_BLOCK(int index, int *flags) void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags) { #ifdef PARTICLES - if (!Particles.TRANSFER_PARTICLES_BOUNDARIES) return; + if (!Particles.TRANSFER_PARTICLES_BOUNDARIES) { + return; + } #endif int iwait; diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index e4e254e18..68a77a113 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -21,17 +21,23 @@ __global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, Real Real d_length) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } Real pos; pos = pos_dev[tid]; if (side == 0) { - if (pos < d_min) pos += d_length; + if (pos < d_min) { + pos += d_length; + } } if (side == 1) { - if (pos >= d_max) pos -= d_length; + if (pos >= d_max) { + pos -= d_length; + } } pos_dev[tid] = pos; @@ -77,14 +83,20 @@ __global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, Real d_m bool *transfer_flags_d) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_total) return; + if (tid >= n_total) { + return; + } bool transfer = false; Real pos = pos_d[tid]; - if (side == 0 && pos < d_min) transfer = true; - if (side == 1 && pos >= d_max) transfer = true; + if (side == 0 && pos < d_min) { + transfer = true; + } + if (side == 1 && pos >= d_max) { + transfer = true; + } transfer_flags_d[tid] = transfer; } @@ -122,7 +134,9 @@ __global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *pre } // Clear the last element - if (tid_block == 0) data_sh[n - 1] = 0; + if (tid_block == 0) { + data_sh[n - 1] = 0; + } // Traverse down tree & build scan for (int d = 1; d < n; d *= 2) { @@ -140,14 +154,18 @@ __global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *pre __syncthreads(); // Write results to device memory - if (block_start + 2 * tid_block < n_total) prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; + if (block_start + 2 * tid_block < n_total) { + prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; + } if (block_start + 2 * tid_block + 1 < n_total) { prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1]; } // Write the block sum int last_flag_block = (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; - if (tid_block == 0) prefix_sum_block_d[blockIdx.x] = data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; + if (tid_block == 0) { + prefix_sum_block_d[blockIdx.x] = data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; + } } __global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) @@ -174,7 +192,9 @@ __global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) } __syncthreads(); - if (start_index + tid_block < n_partial) prefix_sum_block_d[start_index + tid_block] = data_sh[tid_block]; + if (start_index + tid_block < n_partial) { + prefix_sum_block_d[start_index + tid_block] = data_sh[tid_block]; + } n += 1; start_index = n * n_threads; } @@ -196,7 +216,9 @@ __global__ void Sum_Blocks_Kernel(part_int_t n_total, int *prefix_sum_d, int *pr } __syncthreads(); - if (tid < n_total) prefix_sum_d[tid] += block_sum_sh[0]; + if (tid < n_total) { + prefix_sum_d[tid] += block_sum_sh[0]; + } } __global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, int *n_transfer_d, bool *transfer_flags_d, @@ -212,7 +234,9 @@ __global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, bool *transfer_f { int tid, transfer_index; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_total) return; + if (tid >= n_total) { + return; + } transfer_index = prefix_sum_d[tid]; if (transfer_index < 0 || transfer_index >= n_total) { @@ -220,7 +244,9 @@ __global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, bool *transfer_f return; } - if (transfer_flags_d[tid]) transfer_indices_d[transfer_index] = tid; + if (transfer_flags_d[tid]) { + transfer_indices_d[transfer_index] = tid; + } } __global__ void Select_Indices_to_Replace_Transfered_Kernel(part_int_t n_total, int n_transfer, bool *transfer_flags_d, @@ -228,11 +254,15 @@ __global__ void Select_Indices_to_Replace_Transfered_Kernel(part_int_t n_total, { int tid, tid_inv; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_total) return; + if (tid >= n_total) { + return; + } tid_inv = n_total - tid - 1; bool transfer_flag = transfer_flags_d[tid]; - if (transfer_flag) return; + if (transfer_flag) { + return; + } int prefix_sum_inv, replace_id; @@ -252,14 +282,18 @@ __global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_transfer) return; + if (tid >= n_transfer) { + return; + } int dst_id, src_id; dst_id = transfer_indices_d[tid]; src_id = replace_indices_d[tid]; if (dst_id < src_id) { - if (print_replace) printf("Replacing: %f \n", field_d[dst_id] * 1.0); + if (print_replace) { + printf("Replacing: %f \n", field_d[dst_id] * 1.0); + } field_d[dst_id] = field_d[src_id]; } } @@ -358,7 +392,9 @@ __global__ void Load_Transfered_Particles_to_Buffer_Kernel(int n_transfer, int f { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_transfer) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; Real field_val; @@ -367,8 +403,12 @@ __global__ void Load_Transfered_Particles_to_Buffer_Kernel(int n_transfer, int f field_val = field_d[src_id]; // Set global periodic boundary conditions - if (boundary_type == 1 && field_val < domainMin) field_val += (domainMax - domainMin); - if (boundary_type == 1 && field_val >= domainMax) field_val -= (domainMax - domainMin); + if (boundary_type == 1 && field_val < domainMin) { + field_val += (domainMax - domainMin); + } + if (boundary_type == 1 && field_val >= domainMax) { + field_val -= (domainMax - domainMin); + } send_buffer_d[dst_id] = field_val; } @@ -397,7 +437,9 @@ __global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_transfer) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; part_int_t field_val; @@ -406,8 +448,12 @@ __global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, field_val = field_d[src_id]; // Set global periodic boundary conditions - if (boundary_type == 1 && field_val < domainMin) field_val += (domainMax - domainMin); - if (boundary_type == 1 && field_val >= domainMax) field_val -= (domainMax - domainMin); + if (boundary_type == 1 && field_val < domainMin) { + field_val += (domainMax - domainMin); + } + if (boundary_type == 1 && field_val >= domainMax) { + field_val -= (domainMax - domainMin); + } send_buffer_d[dst_id] = __longlong_as_double(field_val); } @@ -453,7 +499,9 @@ __global__ void Unload_Transfered_Particles_from_Buffer_Kernel(int n_local, int { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_transfer) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; src_id = tid * n_fields_to_transfer + field_id; @@ -483,7 +531,9 @@ __global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel(int n_local, { int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= n_transfer) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; src_id = tid * n_fields_to_transfer + field_id; diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 97045a3a1..9baba2cc5 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -65,7 +65,9 @@ Real Grid3D::Calc_Particles_dt_GPU() // set values for GPU kernels int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; - if (ngrid > Particles.G.size_blocks_array) chprintf(" Error: particles dt_array too small\n"); + if (ngrid > Particles.G.size_blocks_array) { + chprintf(" Error: particles dt_array too small\n"); + } Real max_dti; max_dti = Particles.Calc_Particles_dt_GPU_function( @@ -155,13 +157,19 @@ void Grid3D::Advance_Particles(int N_step) { CudaCheckError(); #ifdef CPU_TIME - if (N_step == 1) Timer.Advance_Part_1.Start(); - if (N_step == 2) Timer.Advance_Part_2.Start(); + if (N_step == 1) { + Timer.Advance_Part_1.Start(); + } + if (N_step == 2) { + Timer.Advance_Part_2.Start(); + } #endif #ifdef PARTICLES_KDK // Update the velocities by 0.5*delta_t and update the positions by delta_t - if (N_step == 1) Advance_Particles_KDK_Step1(); + if (N_step == 1) { + Advance_Particles_KDK_Step1(); + } #endif if (N_step == 2) { @@ -175,8 +183,12 @@ void Grid3D::Advance_Particles(int N_step) } #ifdef CPU_TIME - if (N_step == 1) Timer.Advance_Part_1.End(); - if (N_step == 2) Timer.Advance_Part_2.End(); + if (N_step == 1) { + Timer.Advance_Part_1.End(); + } + if (N_step == 2) { + Timer.Advance_Part_2.End(); + } #endif CudaCheckError(); } diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index f4192ad62..2e3b51a7b 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -70,7 +70,9 @@ __global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, } // write the result for this block to global memory - if (tid == 0) dti_array[blockIdx.x] = max_dti[0]; + if (tid == 0) { + dti_array[blockIdx.x] = max_dti[0]; + } } Real Particles_3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, @@ -112,7 +114,9 @@ __global__ void Advance_Particles_KDK_Step1_Kernel(part_int_t n_local, Real dt, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } // Advance velocities by half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; @@ -130,7 +134,9 @@ __global__ void Advance_Particles_KDK_Step2_Kernel(part_int_t n_local, Real dt, Real *grav_z_dev) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } // Advance velocities by the second half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index 1ca3f510b..f69bbdc4b 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -132,9 +132,15 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_i = dge / d_i; #endif // DE // cell i-1 - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + if (dir == 0) { + id = xid - 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid - 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid - 1) * nx * ny; + } d_imo = dev_conserved[id]; vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; @@ -158,9 +164,15 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_imo = dge / d_imo; #endif // DE // cell i+1 - if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny; + if (dir == 0) { + id = xid + 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid + 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid + 1) * nx * ny; + } d_ipo = dev_conserved[id]; vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; @@ -276,9 +288,15 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right // side of the i-1/2 interface - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + if (dir == 0) { + id = xid - 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid - 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid - 1) * nx * ny; + } dev_bounds_R[id] = d_L; dev_bounds_R[o1 * n_cells + id] = mx_L; dev_bounds_R[o2 * n_cells + id] = my_L; diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 2058f4b56..c6468cf38 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -161,9 +161,15 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i-1 - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + if (dir == 0) { + id = xid - 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid - 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid - 1) * nx * ny; + } d_imo = dev_conserved[id]; vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; @@ -187,9 +193,15 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i+1 - if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny; + if (dir == 0) { + id = xid + 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid + 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid + 1) * nx * ny; + } d_ipo = dev_conserved[id]; vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; @@ -213,9 +225,15 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i-2 - if (dir == 0) id = xid - 2 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 2) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 2) * nx * ny; + if (dir == 0) { + id = xid - 2 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid - 2) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid - 2) * nx * ny; + } d_imt = dev_conserved[id]; vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; @@ -239,9 +257,15 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i+2 - if (dir == 0) id = xid + 2 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 2) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 2) * nx * ny; + if (dir == 0) { + id = xid + 2 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid + 2) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid + 2) * nx * ny; + } d_ipt = dev_conserved[id]; vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; @@ -817,13 +841,25 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // of cell center lie between neighboring cell-centered values // Stone Eqns 47 - 53 - if ((d_R - d_i) * (d_i - d_L) <= 0) d_L = d_R = d_i; - if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) vx_L = vx_R = vx_i; - if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) vy_L = vy_R = vy_i; - if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) vz_L = vz_R = vz_i; - if ((p_R - p_i) * (p_i - p_L) <= 0) p_L = p_R = p_i; + if ((d_R - d_i) * (d_i - d_L) <= 0) { + d_L = d_R = d_i; + } + if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) { + vx_L = vx_R = vx_i; + } + if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) { + vy_L = vy_R = vy_i; + } + if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) { + vz_L = vz_R = vz_i; + } + if ((p_R - p_i) * (p_i - p_L) <= 0) { + p_L = p_R = p_i; + } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) d_L = 3.0 * d_i - 2.0 * d_R; + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { + d_L = 3.0 * d_i - 2.0 * d_R; + } if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { vx_L = 3.0 * vx_i - 2.0 * vx_R; } @@ -833,9 +869,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { vz_L = 3.0 * vz_i - 2.0 * vz_R; } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) p_L = 3.0 * p_i - 2.0 * p_R; + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { + p_L = 3.0 * p_i - 2.0 * p_R; + } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) d_R = 3.0 * d_i - 2.0 * d_L; + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { + d_R = 3.0 * d_i - 2.0 * d_L; + } if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { vx_R = 3.0 * vx_i - 2.0 * vx_L; } @@ -845,7 +885,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { vz_R = 3.0 * vz_i - 2.0 * vz_L; } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) p_R = 3.0 * p_i - 2.0 * p_L; + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { + p_R = 3.0 * p_i - 2.0 * p_L; + } d_L = fmax(fmin(d_i, d_imo), d_L); d_L = fmin(fmax(d_i, d_imo), d_L); @@ -869,7 +911,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_R = fmin(fmax(p_i, p_ipo), p_R); #ifdef DE - if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; + if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { + ge_L = ge_R = ge_i; + } if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { ge_L = 3.0 * ge_i - 2.0 * ge_R; } @@ -884,7 +928,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) scalar_L[i] = scalar_R[i] = scalar_i[i]; + if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { + scalar_L[i] = scalar_R[i] = scalar_i[i]; + } if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; @@ -1169,9 +1215,15 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 11 - Send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + if (dir == 0) { + id = xid - 1 + yid * nx + zid * nx * ny; + } + if (dir == 1) { + id = xid + (yid - 1) * nx + zid * nx * ny; + } + if (dir == 2) { + id = xid + yid * nx + (zid - 1) * nx * ny; + } dev_bounds_R[id] = d_L; dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 9e8b2298d..918188441 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -173,13 +173,17 @@ __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real v // compute guess pressure from PVRS Riemann solver ppv = 0.5 * (pl + pr) + 0.125 * (vxl - vxr) * (dl + dr) * (cl + cr); - if (ppv < 0.0) ppv = 0.0; + if (ppv < 0.0) { + ppv = 0.0; + } // Two-Shock Riemann solver with PVRS as estimate gl = sqrt((2.0 / ((gamma + 1.0) * dl)) / (((gamma - 1.0) / (gamma + 1.0)) * pl + ppv)); gr = sqrt((2.0 / ((gamma + 1.0) * dr)) / (((gamma - 1.0) / (gamma + 1.0)) * pr + ppv)); pm = (gl * pl + gr * pr - (vxr - vxl)) / (gl + gr); - if (pm < 0.0) pm = TOL; + if (pm < 0.0) { + pm = TOL; + } return pm; } @@ -226,8 +230,12 @@ __device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real c *p = pold - (fl + fr + vxr - vxl) / (fld + frd); change = 2.0 * fabs((*p - pold) / (*p + pold)); - if (change <= TOL) break; - if (*p < 0.0) *p = TOL; + if (change <= TOL) { + break; + } + if (*p < 0.0) { + *p = TOL; + } pold = *p; } if (i > nriter) { diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 9f6059d62..08c1f2acb 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -45,7 +45,9 @@ void systemTest::SystemTestRunner::runTest() /// globalRunCholla should only be used for large MPI tests where the user /// wishes to separate the execution of cholla and the comparison of results /// onto different machines/jobs - if (not globalCompareSystemTestResults) return; + if (not globalCompareSystemTestResults) { + return; + } // Make sure we have all the required data files and open the test data file _testHydroFieldsFileVec.resize(numMpiRanks); @@ -71,11 +73,15 @@ void systemTest::SystemTestRunner::runTest() if (_particleDataExists) { _testParticleIDs = _loadTestParticleData("particle_IDs"); - if (_fiducialFileExists) _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + if (_fiducialFileExists) { + _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + } } // Get the list of test dataset names - if (_hydroDataExists) _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + if (_hydroDataExists) { + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + } if (_particleDataExists) { // Load the data, replace the density value with the new name, then append std::vector particleNames = _findDataSetNames(_testParticlesFileVec[0]); @@ -88,7 +94,9 @@ void systemTest::SystemTestRunner::runTest() // Start Performing Checks // ======================= // Check the number of time steps - if (_compareNumTimeSteps) _checkNumTimeSteps(); + if (_compareNumTimeSteps) { + _checkNumTimeSteps(); + } // Check that the test file has as many, or more, datasets than the fiducial // file. Provide a warning if the datasets are not the same size @@ -189,7 +197,9 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro /// globalRunCholla should only be used for large MPI tests where the user /// wishes to separate the execution of cholla and the comparison of results /// onto different machines/jobs - if (not globalCompareSystemTestResults) return; + if (not globalCompareSystemTestResults) { + return; + } // Make sure we have all the required data files and open the data files _testHydroFieldsFileVec.resize(numMpiRanks); @@ -213,7 +223,9 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro // Start Performing Checks // ======================= // Check the number of time steps - if (_compareNumTimeSteps) _checkNumTimeSteps(); + if (_compareNumTimeSteps) { + _checkNumTimeSteps(); + } // Check that the test file has as many, or more, datasets than the fiducial // file. Provide a warning if the datasets are not the same size @@ -420,8 +432,12 @@ systemTest::SystemTestRunner::~SystemTestRunner() { _fiducialFile.close(); for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) { - if (_hydroDataExists) _testHydroFieldsFileVec[i].close(); - if (_particleDataExists) _testParticlesFileVec[i].close(); + if (_hydroDataExists) { + _testHydroFieldsFileVec[i].close(); + } + if (_particleDataExists) { + _testParticlesFileVec[i].close(); + } } } // ============================================================================= diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index 4b91efee5..4a0f78cb4 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -20,7 +20,9 @@ __global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int int tid_k = blockIdx.y * blockDim.y + threadIdx.y; if (blockDim.x != N_Y || blockDim.y != N_Z) { - if (tid_j == 0 && tid_k == 0) printf("ERROR CHECK: Block Dimension Error \n"); + if (tid_j == 0 && tid_k == 0) { + printf("ERROR CHECK: Block Dimension Error \n"); + } return; } @@ -45,12 +47,16 @@ __global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int if (tid_j == 0 && tid_k == 0) { for (i = 0; i < N_Y * N_Z - 1; i++) { - if (sh_data[i] == sh_data[i + 1]) error += 1; + if (sh_data[i] == sh_data[i + 1]) { + error += 1; + } } } } - if (tid_j == 0 && tid_k == 0) *return_value = error; + if (tid_j == 0 && tid_k == 0) { + *return_value = error; + } } int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index a248ef9fe..49dffe13d 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -127,7 +127,9 @@ static void __attribute__((unused)) check(const hipError_t err, const char *cons static void check(const cufftResult err, const char *const file, const int line) { - if (err == CUFFT_SUCCESS) return; + if (err == CUFFT_SUCCESS) { + return; + } fprintf(stderr, "CUFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, err); fflush(stderr); exit(err); @@ -137,7 +139,9 @@ static void check(const cufftResult err, const char *const file, const int line) static void check(const cudaError_t err, const char *const file, const int line) { - if (err == cudaSuccess) return; + if (err == cudaSuccess) { + return; + } fprintf(stderr, "CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, cudaGetErrorName(err), cudaGetErrorString(err)); fflush(stderr); @@ -161,13 +165,17 @@ template __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, const F f) { const int i0 = blockIdx.x * blockDim.x + threadIdx.x; - if (i0 < n0) f(i0); + if (i0 < n0) { + f(i0); + } } template void gpuFor(const int n0, const F f) { - if (n0 <= 0) return; + if (n0 <= 0) { + return; + } const int b0 = (n0 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t0 = (n0 + b0 - 1) / b0; gpuRun0<<>>(n0, f); @@ -195,13 +203,17 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, const { const int i0 = blockIdx.y; const int i1 = blockIdx.x * blockDim.x + threadIdx.x; - if (i1 < n1) f(i0, i1); + if (i1 < n1) { + f(i0, i1); + } } template void gpuFor(const int n0, const int n1, const F f) { - if ((n0 <= 0) || (n1 <= 0)) return; + if ((n0 <= 0) || (n1 <= 0)) { + return; + } const long nl01 = long(n0) * long(n1); assert(nl01 < long(INT_MAX)); @@ -252,13 +264,17 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, const const int i0 = blockIdx.z; const int i1 = blockIdx.y; const int i2 = blockIdx.x * blockDim.x + threadIdx.x; - if (i2 < n2) f(i0, i1, i2); + if (i2 < n2) { + f(i0, i1, i2); + } } template void gpuFor(const int n0, const int n1, const int n2, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) return; + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) { + return; + } const long nl12 = long(n1) * long(n2); const long nl012 = long(n0) * nl12; assert(nl012 < long(INT_MAX)); @@ -326,7 +342,9 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, cons template void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) return; + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) { + return; + } const long nl23 = long(n2) * long(n3); const long nl123 = long(n1) * nl23; assert(long(n0) * nl123 < long(INT_MAX)); @@ -402,7 +420,9 @@ __global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, const template void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) return; + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) { + return; + } const long nl01 = long(n0) * long(n1); const long nl34 = long(n3) * long(n4); assert(nl01 * long(n2) * nl34 < long(INT_MAX)); diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu index 4604f58d6..696b19a6f 100644 --- a/src/utils/gpu_arrays_functions.cu +++ b/src/utils/gpu_arrays_functions.cu @@ -7,8 +7,12 @@ void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_size, bool print_out) { - if (new_size <= current_size) return; - if (print_out) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + if (new_size <= current_size) { + return; + } + if (print_out) { + std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + } size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h index 5ed935bce..e28e86714 100644 --- a/src/utils/gpu_arrays_functions.h +++ b/src/utils/gpu_arrays_functions.h @@ -11,8 +11,12 @@ template void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool print_out) { - if (new_size <= current_size) return; - if (print_out) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + if (new_size <= current_size) { + return; + } + if (print_out) { + std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + } size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); diff --git a/src/utils/parallel_omp.cpp b/src/utils/parallel_omp.cpp index 08e7bbb87..1e633ef07 100644 --- a/src/utils/parallel_omp.cpp +++ b/src/utils/parallel_omp.cpp @@ -13,11 +13,15 @@ void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int int counter = 0; while (counter < omp_proc_id) { g_start += n_grid_omp; - if (counter < grid_reminder) g_start += 1; + if (counter < grid_reminder) { + g_start += 1; + } counter += 1; } g_end = g_start + n_grid_omp; - if (omp_proc_id < grid_reminder) g_end += 1; + if (omp_proc_id < grid_reminder) { + g_end += 1; + } *omp_gridIndx_start = g_start; *omp_gridIndx_end = g_end; @@ -36,11 +40,15 @@ void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_ int counter = 0; while (counter < omp_proc_id) { p_start += n_parts_omp; - if (counter < parts_reminder) p_start += 1; + if (counter < parts_reminder) { + p_start += 1; + } counter += 1; } p_end = p_start + n_parts_omp; - if (omp_proc_id < parts_reminder) p_end += 1; + if (omp_proc_id < parts_reminder) { + p_end += 1; + } *omp_pIndx_start = p_start; *omp_pIndx_end = p_end; diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 38cb54724..811a9a13b 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -64,14 +64,18 @@ __inline__ __device__ Real blockReduceMax(Real val) val = warpReduceMax(val); // Each warp performs partial reduction - if (lane == 0) shared[warpId] = val; // Write reduced value to shared memory + if (lane == 0) { + shared[warpId] = val; + } // Write reduced value to shared memory __syncthreads(); // Wait for all partial reductions // read from shared memory only if that warp existed val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; - if (warpId == 0) val = warpReduceMax(val); // Final reduce within first warp + if (warpId == 0) { + val = warpReduceMax(val); + } // Final reduce within first warp return val; } @@ -135,7 +139,9 @@ inline __device__ long long encode(double val) */ inline __device__ float decode(int val) { - if (val < 0) val = (1 << 31) | ~val; + if (val < 0) { + val = (1 << 31) | ~val; + } return bit_cast(val); } @@ -147,7 +153,9 @@ inline __device__ float decode(int val) */ inline __device__ double decode(long long val) { - if (val < 0) val = (1ULL << 63) | ~val; + if (val < 0) { + val = (1ULL << 63) | ~val; + } return bit_cast(val); } #endif // O_HIP @@ -270,7 +278,9 @@ __inline__ __device__ void gridReduceMax(Real val, Real* out) val = blockReduceMax(val); // Write block level reduced value to the output scalar atomically - if (threadIdx.x == 0) atomicMaxBits(out, val); + if (threadIdx.x == 0) { + atomicMaxBits(out, val); + } } // ===================================================================== diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index a279288ad..363ed750b 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -25,26 +25,36 @@ int64_t ulpsDistanceDbl(double const &a, double const &b) { // Save work if the floats are equal. // Also handles +0 == -0 - if (a == b) return 0; + if (a == b) { + return 0; + } const auto maxInt = std::numeric_limits::max(); // If either one is NaN then they are not equal, max distance. - if (std::isnan(a) || std::isnan(b)) return maxInt; + if (std::isnan(a) || std::isnan(b)) { + return maxInt; + } // If one's infinite and they're not equal, max distance. - if (std::isinf(a) || std::isinf(b)) return maxInt; + if (std::isinf(a) || std::isinf(b)) { + return maxInt; + } int64_t ia, ib; std::memcpy(&ia, &a, sizeof(double)); std::memcpy(&ib, &b, sizeof(double)); // Don't compare differently-signed floats. - if ((ia < 0) != (ib < 0)) return maxInt; + if ((ia < 0) != (ib < 0)) { + return maxInt; + } // Return the absolute value of the distance in ULPs. int64_t distance = ia - ib; - if (distance < 0) distance = -distance; + if (distance < 0) { + distance = -distance; + } return distance; } diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index ae33ea089..155073b8f 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -15,7 +15,9 @@ void OneTime::Start() { - if (inactive) return; + if (inactive) { + return; + } time_start = get_time(); } @@ -28,7 +30,9 @@ void OneTime::Subtract(Real time_to_subtract) void OneTime::End() { - if (inactive) return; + if (inactive) { + return; + } Real time_end = get_time(); Real time = (time_end - time_start) * 1000; @@ -41,7 +45,9 @@ void OneTime::End() t_max = time; t_avg = time; #endif - if (n_steps > 0) t_all += t_max; + if (n_steps > 0) { + t_all += t_max; + } n_steps++; } @@ -57,7 +63,9 @@ void OneTime::RecordTime(Real time) t_max = time; t_avg = time; #endif - if (n_steps > 0) t_all += t_max; + if (n_steps > 0) { + t_all += t_max; + } n_steps++; } @@ -68,7 +76,9 @@ void OneTime::PrintStep() void OneTime::PrintAverage() { - if (n_steps > 1) chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); + if (n_steps > 1) { + chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); + } } void OneTime::PrintAll() { chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); } @@ -161,7 +171,9 @@ void Time::Print_Average_Times(struct parameters P) } #ifdef MPI_CHOLLA - if (procID != 0) return; + if (procID != 0) { + return; + } #endif std::ofstream out_file; From 4bc67bdd29547a490915c4e32ccc37b892aa9074 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 17:30:18 -0500 Subject: [PATCH 230/694] resolve readability-braces-around-statements for cosmology build --- src/cosmology/cosmology_functions.cpp | 4 +++- src/cosmology/cosmology_functions_gpu.cu | 4 +++- src/particles/particles_dynamics_gpu.cu | 8 ++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index 23ee5c5d4..3debb1dfb 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -13,7 +13,9 @@ void Grid3D::Initialize_Cosmology(struct parameters *P) // Change to comoving Cosmological System Change_Cosmological_Frame_Sytem(true); - if (fabs(Cosmo.current_a - Cosmo.next_output) < 1e-5) H.Output_Now = true; + if (fabs(Cosmo.current_a - Cosmo.next_output) < 1e-5) { + H.Output_Now = true; + } chprintf("Cosmology Successfully Initialized. \n\n"); } diff --git a/src/cosmology/cosmology_functions_gpu.cu b/src/cosmology/cosmology_functions_gpu.cu index e6da6dc66..aac1335c5 100644 --- a/src/cosmology/cosmology_functions_gpu.cu +++ b/src/cosmology/cosmology_functions_gpu.cu @@ -20,7 +20,9 @@ void __global__ Change_GAS_Frame_System_kernel(Real dens_factor, Real momentum_f tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) { + return; + } tid_grid = tid_x + tid_y * nx + tid_z * nx * ny; diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index 2e3b51a7b..d0552abe6 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -192,7 +192,9 @@ __global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel(part_int_t n_local, Rea Real Omega_M, Real Omega_L, Real Omega_K) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; @@ -234,7 +236,9 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel(part_int_t n_local, Rea Real Omega_M, Real Omega_L, Real Omega_K) { part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= n_local) return; + if (tid >= n_local) { + return; + } Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; From aae57d8fffa48d7d14fb4f29a26c955551545e70 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 17:45:30 -0500 Subject: [PATCH 231/694] resolve readability-braces-around-statements for dust build --- src/dust/dust_cuda.cu | 2 +- src/reconstruction/ppmc_cuda.cu | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 6d158dd72..68f7a4040 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -64,7 +64,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g n = d_gas * DENSITY_UNIT / (mu * MP); - if (E < 0.0 || E != E) return; + if (E < 0.0 || E != E) {return;} vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index c6468cf38..10c6a788d 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -448,8 +448,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_scalar_m_imo[i] = 0.0; + } } #endif // SCALAR @@ -620,8 +621,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_scalar_m_i[i] = 0.0; + } } #endif // SCALAR @@ -792,8 +794,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else + } else { del_scalar_m_ipo[i] = 0.0; + } } #endif // SCALAR From 02ba073aefd58a7918b2b0d2128b724b9c176b39 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 17:45:58 -0500 Subject: [PATCH 232/694] resolve readability-braces-around-statements for dust build --- src/dust/dust_cuda.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 68f7a4040..344d3c9ce 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -64,7 +64,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g n = d_gas * DENSITY_UNIT / (mu * MP); - if (E < 0.0 || E != E) {return;} + if (E < 0.0 || E != E) { + return; + } vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; From 79f94827bc9bb8550ba8d378bad3911163bcdbf1 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 18:12:27 -0500 Subject: [PATCH 233/694] resolve readability-braces-around-statements for mhd build --- src/io/io.cpp | 24 ++++++++++++++++++------ src/riemann_solvers/hlld_cuda.cu | 4 +++- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index cc2a922b8..cdc3c62a5 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -3206,8 +3206,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_x[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_x[id]); - if (fabs(C.magnetic_x[id]) > max_l) max_l = fabs(C.magnetic_x[id]); - if (fabs(C.magnetic_x[id]) < min_l) min_l = fabs(C.magnetic_x[id]); + if (fabs(C.magnetic_x[id]) > max_l) { + max_l = fabs(C.magnetic_x[id]); + } + if (fabs(C.magnetic_x[id]) < min_l) { + min_l = fabs(C.magnetic_x[id]); + } } } } @@ -3248,8 +3252,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_y[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_y[id]); - if (fabs(C.magnetic_y[id]) > max_l) max_l = fabs(C.magnetic_y[id]); - if (fabs(C.magnetic_y[id]) < min_l) min_l = fabs(C.magnetic_y[id]); + if (fabs(C.magnetic_y[id]) > max_l) { + max_l = fabs(C.magnetic_y[id]); + } + if (fabs(C.magnetic_y[id]) < min_l) { + min_l = fabs(C.magnetic_y[id]); + } } } } @@ -3290,8 +3298,12 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); C.magnetic_z[id] = dataset_buffer[buf_id]; mean_l += fabs(C.magnetic_z[id]); - if (fabs(C.magnetic_z[id]) > max_l) max_l = fabs(C.magnetic_z[id]); - if (fabs(C.magnetic_z[id]) < min_l) min_l = fabs(C.magnetic_z[id]); + if (fabs(C.magnetic_z[id]) > max_l) { + max_l = fabs(C.magnetic_z[id]); + } + if (fabs(C.magnetic_z[id]) < min_l) { + min_l = fabs(C.magnetic_z[id]); + } } } } diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index abf3851de..f05707c58 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -38,7 +38,9 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const int threadId = threadIdx.x + blockIdx.x * blockDim.x; // Thread guard to avoid overrun - if (threadId >= n_cells) return; + if (threadId >= n_cells) { + return; + } // Offsets & indices int o1, o2, o3; From e9a1559d15796b30305006749de12eeb9b95adea Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 3 Mar 2023 18:13:38 -0500 Subject: [PATCH 234/694] add hicpp-readability-braces-around-statements to clang tidy checks --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 3066298bb..65e459e56 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -100,7 +100,6 @@ Checks: "*, -google-readability-todo, -google-runtime-int, -hicpp-avoid-c-arrays, - -hicpp-braces-around-statements, -hicpp-deprecated-headers, -hicpp-explicit-conversions, -hicpp-member-init, From a94a4d2d0556407b94021a6dbb2e18b9915c8e45 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 3 Mar 2023 16:03:28 -0500 Subject: [PATCH 235/694] Remove C & and enable bugprone-signed-char-misuse --- .clang-tidy | 1 - src/mpi/MPI_Comm_node.h | 14 -------------- src/mpi/mpi_routines.cpp | 4 ---- 3 files changed, 19 deletions(-) delete mode 100644 src/mpi/MPI_Comm_node.h diff --git a/.clang-tidy b/.clang-tidy index 65e459e56..8af502682 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -43,7 +43,6 @@ Checks: "*, -bugprone-macro-parentheses, -bugprone-narrowing-conversions, -bugprone-reserved-identifier, - -bugprone-signed-char-misuse, -bugprone-string-integer-assignment, -cert-dcl37-c, -cert-dcl50-cpp, diff --git a/src/mpi/MPI_Comm_node.h b/src/mpi/MPI_Comm_node.h deleted file mode 100644 index eaa975aef..000000000 --- a/src/mpi/MPI_Comm_node.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef MPI_COMM_NODE -#define MPI_COMM_NODE - -#include - -#ifdef __cplusplus -extern "C" { -#endif //__cplusplus -MPI_Comm MPI_Comm_node(int *pid, int *np); -#ifdef __cplusplus -} -#endif //__cplusplus - -#endif // MPI_COMM_NODE diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 2a464c8a4..f35a72952 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -8,7 +8,6 @@ #include "../global/global.h" #include "../io/io.h" - #include "../mpi/MPI_Comm_node.h" #include "../mpi/cuda_mpi_routines.h" #include "../utils/error_handling.h" @@ -21,7 +20,6 @@ int procID_node; /*process rank on node*/ int nproc_node; /*number of MPI processes on node*/ MPI_Comm world; /*global communicator*/ -MPI_Comm node; /*global communicator*/ MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/ @@ -216,8 +214,6 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) } #endif - /*set up node communicator*/ - node = MPI_Comm_node(&procID_node, &nproc_node); // #ifdef ONLY_PARTICLES // chprintf("ONLY_PARTICLES: Initializing without CUDA support.\n"); // #else From 4a92255f98962ac2be1bf7eeeb5111042d096437 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 3 Mar 2023 16:19:17 -0500 Subject: [PATCH 236/694] Remove all support for C in the build system --- .github/workflows/build_and_lint.yml | 3 --- Makefile | 25 ++++--------------------- builds/make.host.c3po | 3 --- builds/make.host.crc | 3 --- builds/make.host.frontier | 4 ---- builds/make.host.github | 5 ----- builds/make.host.lux | 3 --- builds/make.host.poplar | 3 --- builds/make.host.poplar.aomp | 3 --- builds/make.host.poplar.cce+hip | 3 --- builds/make.host.shamrock | 3 --- builds/make.host.spock | 4 ---- builds/make.host.summit | 3 --- builds/make.host.tornado | 3 --- builds/setup.poplar.aomp.sh | 3 +-- tools/analyze_tidy_checks.py | 6 ++---- 16 files changed, 7 insertions(+), 70 deletions(-) diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index 0ea34c687..d0a42529f 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -88,9 +88,6 @@ jobs: - name: Display tidy_results_cpp.log if: ${{ (matrix.container.name == 'CUDA') && (always()) }} run: cat tidy_results_cpp.log - - name: Display tidy_results_c.log - if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_c.log - name: Display tidy_results_gpu.log if: ${{ (matrix.container.name == 'CUDA') && (always()) }} run: cat tidy_results_gpu.log diff --git a/Makefile b/Makefile index 782eaf71c..302654aac 100644 --- a/Makefile +++ b/Makefile @@ -17,13 +17,11 @@ DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle s SUFFIX ?= .$(TYPE).$(MACHINE) -CFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.c)) CPPFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cpp)) GPUFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cu)) # Build a list of all potential object files so cleaning works properly -CLEAN_OBJS := $(subst .c,.o,$(CFILES)) \ - $(subst .cpp,.o,$(CPPFILES)) \ +CLEAN_OBJS := $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) # Check if it should include testing flags @@ -49,30 +47,24 @@ ifeq ($(ADD_TEST_FLAGS), yes) SUFFIX := $(strip $(SUFFIX)).tests LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include - CFLAGS += $(TEST_FLAGS) CXXFLAGS += $(TEST_FLAGS) GPUFLAGS += $(TEST_FLAGS) else # This isn't a test build so clear out testing related files - CFILES := $(filter-out src/system_tests/% %_tests.c,$(CFILES)) CPPFILES := $(filter-out src/system_tests/% %_tests.cpp,$(CPPFILES)) CPPFILES := $(filter-out src/utils/testing_utilities.cpp,$(CPPFILES)) GPUFILES := $(filter-out src/system_tests/% %_tests.cu,$(GPUFILES)) endif -OBJS := $(subst .c,.o,$(CFILES)) \ - $(subst .cpp,.o,$(CPPFILES)) \ +OBJS := $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) #-- Set default compilers and flags -CC ?= cc CXX ?= CC -CFLAGS_OPTIMIZE ?= -g -Ofast CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++17 GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17 -CFLAGS_DEBUG ?= -g -O0 CXXFLAGS_DEBUG ?= -g -O0 -std=c++17 ifdef HIPCONFIG GPUFLAGS_DEBUG ?= -g -O0 -std=c++17 @@ -82,13 +74,11 @@ endif BUILD ?= OPTIMIZE -CFLAGS += $(CFLAGS_$(BUILD)) CXXFLAGS += $(CXXFLAGS_$(BUILD)) GPUFLAGS += $(GPUFLAGS_$(BUILD)) #-- Add flags and libraries as needed -CFLAGS += $(DFLAGS) -Isrc CXXFLAGS += $(DFLAGS) -Isrc GPUFLAGS += $(DFLAGS) -Isrc @@ -170,7 +160,7 @@ ifeq ($(findstring -DCHEMISTRY_GPU,$(DFLAGS)),-DCHEMISTRY_GPU) DFLAGS += -DSCALAR endif -.SUFFIXES: .c .cpp .cu .o +.SUFFIXES: .cpp .cu .o EXEC := bin/cholla$(SUFFIX) @@ -184,17 +174,14 @@ DFLAGS += $(MACRO_FLAGS) LIBS_CLANG_TIDY := $(subst -I/, -isystem /,$(LIBS)) LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) -CFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(CFLAGS)) GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes CPPFILES_TIDY := $(CPPFILES) -CFILES_TIDY := $(CFILES) GPUFILES_TIDY := $(GPUFILES) ifdef TIDY_FILES CPPFILES_TIDY := $(filter $(TIDY_FILES), $(CPPFILES_TIDY)) - CFILES_TIDY := $(filter $(TIDY_FILES), $(CFILES_TIDY)) GPUFILES_TIDY := $(filter $(TIDY_FILES), $(GPUFILES_TIDY)) endif @@ -202,9 +189,6 @@ $(EXEC): prereq-build $(OBJS) mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS) eval $(EXTRA_COMMANDS) -%.o: %.c - $(CC) $(CFLAGS) -c $< -o $@ - %.o: %.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ @@ -221,9 +205,8 @@ tidy: # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(CFILES_TIDY) -- $(DFLAGS) $(CFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_c.log 2>&1 & \ (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ - for i in 1 2 3; do wait -n; done + for i in 1 2; do wait -n; done clean: rm -f $(CLEAN_OBJS) diff --git a/builds/make.host.c3po b/builds/make.host.c3po index 02f658896..dc5e3d8eb 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -1,8 +1,5 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD} GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx -Xcompiler -rdynamic diff --git a/builds/make.host.crc b/builds/make.host.crc index a4e87553e..e0c20e162 100644 --- a/builds/make.host.crc +++ b/builds/make.host.crc @@ -1,9 +1,6 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 diff --git a/builds/make.host.frontier b/builds/make.host.frontier index d9efba818..bae874c78 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -1,13 +1,9 @@ #-- make.host for Frontier at the OLCF with #-- Compiler and flags for different build type -CC = cc CXX = CC #GPUCXX ?= CC -x hip GPUCXX ?= hipcc -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 - CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result diff --git a/builds/make.host.github b/builds/make.host.github index 0dea956fc..46da09349 100644 --- a/builds/make.host.github +++ b/builds/make.host.github @@ -1,8 +1,5 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 @@ -27,8 +24,6 @@ GOOGLETEST_ROOT := ${GOOGLETEST_ROOT} ifdef HIPCONFIG MPI_ROOT := ${MPI_ROOT} - CFLAGS_DEBUG += -fPIE - CFLAGS_OPTIMIZE += -fPIE CXXFLAGS_DEBUG += -fPIE CXXFLAGS_OPTIMIZE += -fPIE GPUFLAGS_DEBUG += -fPIE diff --git a/builds/make.host.lux b/builds/make.host.lux index f17986700..edf4e42c0 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -1,10 +1,7 @@ #-- make.inc for the Shamrock Server #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 GPUFLAGS = -std=c++17 diff --git a/builds/make.host.poplar b/builds/make.host.poplar index f44851847..f029e09e6 100644 --- a/builds/make.host.poplar +++ b/builds/make.host.poplar @@ -1,10 +1,7 @@ #-- make.inc for Poplar, COE cluster at HPE #-- Compiler and flags for different build type -CC = cc CXX = CC -CFLAGS_DEBUG = -g -O0 ${F_OFFLOAD} -CFLAGS_OPTIMIZE = -Ofast ${F_OFFLOAD} CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} diff --git a/builds/make.host.poplar.aomp b/builds/make.host.poplar.aomp index cac6b50b1..e87fe68e2 100644 --- a/builds/make.host.poplar.aomp +++ b/builds/make.host.poplar.aomp @@ -1,11 +1,8 @@ #-- make.inc for Poplar, COE cluster at HPE #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx HIPCONFIG = $(shell hipconfig -C) -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 diff --git a/builds/make.host.poplar.cce+hip b/builds/make.host.poplar.cce+hip index 94963fb8a..b83268e12 100644 --- a/builds/make.host.poplar.cce+hip +++ b/builds/make.host.poplar.cce+hip @@ -1,11 +1,8 @@ #-- make.inc for Poplar, HPE COE cluster #-- Compiler and flags for different build type -CC = cc CXX = CC HIPCONFIG = $(shell hipconfig -C) -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 diff --git a/builds/make.host.shamrock b/builds/make.host.shamrock index cc849b051..eec8d48e6 100644 --- a/builds/make.host.shamrock +++ b/builds/make.host.shamrock @@ -1,12 +1,9 @@ #-- make.inc for the Lux Cluster #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx #CC = gcc #CXX = g++ -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 diff --git a/builds/make.host.spock b/builds/make.host.spock index 8cac7c086..9dfc41676 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -1,12 +1,8 @@ #-- make.inc for Spock EAS at the OLCF with #-- Compiler and flags for different build type -CC = cc CXX = CC -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 - CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 diff --git a/builds/make.host.summit b/builds/make.host.summit index ab1feda42..a9f5337f5 100644 --- a/builds/make.host.summit +++ b/builds/make.host.summit @@ -2,10 +2,7 @@ # https://www.olcf.ornl.gov/summit/ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx -G -cudart shared diff --git a/builds/make.host.tornado b/builds/make.host.tornado index e8cf09a62..cdcf5483f 100644 --- a/builds/make.host.tornado +++ b/builds/make.host.tornado @@ -1,10 +1,7 @@ #-- make.inc for the Lux Cluster #-- Compiler and flags for different build type -CC = gcc CXX = g++ -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 diff --git a/builds/setup.poplar.aomp.sh b/builds/setup.poplar.aomp.sh index 7b83e5ab7..d692711fe 100755 --- a/builds/setup.poplar.aomp.sh +++ b/builds/setup.poplar.aomp.sh @@ -1,12 +1,11 @@ #!/bin/bash module purge -module load craype-x86-naples craype-network-infiniband +module load craype-x86-naples craype-network-infiniband module load shared slurm module use /home/users/twhite/share/modulefiles module load ompi/4.0.4-rocm-3.9 hdf5 -export OMPI_CC=$(which clang) export OMPI_CXX=$(which clang) export CHOLLA_MACHINE=poplar.aomp diff --git a/tools/analyze_tidy_checks.py b/tools/analyze_tidy_checks.py index 0ea286920..de5c86313 100755 --- a/tools/analyze_tidy_checks.py +++ b/tools/analyze_tidy_checks.py @@ -6,7 +6,7 @@ failures a check represents. When running, make sure that you have already run clang-tidy with all the - checks you want enabled since this script looks for the 3 tidy_results_*.log + checks you want enabled since this script looks for the 2 tidy_results_*.log files in the root directory of Cholla ================================================================================ """ @@ -35,14 +35,12 @@ def main(): def loadTidyResults(chollaPath): - with open(chollaPath / "tidy_results_c.log", "r") as file: - cData = file.read() with open(chollaPath / "tidy_results_cpp.log", "r") as file: cppData = file.read() with open(chollaPath / "tidy_results_gpu.log", "r") as file: gpuData = file.read() - return cData + cppData + gpuData + return cppData + gpuData def getEnabledChecks(chollaPath): From e67dc1d4d433837c969a873ced5a1c00ae29d3db Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 7 Mar 2023 14:53:35 -0500 Subject: [PATCH 237/694] Remove .c files again after bad rebase --- src/mpi/MPI_Comm_node.c | 66 ----------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 src/mpi/MPI_Comm_node.c diff --git a/src/mpi/MPI_Comm_node.c b/src/mpi/MPI_Comm_node.c deleted file mode 100644 index 80edaf594..000000000 --- a/src/mpi/MPI_Comm_node.c +++ /dev/null @@ -1,66 +0,0 @@ -#ifdef MPI_CHOLLA - #include "../mpi/MPI_Comm_node.h" - - #include - #include - #include - -/*! \fn int djb2_hash(char *str) - * \brief Simple hash function by Dan Bernstein */ -int djb2_hash(char *str); - -/*! \fn MPI_Comm MPI_Comm_node(void) - * \brief Returns an MPI_Comm for processes on each node.*/ -MPI_Comm MPI_Comm_node(int *myid_node, int *nproc_node) -{ - int myid; // global rank - int nproc; // global rank - char pname[MPI_MAX_PROCESSOR_NAME]; // node hostname - int pname_length; // length of node hostname - int hash; // hash of node hostname - - MPI_Comm node_comm; // communicator for the procs on each node - - // get the global process rank - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - MPI_Comm_size(MPI_COMM_WORLD, &nproc); - - // if we're the only process, then just return - // the global rank, size, and comm - if (nproc == 1) { - *myid_node = myid; - *nproc_node = nproc; - return MPI_COMM_WORLD; - } - - // get the hostname of the node - MPI_Get_processor_name(pname, &pname_length); - - // hash the name of the node - hash = abs(djb2_hash(pname)); - - // printf("hash %d\n",hash); - - // split the communicator - MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm); - - // get size and rank - MPI_Comm_rank(node_comm, myid_node); - MPI_Comm_size(node_comm, nproc_node); - - // return the communicator for processors on the node - return node_comm; -} - -/*! \fn int djb2_hash(char *str) - * \brief Simple hash function by Dan Bernstein */ -int djb2_hash(char *str) -{ - int hash = 5381; - int c; - while ((c = *str++)) { - hash = ((hash << 5) + hash) + c; - } /*hash*33 + c*/ - return hash; -} -#endif /*MPI_CHOLLA*/ From 7f49dd9054788068ca122d671bf3de5fdb913563 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 28 Feb 2023 11:11:50 -0500 Subject: [PATCH 238/694] Add circularly polarized alfven wave test - Added new `polarization` parameter for this test - New `Circularly_Polarized_Alfven_Wave` initial condition type. Note that this test has a very limited set of initial conditions where it is meaningful and so most of them are hard coded and the rest are checked wit assert statements - Lowered the maximum divergence to 1E-9 after discussing with Chris White of the Flatiron institute who's an Athena dev. - Added new system tests for the circularly polarized alfven wave - New parameter file for the circularly polarized alfven wave - Minor refactor of MHD utils to make more consistent use of the `computeMagneticEnergy` function. --- .../3D/circularly_polarized_alfven_wave.txt | 48 ++++++ src/global/global.cpp | 2 + src/global/global.h | 1 + src/grid/grid3D.h | 9 ++ src/grid/initial_conditions.cpp | 142 ++++++++++++++++++ src/mhd/magnetic_divergence.cu | 8 +- src/system_tests/mhd_system_tests.cpp | 122 +++++++++++++++ src/utils/mhd_utilities.h | 43 +++--- 8 files changed, 350 insertions(+), 25 deletions(-) create mode 100644 examples/3D/circularly_polarized_alfven_wave.txt diff --git a/examples/3D/circularly_polarized_alfven_wave.txt b/examples/3D/circularly_polarized_alfven_wave.txt new file mode 100644 index 000000000..193f1ac33 --- /dev/null +++ b/examples/3D/circularly_polarized_alfven_wave.txt @@ -0,0 +1,48 @@ +# +# Parameter File for the circularly polarized Alfven Wave +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135 +# for details. +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Circularly_Polarized_Alfven_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=3.0 +ylen=1.5 +zlen=1.5 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# Polarization. 1 = right polarized, -1 = left polarized +polarization=1.0 +# velocity in the x direction. 0 for moving wave, -1 for standing wave +vx=0.0 +# pitch angle +pitch=0.72972765622696634 +# yaw angle +yaw=1.1071487177940904 +# value of gamma +gamma=1.666666666666667 diff --git a/src/global/global.cpp b/src/global/global.cpp index 3b029c413..394e30a04 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -357,6 +357,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->pitch = atof(value); } else if (strcmp(name, "yaw") == 0) { parms->yaw = atof(value); + } else if (strcmp(name, "polarization") == 0) { + parms->polarization = atof(value); #ifdef PARTICLES } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 59a19e5c2..c8aa99383 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -263,6 +263,7 @@ struct parameters { Real rEigenVec_Bz = 0; Real pitch = 0; Real yaw = 0; + Real polarization = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 34c470399..28d143e17 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -681,6 +681,15 @@ class Grid3D void Chemistry_Test(struct parameters P); +#ifdef MHD + /*! + * \brief Initialize the grid with a circularly polarized Alfven wave. Only options are angle and Vx. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135 for details. + * + * \param P The parameters. Only uses Vx, pitch, and yaw + */ + void Circularly_Polarized_Alfven_Wave(struct parameters const P); +#endif // MHD #ifdef MPI_CHOLLA void Set_Boundaries_MPI(struct parameters P); void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 6331399e0..0a4a29205 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -84,6 +84,10 @@ void Grid3D::Set_Initial_Conditions(parameters P) Zeldovich_Pancake(P); } else if (strcmp(P.init, "Chemistry_Test") == 0) { Chemistry_Test(P); +#ifdef MHD + } else if (strcmp(P.init, "Circularly_Polarized_Alfven_Wave") == 0) { + Circularly_Polarized_Alfven_Wave(P); +#endif // MHD } else { chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); chexit(-1); @@ -1588,3 +1592,141 @@ void Grid3D::Chemistry_Test(struct parameters P) chexit(-1); #endif // COSMOLOGY } + +#ifdef MHD +void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) +{ + // This test is only meaningful for a limited number of parameter values so I will check them here + assert(P.polarization == 1.0 or + P.polarization == -1.0 and + "The polarization for this test must be 1 (right polarized) or -1 (left polarized)."); + assert(std::abs(P.vx) == 1.0 or + P.vx == 0.0 and "The x velocity for this test must be 0 (traveling wave) or 1 (standing wave)."); + + // Check the domain and angles + auto checkDomain = [](int const &nx, int const &ny, int const &nz, Real const &xlen, Real const &ylen, + Real const &zlen) { + assert(nx == 2 * ny and nx == 2 * nz and "This test requires that the number of cells be of shape 2L x L x L"); + assert(xlen == 2 * ylen and xlen == 2 * zlen and "This test requires that the domain be of shape 2L x L x L"); + }; + if ((P.pitch == 0.0 and P.yaw == 0.0) or (P.pitch == std::asin(2. / 3.) and P.yaw == std::asin(2. / std::sqrt(5.)))) { + checkDomain(P.nx, P.ny, P.nz, P.xlen, P.ylen, P.zlen); + } else if (P.pitch == 0.5 * M_PI and P.yaw == 0.0) { + checkDomain(P.ny, P.nz, P.nx, P.ylen, P.zlen, P.xlen); + } else if (P.pitch == 0.0 and P.yaw == 0.5 * M_PI) { + checkDomain(P.nz, P.nx, P.ny, P.zlen, P.xlen, P.ylen); + } else { + assert(false and "This test does not support these angles"); + } + + // Parameters for tests. + Real const density = 1.0; + Real const pressure = 0.1; + Real const velocity_x = P.vx; + Real const amplitude = 0.1; // the amplitude of the wave + Real const magnetic_x = 1.0; + + // Angles + Real const sin_yaw = std::sin(P.yaw); + Real const cos_yaw = std::cos(P.yaw); + Real const sin_pitch = std::sin(P.pitch); + Real const cos_pitch = std::cos(P.pitch); + + // Compute the wave quantities + Real const wavelength = 1.; + Real const wavenumber = 2.0 * M_PI / wavelength; // the angular wave number k + + // Compute the vector potentials + std::vector vectorPotential(3 * H.n_cells, 0); + auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) { + // The "_rot" variables are the rotated version + Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch; + Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw; + + Real const a_y = P.polarization * (amplitude / wavenumber) * std::sin(wavenumber * x_rot); + Real const a_z = (amplitude / wavenumber) * std::cos(wavenumber * x_rot) + magnetic_x * y_rot; + + return std::make_pair(a_y, a_z); + }; + + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + auto vectorPot = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.); + vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw; + + vectorPot = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.); + vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw; + + vectorPot = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z); + vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch; + } + } + } + + // Compute the magnetic field + for (int k = 1; k < H.nz; k++) { + for (int j = 1; j < H.ny; j++) { + for (int i = 1; i < H.nx; i++) { + // Get cell index. The "xmo" means: X direction Minus One + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + int const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); + int const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); + int const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); + + C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - + (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; + C.magnetic_y[id] = (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idzmo + 0 * H.n_cells)) / H.dz - + (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idxmo + 2 * H.n_cells)) / H.dx; + C.magnetic_z[id] = (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idxmo + 1 * H.n_cells)) / H.dx - + (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idymo + 0 * H.n_cells)) / H.dy; + } + } + } + + // set initial values of non-magnetic conserved variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = x_pos * cos_pitch * cos_yaw + y_pos * cos_pitch * sin_yaw + z_pos * sin_pitch; + + // Compute the momentum + Real const momentum_x = density * velocity_x; + Real const momentum_y = -P.polarization * density * amplitude * std::sin(wavenumber * x_pos_rot); + Real const momentum_z = -density * amplitude * std::cos(wavenumber * x_pos_rot); + Real const momentum_x_rot = + momentum_x * cos_pitch * cos_yaw - momentum_y * sin_yaw - momentum_z * sin_pitch * cos_yaw; + Real const momentum_y_rot = + momentum_x * cos_pitch * sin_yaw + momentum_y * cos_yaw - momentum_z * sin_pitch * sin_yaw; + Real const momentum_z_rot = momentum_x * sin_pitch + momentum_z * cos_pitch; + + // Compute the Energy + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + Real const energy = mhd::utils::computeEnergy( + pressure, density, momentum_x_rot / density, momentum_y_rot / density, momentum_z_rot / density, + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + + // Final assignment + C.density[id] = density; + C.momentum_x[id] = momentum_x_rot; + C.momentum_y[id] = momentum_y_rot; + C.momentum_z[id] = momentum_z_rot; + C.Energy[id] = energy; + } + } + } +} +#endif // MHD \ No newline at end of file diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index 5c154262b..cc639a8a7 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -98,9 +98,11 @@ Real checkMagneticDivergence(Grid3D const &G) max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); #endif // MPI_CHOLLA - // If the magnetic divergence is greater than the limit then raise a warning - // and exit - Real static const magnetic_divergence_limit = 1.0E-14; + // If the magnetic divergence is greater than the limit then raise a warning and exit. + // This maximum value of divergence was chosen after a discussion with Chris White of the Flatiron institute and an + // Athena dev. He said that in his experience issues start showing up at around 1E-8 divergence so this is set with an + // order of magnitude margin. + Real static const magnetic_divergence_limit = 1.0E-9; if (max_magnetic_divergence > magnetic_divergence_limit) { // Report the error and exit chprintf( diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index a443c9bcb..436c6129b 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -595,4 +595,126 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorre test_runner.runTest(); } /// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization + * \brief Test the circularly polarized Alfven Wave conditions as a parameterized test with varying polarizations. + * Details in Gardiner & Stone 2008 + * + */ +/// @{ +class tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization : public ::testing::TestWithParam +{ + public: + tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization() : cpawTest(false, true, false, false){}; + + protected: + systemTest::SystemTestRunner cpawTest; + + void setLaunchParams(double const &polarization, double const &vx) + { + // Constant for all tests + size_t const N = 32; + double const length = 1.5; + double const gamma = 5. / 3.; + double const tOut = 1.0; + double const pitch = std::asin(2. / 3.); + double const yaw = std::asin(2. / std::sqrt(5.)); + + // Domain settings + double const x_len = 2. * length, y_len = length, z_len = length; + int const nx = 2 * N, ny = N, nz = N; + + // Settings + cpawTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + cpawTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + cpawTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + cpawTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + cpawTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + cpawTest.chollaLaunchParams.append(" init=Circularly_Polarized_Alfven_Wave"); + cpawTest.chollaLaunchParams.append(" xmin=0.0"); + cpawTest.chollaLaunchParams.append(" ymin=0.0"); + cpawTest.chollaLaunchParams.append(" zmin=0.0"); + cpawTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + cpawTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + cpawTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); + cpawTest.chollaLaunchParams.append(" xl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" xu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" yl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" yu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" zl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" zu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" polarization=" + to_string_exact(polarization)); + cpawTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + cpawTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + cpawTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); + cpawTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } +}; + +// Moving wave with right and left polarization +// ============================================= +TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, MovingWaveCorrectInputExpectCorrectOutput) +{ + // Get the test parameter + double const polarization = GetParam(); + + // Set the wave to be moving + double const vx = 0.0; + +// Set allowed errors +#ifdef PCM + double const allowedL1Error = 0.065; // Based on results in Gardiner & Stone 2008 + double const allowedError = 0.046; +#else // PCM + double const allowedL1Error = 1E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-3; +#endif // PCM + + // Set the launch parameters + setLaunchParams(polarization, vx); + + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(82); + + // Check Results + cpawTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Standing wave with right and left polarization +// ============================================= +TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, StandingWaveCorrectInputExpectCorrectOutput) +{ + // Get the test parameter + double const polarization = GetParam(); + + // Set the wave to be standing + double const vx = -polarization; + +// Set allowed errors +#ifdef PCM + double const allowedL1Error = 0.018; // Based on results in Gardiner & Stone 2008 + double const allowedError = 0.017; +#else // PCM + double const allowedL1Error = 0.0; // Based on results in Gardiner & Stone 2008 + double const allowedError = 0.0; +#endif // PCM + + // Set the launch parameters + setLaunchParams(polarization, vx); + + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(130); + + // Check Results + cpawTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, + ::testing::Values(1.0, -1.0)); +/// @} // ============================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index aeca4aa8b..7d5db459e 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -61,6 +61,22 @@ inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real con // ===================================================================== } // namespace _internal +// ========================================================================= +/*! + * \brief Compute the magnetic energy + * + * \param[in] magneticX The magnetic field in the X-direction + * \param[in] magneticY The magnetic field in the Y-direction + * \param[in] magneticZ The magnetic field in the Z-direction + * \return Real The magnetic energy + */ +inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY, + Real const &magneticZ) +{ + return 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); +} +// ========================================================================= + // ========================================================================= /*! * \brief Compute the energy in a cell. If MHD is not defined then simply @@ -85,7 +101,7 @@ inline __host__ __device__ Real computeEnergy(Real const &pressure, Real const & Real energy = (fmax(pressure, TINY_NUMBER) / (gamma - 1.)) + 0.5 * density * (velocityX * velocityX + ((velocityY * velocityY) + (velocityZ * velocityZ))); #ifdef MHD - energy += 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); + energy += computeMagneticEnergy(magneticX, magneticY, magneticZ); #endif // MHD return energy; @@ -114,14 +130,13 @@ inline __host__ __device__ Real computeGasPressure(Real const &energy, Real cons Real pressure = (gamma - 1.) * (energy - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / density - - 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ)))); + computeMagneticEnergy(magneticX, magneticY, magneticZ)); return fmax(pressure, TINY_NUMBER); } /*! - * \brief Specialzation of mhd::utils::computeGasPressure for use in the HLLD - * solver + * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver * * \param state The State to compute the gas pressure of * \param magneticX The X magnetic field @@ -160,23 +175,7 @@ inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, Real return energyTot - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / fmax(density, TINY_NUMBER) - - 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); -} -// ========================================================================= - -// ========================================================================= -/*! - * \brief Compute the magnetic energy - * - * \param[in] magneticX The magnetic field in the X-direction - * \param[in] magneticY The magnetic field in the Y-direction - * \param[in] magneticZ The magnetic field in the Z-direction - * \return Real The magnetic energy - */ -inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY, - Real const &magneticZ) -{ - return 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); + computeMagneticEnergy(magneticX, magneticY, magneticZ); } // ========================================================================= @@ -194,7 +193,7 @@ inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Rea inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, Real const &magneticX, Real const &magneticY, Real const &magneticZ) { - Real pTot = gasPressure + 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); + Real pTot = gasPressure + computeMagneticEnergy(magneticX, magneticY, magneticZ); return fmax(pTot, TINY_NUMBER); } From ab62c92809bf68b0aeb97d9ff395db71bd3bdbed Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 7 Mar 2023 12:29:24 -0500 Subject: [PATCH 239/694] Fix bugs in `Check_Configuration` --- src/utils/error_handling.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index fc09c3363..4576e7abe 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -1,6 +1,7 @@ #include "../utils/error_handling.h" #include +#include #ifdef MPI_CHOLLA #include @@ -62,7 +63,7 @@ void Check_Configuration(parameters const &P) // MHD Checks // ========== #ifdef MHD - assert(P.nx < 2 or P.ny < 2 or P.nz < 2 and "MHD runs must be 3D"); + assert(P.nx > 1 or P.ny > 1 or P.nz > 1 and "MHD runs must be 3D"); // Must use the correct integrator #if !defined(VL) || defined(SIMPLE) @@ -90,8 +91,8 @@ void Check_Configuration(parameters const &P) } // Error if unsupported boundary condition is used - assert(P.xl_bcnd == 2 or P.xu_bcnd == 2 or P.yl_bcnd == 2 or P.yu_bcnd == 2 or P.zl_bcnd == 2 or - P.zu_bcnd == 2 && "MHD does not support reflective boundary conditions"); + assert(P.xl_bcnd != 2 or P.xu_bcnd != 2 or P.yl_bcnd != 2 or P.yu_bcnd != 2 or P.zl_bcnd != 2 or + P.zu_bcnd != 2 && "MHD does not support reflective boundary conditions"); // AVERAGE_SLOW_CELLS not supported on MHD #ifdef AVERAGE_SLOW_CELLS From 3b60b8b0fd833a1b2c384d0ed81bfbade877e118 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 7 Mar 2023 12:52:04 -0500 Subject: [PATCH 240/694] Loosen limits on some HLLD internal tests This fixes the issue with the tests passing on C-3PO but not H2P. Updating some data and loosening a few limits fixed it. --- src/riemann_solvers/hlld_cuda_tests.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index e6ab03fe9..4a9a10270 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -1979,7 +1979,7 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut testParams const parameters; std::vector fiducialStarState{ - {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 1023.8840191068896, 18.648382121236992, + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 1023.8840191068900, 18.648382121236992, 70.095850905078336}, {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, 39.389537509454122}}; @@ -2013,9 +2013,9 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) testParams const parameters; std::vector fiducialFlux{ - {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617493, - -83.294404848633206, -504.8413875424834}, - {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.343648925162142, -1413.4750762739586, + {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617500, + -83.294404848633300, -504.84138754248409}, + {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.34364892516212, -1413.4750762739586, 25.139956754826922, 78.863254638038882}}; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -2039,9 +2039,9 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); + parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); } } @@ -2209,7 +2209,7 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) std::vector const fiducialFlux{ {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029, -245.53635448727721, -746.94190287166407}, - {10.040447333773216, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595323, + {10.040447333773258, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595295, 30.244176588794346, -466.15336272175193}}; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -2220,7 +2220,7 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) // Now check results testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); + parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, From 84bc3a602e163011aa0c79eff551372956b46a50 Mon Sep 17 00:00:00 2001 From: alwinm Date: Fri, 10 Mar 2023 07:50:40 -0800 Subject: [PATCH 241/694] Update Doxyfile for recursive search Doxygen output gets a lot more interesting when all of the files are actually included by setting RECURSIVE = YES --- docs/doxygen/Doxyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 9c3acb19f..8a9752f90 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -964,7 +964,7 @@ EXAMPLE_PATTERNS = * # irrespective of the value of the RECURSIVE tag. # The default value is: NO. -EXAMPLE_RECURSIVE = NO +EXAMPLE_RECURSIVE = YES # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the From 5421a10de9ad3945d774aa859d8357fd05e21390 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 11 Mar 2023 01:31:06 -0800 Subject: [PATCH 242/694] Add ScopedTimer and Read_Grid_Cat (not tested) --- docs/doxygen/Doxyfile | 2 +- src/grid/grid3D.h | 6 +++- src/grid/initial_conditions.cpp | 2 ++ src/io/io.cpp | 8 ++++- src/io/io.h | 7 ++++ src/main.cpp | 4 +-- src/utils/timing_functions.h | 60 +++++++++++++++++++++++++++++---- 7 files changed, 76 insertions(+), 13 deletions(-) diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 9c3acb19f..443ea1720 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -908,7 +908,7 @@ FILE_PATTERNS = *.c \ # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 34c470399..59016489b 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -504,9 +504,13 @@ class Grid3D #endif /*! \fn void Read_Grid(struct parameters P) - * \brief Read in grid data from an output file. */ + * \brief Read in grid data from 1-per-process output files. */ void Read_Grid(struct parameters P); + /*! \fn void Read_Grid_Cat(struct parameters P) + * \brief Read in grid data from a single concatenated output file. */ + void Read_Grid_Cat(struct parameters P); + /*! \fn Read_Grid_Binary(FILE *fp) * \brief Read in grid data from a binary file. */ void Read_Grid_Binary(FILE *fp); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 6331399e0..d421b1cb3 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -78,6 +78,8 @@ void Grid3D::Set_Initial_Conditions(parameters P) // Initialize a uniform hydro grid when only integrating particles Uniform_Grid(); #endif // ONLY_PARTICLES + } else if (strcmp(P.init, "Read_Grid_Cat") == 0) { + Read_Grid_Cat(P); } else if (strcmp(P.init, "Uniform") == 0) { Uniform_Grid(); } else if (strcmp(P.init, "Zeldovich_Pancake") == 0) { diff --git a/src/io/io.cpp b/src/io/io.cpp index 6fe61c544..46d746343 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -13,6 +13,7 @@ #endif // HDF5 #include "../grid/grid3D.h" #include "../io/io.h" +#include "../utils/timing_functions.h" // provides ScopedTimer #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" #endif // MPI_CHOLLA @@ -2242,6 +2243,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) * \brief Read in grid data from an output file. */ void Grid3D::Read_Grid(struct parameters P) { + //ScopedTimer("Read_Grid"); char filename[100]; char timestep[20]; int nfile = P.nfile; // output step you want to read from @@ -2435,7 +2437,7 @@ void Grid3D::Read_Grid_Binary(FILE *fp) #ifdef HDF5 - +/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer) { // Note: for 1D ny_real and nz_real are not used @@ -2508,9 +2510,13 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); status = H5Aclose(attribute_id); + /* + // Alwin: I don't think this is needed anymore because dt of the current state of cells is calculated for consistency and output was using previous timestep's H.dt + // This is because dti = Update_Grid, then output, then dt = 1/MPI_Allreduce(dti) in next step attribute_id = H5Aopen(file_id, "dt", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.dt); status = H5Aclose(attribute_id); + */ attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); status = H5Aclose(attribute_id); diff --git a/src/io/io.h b/src/io/io.h index 6e52c6cb7..b02159041 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -53,6 +53,10 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp + + + + herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute, const char* name); herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name); @@ -62,6 +66,9 @@ herr_t Read_HDF5_Dataset(hid_t file_id, float* dataset_buffer, const char* name) herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); +/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer); + // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, // then write HDF5 field diff --git a/src/main.cpp b/src/main.cpp index bcda7a32a..e272ef659 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -230,9 +230,7 @@ int main(int argc, char *argv[]) #endif // CPU_TIME start_step = get_time(); - // calculate the timestep. Note: this computes the timestep ONLY on the - // first loop, on subsequent time steps it just calls the MPI_Allreduce to - // determine the global timestep + // calculate the timestep by calling MPI_Allreduce G.set_dt(dti); if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 364736ab1..c6c46b5c8 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -1,11 +1,16 @@ -#ifdef CPU_TIME - #ifndef TIMING_FUNCTIONS_H - #define TIMING_FUNCTIONS_H +#pragma once + + +#include - #include +#include "../global/global.h" // Provides get_time +#include "../io/io.h" // Provides chprintf - #include "../global/global.h" +#ifdef MPI_CHOLLA +#include "../mpi/mpi_routines.h" // Provides ReduceRealMin, Max, Avg +#endif +#ifdef CPU_TIME // Each instance of this class represents a single timer, timing a single // section of code. All instances have their own n_steps, time_start, etc. so // that all timers can run independently @@ -66,6 +71,47 @@ class Time void Print_Times(); void Print_Average_Times(struct parameters P); }; - - #endif #endif // CPU_TIME + + +// ScopedTimer does nothing if CPU_TIME is disabled +/* \brief ScopedTimer helps time a scope. Initialize as first variable and C++ guarantees it is destroyed last */ +class ScopedTimer +{ + public: + const char* name; + double time_start = 0; + + /* \brief ScopedTimer Constructor initializes name and time */ + ScopedTimer(const char* input_name) + { + #ifdef CPU_TIME + name = input_name; + time_start = get_time(); + #endif + } + + /* \brief ScopedTimer Destructor computes dt and prints */ + ~ScopedTimer(void) + { + #ifdef CPU_TIME + double time_elapsed_ms = (get_time() - time_start)*1000; + +#ifdef MPI_CHOLLA + double t_min = ReduceRealMin(time_elapsed_ms); + double t_max = ReduceRealMax(time_elapsed_ms); + double t_avg = ReduceRealAvg(time_elapsed_ms); +#else + double t_min = time_elapsed_ms; + double t_max = time_elapsed_ms; + double t_avg = time_elapsed_ms; +#endif + //chprintf("ScopedTimer Min: %9.4f ms Max: %9.4f ms Avg: %9.4f ms %s \n", t_min, t_max, t_avg, name); + #endif + } + +}; + + + + From d28a06bdd6f4b6ce47e00e98cbec55cb28b801ad Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 11 Mar 2023 01:35:32 -0800 Subject: [PATCH 243/694] io_parallel to actually add Read_Grid_Cat --- src/io/io_parallel.cpp | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 src/io/io_parallel.cpp diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp new file mode 100644 index 000000000..dd204cc60 --- /dev/null +++ b/src/io/io_parallel.cpp @@ -0,0 +1,128 @@ +// Routines for using Parallel HDF5 to read/write from single file + +#if defined(HDF5) && defined(MPI_CHOLLA) +#include +#include "../io/io.h" +#include "../mpi/mpi_routines.h" +#include "../grid/grid3D.h" +#include "../utils/timing_functions.h" // provides ScopedTimer + + + +// I think this helper function is finished. It's just meant to interface with HDF5 and open/free handles +// I need to figure out offset and count elsewhere + +// Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace file_space_id +void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, double* buffer, const char* name) +{ + hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); + // Select the requested subset of data + hid_t file_space_id = H5Dget_space(dataset_id); + hid_t mem_space_id = H5Screate_simple(3, count, NULL); + + + // Notes on hyperslab call: + + // First NULL is stride, setting to NULL is like setting to 1, contiguous + + // Second NULL is block, setting to NULL sets block size to 1. + + // Count is the number of blocks in each dimension: + + // since our block size is 1, Count is the number of voxels in each dimension + + herr_t status = H5Sselect_hyperslab(file_space_id, H5S_SELECT_SET, offset, NULL, count, NULL); + // Read in the data subset + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, mem_space_id, file_space_id, H5P_DEFAULT, buffer); + + // Free the ids + status = H5Sclose(mem_space_id); + status = H5Sclose(file_space_id); + status = H5Dclose(dataset_id); +} + + +// Alwin: I'm only writing a 3D version of this because that's what is practical. +// Read from concatenated HDF5 file +void Read_Grid_Cat_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, + Real* grid_buffer, const char* name) +{ + Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); +} + +void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, + Real* grid_buffer, const char* name) +{ + Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, dataset_buffer, grid_buffer); +} + + +/*! \brief Read in grid data from a single concatenated output file. */ +void Grid3D::Read_Grid_Cat(struct parameters P) +{ + + //ScopedTimer("Read_Grid_Cat"); + herr_t status; + char filename[100]; + + sprintf(filename, "%s%d.h5", P.indir, P.nfile); + + hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + + if (file_id < 0) { + printf("Unable to open input file: %s\n", filename); + exit(0); + } + + + // TODO (Alwin) : Need to consider how or whether to read attributes. + + // even if I do not read gamma from file, it is set in initial_conditions.cpp + // if I do not set t or n_step what does it get set to?0 in grid/grid3D.cpp + // This should be okay to start with. + + + // Offsets are global variables from mpi_routines.h + hsize_t offset[3]; + offset[0] = nx_local_start; + offset[1] = ny_local_start; + offset[2] = nz_local_start; + + // This is really dims but I name it count because that's what HDF5 names it + hsize_t count[3]; + count[0] = H.nx_real; + count[1] = H.ny_real; + count[2] = H.nz_real; + + #ifdef MHD + Real* dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); + #else + Real* dataset_buffer = (Real *)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real)); + #endif + + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.density, "/density"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_x, "/momentum_x"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_y, "/momentum_y"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_z, "/momentum_z"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/Energy"); + #ifdef DE + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/GasEnergy"); + #endif //DE + + // TODO (Alwin) : add scalar stuff + + #ifdef MHD + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); + Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); + #endif + + + free(dataset_buffer); + status = H5Fclose(file_id); +} + + +#endif From a29a8e51fd7e7da6fe50cfeb8578cdf90413d6e3 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 11 Mar 2023 06:05:26 -0500 Subject: [PATCH 244/694] fix bugs --- src/io/io.cpp | 2 +- src/io/io_parallel.cpp | 2 +- src/utils/timing_functions.cpp | 29 ++++++++++++++++++++++- src/utils/timing_functions.h | 43 ++++++++-------------------------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 46d746343..7f33ea723 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2243,7 +2243,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) * \brief Read in grid data from an output file. */ void Grid3D::Read_Grid(struct parameters P) { - //ScopedTimer("Read_Grid"); + ScopedTimer("Read_Grid"); char filename[100]; char timestep[20]; int nfile = P.nfile; // output step you want to read from diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index dd204cc60..c12cd9569 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -63,7 +63,7 @@ void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Head void Grid3D::Read_Grid_Cat(struct parameters P) { - //ScopedTimer("Read_Grid_Cat"); + ScopedTimer("Read_Grid_Cat"); herr_t status; char filename[100]; diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index ae33ea089..0d4502801 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -7,6 +7,7 @@ #include #include + #include "../global/global.h" #include "../io/io.h" #ifdef MPI_CHOLLA @@ -196,4 +197,30 @@ void Time::Print_Average_Times(struct parameters P) chprintf("Saved Timing: %s \n\n", file_name.c_str()); } -#endif +#endif //CPU_TIME + +ScopedTimer::ScopedTimer(const char* input_name) +{ + #ifdef CPU_TIME + name = input_name; + time_start = get_time(); + #endif +} + +ScopedTimer::~ScopedTimer(void) +{ +#ifdef CPU_TIME + double time_elapsed_ms = (get_time() - time_start)*1000; + +#ifdef MPI_CHOLLA + double t_min = ReduceRealMin(time_elapsed_ms); + double t_max = ReduceRealMax(time_elapsed_ms); + double t_avg = ReduceRealAvg(time_elapsed_ms); +#else + double t_min = time_elapsed_ms; + double t_max = time_elapsed_ms; + double t_avg = time_elapsed_ms; +#endif //MPI_CHOLLA + chprintf("ScopedTimer Min: %9.4f ms Max: %9.4f ms Avg: %9.4f ms %s \n", t_min, t_max, t_avg, name); +#endif // CPU_TIME +} diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index c6c46b5c8..4e26c8e9b 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -1,16 +1,12 @@ -#pragma once +#ifndef TIMING_FUNCTIONS_H +#define TIMING_FUNCTIONS_H #include -#include "../global/global.h" // Provides get_time -#include "../io/io.h" // Provides chprintf +#include "../global/global.h" // Provides Real, get_time -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" // Provides ReduceRealMin, Max, Avg -#endif - -#ifdef CPU_TIME +//#ifdef CPU_TIME // Each instance of this class represents a single timer, timing a single // section of code. All instances have their own n_steps, time_start, etc. so // that all timers can run independently @@ -71,7 +67,7 @@ class Time void Print_Times(); void Print_Average_Times(struct parameters P); }; -#endif // CPU_TIME +//#endif // CPU_TIME // ScopedTimer does nothing if CPU_TIME is disabled @@ -83,35 +79,16 @@ class ScopedTimer double time_start = 0; /* \brief ScopedTimer Constructor initializes name and time */ - ScopedTimer(const char* input_name) - { - #ifdef CPU_TIME - name = input_name; - time_start = get_time(); - #endif - } + ScopedTimer(const char* input_name); /* \brief ScopedTimer Destructor computes dt and prints */ - ~ScopedTimer(void) - { - #ifdef CPU_TIME - double time_elapsed_ms = (get_time() - time_start)*1000; - -#ifdef MPI_CHOLLA - double t_min = ReduceRealMin(time_elapsed_ms); - double t_max = ReduceRealMax(time_elapsed_ms); - double t_avg = ReduceRealAvg(time_elapsed_ms); -#else - double t_min = time_elapsed_ms; - double t_max = time_elapsed_ms; - double t_avg = time_elapsed_ms; -#endif - //chprintf("ScopedTimer Min: %9.4f ms Max: %9.4f ms Avg: %9.4f ms %s \n", t_min, t_max, t_avg, name); - #endif - } + ~ScopedTimer(void); + }; + +#endif // TIMING_FUNCTIONS_H From 8ecef7f11d4f45c0dd4ae653a4d606dcb0e6f5c5 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 11 Mar 2023 07:39:25 -0500 Subject: [PATCH 245/694] fix bugs --- src/io/io.cpp | 2 +- src/io/io_parallel.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 7f33ea723..2f84ee859 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2243,7 +2243,7 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) * \brief Read in grid data from an output file. */ void Grid3D::Read_Grid(struct parameters P) { - ScopedTimer("Read_Grid"); + ScopedTimer timer("Read_Grid"); char filename[100]; char timestep[20]; int nfile = P.nfile; // output step you want to read from diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index c12cd9569..44f0ce186 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -63,7 +63,7 @@ void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Head void Grid3D::Read_Grid_Cat(struct parameters P) { - ScopedTimer("Read_Grid_Cat"); + ScopedTimer timer("Read_Grid_Cat"); herr_t status; char filename[100]; From 206d2cb4efa3c4b10b79e2181ac0576d4c656490 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 14 Mar 2023 15:54:33 -0400 Subject: [PATCH 246/694] Restore & refactor MPI_Comm_node function This function removed because I thought it didn't do anything. It turns out that it does so I'm restoring it with some refactoring to make what it does clearer --- src/mpi/mpi_routines.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/mpi/mpi_routines.h | 9 +++++++++ 2 files changed, 47 insertions(+) diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index f35a72952..d30b7944b 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -5,6 +5,7 @@ #include #include + #include #include "../global/global.h" #include "../io/io.h" @@ -214,6 +215,9 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) } #endif + /*set up node communicator*/ + std::tie(procID_node, nproc_node) = MPI_Comm_node(); + // #ifdef ONLY_PARTICLES // chprintf("ONLY_PARTICLES: Initializing without CUDA support.\n"); // #else @@ -978,4 +982,38 @@ void copyHostToDeviceReceiveBuffer(int direction) } } +std::pair MPI_Comm_node() +{ + // get the global process rank + int myid, nproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + // if there is the only one process, then just return the global rank and size + if (nproc == 1) { + return {myid, nproc}; + } + + // get the hostname of the node + std::string pname; // node hostname + pname.resize(MPI_MAX_PROCESSOR_NAME); + int pname_length; // length of node hostname + + MPI_Get_processor_name(pname.data(), &pname_length); + + // hash the name of the node. MPI_Comm_split doesn't like negative numbers and accepts ints not unsigned ints so we + // need to take the absolute value + int const hash = std::abs(static_cast(std::hash{}(pname))); + + // split the communicator + MPI_Comm node_comm; // communicator for the procs on each node + MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm); + + // get size and rank + MPI_Comm_rank(node_comm, &myid); + MPI_Comm_size(node_comm, &nproc); + + return {myid, nproc}; +} + #endif /*MPI_CHOLLA*/ diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index f1170a6a9..2d2a644b4 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -4,6 +4,8 @@ #include #include + #include + #include "../global/global.h" #include "../grid/grid3D.h" @@ -199,5 +201,12 @@ void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m); /* Copy MPI receive buffers on Host to their device locations */ void copyHostToDeviceReceiveBuffer(int direction); +/*! + * \brief Split the communicator for each node and return IDs + * + * \return std::pair The rank id and total number of processes + */ +std::pair MPI_Comm_node(); + #endif /*MPI_ROUTINES_H*/ #endif /*MPI_CHOLLA*/ From 445ddb26fefac4ee1071ba5361c305dc625098c8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Mar 2023 14:57:07 -0500 Subject: [PATCH 247/694] Fix & enable bugprone-assignment-in-if-condition check --- .clang-tidy | 1 - src/mpi/mpi_routines.cpp | 21 +++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 8af502682..8ac860c4b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -35,7 +35,6 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - -bugprone-assignment-in-if-condition, -bugprone-branch-clone, -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index d30b7944b..2e0faaa16 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -181,35 +181,40 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) #endif #endif - /*create the MPI_Request arrays for non-blocking sends*/ - if (!(send_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + /*create the MPI_Request arrays for non-blocking sends. If the malloc fails then print an error and exit*/ + send_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request) { chprintf("Error allocating send_request.\n"); chexit(-2); } - if (!(recv_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + recv_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request) { chprintf("Error allocating recv_request.\n"); chexit(-2); } #ifdef PARTICLES - if (!(send_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + send_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request_n_particles) { chprintf( "Error allocating send_request for number of particles for " "transfer.\n"); chexit(-2); } - if (!(recv_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + recv_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request_n_particles) { chprintf( "Error allocating recv_request for number of particles for " "transfer.\n"); chexit(-2); } - - if (!(send_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + send_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request_particles_transfer) { chprintf("Error allocating send_request for particles transfer.\n"); chexit(-2); } - if (!(recv_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)))) { + recv_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request_particles_transfer) { chprintf("Error allocating recv_request for particles transfer.\n"); chexit(-2); } From eb648fd2b8661a818ad193061a65f99de295f02c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Mar 2023 15:30:30 -0500 Subject: [PATCH 248/694] Fix & enable bugprone-branck-clone check --- .clang-tidy | 1 - src/grid/initial_conditions.cpp | 20 ++++---------------- src/utils/testing_utilities.cpp | 11 ++++------- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 8ac860c4b..f9c0c1534 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -35,7 +35,6 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - -bugprone-branch-clone, -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, -bugprone-integer-division, diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 0a4a29205..ef6619328 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -621,7 +621,7 @@ void Grid3D::KH() Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos); // outer quarters of slab - if (y_pos <= 1.0 * H.ydglobal / 4.0) { + if ((y_pos <= 1.0 * H.ydglobal / 4.0) or (y_pos >= 3.0 * H.ydglobal / 4.0)) { C.density[id] = d2; C.momentum_x[id] = v2 * C.density[id]; C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); @@ -631,20 +631,8 @@ void Grid3D::KH() C.basic_scalar[id] = 0.0; #endif #endif - } else if (y_pos >= 3.0 * H.ydglobal / 4.0) { - C.density[id] = d2; - C.momentum_x[id] = v2 * C.density[id]; - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); - C.momentum_z[id] = 0.0; - -#ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = 0.0; - #endif -#endif - } - // inner half of slab - else { + // inner half of slab + } else { C.density[id] = d1; C.momentum_x[id] = v1 * C.density[id]; C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); @@ -1110,7 +1098,7 @@ void Grid3D::Disk_2D() #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif // DE - // printf("%e %e %f %f %f %f %f\n", x_pos, y_pos, d, Sigma, vx, vy, P); + // printf("%e %e %f %f %f %f %f\n", x_pos, y_pos, d, Sigma, vx, vy, P); } } } diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 363ed750b..8c42b9e1e 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -69,15 +69,12 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 ulpsDiff = ulpsDistanceDbl(a, b); absoluteDiff = std::abs(a - b); - // Perform the ULP check which is for numbers far from zero - if (ulpsDiff <= ulpsEpsilon) { + // Perform the ULP check which is for numbers far from zero and perform the absolute check which is for numbers near + // zero + if (ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon) { return true; } - // Perform the absolute check which is for numbers near zero - else if (absoluteDiff <= fixedEpsilon) { - return true; - } - // if none of the checks have passed indicate test failure + // if the checks don't pass indicate test failure else { return false; } From 8acb6b062688dd10156a2b4c0703309e54219d9a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Mar 2023 16:04:36 -0500 Subject: [PATCH 249/694] Fix and enable bugprone-integer-division check --- .clang-tidy | 1 - src/model/disk_ICs.cpp | 4 ++-- src/particles/feedback_CIC_gpu.cu | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index f9c0c1534..986ca33e2 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,6 @@ Checks: "*, -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, - -bugprone-integer-division, -bugprone-macro-parentheses, -bugprone-narrowing-conversions, -bugprone-reserved-identifier, diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index fcefbf767..8e4bede3f 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -170,10 +170,10 @@ Real z_hc_D3D(int k, Real dz, int nz, int ng) // the real domain, and nz + 2*ng spanning the real + ghost domains if (!(nz % 2)) { // even # of cells - return 0.5 * dz + ((Real)(k - ng - nz / 2)) * dz; + return 0.5 * dz + ((Real)(k - ng - (int)(nz / 2))) * dz; } else { // odd # of cells - return ((Real)(k - ng - (nz - 1) / 2)) * dz; + return ((Real)(k - ng - (int)((nz - 1) / 2))) * dz; } } diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 3746d8226..286ae92e3 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -754,7 +754,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) } Real global_resolved_ratio = 0.0; if (analysis.countResolved > 0 || analysis.countUnresolved > 0) { - global_resolved_ratio = analysis.countResolved / (analysis.countResolved + analysis.countUnresolved); + global_resolved_ratio = (Real)(analysis.countResolved) / (Real)(analysis.countResolved + analysis.countUnresolved); } chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN], From c0b2a9d2b8f5bff0f78becbeb9eb2cf93fa3d4ff Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Mar 2023 16:18:46 -0500 Subject: [PATCH 250/694] Fix & enable bugprone-string-integer-assignment check --- .clang-tidy | 1 - src/utils/testing_utilities.cpp | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 986ca33e2..d810a3ef2 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -40,7 +40,6 @@ Checks: "*, -bugprone-macro-parentheses, -bugprone-narrowing-conversions, -bugprone-reserved-identifier, - -bugprone-string-integer-assignment, -cert-dcl37-c, -cert-dcl50-cpp, -cert-dcl51-cpp, diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 8c42b9e1e..79dc8d11b 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -87,11 +87,11 @@ void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_valu std::string outString; outString += dataSetName; outString += " dataset at ["; - outString += i; + outString += std::to_string(i); outString += ","; - outString += j; + outString += std::to_string(j); outString += ","; - outString += k; + outString += std::to_string(k); outString += "]"; ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); From f2304f8858845412c52a2f89f7fc8925a2df84bd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 10 Mar 2023 16:30:07 -0500 Subject: [PATCH 251/694] Fix & enable bugprone-macro-parentheses check --- .clang-tidy | 4 ++-- src/analysis/feedback_analysis.cpp | 2 +- src/analysis/feedback_analysis_gpu.cu | 2 +- src/particles/particles_boundaries_gpu.cu | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d810a3ef2..7f889643b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,6 @@ Checks: "*, -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, - -bugprone-macro-parentheses, -bugprone-narrowing-conversions, -bugprone-reserved-identifier, -cert-dcl37-c, @@ -155,7 +154,8 @@ Checks: "*, -readability-simplify-boolean-expr, -readability-suspicious-call-argument" WarningsAsErrors: '' -HeaderFilterRegex: '.*' +# More paths can be ignored by modifying this so that it looks like '^((?!/PATH/ONE/|/PATH/TWO/).)*$' +HeaderFilterRegex: '^((?!/ihome/crc/install/power9/googletest/1.11.0/include/|/usr/lib/x86_64-linux-gnu/hdf5/serial/include/).)*$' AnalyzeTemporaryDtors: false FormatStyle: 'file' UseColor: false diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 3164abb04..4f870a33c 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -7,7 +7,7 @@ #include "../mpi/mpi_routines.h" #endif -#define VRMS_CUTOFF_DENSITY 0.01 * 0.6 * MP / DENSITY_UNIT +#define VRMS_CUTOFF_DENSITY (0.01 * 0.6 * MP / DENSITY_UNIT) FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) { diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index fadd841f2..9ef268216 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -8,7 +8,7 @@ #define MU 0.6 // in cgs, this is 0.01 cm^{-3} - #define MIN_DENSITY 0.01 * MP *MU *LENGTH_UNIT *LENGTH_UNIT *LENGTH_UNIT / MASS_UNIT // 148279.7 + #define MIN_DENSITY (0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT) // 148279.7 #define TPB_ANALYSIS 1024 __device__ void warpReduce(volatile Real *buff, size_t tid) diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index 68a77a113..94433f267 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -15,7 +15,7 @@ #include "particles_3D.h" #include "particles_boundaries_gpu.h" - #define SCAN_SHARED_SIZE 2 * TPB_PARTICLES + #define SCAN_SHARED_SIZE (2 * TPB_PARTICLES) __global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max, Real d_length) From 1577fbc76b420e1afcab36e2a2682ba634f997ef Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 19 Mar 2023 10:57:00 -0400 Subject: [PATCH 252/694] Fix & enable bugprone-reserved-identifier check Also enabled aliases to that check and removed some unused code from clang_format_runner.sh --- .clang-tidy | 3 --- tools/clang-format_runner.sh | 5 ----- 2 files changed, 8 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 7f889643b..fd371e379 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -38,10 +38,7 @@ Checks: "*, -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, - -bugprone-reserved-identifier, - -cert-dcl37-c, -cert-dcl50-cpp, - -cert-dcl51-cpp, -cert-dcl59-cpp, -cert-env33-c, -cert-err33-c, diff --git a/tools/clang-format_runner.sh b/tools/clang-format_runner.sh index 52e4a2e18..bc89d8050 100755 --- a/tools/clang-format_runner.sh +++ b/tools/clang-format_runner.sh @@ -14,9 +14,4 @@ cholla_root=$(git rev-parse --show-toplevel) # Get a list of all the files to format readarray -t files <<<$(find ${cholla_root} -regex '.*\.\(h\|hpp\|c\|cpp\|cu\|cuh\)$' -print) -for VAR in $LIST -do - echo "$VAR" -done - clang-format -i --verbose "$@" -style="file" "${files[@]}" \ No newline at end of file From e697ff005b3829f64a2cc2e267e3f66ce9ae2790 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 19 Mar 2023 11:17:12 -0400 Subject: [PATCH 253/694] Permanently disable bugprone-easily-swappable-parameters Checks for contiguous arguments with the same type which we will always have many of and refactoring around this would be time consuming with no real benefit --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index fd371e379..ec0fb2608 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -27,6 +27,7 @@ Checks: "*, -*osx*, -zircon-*, + -bugprone-easily-swappable-parameters, -modernize-use-trailing-return-type, -readability-avoid-const-params-in-decls, -readability-static-accessed-through-instance, @@ -35,7 +36,6 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - -bugprone-easily-swappable-parameters, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -cert-dcl50-cpp, From bae5649aa7257c85ce7f0aca4efb165e2e377d88 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 20 Mar 2023 17:04:01 -0400 Subject: [PATCH 254/694] Add test of MHD restart Also, replace custom definition of Pi with M_PI --- src/io/io_tests.cpp | 40 +++++++++++ src/system_tests/hydro_system_tests.cpp | 6 +- ...df5_RestartSlowWaveExpectCorrectOutput.txt | 72 +++++++++++++++++++ 3 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 src/io/io_tests.cpp create mode 100644 src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp new file mode 100644 index 000000000..b4920f4e7 --- /dev/null +++ b/src/io/io_tests.cpp @@ -0,0 +1,40 @@ +/*! + * \file io_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains all the system tests for code in io.h and io.cpp + * + */ + +// External Libraries and Headers +#include + +// Local includes +#include "../io/io.h" +#include "../system_tests/system_tester.h" + +// STL includes +#include +#include + +// ============================================================================= +TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) +{ + // Set parameters + int const num_ranks = 4; + + // Generate the data to read from + systemTest::SystemTestRunner initializer(false, true, false); + initializer.numMpiRanks = num_ranks; + initializer.chollaLaunchParams.append(" tout=0.0 outstep=0.0"); + initializer.launchCholla(); + std::string const read_directory = initializer.getOutputDirectory() + "/"; + + // Reload data and run the test + systemTest::SystemTestRunner loadRun(false, true, false); + loadRun.numMpiRanks = num_ranks; + loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=0 indir=" + read_directory); + + loadRun.setFiducialNumTimeSteps(427); + loadRun.runL1ErrorTest(4.2E-7, 5.4E-7); +} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index f73d77ed9..9d9639f65 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -15,10 +15,6 @@ #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" -#ifndef PI - #define PI 3.141592653589793 -#endif - // ============================================================================= // Test Suite: tHYDROtMHDSYSTEMSodShockTube // ============================================================================= @@ -89,7 +85,7 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) double amplitude = 1e-5; double dx = 1. / 64.; - double real_kx = 2 * PI; // kx of the physical problem + double real_kx = 2 * M_PI; // kx of the physical problem double kx = real_kx * dx; double speed = 1; // speed of wave is 1 since P = 0.6 and gamma = 1.666667 diff --git a/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt new file mode 100644 index 000000000..38686bfbd --- /dev/null +++ b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt @@ -0,0 +1,72 @@ +# +# Parameter File for MHD slow magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=0.5 +zlen=0.5 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.8944271909999159 +rEigenVec_MomentumX=0.4472135954999579 +rEigenVec_MomentumY=0.8944271909999159 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=-0.4472135954999579 +rEigenVec_Bz=0.0 +rEigenVec_E=0.6708203932499369 + From 1fcb3b2dc9a4ef10a46d78e6594eec95aa1ce6b3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 10:42:59 -0400 Subject: [PATCH 255/694] Remove MHD dims from hdf5 output --- src/io/io.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index cdc3c62a5..6ddbe0f3a 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -710,14 +710,6 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims"); - #ifdef MHD - for (size_t i = 0; i < 3; i++) { - int_data[i]++; - } - - status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "magnetic_field_dims"); - #endif // MHD - #ifdef MPI_CHOLLA int_data[0] = H.nx_real; int_data[1] = H.ny_real; @@ -725,14 +717,6 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims_local"); - #ifdef MHD - int_data[0] = H.nx_real + 1; - int_data[1] = H.ny_real + 1; - int_data[2] = H.nz_real + 1; - - status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "magnetic_field_dims_local"); - #endif // MHD - int_data[0] = nx_local_start; int_data[1] = ny_local_start; int_data[2] = nz_local_start; From 3a90d70bdbd00bee04bfed225332318b037ee6be Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 10:59:36 -0400 Subject: [PATCH 256/694] Fix up MHD I/O so that it only outputs correct cells --- src/io/io.cpp | 12 ++++++------ src/io/io.h | 4 ++-- src/io/io_gpu.cu | 43 ++++++++++++++++++++++--------------------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 6ddbe0f3a..64c757d03 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1630,12 +1630,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef MHD if (H.Output_Complete_Data) { // Note: for WriteHDF5Field3D, use the left side n_ghost - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_x, "/magnetic_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_y, "/magnetic_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_z, "/magnetic_z"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_buffer, C.d_magnetic_x, "/magnetic_x", 0); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset + 1, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_buffer, C.d_magnetic_y, "/magnetic_y", 1); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset + 1, H.n_ghost, file_id, dataset_buffer, + device_dataset_buffer, C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD diff --git a/src/io/io.h b/src/io/io.h index fb47246a8..e8de51adc 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -65,7 +65,7 @@ herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buff // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, // then write HDF5 field void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, - float* device_buffer, Real* source, const char* name); + float* device_buffer, Real* source, const char* name, int mhd_direction = -1); void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, - double* device_buffer, Real* source, const char* name); + double* device_buffer, Real* source, const char* name, int mhd_direction = -1); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 62f0473a8..0ec9935e8 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -5,6 +5,7 @@ #include "../grid/grid3D.h" #include "../io/io.h" // To provide io.h with OutputViz3D + #include "../utils/cuda_utilities.h" // Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real // whereas the conserved variables have size nx,ny,nz Note that magnetic fields @@ -15,22 +16,21 @@ // Copy Real (non-ghost) cells from source to a double destination (for writing // HDF5 in double precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, - double* destination, Real* source) + double* destination, Real* source, int mhd_direction) { - int dest_id, source_id, id, i, j, k; - id = threadIdx.x + blockIdx.x * blockDim.x; + int const id = threadIdx.x + blockIdx.x * blockDim.x; - k = id / (nx_real * ny_real); - j = (id - k * nx_real * ny_real) / nx_real; - i = id - j * nx_real - k * nx_real * ny_real; + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); if (k >= nz_real) { return; } // This converts into HDF5 indexing that plays well with Python - dest_id = k + j * nz_real + i * ny_real * nz_real; - source_id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + int const dest_id = k + j * nz_real + i * ny_real * nz_real; + int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx + + (k + n_ghost - int(mhd_direction == 2)) * nx * ny; destination[dest_id] = (double)source[source_id]; } @@ -38,22 +38,23 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // Copy Real (non-ghost) cells from source to a float destination (for writing // HDF5 in float precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, - float* destination, Real* source) + float* destination, Real* source, int mhd_direction) { - int dest_id, source_id, id, i, j, k; - id = threadIdx.x + blockIdx.x * blockDim.x; + int const id = threadIdx.x + blockIdx.x * blockDim.x; - k = id / (nx_real * ny_real); - j = (id - k * nx_real * ny_real) / nx_real; - i = id - j * nx_real - k * nx_real * ny_real; + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); if (k >= nz_real) { return; } - // This converts into HDF5 indexing that plays well with Python - dest_id = k + j * nz_real + i * ny_real * nz_real; - source_id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + // This converts into HDF5 indexing that plays well with Python. + // The `int(mhd_direction == NUM)` sections provide appropriate shifts for writing out the magnetic fields since they + // need an extra cell in the same direction as the field + int const dest_id = k + j * nz_real + i * ny_real * nz_real; + int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx + + (k + n_ghost - int(mhd_direction == 2)) * nx * ny; destination[dest_id] = (float)source[source_id]; } @@ -61,7 +62,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything // using function overloading void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, - double* device_buffer, Real* device_source, const char* name) + double* device_buffer, Real* device_source, const char* name, int mhd_direction) { herr_t status; hsize_t dims[3]; @@ -74,7 +75,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, - device_buffer, device_source); + device_buffer, device_source, mhd_direction); CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 @@ -89,7 +90,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int // When buffer is float, automatically use the float version of everything using // function overloading void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, - float* device_buffer, Real* device_source, const char* name) + float* device_buffer, Real* device_source, const char* name, int mhd_direction) { herr_t status; hsize_t dims[3]; @@ -102,7 +103,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, - device_buffer, device_source); + device_buffer, device_source, mhd_direction); CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 From 287108f8daf0ec57d9e68c5be0854fcafa37b943 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 14:33:18 -0400 Subject: [PATCH 257/694] Enable reading in new MHD file format Read_Grid now works with the new magnetic fields file format and the SystemTester class now does as well. --- src/io/io.cpp | 63 +++++++++++++----------------- src/io/io_tests.cpp | 2 +- src/system_tests/system_tester.cpp | 25 +++++++++--- 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 64c757d03..a32c8cff0 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -3183,23 +3183,20 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) min_l = 1e65; max_l = -1; // Copy the x magnetic field array to the grid - for (k = 0; k < H.nz_real + 1; k++) { - for (j = 0; j < H.ny_real + 1; j++) { + for (k = 0; k < H.nz_real; k++) { + for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + id = (i + H.n_ghost - 1) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real); C.magnetic_x[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_x[id]); - if (fabs(C.magnetic_x[id]) > max_l) { - max_l = fabs(C.magnetic_x[id]); - } - if (fabs(C.magnetic_x[id]) < min_l) { - min_l = fabs(C.magnetic_x[id]); - } + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); + mean_l /= ((H.nz_real + 1) * (H.ny_real) * (H.nx_real)); #if MPI_CHOLLA mean_g = ReduceRealAvg(mean_l); @@ -3229,23 +3226,20 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) min_l = 1e65; max_l = -1; // Copy the y magnetic field array to the grid - for (k = 0; k < H.nz_real + 1; k++) { + for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real + 1; j++) { - for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + for (i = 0; i < H.nx_real; i++) { + id = (i + H.n_ghost) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real + 1); C.magnetic_y[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_y[id]); - if (fabs(C.magnetic_y[id]) > max_l) { - max_l = fabs(C.magnetic_y[id]); - } - if (fabs(C.magnetic_y[id]) < min_l) { - min_l = fabs(C.magnetic_y[id]); - } + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); + mean_l /= ((H.nz_real) * (H.ny_real + 1) * (H.nx_real)); #if MPI_CHOLLA mean_g = ReduceRealAvg(mean_l); @@ -3276,22 +3270,19 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) max_l = -1; // Copy the z magnetic field array to the grid for (k = 0; k < H.nz_real + 1; k++) { - for (j = 0; j < H.ny_real + 1; j++) { - for (i = 0; i < H.nx_real + 1; i++) { - id = (i + H.n_ghost - 1) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; - buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real + 1); + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real); C.magnetic_z[id] = dataset_buffer[buf_id]; - mean_l += fabs(C.magnetic_z[id]); - if (fabs(C.magnetic_z[id]) > max_l) { - max_l = fabs(C.magnetic_z[id]); - } - if (fabs(C.magnetic_z[id]) < min_l) { - min_l = fabs(C.magnetic_z[id]); - } + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1)); + mean_l /= ((H.nz_real) * (H.ny_real) * (H.nx_real + 1)); #if MPI_CHOLLA mean_g = ReduceRealAvg(mean_l); diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp index b4920f4e7..fcebbe499 100644 --- a/src/io/io_tests.cpp +++ b/src/io/io_tests.cpp @@ -34,7 +34,7 @@ TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) loadRun.numMpiRanks = num_ranks; loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=0 indir=" + read_directory); - loadRun.setFiducialNumTimeSteps(427); + loadRun.setFiducialNumTimeSteps(854); loadRun.runL1ErrorTest(4.2E-7, 5.4E-7); } // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 08c1f2acb..44d14b306 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -512,12 +512,19 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string file = _testHydroFieldsFileVec; } - // Get the size of each dimension. First check if the field is a magnetic + // Get the size of each dimension. Check if the field is a magnetic // field or not to make sure we're retreiving the right dimensions - std::string dimsName = (dataSetName.find("magnetic") != std::string::npos) ? "magnetic_field_dims" : "dims"; - H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); + H5::Attribute dimensions = file[0].openAttribute("dims"); dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); + if (dataSetName == "magnetic_x") { + testDims.at(0)++; + } else if (dataSetName == "magnetic_y") { + testDims.at(1)++; + } else if (dataSetName == "magnetic_z") { + testDims.at(2)++; + } + // Allocate the vector std::vector testData(testDims[0] * testDims[1] * testDims[2]); @@ -544,11 +551,17 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string H5::Attribute offsetAttr = file[rank].openAttribute("offset"); offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); + if (dataSetName == "magnetic_x") { + offset.at(0)--; + } else if (dataSetName == "magnetic_y") { + offset.at(1)--; + } else if (dataSetName == "magnetic_z") { + offset.at(2)--; + } + // Get dims_local std::vector dimsLocal(3, 1); - std::string dimsNameLocal = - (dataSetName.find("magnetic") != std::string::npos) ? "magnetic_field_dims_local" : "dims_local"; - H5::Attribute dimsLocalAttr = file[rank].openAttribute(dimsNameLocal.c_str()); + H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); // Now we add the data to the larger vector From 1cc39d576dca26ddd00e10c1a2ddabb2a985087d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 14:54:53 -0400 Subject: [PATCH 258/694] Update cat_dset_3D.py for new MHD format --- python_scripts/cat_dset_3D.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index e37525b5b..70e40d7de 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -76,10 +76,12 @@ except KeyError: print('No Dual energy data present'); try: - [nxl_mag, nyl_mag, nzl_mag] = head['magnetic_field_dims_local'] - fileout['magnetic_x'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_x'] - fileout['magnetic_y'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_y'] - fileout['magnetic_z'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_z'] + xShift = 1 if xs>0 else 0 + yShift = 1 if ys>0 else 0 + zShift = 1 if zs>0 else 0 + fileout['magnetic_x'][xs-xShift:xs+nxl, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl, ys-yShift:ys+nyl, zs:zs+nzl] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs-zShift:zs+nzl] = filein['magnetic_z'] except KeyError: print('No magnetic field data present'); From 82670cd7258ed5d338c2d8fecfee20b07ab92787 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 16:13:00 -0400 Subject: [PATCH 259/694] Update test data & fix SystemTester bugs --- cholla-tests-data | 2 +- src/system_tests/system_tester.cpp | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index 8c3c4476f..566ec398e 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f +Subproject commit 566ec398ec476514999082c49a3f49d1241fce59 diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 44d14b306..0c4100bfc 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -525,8 +526,8 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string testDims.at(2)++; } - // Allocate the vector - std::vector testData(testDims[0] * testDims[1] * testDims[2]); + // Allocate the vector and initialize to a quiet NaN to make failed writes clearer + std::vector testData(testDims[0] * testDims[1] * testDims[2], std::numeric_limits::quiet_NaN()); for (size_t rank = 0; rank < numMpiRanks; rank++) { // Open the dataset @@ -551,19 +552,19 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string H5::Attribute offsetAttr = file[rank].openAttribute("offset"); offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); - if (dataSetName == "magnetic_x") { - offset.at(0)--; - } else if (dataSetName == "magnetic_y") { - offset.at(1)--; - } else if (dataSetName == "magnetic_z") { - offset.at(2)--; - } - // Get dims_local std::vector dimsLocal(3, 1); H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); + if (dataSetName == "magnetic_x") { + dimsLocal.at(0)++; + } else if (dataSetName == "magnetic_y") { + dimsLocal.at(1)++; + } else if (dataSetName == "magnetic_z") { + dimsLocal.at(2)++; + } + // Now we add the data to the larger vector size_t localIndex = 0; for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) { From 51c2551c9a60075002c436630f24fc28643bada1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 16:24:21 -0400 Subject: [PATCH 260/694] Fix bug in cat_dset_3D.py --- python_scripts/cat_dset_3D.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 70e40d7de..5ac71a612 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -51,10 +51,9 @@ except KeyError: print('No Dual energy data present'); try: - [nx_mag, ny_mag, nz_mag] = head['magnetic_field_dims'] - bx = fileout.create_dataset("magnetic_x", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_x'].dtype) - by = fileout.create_dataset("magnetic_y", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_y'].dtype) - bz = fileout.create_dataset("magnetic_z", (nx_mag, ny_mag, nz_mag), chunks=True, dtype=filein['magnetic_z'].dtype) + bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) + by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) + bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) except KeyError: print('No magnetic field data present'); @@ -76,12 +75,9 @@ except KeyError: print('No Dual energy data present'); try: - xShift = 1 if xs>0 else 0 - yShift = 1 if ys>0 else 0 - zShift = 1 if zs>0 else 0 - fileout['magnetic_x'][xs-xShift:xs+nxl, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] - fileout['magnetic_y'][xs:xs+nxl, ys-yShift:ys+nyl, zs:zs+nzl] = filein['magnetic_y'] - fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs-zShift:zs+nzl] = filein['magnetic_z'] + fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] except KeyError: print('No magnetic field data present'); From 311cbfb26b377a3834c7f3c5324e3bfbf5a54f0c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 16:30:56 -0400 Subject: [PATCH 261/694] Replace all custom PI macros with built in M_PI --- src/global/global.h | 1 - src/grid/initial_conditions.cpp | 42 +++++++++++------------ src/io/io.cpp | 2 +- src/system_tests/cooling_system_tests.cpp | 4 --- 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index c8aa99383..9ba6ca331 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -28,7 +28,6 @@ typedef double Real; #define MAXLEN 2048 #define TINY_NUMBER 1.0e-20 -#define PI 3.141592653589793 #define MP 1.672622e-24 // mass of proton, grams #define KB 1.380658e-16 // boltzmann constant, cgs // #define GN 6.67259e-8 // gravitational constant, cgs diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 0a4a29205..e7fb0f0b5 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -275,11 +275,11 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) C.momentum_z[id] = rho * vz; C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); // add small-amplitude perturbations - C.density[id] = C.density[id] + A * sin(2.0 * PI * x_pos); - C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0 * PI * x_pos); - C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0 * PI * x_pos); - C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0 * PI * x_pos); - C.Energy[id] = C.Energy[id] + A * (1.5) * sin(2 * PI * x_pos); + C.density[id] = C.density[id] + A * sin(2.0 * M_PI * x_pos); + C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0 * M_PI * x_pos); + C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0 * M_PI * x_pos); + C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0 * M_PI * x_pos); + C.Energy[id] = C.Energy[id] + A * (1.5) * sin(2 * M_PI * x_pos); #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); #endif // DE @@ -316,7 +316,7 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re // set constant initial states. Note that mhd::utils::computeEnergy // computes the hydro energy if MHD is turned off - Real sine_wave = std::sin(2.0 * PI * x_pos); + Real sine_wave = std::sin(2.0 * M_PI * x_pos); C.density[id] = rho; C.momentum_x[id] = rho * vx; @@ -331,7 +331,7 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re C.Energy[id] += A * rEigenVec_E * sine_wave; #ifdef MHD - sine_wave = std::sin(2.0 * PI * (x_pos + stagger)); + sine_wave = std::sin(2.0 * M_PI * (x_pos + stagger)); C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; @@ -526,7 +526,7 @@ void Grid3D::Shu_Osher() P = 10.33333; C.Energy[id] = P / (gama - 1.0) + 0.5 * C.density[id] * vx * vx; } else { - C.density[id] = 1.0 + 0.2 * sin(5.0 * PI * x_pos); + C.density[id] = 1.0 + 0.2 * sin(5.0 * M_PI * x_pos); Real vx = 0.0; C.momentum_x[id] = C.density[id] * vx; C.momentum_y[id] = 0.0; @@ -624,7 +624,7 @@ void Grid3D::KH() if (y_pos <= 1.0 * H.ydglobal / 4.0) { C.density[id] = d2; C.momentum_x[id] = v2 * C.density[id]; - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos); C.momentum_z[id] = 0.0; #ifdef SCALAR #ifdef BASIC_SCALAR @@ -634,7 +634,7 @@ void Grid3D::KH() } else if (y_pos >= 3.0 * H.ydglobal / 4.0) { C.density[id] = d2; C.momentum_x[id] = v2 * C.density[id]; - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos); C.momentum_z[id] = 0.0; #ifdef SCALAR @@ -647,7 +647,7 @@ void Grid3D::KH() else { C.density[id] = d1; C.momentum_x[id] = v1 * C.density[id]; - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos); C.momentum_z[id] = 0.0; #ifdef SCALAR @@ -721,7 +721,7 @@ void Grid3D::KH_res_ind() C.momentum_x[id] = v1 * C.density[id] - C.density[id] * (v1 - v2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else { C.density[id] = @@ -729,7 +729,7 @@ void Grid3D::KH_res_ind() C.momentum_x[id] = v1 * C.density[id] - C.density[id] * (v1 - v2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } } @@ -741,7 +741,7 @@ void Grid3D::KH_res_ind() C.momentum_x[id] = v2 * C.density[id] + C.density[id] * (v1 - v2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else { C.density[id] = @@ -749,7 +749,7 @@ void Grid3D::KH_res_ind() C.momentum_x[id] = v2 * C.density[id] + C.density[id] * (v1 - v2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } } @@ -765,18 +765,18 @@ void Grid3D::KH_res_ind() C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = v1 * C.density[id] - C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); } else // outside the cylinder { C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); C.momentum_x[id] = v2 * C.density[id] + C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * PI * x_pos) * + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); } @@ -815,7 +815,7 @@ void Grid3D::Rayleigh_Taylor() Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos); // set the y velocities (small perturbation tapering off from center) - vy = 0.01 * cos(6 * PI * x_pos + PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); + vy = 0.01 * cos(6 * M_PI * x_pos + M_PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); // vy = 0.0; // lower half of slab @@ -1084,7 +1084,7 @@ void Grid3D::Disk_2D() // Disk surface density [M_sun / kpc^2] // Assume gas surface density is exponential with scale length 2*R_d and // mass 0.25*M_d - Sigma = 0.25 * M_d * exp(-r / (2 * R_d)) / (8 * PI * R_d * R_d); + Sigma = 0.25 * M_d * exp(-r / (2 * R_d)) / (8 * M_PI * R_d * R_d); d = Sigma; // just use sigma for mass density since height is arbitrary n = d * DENSITY_UNIT / MP; // number density, cgs P = n * KB * T_d / PRESSURE_UNIT; // disk pressure, code units diff --git a/src/io/io.cpp b/src/io/io.cpp index a32c8cff0..f1cecaa4f 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -482,7 +482,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) } else if (G.R.flag_delta == 2) { // case 2 -- outputting at a rotating delta // rotation rate given in the parameter file - G.R.delta = fmod(nfile * G.R.ddelta_dt * 2.0 * PI, (2.0 * PI)); + G.R.delta = fmod(nfile * G.R.ddelta_dt * 2.0 * M_PI, (2.0 * M_PI)); // Create a new file file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp index 7db321cc9..f3fa90db4 100644 --- a/src/system_tests/cooling_system_tests.cpp +++ b/src/system_tests/cooling_system_tests.cpp @@ -13,10 +13,6 @@ #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" -#ifndef PI - #define PI 3.141592653589793 -#endif - #define COOL_RHO 6.9498489284711 TEST(tCOOLINGSYSTEMConstant5, CorrectInputExpectCorrectOutput) From c8a828f5818fff5750bbdf8115b3236fb34c0f85 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 16:52:23 -0400 Subject: [PATCH 262/694] Remove unused variables in Wind_Boundary_kernel --- src/grid/cuda_boundaries.cu | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 1e0257380..8cf48c0d8 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -1,6 +1,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/cuda_utilities.h" #include "cuda_boundaries.h" __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a); @@ -324,17 +325,8 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; - int isize, jsize, ksize; - - // -x boundary - isize = n_ghost; - jsize = ny; - ksize = nz; - // not true i,j,k but relative i,j,k in the GPU grid - zid = id / (isize * jsize); - yid = (id - zid * isize * jsize) / isize; - xid = id - zid * isize * jsize - yid * isize; + cuda_utilities::compute3DIndices(id, n_ghost, ny, xid, yid, zid); // map thread id to ghost cell id xid += 0; // -x boundary From af35b4b4ad68891d16c960630ed9fb10bd556af8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 16:54:04 -0400 Subject: [PATCH 263/694] Mark some variables as [[maybe_unused]] Due to different ifdefs some variables end up unused in some builds. I've marked the unused variables for the MHD build as such --- src/grid/cuda_boundaries.cu | 2 +- src/integrators/VL_1D_cuda.cu | 8 ++++---- src/integrators/VL_2D_cuda.cu | 6 +++--- src/integrators/simple_1D_cuda.cu | 8 ++++---- src/integrators/simple_2D_cuda.cu | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 8cf48c0d8..f5dbe361d 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -1,7 +1,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" -#include "../utils/gpu.hpp" #include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" #include "cuda_boundaries.h" __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a); diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index e1764e386..99463f927 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -34,10 +34,10 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea // Here, *dev_conserved contains the entire // set of conserved variables on the grid - int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx; + [[maybe_unused]] int ny = 1; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 05bbbec6f..79d410033 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -32,9 +32,9 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of // set of conserved variables on the grid // concatenated into a 1-d array - int n_cells = nx * ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 415edceef..8e622b85c 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -29,10 +29,10 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Here, *dev_conserved contains the entire // set of conserved variables on the grid - int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx; + [[maybe_unused]] int ny = 1; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 295c955a8..bf75e97cc 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -26,9 +26,9 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int // Here, *dev_conserved contains the entire // set of conserved variables on the grid // concatenated into a 1-d array - int n_cells = nx * ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid From 1f898464faf4453b990a0f69c0b0ed1dd0178179 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 21 Mar 2023 17:57:27 -0400 Subject: [PATCH 264/694] Replace char arrays with std::string in I/O --- src/io/io.cpp | 97 +++++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 58 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index cdc3c62a5..a937928cf 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef HDF5 #include #endif // HDF5 @@ -179,29 +180,26 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) /* Output the grid data to file. */ void OutputData(Grid3D &G, struct parameters P, int nfile) { - char filename[MAXLEN]; - char timestep[20]; - // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d", nfile); - strcat(filename, timestep); + std::string filename(P.outdir); + filename += std::to_string(nfile); + #if defined BINARY - strcat(filename, ".bin"); + filename += ".bin"; #elif defined HDF5 - strcat(filename, ".h5"); + filename += ".h5"; #else strcat(filename, ".txt"); if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); #endif #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif // open the file for binary writes #if defined BINARY FILE *out; - out = fopen(filename, "w"); + out = fopen(filename.data(), "w"); if (out == NULL) { printf("Error opening output file.\n"); exit(-1); @@ -222,7 +220,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) herr_t status; // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write the header (file attributes) G.Write_Header_HDF5(file_id); @@ -273,16 +271,12 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) return; } - char filename[MAXLEN]; - char timestep[20]; - // create the filename - sprintf(timestep, "%d", nfile); - strcpy(filename, P.outdir); - strcat(filename, timestep); - strcat(filename, ".float32.h5"); + std::string filename(P.outdir); + filename += std::to_string(nfile); + filename += ".float32.h5"; #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif // create hdf5 file @@ -290,7 +284,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) herr_t status; // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write the header (file attributes) G.Write_Header_HDF5(file_id); @@ -375,24 +369,21 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) /* Output a projection of the grid data to file. */ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) { - char filename[100]; - char timestep[20]; #ifdef HDF5 hid_t file_id; herr_t status; // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d_proj", nfile); - strcat(filename, timestep); - strcat(filename, ".h5"); + std::string filename(P.outdir); + filename += std::to_string(nfile); + filename += "_proj.h5"; #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif /*MPI_CHOLLA*/ // Create a new file - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) G.Write_Header_HDF5(file_id); @@ -423,20 +414,17 @@ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) /* Output a rotated projection of the grid data to file. */ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) { - char filename[100]; - char timestep[20]; #ifdef HDF5 hid_t file_id; herr_t status; // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d_rot_proj", nfile); - strcat(filename, timestep); - strcat(filename, ".h5"); + std::string filename(P.outdir); + filename += std::to_string(nfile); + filename += "_rot_proj.h5"; #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif /*MPI_CHOLLA*/ if (G.R.flag_delta == 1) { @@ -446,7 +434,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) char fname[200]; for (i_delta = 0; i_delta < G.R.n_delta; i_delta++) { - sprintf(fname, "%s.%d", filename, G.R.i_delta); + filename += "." + std::to_string(G.R.i_delta); chprintf("Outputting rotated projection %s.\n", fname); // determine delta about z by output index @@ -485,7 +473,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) G.R.delta = fmod(nfile * G.R.ddelta_dt * 2.0 * PI, (2.0 * PI)); // Create a new file - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) G.Write_Header_Rotated_HDF5(file_id); @@ -499,7 +487,7 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) // case 0 -- just output at the delta given in the parameter file // Create a new file - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) G.Write_Header_Rotated_HDF5(file_id); @@ -531,24 +519,21 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) /* Output xy, xz, and yz slices of the grid data. */ void OutputSlices(Grid3D &G, struct parameters P, int nfile) { - char filename[100]; - char timestep[20]; #ifdef HDF5 hid_t file_id; herr_t status; // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d_slice", nfile); - strcat(filename, timestep); - strcat(filename, ".h5"); + std::string filename(P.outdir); + filename += std::to_string(nfile); + filename += "_slice.h5"; #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif /*MPI_CHOLLA*/ // Create a new file - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) G.Write_Header_HDF5(file_id); @@ -2259,21 +2244,17 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) * \brief Read in grid data from an output file. */ void Grid3D::Read_Grid(struct parameters P) { - char filename[100]; - char timestep[20]; - int nfile = P.nfile; // output step you want to read from - // create the filename to read from // assumes your data is in the outdir specified in the input file // strcpy(filename, P.outdir); // Changed to read initial conditions from indir - strcpy(filename, P.indir); - sprintf(timestep, "%d", nfile); - strcat(filename, timestep); + std::string filename(P.indir); + filename += std::to_string(P.nfile); + #if defined BINARY - strcat(filename, ".bin"); + filename += ".bin"; #elif defined HDF5 - strcat(filename, ".h5"); + filename += ".h5"; #endif // BINARY or HDF5 // for now assumes you will run on the same number of processors #ifdef MPI_CHOLLA @@ -2281,7 +2262,7 @@ void Grid3D::Read_Grid(struct parameters P) sprintf(filename, "%sics_%dMpc_%d.h5", P.indir, (int)P.tile_length / 1000, H.nx_real); // Everyone reads the same file #else // TILED_INITIAL_CONDITIONS is not defined - sprintf(filename, "%s.%d", filename, procID); + filename += "." + std::to_string(procID); #endif // TILED_INITIAL_CONDITIONS #endif // MPI_CHOLLA @@ -2305,9 +2286,9 @@ void Grid3D::Read_Grid(struct parameters P) herr_t status; // open the file - file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + file_id = H5Fopen(filename.data(), H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { - printf("Unable to open input file: %s\n", filename); + std::cout << "Unable to open input file: " << filename << std::endl; exit(0); } From 74df5da966c26959d82fce77c13cab6c870f3f36 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 24 Mar 2023 15:30:47 -0400 Subject: [PATCH 265/694] Post merge formatting --- src/grid/initial_conditions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index e06b4b94f..218a3bd28 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -631,7 +631,7 @@ void Grid3D::KH() C.basic_scalar[id] = 0.0; #endif #endif - // inner half of slab + // inner half of slab } else { C.density[id] = d1; C.momentum_x[id] = v1 * C.density[id]; From 700538844a0269f6cac5fff8e55145fce123aba2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 10:31:57 -0400 Subject: [PATCH 266/694] Have `make tidy` tell you where the results are --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 302654aac..e5b7a56e3 100644 --- a/Makefile +++ b/Makefile @@ -204,9 +204,11 @@ tidy: # Flags we might want # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config + @echo -e (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ for i in 1 2; do wait -n; done + @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp.log' and 'tidy_results_gpu.log' files." clean: rm -f $(CLEAN_OBJS) From 037af8d8834d88feb1b9bb2a841366bd130f519f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 10:49:17 -0400 Subject: [PATCH 267/694] Replace naive L1 error sum with Kahan sum --- src/system_tests/system_tester.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 0c4100bfc..31aed41b8 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -264,18 +264,25 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro << "The initial and final '" << dataSetName << "' datasets are not the same length"; // Compute the L1 Error. - double L1Error = 0; + double L1_error = 0.0; + double fp_sum_error = 0.0; for (size_t i = 0; i < initialData.size(); i++) { double const diff = std::abs(initialData.at(i) - finalData.at(i)); - L1Error += diff; - maxError = (diff > maxError) ? diff : maxError; + + maxError = std::max(maxError, diff); + + // Perform a Kahan sum to maintain precision in the result + double const y = diff - fp_sum_error; + double const t = L1_error + y; + fp_sum_error = (t - L1_error) - y; + L1_error = t; } - L1Error *= (1. / static_cast(initialDims[0] * initialDims[1] * initialDims[2])); - L2Norm += L1Error * L1Error; + L1_error /= static_cast(initialDims[0] * initialDims[1] * initialDims[2]); + L2Norm += L1_error * L1_error; // Perform the correctness check - EXPECT_LT(L1Error, maxAllowedL1Error) + EXPECT_LT(L1_error, maxAllowedL1Error) << "the L1 error for the " << dataSetName << " data has exceeded the allowed value"; } From 81470b066f27fbf5ce5aa27eb9c44d15a0ae3f80 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 10:49:48 -0400 Subject: [PATCH 268/694] Fix minor bug in hydro restart test The number of timesteps differs between the MHD and hydro versions of the tests but that wasn't acounted for. Now it is. --- src/io/io_tests.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp index fcebbe499..f9c94119b 100644 --- a/src/io/io_tests.cpp +++ b/src/io/io_tests.cpp @@ -34,7 +34,11 @@ TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) loadRun.numMpiRanks = num_ranks; loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=0 indir=" + read_directory); +#ifdef MHD loadRun.setFiducialNumTimeSteps(854); +#else // not MHD + loadRun.setFiducialNumTimeSteps(427); +#endif // MHD loadRun.runL1ErrorTest(4.2E-7, 5.4E-7); } // ============================================================================= \ No newline at end of file From 8d2fc36a08dc15165c5789da96b687ab78fb5f20 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 10:56:45 -0400 Subject: [PATCH 269/694] Modify Grid3D::Compute_Gas_Temperature for MHD --- src/chemistry_gpu/chemistry_functions.cpp | 5 ++++- src/riemann_solvers/hlld_cuda.cu | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 18999fe2d..227c6eee5 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -254,7 +254,10 @@ void Grid3D::Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units #ifdef DE GE = C.GasEnergy[id]; #else - GE = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)); // TODO: this probably needs to be fixed for MHD + GE = E - hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + GE -= mhd::utils::computeMagneticEnergy(C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); + #endif // MHD #endif dens_HI = C.HI_density[id]; diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index f05707c58..c962325a7 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -189,9 +189,7 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re #endif // SCALAR #ifdef DE state.thermalEnergySpecific = interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; - #endif // DE} - #ifdef DE // PRESSURE_DE Real energyNonThermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(state.density, state.velocityX, state.velocityY, state.velocityZ) + mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, state.magneticZ); @@ -203,7 +201,7 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re // Note that this function does the positive pressure check // internally state.gasPressure = mhd::utils::computeGasPressure(state, magneticX, gamma); - #endif // PRESSURE_DE + #endif // DE state.totalPressure = mhd::utils::computeTotalPressure(state.gasPressure, magneticX, state.magneticY, state.magneticZ); From 606ae534426d7f8b3e083513cd67cc2fc1c1fbea Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 11:31:34 -0400 Subject: [PATCH 270/694] Replace ad hoc file methods with std::filesystem Thet systemTest::SystemTestRunner class used several, somewhat hacky, file manipulation methods since we weren't on C++17 when they were written. Now those have all been replaced with calls from std::filesystem. --- src/system_tests/system_tester.cpp | 93 ++++++++++++++---------------- src/system_tests/system_tester.h | 18 ------ 2 files changed, 42 insertions(+), 69 deletions(-) diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 31aed41b8..0bc4a8b0b 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -55,17 +56,15 @@ void systemTest::SystemTestRunner::runTest() _testParticlesFileVec.resize(numMpiRanks); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { // Load the hydro data - if (_hydroDataExists) { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); + std::string filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + if (_hydroDataExists and std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } // Load the particles data - if (_particleDataExists) { - std::string fileName = "/1_particles.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); + filePath = _outputDirectory + "/1_particles.h5." + std::to_string(fileIndex); + if (_particleDataExists and std::filesystem::exists(filePath)) { + _testParticlesFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } } @@ -207,14 +206,16 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro std::vector initialHydroFieldsFileVec(numMpiRanks); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { // Initial time data - std::string fileName = "/0.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); + std::string filePath = _outputDirectory + "/0.h5." + std::to_string(fileIndex); + if (std::filesystem::exists(filePath)) { + initialHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); + } // Final time data - fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); + filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + if (std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); + } } // Get the list of test dataset names @@ -311,9 +312,13 @@ void systemTest::SystemTestRunner::launchCholla() EXPECT_EQ(returnLaunch, 0) << "Warning: Launching Cholla returned a non-zero exit status. Likely " << "failed to launch. Please see the log files" << std::endl; - _safeMove("run_output.log", _outputDirectory); - // TODO: instead of commenting out, change to check if exist - //_safeMove("run_timing.log", _outputDirectory); + // Move the output files to the correct spots + std::filesystem::rename(::globalChollaRoot.getString() + "/run_output.log", _outputDirectory + "/run_output.log"); + try { + std::filesystem::rename(::globalChollaRoot.getString() + "/run_timing.log", _outputDirectory + "/run_timing.log"); + } catch (const std::filesystem::filesystem_error &error) { + // This file might not exist and isn't required so don't worry if it doesn't exist + } } // ============================================================================= @@ -322,9 +327,10 @@ void systemTest::SystemTestRunner::openHydroTestData() { _testHydroFieldsFileVec.resize(numMpiRanks); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, H5F_ACC_RDONLY); + std::string filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + if (std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); + } } } // ============================================================================= @@ -396,22 +402,32 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool co _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); // Generate the input paths. Strip out everything after a "/" since that - // probably indicates a parameterized test. Also, check that the files exist - // and load fiducial HDF5 file if required + // probably indicates a parameterized test. _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." + ::globalChollaMachine.getString(); - _checkFileExists(_chollaPath); + + // Check that Cholla exists and abort if it doesn't + if (not std::filesystem::exists(_chollaPath)) { + throw std::invalid_argument("Error: Cholla executable not found."); + } + + // Check that settings file exist if (useSettingsFile) { _chollaSettingsPath = ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + _fullTestFileName + ".txt"; - _checkFileExists(_chollaSettingsPath); } else { _chollaSettingsPath = ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + "blank_settings_file.txt"; - _checkFileExists(_chollaSettingsPath); } + if (not std::filesystem::exists(_chollaSettingsPath)) { + throw std::invalid_argument("Error: Cholla settings file not found at :" + _chollaSettingsPath); + } + + // Check that the fiducial file exists and load it if it does if (useFiducialFile) { _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; - _checkFileExists(_fiducialFilePath); + if (not std::filesystem::exists(_fiducialFilePath)) { + throw std::invalid_argument("Error: Cholla settings file not found at :" + _fiducialFilePath); + } _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); _fiducialDataSetNames = _findDataSetNames(_fiducialFile); _fiducialFileExists = true; @@ -454,31 +470,6 @@ systemTest::SystemTestRunner::~SystemTestRunner() // Private Members // ============================================================================= -// ============================================================================= -void systemTest::SystemTestRunner::_checkFileExists(std::string const &filePath) -{ - // TODO C++17 std::filesystem does this better - std::fstream file; - file.open(filePath); - if (not file) { - std::string errMessage = "Error: File '" + filePath + "' not found."; - throw std::invalid_argument(errMessage); - } -} -// ============================================================================= - -// ============================================================================= -void systemTest::SystemTestRunner::_safeMove(std::string const &sourcePath, std::string const &destinationDirectory) -{ - // TODO C++17 std::filesystem does this better - _checkFileExists(sourcePath); - if (std::rename(sourcePath.c_str(), (destinationDirectory + "/" + sourcePath).c_str()) < 0) { - std::string errMessage = "Error: File '" + sourcePath + "' could not be moved to '" + destinationDirectory + "`"; - throw std::invalid_argument(errMessage); - } -} -// ============================================================================= - // ============================================================================= void systemTest::SystemTestRunner::_checkNumTimeSteps() { diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 2003d72fd..1c942a766 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -321,24 +321,6 @@ class systemTest::SystemTestRunner /// no particle data bool _particleDataExists = false; - /*! - * \brief Move a file. Throws an exception if the file does not exist. - * or if the move was unsuccessful - * - * \param[in] sourcePath The path the the file to be moved - * \param[in] destinationDirectory The path to the director the file should - * be moved to - */ - void _safeMove(std::string const &sourcePath, std::string const &destinationDirectory); - - /*! - * \brief Checks if the given file exists. Throws an exception if the - * file does not exist. - * - * \param[in] filePath The path to the file to check for - */ - void _checkFileExists(std::string const &filePath); - /*! * \brief Using GTest assertions to check if the fiducial and test data have * the same number of time steps From 0374447d243d65238baae0a94297122a7daad505 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 8 Mar 2023 10:02:18 -0500 Subject: [PATCH 271/694] Automated Testing with Jenkins - Add jenkinsfile - Disable CUDA builds on Build & Lint GHA - Move the `make clobber` in run_tests.sh so it can work with automation better - Disable the tPARTICLESSYSTEMSphericalCollapse_CorrectInputExpectCorrectOutput test - Add more ignore paths to clang-tidy - Add some NOLINTs where needed to avoid erroneous errors with clang-tidy on C-3PO --- .clang-tidy | 2 +- .github/workflows/build_and_lint.yml | 40 ++++--- Jenkinsfile | 121 ++++++++++++++++++++ builds/run_tests.sh | 10 +- src/system_tests/particles_system_tests.cpp | 2 +- src/utils/DeviceVector_tests.cu | 3 + 6 files changed, 156 insertions(+), 22 deletions(-) create mode 100644 Jenkinsfile diff --git a/.clang-tidy b/.clang-tidy index ec0fb2608..86eab99ac 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -152,7 +152,7 @@ Checks: "*, -readability-suspicious-call-argument" WarningsAsErrors: '' # More paths can be ignored by modifying this so that it looks like '^((?!/PATH/ONE/|/PATH/TWO/).)*$' -HeaderFilterRegex: '^((?!/ihome/crc/install/power9/googletest/1.11.0/include/|/usr/lib/x86_64-linux-gnu/hdf5/serial/include/).)*$' +HeaderFilterRegex: '^((?!/ihome/crc/install/power9/googletest/1.11.0/include/|/ihome/crc/install/power9/googletest/1.11.0/include/|/usr/lib/x86_64-linux-gnu/hdf5/serial/include/).)*$' AnalyzeTemporaryDtors: false FormatStyle: 'file' UseColor: false diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index d0a42529f..59e5409af 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -1,5 +1,8 @@ name: Build & Lint +# This runs the HIP Builds. CUDA builds can be reenabled by adding the CUDA +# container to the matrix and uncommenting the CUDA lines + on: pull_request: schedule: @@ -26,7 +29,8 @@ jobs: fail-fast: false matrix: make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust] - container: [{name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"}, {name: "HIP",link: "docker://chollahydro/cholla:rocm_github"},] + # The CUDA container can be added with {name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"} + container: [{name: "HIP",link: "docker://chollahydro/cholla:rocm_github"}] # Setup environment variables env: @@ -49,12 +53,12 @@ jobs: git --version git config --global --add safe.directory /__w/cholla/cholla git config --global --add safe.directory '*' - - name: Show CUDA and gcc version - if: matrix.container.name == 'CUDA' - run: | - cc --version - c++ --version - nvcc -V + # - name: Show CUDA and gcc version + # if: matrix.container.name == 'CUDA' + # run: | + # cc --version + # c++ --version + # nvcc -V - name: Show HIP and hipcc version if: matrix.container.name == 'HIP' run: | @@ -64,6 +68,7 @@ jobs: # Perform Build - name: Cholla setup run: | + make clobber source builds/run_tests.sh setupTests -c gcc echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" @@ -82,12 +87,15 @@ jobs: buildChollaTests # Run Clang-tidy - - name: Run clang-tidy - if: matrix.container.name == 'CUDA' - run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" - - name: Display tidy_results_cpp.log - if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_cpp.log - - name: Display tidy_results_gpu.log - if: ${{ (matrix.container.name == 'CUDA') && (always()) }} - run: cat tidy_results_gpu.log + # - name: Run clang-tidy + # if: matrix.container.name == 'CUDA' + # run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" + # - name: Display tidy_results_cpp.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_cpp.log + # - name: Display tidy_results_c.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_c.log + # - name: Display tidy_results_gpu.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_gpu.log diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 000000000..f974e3da7 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,121 @@ +pipeline +{ + agent none + + environment + { + CHOLLA_ROOT = "${env.WORKSPACE}" + CHOLLA_MACHINE = 'crc' + CHOLLA_LAUNCH_COMMAND = 'mpirun -np' + } + + stages + { + stage('BuildAndTest') + { + matrix + { + agent + { + label + { + label 'eschneider-ppc-n4' + customWorkspace "${env.JOB_NAME}/${env.CHOLLA_MAKE_TYPE}" + } + } + + axes + { + axis + { + name 'CHOLLA_MAKE_TYPE' + values 'hydro', 'gravity', 'disk', 'particles', 'cosmology', 'mhd', 'dust' + } + } + + stages + { + stage('Clone Repo Cholla') + { + steps + { + sh ''' + git submodule update --init --recursive + make clobber + ''' + } + } + stage('Build Cholla') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + buildCholla OPTIMIZE + ''' + } + } + stage('Build Tests') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + buildChollaTests + ''' + } + } + stage('Run Tests') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + runTests + ''' + } + } + stage('Run Clang Tidy') + { + steps + { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + module load clang/15.0.2 + make tidy CLANG_TIDY_ARGS="--warnings-as-errors=*" TYPE=${CHOLLA_MAKE_TYPE} + ''' + } + } + } + stage('Show Tidy Results') + { + steps + { + // Print the clang-tidy results with bars of equal + // signs seperating each file + sh ''' + printf '=%.0s' {1..100} + printf "\n" + cat tidy_results_cpp.log + printf '=%.0s' {1..100} + printf "\n" + cat tidy_results_gpu.log + printf '=%.0s' {1..100} + printf "\n" + ''' + } + } + } + } + } + } +} diff --git a/builds/run_tests.sh b/builds/run_tests.sh index a5aac62d8..eb688dc14 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -54,6 +54,8 @@ setupTests () return 1 fi + builtin cd $CHOLLA_ROOT + # Determine the hostname then use that to pick the right machine name and launch # command if [[ -n ${CHOLLA_MACHINE+x} ]]; then @@ -94,10 +96,6 @@ setupTests () ;; esac - # Clean the cholla directory - builtin cd $CHOLLA_ROOT - make clobber - # Source the setup file source "${CHOLLA_ROOT}/builds/setup.${CHOLLA_MACHINE}${CHOLLA_COMPILER}.sh" } @@ -250,6 +248,10 @@ buildAndRunTests () esac done + # Clean the cholla directory + builtin cd $CHOLLA_ROOT + make clobber + # Now we get to setting up and building setupTests $MAKE_TYPE_ARG $COMPILER_ARG && \ if [[ -n $BUILD_GTEST ]]; then diff --git a/src/system_tests/particles_system_tests.cpp b/src/system_tests/particles_system_tests.cpp index 9c2fbb892..7cbd587cb 100644 --- a/src/system_tests/particles_system_tests.cpp +++ b/src/system_tests/particles_system_tests.cpp @@ -20,7 +20,7 @@ * */ /// @{ -TEST(tPARTICLESSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput) +TEST(tPARTICLESSYSTEMSphericalCollapse, DISABLED_CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner collapseTest(true); collapseTest.runTest(); diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 2759e253a..3d03218df 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -310,6 +310,7 @@ TEST(tALLDeviceVectorAt, OutOfBoundsAccessExpectThrowOutOfRange) devVector.cpyHostToDevice(stdVec); // Check that the .at() method throws the correct exception + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) EXPECT_THROW(devVector.at(100), std::out_of_range); } @@ -322,6 +323,7 @@ TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, OutOfBoundsCopyExpectThrowOutOfR std::iota(stdVec.begin(), stdVec.end(), 0); // Copy the value to the device memory + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); } @@ -334,5 +336,6 @@ TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, OutOfBoundsCopyExpectThrowOutOfR std::iota(stdVec.begin(), stdVec.end(), 0); // Copy the value to the device memory + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range); } From e24b94d456766d2b6b9a509e1f40f26386422843 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 13:45:55 -0400 Subject: [PATCH 272/694] Fix & Enable -use-emplace checks This enables the modernize-use-emplace check and its alias, hicpp-use-emplace. They check for cases when the programmer is using `push_back` when it would be easier, simplier, and more performant to use `emplace_back`. --- .clang-tidy | 2 -- src/main_tests.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 86eab99ac..c43b78f48 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -100,7 +100,6 @@ Checks: "*, -hicpp-signed-bitwise, -hicpp-special-member-functions, -hicpp-use-auto, - -hicpp-use-emplace, -hicpp-use-equals-default, -hicpp-use-noexcept, -hicpp-use-nullptr, @@ -120,7 +119,6 @@ Checks: "*, -modernize-redundant-void-arg, -modernize-use-auto, -modernize-use-default-member-init, - -modernize-use-emplace, -modernize-use-equals-default, -modernize-use-nodiscard, -modernize-use-noexcept, diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 1efd9f43e..3be97f3eb 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -86,7 +86,7 @@ class InputParser InputParser(int &argc, char **argv) { for (int i = 1; i < argc; ++i) { - this->_tokens.push_back(std::string(argv[i])); + this->_tokens.emplace_back(argv[i]); } } ~InputParser() = default; From e666f85b08bc30709116c20abb3fb7a2a566f254 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 13:51:20 -0400 Subject: [PATCH 273/694] Enable alread fixed cert-str34-c check This check is an alias to bugprone-signed-char-misuse which has already been fixed --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index c43b78f48..67ab0e36a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -46,7 +46,6 @@ Checks: "*, -cert-err58-cpp, -cert-msc32-c, -cert-msc51-cpp, - -cert-str34-c, -clang-analyzer-core.CallAndMessage, -clang-analyzer-core.NullDereference, -clang-analyzer-core.UndefinedBinaryOperatorResult, From af8f6835bfad87b1a6f6998dc29c97edf97e4a08 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 13:59:20 -0400 Subject: [PATCH 274/694] Fix & enable cert-dcl50-cpp check This check prohibit C-style variadic functions (the ones with ...). We only have one, `chprintf`, and since that's basically an MPI safe wrapper around `printf` I don't think we should change it. Instead I marked it as ok using the NOLINT comment and enabled the check. --- .clang-tidy | 1 - src/io/io.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 67ab0e36a..1b5496883 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -38,7 +38,6 @@ Checks: "*, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, - -cert-dcl50-cpp, -cert-dcl59-cpp, -cert-env33-c, -cert-err33-c, diff --git a/src/io/io.cpp b/src/io/io.cpp index 833c1ad75..69dbff617 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -3287,7 +3287,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif /* MPI-safe printf routine */ -int chprintf(const char *__restrict sdata, ...) +int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) { int code = 0; #ifdef MPI_CHOLLA From b0c1be04efece0c819e5a21c263564d7a9035e1c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 14:18:49 -0400 Subject: [PATCH 275/694] Fix & enable clang-analyzer-core.uninitialized.Assign check --- .clang-tidy | 1 - src/mpi/mpi_routines.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 1b5496883..6f558b538 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -49,7 +49,6 @@ Checks: "*, -clang-analyzer-core.NullDereference, -clang-analyzer-core.UndefinedBinaryOperatorResult, -clang-analyzer-core.uninitialized.ArraySubscript, - -clang-analyzer-core.uninitialized.Assign, -clang-analyzer-core.uninitialized.UndefReturn, -clang-analyzer-deadcode.DeadStores, -clang-analyzer-optin.performance.Padding, diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 2e0faaa16..0250080ea 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -542,7 +542,7 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin void Allocate_MPI_DeviceBuffers(struct Header *H) { - int xbsize, ybsize, zbsize; + int xbsize = 0, ybsize = 0, zbsize = 0; if (H->ny == 1 && H->nz == 1) { xbsize = H->n_fields * H->n_ghost; ybsize = 1; From f16e613ace27893b3eae57dfd34fca847081181e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 14:24:28 -0400 Subject: [PATCH 276/694] Fix & enable clang-analyzer-valist.Uninitialized This only has one error and it's in `chprintf`. The code appears to be correct as is so I'm marking the offending line as NOLINT for this check. --- .clang-tidy | 1 - src/io/io.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 6f558b538..de0adc29f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -53,7 +53,6 @@ Checks: "*, -clang-analyzer-deadcode.DeadStores, -clang-analyzer-optin.performance.Padding, -clang-analyzer-security.insecureAPI.strcpy, - -clang-analyzer-valist.Uninitialized, -clang-diagnostic-format, -clang-diagnostic-macro-redefined, -clang-diagnostic-unknown-cuda-version, diff --git a/src/io/io.cpp b/src/io/io.cpp index 69dbff617..bd64d0e1f 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -3297,7 +3297,7 @@ int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) va_list ap; va_start(ap, sdata); - code = vfprintf(stdout, sdata, ap); + code = vfprintf(stdout, sdata, ap); // NOLINT(clang-analyzer-valist.Uninitialized) va_end(ap); fflush(stdout); From 82fdc2804add1c8bfc5222fed291f334b438dee1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 14:44:55 -0400 Subject: [PATCH 277/694] Fix non-hydro build for enabled checks --- src/gravity/gravity_boundaries.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index a49b75185..85ee8a142 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -187,7 +187,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc // for bc_pontential_type = 1 the mod_frac is the fraction // of the disk mass contributed by the simulated particles Real mod_frac = SIMULATED_FRACTION; - Real pot_val; + Real pot_val = 0.0; int i, j, k, id; for (k = 0; k < nGHST; k++) { for (i = 0; i < n_i; i++) { From 0549991c0d523ce02fe584a5df78e907a7a2e70d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 14:57:40 -0400 Subject: [PATCH 278/694] Reformat some badly formatted doxygen comments --- src/mhd/magnetic_divergence.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h index 605a50dae..3833692c6 100644 --- a/src/mhd/magnetic_divergence.h +++ b/src/mhd/magnetic_divergence.h @@ -32,10 +32,12 @@ namespace mhd * API * * \param[in] dev_conserved The device array of conserved variables - * \param[out] maxDivergence The device scalar to store the reduced divergence - * at \param[in] dx Cell size in the X-direction \param[in] dy Cell size in the - * Y-direction \param[in] dz Cell size in the Z-direction \param[in] nx Number - * of cells in the X-direction \param[in] ny Number of cells in the Y-direction + * \param[out] maxDivergence The device scalar to store the reduced divergence at + * \param[in] dx Cell size in the X-direction + * \param[in] dy Cell size in the Y-direction + * \param[in] dz Cell size in the Z-direction + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction * \param[in] nz Number of cells in the Z-direction * \param[in] n_cells Total number of cells */ From 745b3c2d3b0487368feb7257a43985c2023f4c1d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 15:17:39 -0400 Subject: [PATCH 279/694] Permanently disable hicpp-multiway-paths-covered check This check checks for missing `else`s in `if` statements and `default` in `switch` statments. We have many cases where there's no need for a fall through case so I've added it to the list of permanently disable checks. --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index de0adc29f..61b5260bb 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -32,6 +32,7 @@ Checks: "*, -readability-avoid-const-params-in-decls, -readability-static-accessed-through-instance, -misc-unused-parameters, + -hicpp-multiway-paths-covered, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, @@ -90,7 +91,6 @@ Checks: "*, -hicpp-deprecated-headers, -hicpp-explicit-conversions, -hicpp-member-init, - -hicpp-multiway-paths-covered, -hicpp-no-array-decay, -hicpp-no-malloc, -hicpp-signed-bitwise, From dcec6fb9555890145cfa0017db8690ee2ad2a82e Mon Sep 17 00:00:00 2001 From: alwinm Date: Fri, 31 Mar 2023 13:09:54 -0700 Subject: [PATCH 280/694] Update Doxyfile --- docs/doxygen/Doxyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 8a9752f90..4fedbe262 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -908,7 +908,7 @@ FILE_PATTERNS = *.c \ # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a From 3302ae06b27fa100b81f5f8c88b76cbb0ff9bd74 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 31 Mar 2023 16:35:46 -0400 Subject: [PATCH 281/694] update comments --- src/dust/dust_cuda.cu | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 344d3c9ce..9a2acc9fd 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,12 +1,23 @@ +/*! + * \file dust_cuda.cu + * \author Helena Richie (helenarichie@gmail.com) + * \brief Contains code that updates the dust density scalar field. The dust_kernel function determines the rate of + * change of dust density, which is controlled by the sputtering timescale. The sputtering timescale is from the + * McKinnon et al. (2017) model of dust sputtering, which depends on the cell's gas density and temperature. + */ + #ifdef CUDA #ifdef DUST + // STL includes #include + // External includes #include #include #include + // Local includes #include "../global/global.h" #include "../global/global_cuda.h" #include "../grid/grid3D.h" @@ -28,6 +39,20 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { + /*! + * \brief Compute the change in dust density for a cell and update its value in dev_conserved. + * + * \param[in] dev_conserved The device conserved variable array + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_ghost The number of ghost cells + * \param[in] dt + * \param[in] gamma + + * \return None + */ + // get grid indices int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; @@ -38,7 +63,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g int zid = id / (nx * ny); int yid = (id - zid * nx * ny) / nx; int xid = id - zid * nx * ny - yid * nx; - // add a thread id within the block // define physics variables Real d_gas, d_dust; // fluid mass densities @@ -52,7 +76,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // define integration variables Real dd_dt; // instantaneous rate of change in dust density - Real dd; // change in dust density at current time-step + Real dd; // change in dust density at current timestep Real dd_max = 0.01; // allowable percentage of dust density increase Real dt_sub; // refined timestep @@ -62,12 +86,14 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g d_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; E = dev_conserved[id + n_cells * grid_enum::Energy]; + // convert mass density to number density n = d_gas * DENSITY_UNIT / (mu * MP); if (E < 0.0 || E != E) { return; } + // get conserved quanitites vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; vz = dev_conserved[id + n_cells * grid_enum::momentum_z] / d_gas; @@ -86,12 +112,13 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); #endif // DE + // if dual energy is turned on use temp from total internal energy T = T_init; - Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // kyr, sim units + Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // sputtering timescale, kyr (sim units) - dd_dt = calc_dd_dt(d_dust, tau_sp); - dd = dd_dt * dt; + dd_dt = calc_dd_dt(d_dust, tau_sp); // rate of change in dust density at current timestep + dd = dd_dt * dt; // change in dust density at current timestep // ensure that dust density is not changing too rapidly while (dd / d_dust > dd_max) { @@ -123,7 +150,7 @@ __device__ __host__ Real calc_tau_sp(Real n, Real T) Real omega = 2.5; Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s - Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // s + Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // sputtering timescale, s return tau_sp; } From 337c3630ce1e1464cacfa576c291005c21457a85 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 31 Mar 2023 16:55:28 -0400 Subject: [PATCH 282/694] finish comments in dust_cuda.cu --- src/dust/dust_cuda.cu | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 9a2acc9fd..3b73a9046 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -29,6 +29,20 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { + /*! + * \brief Launch the dust kernel. + * + * \param[in] dev_conserved The device conserved variable array. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + * + * \return None + */ int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); @@ -40,18 +54,20 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { /*! - * \brief Compute the change in dust density for a cell and update its value in dev_conserved. - * - * \param[in] dev_conserved The device conserved variable array - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[in] nz The number of cells in the z-direction - * \param[in] n_ghost The number of ghost cells - * \param[in] dt - * \param[in] gamma - - * \return None - */ + * \brief Compute the change in dust density for a cell and update its value in dev_conserved. + * + * \param[out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual + * energy is turned on, then the dual energy field is updated, as well. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + * + * \return None + */ // get grid indices int n_cells = nx * ny * nz; From deb0b1832d0957f641725f5d0d7c1ed3a2848181 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 31 Mar 2023 17:09:27 -0400 Subject: [PATCH 283/694] add doxygen comments to header and move function doc strings to header --- src/dust/dust_cuda.cu | 30 --------------------- src/dust/dust_cuda.h | 52 ++++++++++++++++++++++++++++++++++++ src/dust/dust_cuda_tests.cpp | 8 +++--- 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 3b73a9046..65a69836c 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -29,20 +29,6 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - /*! - * \brief Launch the dust kernel. - * - * \param[in] dev_conserved The device conserved variable array. - * \param[in] nx Number of cells in the x-direction - * \param[in] ny Number of cells in the y-direction - * \param[in] nz Number of cells in the z-direction - * \param[in] n_ghost Number of ghost cells - * \param[in] n_fields - * \param[in] dt Simulation timestep - * \param[in] gamma Specific heat ratio - * - * \return None - */ int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); @@ -53,22 +39,6 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { - /*! - * \brief Compute the change in dust density for a cell and update its value in dev_conserved. - * - * \param[out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual - * energy is turned on, then the dual energy field is updated, as well. - * \param[in] nx Number of cells in the x-direction - * \param[in] ny Number of cells in the y-direction - * \param[in] nz Number of cells in the z-direction - * \param[in] n_ghost Number of ghost cells - * \param[in] n_fields - * \param[in] dt Simulation timestep - * \param[in] gamma Specific heat ratio - * - * \return None - */ - // get grid indices int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 0377b645b..df35c6675 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -1,3 +1,10 @@ +/*! + * \file dust_cuda.h + * \author Helena Richie (helenarichie@pitt.edu) + * \brief Contains the declaration for the kernel that updates the dust density scalar in dev_conserved. + * + */ + #ifdef CUDA #ifdef DUST @@ -9,13 +16,58 @@ #include "../global/global.h" #include "../utils/gpu.hpp" +/*! + * \brief Launch the dust kernel. + * + * \param[in] dev_conserved The device conserved variable array. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + * + * \return None + */ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); +/*! + * \brief Compute the change in dust density for a cell and update its value in dev_conserved. + * + * \param[out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual + * energy is turned on, then the dual energy field is updated, as well. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + * + * \return None + */ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); +/*! + * \brief Compute the sputtering timescale based on a cell's density and temperature. + * + * \param[in] n Gas number density in cm^-3 + * \param[in] T Gas temperature in K + * + * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) + */ __device__ __host__ Real calc_tau_sp(Real n, Real T); +/*! + * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale. + * + * \param[in] d_dust Dust mass density in M_sun/kpc^3 + * \param[in] tau_sp Sputtering timescale in kyr + * + * \return Real Dust density rate of change (McKinnon et al. 2017) + */ __device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); #endif // DUST diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index e3db8dac1..1b9a8b284 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -1,13 +1,11 @@ /*! * \file dust_cuda_tests.cpp - * \author Helena Richie (helenarichie@pitt.edu) - * \brief Test dust model functions - * + * \author Helena Richie (helenarichie@gmail.com) + * \brief Tests for dust model functions. */ // STL Includes #include - #include #include @@ -15,7 +13,7 @@ #include // Include GoogleTest and related libraries/headers // Local Includes -#include "../dust/dust_cuda.h" // Include code to test +#include "../dust/dust_cuda.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" From bc438980f78a433fe8da051196da6604db95c5b8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 31 Mar 2023 17:10:24 -0400 Subject: [PATCH 284/694] run clang format --- src/dust/dust_cuda.cu | 6 +++--- src/dust/dust_cuda.h | 8 ++++---- src/dust/dust_cuda_tests.cpp | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 65a69836c..dfd1e4246 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -1,8 +1,8 @@ /*! * \file dust_cuda.cu * \author Helena Richie (helenarichie@gmail.com) - * \brief Contains code that updates the dust density scalar field. The dust_kernel function determines the rate of - * change of dust density, which is controlled by the sputtering timescale. The sputtering timescale is from the + * \brief Contains code that updates the dust density scalar field. The dust_kernel function determines the rate of + * change of dust density, which is controlled by the sputtering timescale. The sputtering timescale is from the * McKinnon et al. (2017) model of dust sputtering, which depends on the cell's gas density and temperature. */ @@ -104,7 +104,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // sputtering timescale, kyr (sim units) dd_dt = calc_dd_dt(d_dust, tau_sp); // rate of change in dust density at current timestep - dd = dd_dt * dt; // change in dust density at current timestep + dd = dd_dt * dt; // change in dust density at current timestep // ensure that dust density is not changing too rapidly while (dd / d_dust > dd_max) { diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index df35c6675..aea9befec 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -27,7 +27,7 @@ * \param[in] n_fields * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio - * + * * \return None */ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); @@ -44,7 +44,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n * \param[in] n_fields * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio - * + * * \return None */ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, @@ -55,7 +55,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g * * \param[in] n Gas number density in cm^-3 * \param[in] T Gas temperature in K - * + * * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) */ __device__ __host__ Real calc_tau_sp(Real n, Real T); @@ -65,7 +65,7 @@ __device__ __host__ Real calc_tau_sp(Real n, Real T); * * \param[in] d_dust Dust mass density in M_sun/kpc^3 * \param[in] tau_sp Sputtering timescale in kyr - * + * * \return Real Dust density rate of change (McKinnon et al. 2017) */ __device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 1b9a8b284..03bd8111f 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -6,6 +6,7 @@ // STL Includes #include + #include #include From 954d602c9d47975651ecf326e52e86b1bdd9a7c5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 3 Apr 2023 09:31:06 -0400 Subject: [PATCH 285/694] Fix missing headers in chemistry_functions.cpp --- src/chemistry_gpu/chemistry_functions.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 227c6eee5..181c2a98e 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -2,6 +2,8 @@ #include "../grid/grid3D.h" #include "../io/io.h" + #include "../utils/hydro_utilities.h" + #include "../utils/mhd_utilities.h" #include "chemistry_gpu.h" #include "rates.cuh" From b5c8c17f100a3c79dcf6d2a1fd1aa05e9b3d5798 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 16 Mar 2023 10:57:56 -0400 Subject: [PATCH 286/694] Add Advecting Field Loop test & initial conditions - Example parameter file in `examples/3D/advecting_field_loop.txt` - New parameter "R" for the radius of the field loop - New initial conditions type `Advecting_Field_Loop` and an initializing function of the same name - New MHD system test for the advecting field loop - New function for initializing the magnetic field from the vector potential and a test to go with it - Update `Grid3D::AllocateMemory` to use `grid_enums` - Fix a typo in the `.clang-tidy` file --- .clang-tidy | 2 +- examples/3D/advecting_field_loop.txt | 55 ++++++++++++ src/global/global.cpp | 2 + src/global/global.h | 1 + src/grid/grid3D.cpp | 16 ++-- src/grid/grid3D.h | 8 ++ src/grid/initial_conditions.cpp | 87 +++++++++++++++---- ...eldLoopCorrectInputExpectCorrectOutput.txt | 55 ++++++++++++ src/system_tests/mhd_system_tests.cpp | 7 ++ src/utils/mhd_utilities.cu | 24 +++++ src/utils/mhd_utilities.h | 18 +++- src/utils/mhd_utilities_tests.cu | 60 +++++++++++++ 12 files changed, 306 insertions(+), 29 deletions(-) create mode 100644 examples/3D/advecting_field_loop.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt diff --git a/.clang-tidy b/.clang-tidy index 86eab99ac..67ebc1f96 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -2,7 +2,7 @@ # for those checks, etc. It uses as many of the default values as possible and # runs all checks with some exclusions by default. # -# The full list of clang-format 15 checks and documentation can be found +# The full list of clang-tidy 15 checks and documentation can be found # [here](https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/index.html) # # The "Checks" command should have 5 sections seperated by a newline: diff --git a/examples/3D/advecting_field_loop.txt b/examples/3D/advecting_field_loop.txt new file mode 100644 index 000000000..9819e5f9a --- /dev/null +++ b/examples/3D/advecting_field_loop.txt @@ -0,0 +1,55 @@ +# +# Parameter File for an MHD Advecting Field Loop as defined in +# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract) +# + +################################################ +# number of grid cells in the x dimension +nx=128 +# number of grid cells in the y dimension +ny=128 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Advecting_Field_Loop +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-1.0 +xlen=1.0 +ylen=1.0 +zlen=2.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for advecting field loop problem +# initial density +rho=1.0 +# velocity in the x direction +vx=1.0 +# velocity in the y direction +vy=1.0 +# velocity in the z direction +vz=2.0 +# initial pressure +P=1.0 +# amplitude of the loop/magnetic field background value +A=0.001 +# Radius of the Loop +R=0.3 + +# value of gamma +gamma=1.666666666666667 + diff --git a/src/global/global.cpp b/src/global/global.cpp index 394e30a04..db3bc162a 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -359,6 +359,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->yaw = atof(value); } else if (strcmp(name, "polarization") == 0) { parms->polarization = atof(value); + } else if (strcmp(name, "R") == 0) { + parms->R = atof(value); #ifdef PARTICLES } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 9ba6ca331..4fa5f9059 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -263,6 +263,7 @@ struct parameters { Real pitch = 0; Real yaw = 0; Real polarization = 0; + Real R = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d83c32e0b..dac2795c8 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -290,11 +290,11 @@ void Grid3D::AllocateMemory(void) CudaSafeCall(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault)); // point conserved variables to the appropriate locations - C.density = C.host; - C.momentum_x = &(C.host[H.n_cells]); - C.momentum_y = &(C.host[2 * H.n_cells]); - C.momentum_z = &(C.host[3 * H.n_cells]); - C.Energy = &(C.host[4 * H.n_cells]); + C.density = &(C.host[grid_enum::density * H.n_cells]); + C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]); + C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]); + C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]); + C.Energy = &(C.host[grid_enum::Energy * H.n_cells]); #ifdef SCALAR C.scalar = &(C.host[H.n_cells * grid_enum::scalar]); #ifdef BASIC_SCALAR @@ -302,9 +302,9 @@ void Grid3D::AllocateMemory(void) #endif #endif // SCALAR #ifdef MHD - C.magnetic_x = &(C.host[(grid_enum::magnetic_x)*H.n_cells]); - C.magnetic_y = &(C.host[(grid_enum::magnetic_y)*H.n_cells]); - C.magnetic_z = &(C.host[(grid_enum::magnetic_z)*H.n_cells]); + C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); + C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]); + C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]); #endif // MHD #ifdef DE C.GasEnergy = &(C.host[(H.n_fields - 1) * H.n_cells]); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 28d143e17..aaddacae4 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -689,6 +689,14 @@ class Grid3D * \param P The parameters. Only uses Vx, pitch, and yaw */ void Circularly_Polarized_Alfven_Wave(struct parameters const P); + + /*! + * \brief Initialize the grid with a advecting field loop. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634). + * + * \param P The parameters object + */ + void Advecting_Field_Loop(struct parameters const P); #endif // MHD #ifdef MPI_CHOLLA void Set_Boundaries_MPI(struct parameters P); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 218a3bd28..64e0b732b 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -87,6 +87,8 @@ void Grid3D::Set_Initial_Conditions(parameters P) #ifdef MHD } else if (strcmp(P.init, "Circularly_Polarized_Alfven_Wave") == 0) { Circularly_Polarized_Alfven_Wave(P); + } else if (strcmp(P.init, "Advecting_Field_Loop") == 0) { + Advecting_Field_Loop(P); #endif // MHD } else { chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); @@ -1658,24 +1660,7 @@ void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) } // Compute the magnetic field - for (int k = 1; k < H.nz; k++) { - for (int j = 1; j < H.ny; j++) { - for (int i = 1; i < H.nx; i++) { - // Get cell index. The "xmo" means: X direction Minus One - int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); - int const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); - int const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); - int const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); - - C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - - (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; - C.magnetic_y[id] = (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idzmo + 0 * H.n_cells)) / H.dz - - (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idxmo + 2 * H.n_cells)) / H.dx; - C.magnetic_z[id] = (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idxmo + 1 * H.n_cells)) / H.dx - - (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idymo + 0 * H.n_cells)) / H.dy; - } - } - } + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); // set initial values of non-magnetic conserved variables for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { @@ -1716,4 +1701,70 @@ void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) } } } + +void Grid3D::Advecting_Field_Loop(struct parameters const P) +{ + // This test is only meaningful for a limited number of parameter values so I will check them here + // Check that the domain is centered on zero + assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and + "Domain must be centered at zero"); + + // Check that P.R is smaller than the size of the domain + Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); + assert(domain_size > P.R and "The size of the domain must be greater than P.R"); + + // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new + // values when required and ignore the cases where I would be assigning zero + std::vector vectorPotential(3 * H.n_cells, 0); + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Y vector potential + Real radius = std::hypot(x + H.dx / 2., y, z + H.dz / 2.); + if (radius < P.R) { + vectorPotential.at(id + 1 * H.n_cells) = P.A * (P.R - radius); + } + + // Z vector potential + radius = std::hypot(x + H.dx / 2., y + H.dy / 2., z); + if (radius < P.R) { + vectorPotential.at(id + 2 * H.n_cells) = P.A * (P.R - radius); + } + } + } + } + + // Initialize the magnetic fields + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + // Initialize the hydro variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Compute the cell centered magnetic fields + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Assignment + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = mhd::utils::computeEnergy(P.P, P.rho, C.momentum_x[id] / P.rho, C.momentum_y[id] / P.rho, + C.momentum_z[id] / P.rho, magnetic_centered.x, magnetic_centered.y, + magnetic_centered.z, ::gama); + } + } + } +} #endif // MHD \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..a4bd2530a --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,55 @@ +# +# Parameter File for an MHD Advecting Field Loop as defined in +# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract) +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Advecting_Field_Loop +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-1.0 +xlen=1.0 +ylen=1.0 +zlen=2.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=1.0 +# velocity in the y direction +vy=1.0 +# velocity in the z direction +vz=2.0 +# initial pressure +P=1.0 +# amplitude of the loop/magnetic field background value +A=0.001 +# Radius of the Loop +R=0.3 + +# value of gamma +gamma=1.666666666666667 + diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 436c6129b..9e5e87b8d 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -594,6 +594,13 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorre test_runner.numMpiRanks = GetParam(); test_runner.runTest(); } + +/// Test the Advecting Field Loop +TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} /// @} // ============================================================================= diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu index 5205b6c17..110d8d66b 100644 --- a/src/utils/mhd_utilities.cu +++ b/src/utils/mhd_utilities.cu @@ -18,5 +18,29 @@ namespace mhd::utils { +#ifdef MHD +void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C, + std::vector const &vectorPotential) +{ + // Compute the magnetic field + for (int k = 1; k < H.nz; k++) { + for (int j = 1; j < H.ny; j++) { + for (int i = 1; i < H.nx; i++) { + // Get cell index. The "xmo" means: X direction Minus One + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + int const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); + int const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); + int const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); + C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - + (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; + C.magnetic_y[id] = (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idzmo + 0 * H.n_cells)) / H.dz - + (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idxmo + 2 * H.n_cells)) / H.dx; + C.magnetic_z[id] = (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idxmo + 1 * H.n_cells)) / H.dx - + (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idymo + 0 * H.n_cells)) / H.dy; + } + } + } +} +#endif // MHD } // end namespace mhd::utils \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 7d5db459e..57bf14549 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -8,12 +8,14 @@ #pragma once // STL Includes +#include // External Includes // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../grid/grid3D.h" #include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -284,7 +286,7 @@ inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conse // Ternary operator to check that no values outside of the magnetic field // arrays are loaded. If the cell is on the edge that doesn't have magnetic // fields on both sides then instead set the centered magnetic field to be - // equal to the magnetic field of the closest edge. T + // equal to the magnetic field of the closest edge. Real avgBx = (xid > 0) ? /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + dev_conserved[(grid_enum::magnetic_x)*n_cells + @@ -309,6 +311,18 @@ inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conse }; return returnStruct{avgBx, avgBy, avgBz}; } -#endif // MHD // ========================================================================= + +// ========================================================================= +/*! + * \brief Initialize the magnitice field from the vector potential + * + * \param H The Header struct + * \param C The Conserved struct + * \param vectorPotential The vector potential in the same format as the other arrays in Cholla + */ +void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C, + std::vector const &vectorPotential); +// ========================================================================= +#endif // MHD } // end namespace mhd::utils diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index bd2579ab9..7383ef0e3 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -18,6 +18,7 @@ // Local Includes #include "../global/global.h" +#include "../grid/grid3D.h" #include "../utils/mhd_utilities.h" #include "../utils/testing_utilities.h" @@ -417,3 +418,62 @@ TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) // ============================================================================= // End of tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function +// ============================================================================= +#ifdef MHD +TEST(tMHDInitMagneticFieldWithVectorPotential, CorrectInputExpectCorrectOutput) +{ + // Mock up Header and Conserved structs + Header H; + Grid3D::Conserved C; + + H.nx = 2; + H.ny = 2; + H.nz = 2; + H.n_cells = H.nx * H.ny * H.nz; + H.dx = 0.2; + H.dy = 0.2; + H.dz = 0.2; + + double const default_fiducial = -999; + std::vector conserved_vector(H.n_cells * grid_enum::num_fields, default_fiducial); + C.host = conserved_vector.data(); + C.density = &(C.host[grid_enum::density * H.n_cells]); + C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]); + C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]); + C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]); + C.Energy = &(C.host[grid_enum::Energy * H.n_cells]); + C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); + C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]); + C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]); + + // Mock up vector potential + std::vector vector_potential(H.n_cells * 3, 0); + std::iota(vector_potential.begin(), vector_potential.end(), 0); + + // Run the function + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vector_potential); + + // Check the results + double const bx_fiducial = -10.0; + double const by_fiducial = 15.0; + double const bz_fiducial = -5.0; + + for (size_t i = 0; i < conserved_vector.size(); i++) { + if (i == 47) { + testingUtilities::checkResults(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else if (i == 55) { + testingUtilities::checkResults(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else if (i == 63) { + testingUtilities::checkResults(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else { + testingUtilities::checkResults(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } + } +} +#endif // MHD +// ============================================================================= +// End of tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function +// ============================================================================= From ec1fc6e2220bb0852f8b3a311a00620b0042a79a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 28 Mar 2023 14:38:07 -0400 Subject: [PATCH 287/694] Add data for advecting field loop test --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 566ec398e..8ae011909 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 566ec398ec476514999082c49a3f49d1241fce59 +Subproject commit 8ae01190955e861acec21c4d5fb5249ad51c17db From fe1831d2ea2ec5a398778b885c4d3491b471a058 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 28 Mar 2023 14:38:58 -0400 Subject: [PATCH 288/694] Fix cd bug in run_tests.sh Fixed a bug that caused the buildAndRunTests function to cd to the users home directory then exit. Fixed a minor typo in the SystemTestRunner constructor --- builds/run_tests.sh | 8 +++++++- src/system_tests/system_tester.cpp | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/builds/run_tests.sh b/builds/run_tests.sh index eb688dc14..c2337ca05 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -248,12 +248,18 @@ buildAndRunTests () esac done + # Run setup and check if it worked + setupTests $MAKE_TYPE_ARG $COMPILER_ARG + if [ $? -ne 0 ]; then + echo "setup failed" + exit 1 + fi + # Clean the cholla directory builtin cd $CHOLLA_ROOT make clobber # Now we get to setting up and building - setupTests $MAKE_TYPE_ARG $COMPILER_ARG && \ if [[ -n $BUILD_GTEST ]]; then buildGoogleTest fi diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 0bc4a8b0b..677581353 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -426,7 +426,7 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool co if (useFiducialFile) { _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; if (not std::filesystem::exists(_fiducialFilePath)) { - throw std::invalid_argument("Error: Cholla settings file not found at :" + _fiducialFilePath); + throw std::invalid_argument("Error: Cholla fiducial data file not found at :" + _fiducialFilePath); } _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); _fiducialDataSetNames = _findDataSetNames(_fiducialFile); From cf1eaf5ef2b177af809a7db2d731b8a5c6e94c57 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 3 Apr 2023 15:07:59 -0400 Subject: [PATCH 289/694] make changes requested by PR review --- src/dust/dust_cuda.cu | 4 +--- src/dust/dust_cuda.h | 12 ++++-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index dfd1e4246..47065aefd 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -11,8 +11,6 @@ // STL includes #include - - // External includes #include #include #include @@ -25,7 +23,7 @@ #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" - #include "dust_cuda.h" + #include "../dust/dust_cuda.h" void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index aea9befec..6c0660e67 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -19,33 +19,29 @@ /*! * \brief Launch the dust kernel. * - * \param[in] dev_conserved The device conserved variable array. + * \param[in,out] dev_conserved The device conserved variable array. * \param[in] nx Number of cells in the x-direction * \param[in] ny Number of cells in the y-direction * \param[in] nz Number of cells in the z-direction * \param[in] n_ghost Number of ghost cells - * \param[in] n_fields + * \param[in] n_fields Number of fields in dev_conserved * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio - * - * \return None */ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); /*! * \brief Compute the change in dust density for a cell and update its value in dev_conserved. * - * \param[out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual + * \param[in,out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual * energy is turned on, then the dual energy field is updated, as well. * \param[in] nx Number of cells in the x-direction * \param[in] ny Number of cells in the y-direction * \param[in] nz Number of cells in the z-direction * \param[in] n_ghost Number of ghost cells - * \param[in] n_fields + * \param[in] n_fields Number of fields in dev_conserved * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio - * - * \return None */ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); From ebd806ffc880b78910001717d42ea7d674c693a5 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 3 Apr 2023 15:15:35 -0400 Subject: [PATCH 290/694] run clang format --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 566ec398e..8c3c4476f 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 566ec398ec476514999082c49a3f49d1241fce59 +Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f From a5557b2fcc02d737f7e15633afdd1e8b2e0daee9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 3 Apr 2023 15:20:05 -0400 Subject: [PATCH 291/694] run clang format --- src/dust/dust_cuda.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 47065aefd..9b2994b12 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -11,11 +11,13 @@ // STL includes #include + #include #include #include // Local includes + #include "../dust/dust_cuda.h" #include "../global/global.h" #include "../global/global_cuda.h" #include "../grid/grid3D.h" @@ -23,7 +25,6 @@ #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" - #include "../dust/dust_cuda.h" void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { From 975dd6e0a98fd701dd24f9ab39ea8bd34768984c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 22 Mar 2023 13:10:16 -0400 Subject: [PATCH 292/694] Add MHD blast wave initial conditions & test - New `MHD_Spherical_Blast` initial conditions - New `P_blast` parameter to indicate the pressure in the over pressure zone of the blast - Add MHD Blast system test Other Add a check to `Check_Configuration` that gamma has a reasonable value --- examples/3D/mhd_blast.txt | 61 ++++++++++++++++++ src/global/global.cpp | 2 + src/global/global.h | 1 + src/grid/grid3D.h | 9 +++ src/grid/initial_conditions.cpp | 63 +++++++++++++++++++ ...astWaveCorrectInputExpectCorrectOutput.txt | 61 ++++++++++++++++++ src/system_tests/mhd_system_tests.cpp | 7 +++ src/utils/error_handling.cpp | 3 + 8 files changed, 207 insertions(+) create mode 100644 examples/3D/mhd_blast.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt new file mode 100644 index 000000000..c7dbf1bab --- /dev/null +++ b/examples/3D/mhd_blast.txt @@ -0,0 +1,61 @@ +# +# Parameter File for the MHD Blast wavelength +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# + +################################################ +# number of grid cells in the x dimension +nx=128 +# number of grid cells in the y dimension +ny=128 +# number of grid cells in the z dimension +nz=128 +# final output time +tout=0.02 +# time interval for output +outstep=0.02 +# name of initial conditions +init=MHD_Spherical_Blast +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-0.5 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for MHD Blast Wave problem + +# initial density +rho=1.0 +# velocity in the x direction +vx=0.0 +# velocity in the y direction +vy=0.0 +# velocity in the z direction +vz=0.0 +# initial pressure outside the blast zone +P=1.0 +# initial pressure inside the blast zone +P_blast=100.0 +# The radius of the blast zone +R=0.125 +# magnetic field in the x direction. Equal to 10/sqrt(2) +Bx=7.0710678118654746 +# magnetic field in the y direction +By=0.0 +# magnetic field in the z direction. Equal to 10/sqrt(2) +Bz=7.0710678118654746 + +# value of gamma +gamma=1.666666666666667 diff --git a/src/global/global.cpp b/src/global/global.cpp index db3bc162a..50e1445b9 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -361,6 +361,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->polarization = atof(value); } else if (strcmp(name, "R") == 0) { parms->R = atof(value); + } else if (strcmp(name, "P_blast") == 0) { + parms->P_blast = atof(value); #ifdef PARTICLES } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 4fa5f9059..4555e5553 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -264,6 +264,7 @@ struct parameters { Real yaw = 0; Real polarization = 0; Real R = 0; + Real P_blast = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index aaddacae4..ae16de86b 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -697,7 +697,16 @@ class Grid3D * \param P The parameters object */ void Advecting_Field_Loop(struct parameters const P); + + /*! + * \brief Initialize the grid with a spherical MHD blast wave. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634) for details. + * + * \param P The parameters struct + */ + void MHD_Spherical_Blast(struct parameters const P); #endif // MHD + #ifdef MPI_CHOLLA void Set_Boundaries_MPI(struct parameters P); void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 64e0b732b..cb6a8f018 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -89,6 +89,8 @@ void Grid3D::Set_Initial_Conditions(parameters P) Circularly_Polarized_Alfven_Wave(P); } else if (strcmp(P.init, "Advecting_Field_Loop") == 0) { Advecting_Field_Loop(P); + } else if (strcmp(P.init, "MHD_Spherical_Blast") == 0) { + MHD_Spherical_Blast(P); #endif // MHD } else { chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); @@ -1767,4 +1769,65 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) } } } + +void Grid3D::MHD_Spherical_Blast(struct parameters const P) +{ + // This test is only meaningful for a limited number of parameter values so I will check them here + // Check that the domain is centered on zero + assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and + "Domain must be centered at zero"); + + // Check that P.R is smaller than the size of the domain + Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); + assert(domain_size > P.R and "The size of the domain must be greater than P.R"); + + // Initialize the magnetic field + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + C.magnetic_x[id] = P.Bx; + C.magnetic_y[id] = P.By; + C.magnetic_z[id] = P.Bz; + } + } + } + + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Set the fields that don't depend on pressure + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Compute the magnetic field in this cell + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Set the field(s) that do depend on pressure. That's just energy + Real radius = std::hypot(x, y, z); + if (radius < P.R) { + C.Energy[id] = mhd::utils::computeEnergy( + P.P_blast, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], + C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + } else { + C.Energy[id] = mhd::utils::computeEnergy( + P.P, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], + C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + } + } + } + } +} #endif // MHD \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..531b65fcd --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,61 @@ +# +# Parameter File for the MHD Blast wavelength +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.02 +# time interval for output +outstep=0.02 +# name of initial conditions +init=MHD_Spherical_Blast +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-0.5 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for MHD Blast Wave problem + +# initial density +rho=1.0 +# velocity in the x direction +vx=0.0 +# velocity in the y direction +vy=0.0 +# velocity in the z direction +vz=0.0 +# initial pressure outside the blast zone +P=1.0 +# initial pressure inside the blast zone +P_blast=100.0 +# The radius of the blast zone +R=0.125 +# magnetic field in the x direction. Equal to 10/sqrt(2) +Bx=7.0710678118654746 +# magnetic field in the y direction +By=0.0 +# magnetic field in the z direction. Equal to 10/sqrt(2) +Bz=7.0710678118654746 + +# value of gamma +gamma=1.666666666666667 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 9e5e87b8d..121a58c16 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -601,6 +601,13 @@ TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOu test_runner.numMpiRanks = GetParam(); test_runner.runTest(); } + +/// Test the MHD Blast Wave +TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} /// @} // ============================================================================= diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 4576e7abe..da2ea80fe 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -60,6 +60,9 @@ void Check_Configuration(parameters const &P) #endif //! PRECISION static_assert(PRECISION == 2, "PRECISION must be 2. Single precision is not currently supported"); + // Check that gamma, the ratio of specific heats, is greater than 1 + assert(::gama <= 1.0 and "Gamma must be greater than one."); + // MHD Checks // ========== #ifdef MHD From 4fe241ae9918fab4c265c4967e44fc833445aa65 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 3 Apr 2023 12:06:51 -0400 Subject: [PATCH 293/694] Rename the `R` parameter to `radius` --- examples/3D/advecting_field_loop.txt | 2 +- examples/3D/mhd_blast.txt | 2 +- src/global/global.cpp | 4 ++-- src/global/global.h | 2 +- src/grid/initial_conditions.cpp | 18 +++++++++--------- ...ieldLoopCorrectInputExpectCorrectOutput.txt | 2 +- ...lastWaveCorrectInputExpectCorrectOutput.txt | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/3D/advecting_field_loop.txt b/examples/3D/advecting_field_loop.txt index 9819e5f9a..eca9c382e 100644 --- a/examples/3D/advecting_field_loop.txt +++ b/examples/3D/advecting_field_loop.txt @@ -48,7 +48,7 @@ P=1.0 # amplitude of the loop/magnetic field background value A=0.001 # Radius of the Loop -R=0.3 +radius=0.3 # value of gamma gamma=1.666666666666667 diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt index c7dbf1bab..f60997c7e 100644 --- a/examples/3D/mhd_blast.txt +++ b/examples/3D/mhd_blast.txt @@ -49,7 +49,7 @@ P=1.0 # initial pressure inside the blast zone P_blast=100.0 # The radius of the blast zone -R=0.125 +radius=0.125 # magnetic field in the x direction. Equal to 10/sqrt(2) Bx=7.0710678118654746 # magnetic field in the y direction diff --git a/src/global/global.cpp b/src/global/global.cpp index 50e1445b9..a47f9e78b 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -359,8 +359,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->yaw = atof(value); } else if (strcmp(name, "polarization") == 0) { parms->polarization = atof(value); - } else if (strcmp(name, "R") == 0) { - parms->R = atof(value); + } else if (strcmp(name, "radius") == 0) { + parms->radius = atof(value); } else if (strcmp(name, "P_blast") == 0) { parms->P_blast = atof(value); #ifdef PARTICLES diff --git a/src/global/global.h b/src/global/global.h index 4555e5553..17e7d7b73 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -263,7 +263,7 @@ struct parameters { Real pitch = 0; Real yaw = 0; Real polarization = 0; - Real R = 0; + Real radius = 0; Real P_blast = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index cb6a8f018..246c23ec7 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1711,9 +1711,9 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and "Domain must be centered at zero"); - // Check that P.R is smaller than the size of the domain + // Check that P.radius is smaller than the size of the domain Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); - assert(domain_size > P.R and "The size of the domain must be greater than P.R"); + assert(domain_size > P.radius and "The size of the domain must be greater than P.radius"); // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new // values when required and ignore the cases where I would be assigning zero @@ -1730,14 +1730,14 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) // Y vector potential Real radius = std::hypot(x + H.dx / 2., y, z + H.dz / 2.); - if (radius < P.R) { - vectorPotential.at(id + 1 * H.n_cells) = P.A * (P.R - radius); + if (radius < P.radius) { + vectorPotential.at(id + 1 * H.n_cells) = P.A * (P.radius - radius); } // Z vector potential radius = std::hypot(x + H.dx / 2., y + H.dy / 2., z); - if (radius < P.R) { - vectorPotential.at(id + 2 * H.n_cells) = P.A * (P.R - radius); + if (radius < P.radius) { + vectorPotential.at(id + 2 * H.n_cells) = P.A * (P.radius - radius); } } } @@ -1777,9 +1777,9 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and "Domain must be centered at zero"); - // Check that P.R is smaller than the size of the domain + // Check that P.radius is smaller than the size of the domain Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); - assert(domain_size > P.R and "The size of the domain must be greater than P.R"); + assert(domain_size > P.radius and "The size of the domain must be greater than P.radius"); // Initialize the magnetic field for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { @@ -1817,7 +1817,7 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) // Set the field(s) that do depend on pressure. That's just energy Real radius = std::hypot(x, y, z); - if (radius < P.R) { + if (radius < P.radius) { C.Energy[id] = mhd::utils::computeEnergy( P.P_blast, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt index a4bd2530a..d6a733d3c 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt @@ -48,7 +48,7 @@ P=1.0 # amplitude of the loop/magnetic field background value A=0.001 # Radius of the Loop -R=0.3 +radius=0.3 # value of gamma gamma=1.666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt index 531b65fcd..4e70c2993 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt @@ -49,7 +49,7 @@ P=1.0 # initial pressure inside the blast zone P_blast=100.0 # The radius of the blast zone -R=0.125 +radius=0.125 # magnetic field in the x direction. Equal to 10/sqrt(2) Bx=7.0710678118654746 # magnetic field in the y direction From 3233fb8cbc99a4248c7996271cf23720cd193ecd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 3 Apr 2023 13:20:30 -0400 Subject: [PATCH 294/694] Add test data for MHD blast wave --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 8ae011909..2ada29389 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 8ae01190955e861acec21c4d5fb5249ad51c17db +Subproject commit 2ada29389c9af90694a8db6d514b6edb9fc50808 From 9be27243d379b39769ef156ff12c07f081cc3ec5 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 4 Apr 2023 14:23:29 -0400 Subject: [PATCH 295/694] update submodule --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 8c3c4476f..2ada29389 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f +Subproject commit 2ada29389c9af90694a8db6d514b6edb9fc50808 From f2a4718490a4632a02e598b1baf5b7166244a622 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 4 Apr 2023 18:07:12 -0400 Subject: [PATCH 296/694] deprecate cuda macro --- src/dust/dust_cuda.cu | 40 +++++++++++++++++++--------------------- src/dust/dust_cuda.h | 18 ++++++++---------- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 9b2994b12..c05af264e 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -6,25 +6,24 @@ * McKinnon et al. (2017) model of dust sputtering, which depends on the cell's gas density and temperature. */ -#ifdef CUDA - #ifdef DUST - - // STL includes - #include - - #include - #include - #include - - // Local includes - #include "../dust/dust_cuda.h" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../grid/grid3D.h" - #include "../grid/grid_enum.h" - #include "../utils/cuda_utilities.h" - #include "../utils/gpu.hpp" - #include "../utils/hydro_utilities.h" +#ifdef DUST + + // STL includes + #include + + #include + #include + #include + + // Local includes + #include "../dust/dust_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) { @@ -143,5 +142,4 @@ __device__ __host__ Real calc_tau_sp(Real n, Real T) // McKinnon et al. (2017) __device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp / 3); } - #endif // DUST -#endif // CUDA +#endif // DUST diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 6c0660e67..aab4c7db4 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -5,16 +5,15 @@ * */ -#ifdef CUDA - #ifdef DUST +#ifdef DUST - #ifndef DUST_CUDA_H - #define DUST_CUDA_H + #ifndef DUST_CUDA_H + #define DUST_CUDA_H - #include + #include - #include "../global/global.h" - #include "../utils/gpu.hpp" + #include "../global/global.h" + #include "../utils/gpu.hpp" /*! * \brief Launch the dust kernel. @@ -66,6 +65,5 @@ __device__ __host__ Real calc_tau_sp(Real n, Real T); */ __device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); - #endif // DUST - #endif // CUDA -#endif // DUST_CUDA_H \ No newline at end of file + #endif // DUST_CUDA_H +#endif // DUST \ No newline at end of file From d85e1f432c4dcb6156f54c285e288aa4bcfa184f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 4 Apr 2023 18:11:38 -0400 Subject: [PATCH 297/694] run clang format --- cholla-tests-data | 2 +- src/dust/dust_cuda.cu | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index 2ada29389..8c3c4476f 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 2ada29389c9af90694a8db6d514b6edb9fc50808 +Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index c05af264e..b0969c455 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -54,9 +54,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real mu = 0.6; // mean molecular weight Real T, E, P; // temperature, energy, pressure Real vx, vy, vz; // velocities - #ifdef DE + #ifdef DE Real ge; - #endif // DE + #endif // DE // define integration variables Real dd_dt; // instantaneous rate of change in dust density @@ -81,10 +81,10 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; vz = dev_conserved[id + n_cells * grid_enum::momentum_z] / d_gas; - #ifdef DE + #ifdef DE ge = dev_conserved[id + n_cells * grid_enum::GasEnergy] / d_gas; ge = fmax(ge, (Real)TINY_NUMBER); - #endif // DE + #endif // DE // calculate physical quantities P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); @@ -92,9 +92,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real T_init; T_init = hydro_utilities::Calc_Temp(P, n); - #ifdef DE + #ifdef DE T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); - #endif // DE + #endif // DE // if dual energy is turned on use temp from total internal energy T = T_init; @@ -118,9 +118,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g dev_conserved[id + n_cells * grid_enum::dust_density] = d_dust; - #ifdef DE + #ifdef DE dev_conserved[id + n_cells * grid_enum::GasEnergy] = d_dust * ge; - #endif + #endif } } From f9ae8f4ba60957aa118799493c2c32647c4e6ed0 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 5 Apr 2023 13:21:20 -0400 Subject: [PATCH 298/694] update test data --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 8c3c4476f..2ada29389 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 8c3c4476fdc388554cd4bb1ca036a2762830951f +Subproject commit 2ada29389c9af90694a8db6d514b6edb9fc50808 From 6ecd2cf5d89fb0dd82c39387d6ebdfff84205567 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 22 Mar 2023 10:27:40 -0400 Subject: [PATCH 299/694] Add Orszag-Tang Vortex initial conditions and test --- examples/3D/orszag_tang_vortex.txt | 42 +++++++++++++ src/grid/grid3D.h | 8 +++ src/grid/initial_conditions.cpp | 61 +++++++++++++++++++ ...gVortexCorrectInputExpectCorrectOutput.txt | 42 +++++++++++++ src/system_tests/mhd_system_tests.cpp | 7 +++ 5 files changed, 160 insertions(+) create mode 100644 examples/3D/orszag_tang_vortex.txt create mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt diff --git a/examples/3D/orszag_tang_vortex.txt b/examples/3D/orszag_tang_vortex.txt new file mode 100644 index 000000000..9d8050073 --- /dev/null +++ b/examples/3D/orszag_tang_vortex.txt @@ -0,0 +1,42 @@ +# +# Parameter File for the Orszag-Tang Vortex +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) +# + +################################################ +# number of grid cells in the x dimension +nx=128 +# number of grid cells in the y dimension +ny=128 +# number of grid cells in the z dimension +nz=128 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Orszag_Tang_Vortex +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set +# of initial conditions which have been hard coded into the initial conditions +# function. The only thing that needs set here is the adiabatic index + +# value of gamma +gamma=1.666666666666667 diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index ae16de86b..635c96aa6 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -705,6 +705,14 @@ class Grid3D * \param P The parameters struct */ void MHD_Spherical_Blast(struct parameters const P); + + /*! + * \brief Initialize the grid with the Orszag-Tang Vortex. See [Gardiner & Stone + * 2008](https://arxiv.org/abs/0712.2634) + * + * \param P The parameters. + */ + void Orszag_Tang_Vortex(); #endif // MHD #ifdef MPI_CHOLLA diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 246c23ec7..1ad70bebe 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -91,6 +91,8 @@ void Grid3D::Set_Initial_Conditions(parameters P) Advecting_Field_Loop(P); } else if (strcmp(P.init, "MHD_Spherical_Blast") == 0) { MHD_Spherical_Blast(P); + } else if (strcmp(P.init, "Orszag_Tang_Vortex") == 0) { + Orszag_Tang_Vortex(); #endif // MHD } else { chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); @@ -1830,4 +1832,63 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) } } } + +void Grid3D::Orszag_Tang_Vortex() +{ + // This problem requires specific parameters so I will define them here + Real const magnetic_background = 1.0 / std::sqrt(4.0 * M_PI); + Real const density_background = 25.0 / (36.0 * M_PI); + Real const velocity_background = 1.0; + Real const pressure_background = 5.0 / (12.0 * M_PI); + + // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new + // values when required and ignore the cases where I would be assigning zero + std::vector vectorPotential(3 * H.n_cells, 0); + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Z vector potential + vectorPotential.at(id + 2 * H.n_cells) = (magnetic_background / 4.0 * M_PI) * std::cos(4.0 * M_PI * x) - + (magnetic_background / 2.0 * M_PI) * std::cos(2.0 * M_PI * y); + } + } + } + + // Initialize the magnetic fields + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + // Initialize the hydro variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Compute the cell centered magnetic fields + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Assignment + C.density[id] = density_background; + C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y); + C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x); + C.momentum_z[id] = 0.0; + C.Energy[id] = mhd::utils::computeEnergy(pressure_background, C.density[id], C.momentum_x[id] / C.density[id], + C.momentum_y[id] / C.density[id], C.momentum_z[id] / C.density[id], + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + } + } + } +} #endif // MHD \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..332e26eb2 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,42 @@ +# +# Parameter File for the Orszag-Tang Vortex +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Orszag_Tang_Vortex +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set +# of initial conditions which have been hard coded into the initial conditions +# function. The only thing that needs set here is the adiabatic index + +# value of gamma +gamma=1.666666666666667 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 121a58c16..1dd37a354 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -608,6 +608,13 @@ TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput) test_runner.numMpiRanks = GetParam(); test_runner.runTest(); } + +/// Test the Orszag-Tang Vortex +TEST_P(tMHDSYSTEMParameterizedMpi, OrszagTangVortexCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} /// @} // ============================================================================= From 401084f13cac75a612b2ee915721f194af970584 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 4 Apr 2023 16:31:58 -0400 Subject: [PATCH 300/694] Add test data for Orszag-Tang vortex test --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 2ada29389..d6202baad 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 2ada29389c9af90694a8db6d514b6edb9fc50808 +Subproject commit d6202baadc9eaac6dce5ec4060f1f3fda8abdf1f From 86477ff65d084a53e67effe80af84f14e5b7ed2f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 4 Apr 2023 17:38:06 -0400 Subject: [PATCH 301/694] Update Orszag-Tang test to pass This loosens the correctness limit for the Orszag-Tang vortex test. Due to the chaotic and largely qualitative nature of the test, and that MHD is currently PCM only, this loosening accounts for variations between machines --- src/system_tests/mhd_system_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 1dd37a354..aaeb2f4e7 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -613,6 +613,7 @@ TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput) TEST_P(tMHDSYSTEMParameterizedMpi, OrszagTangVortexCorrectInputExpectCorrectOutput) { test_runner.numMpiRanks = GetParam(); + test_runner.setFixedEpsilon(8.E-4); test_runner.runTest(); } /// @} @@ -738,4 +739,4 @@ TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, Standin INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, ::testing::Values(1.0, -1.0)); /// @} -// ============================================================================= \ No newline at end of file +// ============================================================================= From d759def34dc8851cbe6a0fa0ad050c22e9b2d00d Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 10 Apr 2023 21:12:02 -0400 Subject: [PATCH 302/694] draft 1 hdf5 refactor --- src/io/io.cpp | 550 ++++++++++++++++++----------------------------- src/io/io_gpu.cu | 53 +++++ 2 files changed, 261 insertions(+), 342 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 4e407100e..b039bd7f1 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -341,18 +341,24 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) } #endif // DE #ifdef MHD + + // TODO : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); } + #endif free(dataset_buffer); @@ -1153,15 +1159,13 @@ herr_t Read_HDF5_Dataset(hid_t file_id, float *dataset_buffer, const char *name) } - - // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, const char *name) { - // Create a dataset id for density + // Create the dataset id hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the density array to file + // Write the array to file herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -1170,9 +1174,9 @@ herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buf herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float *dataset_buffer, const char *name) { - // Create a dataset id for density + // Create the dataset id hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the density array to file + // Write the array to file herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); @@ -1234,6 +1238,112 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } + +/* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ +void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer) +{ + int i, j, k, id, buf_id; + // 3D case + if (nx > 1 && ny > 1 && nz > 1) { + for (k = 0; k < nz_real; k++) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; + } + } + } + return; + } + + // 2D case + if (nx > 1 && ny > 1 && nz == 1) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; + } + } + return; + } + + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + id = n_ghost; + memcpy(&hdf5_buffer[0], &grid_buffer[id], nx_real * sizeof(Real)); + return; + } +} + +/* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_grid_buffer); +// From src/io/io_gpu + +// Set up dataspace for grid formatted data and write dataset +void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, hid_t file_id, Real* dataset_buffer, const char* name) +{ + // Set up dataspace + + hid_t dataspace_id; + // 1-D Case + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + int rank = 1; + hsize_t dims[1]; + dims[0] = H.nx_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + // 2-D Case + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + int rank = 2; + hsize_t dims[2]; + dims[0] = H.nx_real; + dims[1] = H.ny_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + // 3-D Case + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int rank = 3; + hsize_t dims[3]; + dims[0] = H.nx_real; + dims[1] = H.ny_real; + dims[2] = H.nz_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + + // Write to HDF5 file + + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + + // Close dataspace + herr_t status = H5Sclose(dataspace_id); +} + +// Data moves from host grid_buffer to dataset_buffer to hdf5 file +void Write_Grid_HDF5_Field_CPU(Header H, hid_t file_id, Real* dataset_buffer, Real* grid_buffer, const char* name) +{ + Fill_HDF5_Buffer_From_Grid_CPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); + Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); +} + +// Data moves from device_grid_buffer to device_hdf5_buffer to dataset_buffer to hdf5 file +void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real* dataset_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer, const char* name) +{ + Fill_HDF5_Buffer_From_Grid_GPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, device_hdf5_buffer, device_grid_buffer); + Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); +} + +void Write_Generic_HDF5_Field(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + hid_t file_id, Real* dataset_buffer, Real* source_buffer, const char* name) +{ + Fill_HDF5_Buffer_From_Grid_CPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, source_buffer); + Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); +} + + + + /*! \fn void Write_Grid_HDF5(hid_t file_id) * \brief Write the grid to a file, at the current simulation time. */ void Grid3D::Write_Grid_HDF5(hid_t file_id) @@ -1284,331 +1394,92 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) cudaMemcpyDeviceToHost)); #endif // GRAVITY_GPU and OUTPUT_POTENTIAL - // 1D case - if (H.nx > 1 && H.ny == 1 && H.nz == 1) { - int nx_dset = H.nx_real; - hsize_t dims[1]; - dataset_buffer = (Real *)malloc(H.nx_real * sizeof(Real)); - - // Create the data space for the datasets - dims[0] = nx_dset; - dataspace_id = H5Screate_simple(1, dims, NULL); - - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); - - #ifdef SCALAR - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - // Copy the scalar array to the memory buffer - - // TODO: If there is a test case for regression testing NSCALARS > 1 this - // substitution can be attempted. Write_HDF5_Field_1D_CPU(H, file_id, - // dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - - id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.scalar[id + s * H.n_cells]), H.nx_real * sizeof(Real)); - // dataset here is just a name - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); - } - - #endif // SCALAR - - #ifdef DE - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); - #endif // DE - - // Free the dataspace id - status = H5Sclose(dataspace_id); - } - - // 2D case - if (H.nx > 1 && H.ny > 1 && H.nz == 1) { - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - hsize_t dims[2]; - dataset_buffer = (Real *)malloc(H.ny_real * H.nx_real * sizeof(Real)); - - // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; - dataspace_id = H5Screate_simple(2, dims, NULL); - - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); - - #ifdef SCALAR - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - - // TODO: If there is a test case for regression testing NSCALARS > 1 this - // substitution can be attempted. Write_HDF5_Field_1D_CPU(H, file_id, - // dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - - // Copy the scalar array to the memory buffer - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx; - buf_id = j + i * H.ny_real; - dataset_buffer[buf_id] = C.scalar[id + s * H.n_cells]; - } - } - - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); - } - #endif // SCALAR - - #ifdef DE - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); - #endif // DE - - // Free the dataspace id - status = H5Sclose(dataspace_id); - } - - // 3D case - if (H.nx > 1 && H.ny > 1 && H.nz > 1) { - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[3]; - hsize_t dims_full[3]; - size_t buffer_size; - // Need a larger device buffer for MHD. In the future, if other fields need a - // larger device buffer, choose the maximum of the sizes. If the buffer is too - // large, it does not cause bugs (Oct 6 2022) + // Allocate necessary buffers #ifdef MHD buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); #else buffer_size = nx_dset * ny_dset * nz_dset; #endif - // Using static DeviceVector here automatically allocates the buffer the - // first time it is needed It persists until program exit, and then calls - // Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - double *device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); - // CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - - // Create the data space for the datasets (note: WriteHDF5Field3D creates - // its own dataspace, does not use the shared one) - dims[0] = nx_dset; - dims[1] = ny_dset; - dims[2] = nz_dset; - dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_density, "/density"); - if (output_momentum || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_momentum_x, "/momentum_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_momentum_y, "/momentum_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_momentum_z, "/momentum_z"); - } - - if (output_energy || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_Energy, "/Energy"); - } + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + Real *device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); + + // Start writing fields + + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.density, "/density"); + if (output_momentum || H.Output_Complete_Data) { + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_z, "/momentum_z"); + } + if (output_energy || H.Output_Complete_Data) { + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.Energy, "/Energy"); +#ifdef DE + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); +#endif + } + #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset[100]; - char number[10]; - strcpy(dataset, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset, number); - // Copy the scalar array to the memory buffer - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - dataset_buffer[buf_id] = C.scalar[id + s * H.n_cells]; - } - } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); - } - #else // COOLING_GRACKLE or CHEMISTRY_GPU. Write Chemistry when using - // GRACKLE - #ifdef OUTPUT_CHEMISTRY - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.HI_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.HI_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HI_density"); - - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.HII_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.HII_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - if (output_full_ionization || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HII_density"); - } - - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.HeI_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.HeI_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - if (output_full_ionization || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeI_density"); - } - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.HeII_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.HeII_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeII_density"); - - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.HeIII_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.HeIII_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeIII_density"); - - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.fields.e_density[id]; - #endif // COOLING_GRACKLE - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = C.e_density[id]; - #endif // CHEMISTRY_GPU - } - } - } - if (output_electrons || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/e_density"); - } - - #ifdef GRACKLE_METALS - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - dataset_buffer[buf_id] = Cool.fields.metal_density[id]; - } - } - } - if (output_metals || H.Output_Complete_Data) { - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/metal_density"); - } - #endif // GRACKLE_METALS - - #endif // OUTPUT_CHEMISTRY - - #ifdef OUTPUT_TEMPERATURE - - #ifdef CHEMISTRY_GPU - Compute_Gas_Temperature(Chem.Fields.temperature_h, false); - #endif // CHEMISTRY_GPU + + #ifdef BASIC_SCALAR + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.scalar, "/scalar0"); + #endif - // Copy the internal energy array to the memory buffer - for (k = 0; k < H.nz_real; k++) { - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - buf_id = k + j * H.nz_real + i * H.nz_real * H.ny_real; - #ifdef COOLING_GRACKLE - dataset_buffer[buf_id] = Cool.temperature[id]; - #endif - #ifdef CHEMISTRY_GPU - dataset_buffer[buf_id] = Chem.Fields.temperature_h[id]; - #endif - } - } - } + #ifdef OUTPUT_CHEMISTRY + #ifdef CHEMISTRY_GPU + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HI_density, "/HI_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HII_density, "/HII_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeI_density, "/HeI_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeII_density, "/HeII_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.e_density, "/e_density"); + #elif defined(COOLING_GRACKLE) + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeII_density, "/HeII_density"); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeIII_density, "/HeIII_density"); + if (output_electrons || H.Output_Complete_Data) { + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.e_density, "/e_density"); + } + #endif +#endif //OUTPUT_CHEMISTRY + + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/temperature"); + #ifdef GRACKLE_METALS + if (output_metals || H.Output_Complete_Data) { + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.metal_density, "/metal_density"); + } + #endif // GRACKLE_METALS - #endif // OUTPUT_TEMPERATURE + + #ifdef OUTPUT_TEMPERATURE + #ifdef CHEMISTRY_GPU + Compute_Gas_Temperature(Chem.Fields.temperature_h, false); + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature"); + #elif defined(COOLING_GRACKLE) + Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.temperature, "/temperature"); + #endif + #endif - #endif // COOLING_GRACKLE - #endif // SCALAR + + #endif // COOLING_GRACKLE || CHEMISTRY_GPU + + #endif // SCALAR - #ifdef DE - if (output_energy || H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - C.d_GasEnergy, "/GasEnergy"); - } - #endif // DE + // 3D case + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) // Copy the potential array to the memory buffer + Write_Generic_HDF5_Field(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, Grav.nz_local + 2 * N_GHOST_POTENTIAL, + nx, ny, nz, Grav.nx_local, Grav.ny_local, Grav.nz_local, N_GHOST_POTENTIAL, + file_id, dataset_buffer, Grav.F.potential_h, "/grav_potential"); + + /* for (k = 0; k < Grav.nz_local; k++) { for (j = 0; j < Grav.ny_local; j++) { for (i = 0; i < Grav.nx_local; i++) { @@ -1623,26 +1494,25 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) } } status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); + */ #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD if (H.Output_Complete_Data) { - // Note: for WriteHDF5Field3D, use the left side n_ghost - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x", 0); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset + 1, nz_dset, H.n_ghost, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y", 1); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset + 1, H.n_ghost, file_id, dataset_buffer, + WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD // Free the dataspace id - status = H5Sclose(dataspace_id); - // CudaSafeCall(cudaFree(device_dataset_buffer));// No longer needed because - // devicevector frees when it should + } - free(dataset_buffer); + + free(dataset_buffer); } #endif // HDF5 @@ -2439,7 +2309,7 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real // Note: for 1D ny_real and nz_real are not used // And for 2D nz_real is not used. // This protects the magnetic case where ny_real/nz_real += 1 - + int i, j, k, id, buf_id; // 3D case if (nx > 1 && ny > 1 && nz > 1) { @@ -2454,7 +2324,7 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real } return; } - + // 2D case if (nx > 1 && ny > 1 && nz == 1) { for (j = 0; j < ny_real; j++) { @@ -2466,7 +2336,7 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real } return; } - + // 1D case if (nx > 1 && ny == 1 && nz == 1) { id = n_ghost; @@ -2483,7 +2353,7 @@ void Read_Grid_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, Real* g void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, Real* grid_buffer, const char* name) { - // Magnetic has 1 more real cell, 1 fewer n_ghost on one side. + // Magnetic has 1 more real cell, 1 fewer n_ghost on one side. Read_HDF5_Dataset(file_id, dataset_buffer, name); Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, dataset_buffer, grid_buffer); } @@ -2533,30 +2403,26 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) - for (int s = 0; s < NSCALARS; s++) { - // create the name of the dataset - char dataset_name[100]; - char number[10]; - strcpy(dataset_name, "/scalar"); - sprintf(number, "%d", s); - strcat(dataset_name, number); - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &C.scalar[s * H.n_cells], dataset_name); - } - #else + + #ifdef BASIC_SCALAR + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); + #endif + + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HII_density, "/HII_density"); - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeI_density, "/HeI_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeI_density, "/HeI_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeII_density, "/HeII_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeIII_density, "/HeIII_density"); - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.e_density, "/e_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.e_density, "/e_density"); #ifdef GRACKLE_METALS - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.metal_density, "/metal_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.metal_density, "/metal_density"); #endif // GRACKLE_METALS #endif // COOLING_GRACKLE , CHEMISTRY_GPU + #endif // SCALAR - // 3D case + // MHD only valid in 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { // Compute Statistic of Initial data Real mean_l, min_l, max_l; diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index c2480af80..613390a94 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -17,6 +17,29 @@ // For the magnetic field case, a different // nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. +// 2D version of CopyReal3D_GPU_Kernel. Note that magnetic fields and float32 output are not enabled in 2-D so this is a simpler kernel +__global__ void CopyReal2D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* destination, Real* source) +{ + int const id = threadIdx.x + blockIdx.x * blockDim.x; + + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); + // i goes up to nx_real + // j goes up to ny_real + // for 2D, k should be 0 + if (k >= 1) { + return; + } + + // This converts into HDF5 indexing that plays well with Python + int const dest_id = j + i * ny_real; + int const source_id = (i + n_ghost) + (j + n_ghost) * nx; + + destination[dest_id] = source[source_id]; +} + + // Copy Real (non-ghost) cells from source to a double destination (for writing // HDF5 in double precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, @@ -118,5 +141,35 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int printf("File write failed.\n"); } } +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer){ + int mhd_direction = -1; + + // 3D case + if (nx > 1 && ny > 1 && nz > 1) { + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_hdf5_buffer, device_grid_buffer, mhd_direction); + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), cudaMemcpyDeviceToHost)); + return; + } + + // 2D case + if (nx > 1 && ny > 1 && nz == 1) { + dim3 dim1dGrid((nx_real * ny_real + TPB - 1) / TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal2D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_hdf5_buffer, device_grid_buffer); + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); + return; + } + + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real))); + return; + } +} #endif // HDF5 + From 88bce4d2b0d9e0312da24e35cf96ba020ce3fdeb Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Apr 2023 02:53:10 -0400 Subject: [PATCH 303/694] fix bugs --- src/io/io.cpp | 71 ++++++++++++++++++---------------- src/io/io_gpu.cu | 2 +- src/io/io_parallel.cpp | 14 ++++++- src/utils/timing_functions.cpp | 4 +- 4 files changed, 51 insertions(+), 40 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index b039bd7f1..7eac9775c 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1278,7 +1278,7 @@ void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_ } /* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ -void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_grid_buffer); +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer); // From src/io/io_gpu // Set up dataspace for grid formatted data and write dataset @@ -1288,27 +1288,27 @@ void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, i hid_t dataspace_id; // 1-D Case - if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + if (nx > 1 && ny == 1 && nz == 1) { int rank = 1; hsize_t dims[1]; - dims[0] = H.nx_real; + dims[0] = nx_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } // 2-D Case - if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + if (nx > 1 && ny > 1 && nz == 1) { int rank = 2; hsize_t dims[2]; - dims[0] = H.nx_real; - dims[1] = H.ny_real; + dims[0] = nx_real; + dims[1] = ny_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } // 3-D Case - if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + if (nx > 1 && ny > 1 && nz > 1) { int rank = 3; hsize_t dims[3]; - dims[0] = H.nx_real; - dims[1] = H.ny_real; - dims[2] = H.nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } @@ -1396,10 +1396,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // Allocate necessary buffers + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; #ifdef MHD - buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); + size_t buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); #else - buffer_size = nx_dset * ny_dset * nz_dset; + size_t buffer_size = nx_dset * ny_dset * nz_dset; #endif cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; Real *device_dataset_buffer = device_dataset_vector.data(); @@ -1408,41 +1411,41 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // Start writing fields - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.density, "/density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.density, "/density"); if (output_momentum || H.Output_Complete_Data) { - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.momentum_z, "/momentum_z"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_z, "/momentum_z"); } if (output_energy || H.Output_Complete_Data) { - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.Energy, "/Energy"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.Energy, "/Energy"); #ifdef DE - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif } #ifdef SCALAR #ifdef BASIC_SCALAR - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.scalar, "/scalar0"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.scalar, "/scalar0"); #endif #ifdef OUTPUT_CHEMISTRY #ifdef CHEMISTRY_GPU - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HI_density, "/HI_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HII_density, "/HII_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeI_density, "/HeI_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeII_density, "/HeII_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, C.e_density, "/e_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HI_density, "/HI_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HII_density, "/HII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeI_density, "/HeI_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeII_density, "/HeII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.e_density, "/e_density"); #elif defined(COOLING_GRACKLE) - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeII_density, "/HeII_density"); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.HeIII_density, "/HeIII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeII_density, "/HeII_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeIII_density, "/HeIII_density"); if (output_electrons || H.Output_Complete_Data) { - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.e_density, "/e_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.e_density, "/e_density"); } #endif #endif //OUTPUT_CHEMISTRY @@ -1451,7 +1454,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef GRACKLE_METALS if (output_metals || H.Output_Complete_Data) { - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.fields.metal_density, "/metal_density"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.metal_density, "/metal_density"); } #endif // GRACKLE_METALS @@ -1459,9 +1462,9 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_TEMPERATURE #ifdef CHEMISTRY_GPU Compute_Gas_Temperature(Chem.Fields.temperature_h, false); - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature"); #elif defined(COOLING_GRACKLE) - Write_Grid_HDF5_Field(H, file_id, dataset_buffer, Cool.temperature, "/temperature"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.temperature, "/temperature"); #endif #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 613390a94..7b2b9b17c 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -166,7 +166,7 @@ void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_ // 1D case if (nx > 1 && ny == 1 && nz == 1) { - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real))); + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real), cudaMemcpyDeviceToHost)); return; } } diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 44f0ce186..1217eea0a 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -1,10 +1,11 @@ // Routines for using Parallel HDF5 to read/write from single file +#include "../grid/grid3D.h" +#include "../io/io.h" +#include "../utils/error_handling.h" #if defined(HDF5) && defined(MPI_CHOLLA) #include -#include "../io/io.h" #include "../mpi/mpi_routines.h" -#include "../grid/grid3D.h" #include "../utils/timing_functions.h" // provides ScopedTimer @@ -124,5 +125,14 @@ void Grid3D::Read_Grid_Cat(struct parameters P) status = H5Fclose(file_id); } +#else + +void Grid3D::Read_Grid_Cat(struct parameters P) +{ + chprintf("Warning: Read_Grid_Cat does nothing without MPI_CHOLLA and HDF5\n"); + chexit(-1); + return; + // Does nothing without HDF5 and MPI_CHOLLA +} #endif diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 3c8b94344..d28f197c6 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -1,8 +1,6 @@ - +#include "../utils/timing_functions.h" #ifdef CPU_TIME - #include "../utils/timing_functions.h" - #include #include #include From 3b48d8a241d9768590f57e8778beab3c41be7542 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Apr 2023 03:02:50 -0400 Subject: [PATCH 304/694] format --- src/analysis/lya_statistics.cpp | 2 +- src/cooling/cooling_cuda.cu | 4 +- src/dust/dust_cuda.cu | 8 +- src/global/global.h | 28 +-- src/global/global_cuda.h | 2 +- src/gravity/grav3D.cpp | 4 +- src/gravity/gravity_functions.cpp | 14 +- src/gravity/gravity_functions_gpu.cu | 6 +- src/gravity/paris/HenryPeriodic.hpp | 22 +-- src/gravity/paris/ParisPeriodic.hpp | 4 +- src/grid/boundary_conditions.cpp | 4 +- src/grid/grid3D.cpp | 8 +- src/grid/grid3D.h | 2 +- src/grid/grid_enum.h | 4 +- src/grid/initial_conditions.cpp | 24 +-- src/grid/mpi_boundaries.cpp | 4 +- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 251 ++++++++++++------------ src/io/io.h | 6 +- src/io/io_gpu.cu | 20 +- src/io/io_parallel.cpp | 48 ++--- src/main.cpp | 2 +- src/model/disk_ICs.cpp | 28 +-- src/model/disk_galaxy.h | 2 +- src/mpi/mpi_routines.cpp | 16 +- src/mpi/mpi_routines.h | 16 +- src/particles/density_CIC.cpp | 4 +- src/particles/density_boundaries_gpu.cu | 2 +- src/particles/gravity_CIC.cpp | 2 +- src/particles/gravity_CIC_gpu.cu | 2 +- src/particles/io_particles.cpp | 2 +- src/particles/particles_3D.cpp | 8 +- src/particles/particles_3D.h | 4 +- src/particles/particles_boundaries.cpp | 4 +- src/particles/particles_dynamics.cpp | 4 +- src/particles/supernova.h | 4 +- src/reconstruction/plmc_cuda.cu | 2 +- src/reconstruction/plmp_cuda.cu | 4 +- src/reconstruction/ppmc_cuda.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 4 +- src/riemann_solvers/exact_cuda.cu | 10 +- src/riemann_solvers/hlld_cuda.cu | 2 +- src/system_tests/hydro_system_tests.cpp | 4 +- src/system_tests/mhd_system_tests.cpp | 18 +- src/utils/error_handling.cpp | 2 +- src/utils/reduction_utilities.h | 4 +- src/utils/timing_functions.cpp | 18 +- src/utils/timing_functions.h | 26 +-- 48 files changed, 323 insertions(+), 340 deletions(-) diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index 3a22149a1..1473365f7 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -942,7 +942,7 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) c = 2.99792458e10; // cm/s kpc = 3.0857e21; // cm kpc3 = kpc * kpc * kpc; - e_charge = 4.8032e-10; // cm^3/2 g^1/2 s^-1 + e_charge = 4.8032e-10; // cm^3/2 g^1/2 s^-1 // Fill the Real cells first for (int los_id = 0; los_id < n_los_total; los_id++) { diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 4b09527d0..c0fe25d34 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -355,5 +355,5 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda } #endif // CLOUDY_COOL - #endif // COOLING_GPU -#endif // CUDA + #endif // COOLING_GPU +#endif // CUDA diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 344d3c9ce..f9c71fa2b 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -117,11 +117,11 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g __device__ __host__ Real calc_tau_sp(Real n, Real T) { Real YR_IN_S = 3.154e7; - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2e6; // K + Real a1 = 1; // dust grain size in units of 0.1 micrometers + Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 + Real T_0 = 2e6; // K Real omega = 2.5; - Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s + Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // s diff --git a/src/global/global.h b/src/global/global.h index 9ba6ca331..1b6e0ace8 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -31,16 +31,16 @@ typedef double Real; #define MP 1.672622e-24 // mass of proton, grams #define KB 1.380658e-16 // boltzmann constant, cgs // #define GN 6.67259e-8 // gravitational constant, cgs -#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 -#define C_L 0.306594593 // speed of light in kpc/kyr +#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 +#define C_L 0.306594593 // speed of light in kpc/kyr -#define MYR 31.536e12 // Myears in secs -#define KPC 3.086e16 // kpc in km -#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 -#define MSUN_CGS 1.98847e33; // Msun in gr -#define KPC_CGS 3.086e21; // kpc in cm -#define KM_CGS 1e5; // km in cm -#define MH 1.67262171e-24 // Mass of hydrogen [g] +#define MYR 31.536e12 // Myears in secs +#define KPC 3.086e16 // kpc in km +#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 +#define MSUN_CGS 1.98847e33; // Msun in gr +#define KPC_CGS 3.086e21; // kpc in cm +#define KM_CGS 1e5; // km in cm +#define MH 1.67262171e-24 // Mass of hydrogen [g] #define TIME_UNIT 3.15569e10 // 1 kyr in s #define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm @@ -93,8 +93,8 @@ typedef double Real; #ifdef GRAVITY_5_POINTS_GRADIENT #ifdef PARTICLES #define N_GHOST_POTENTIAL \ - 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the - // CIC interpolation of the potential ) + 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the + // CIC interpolation of the potential ) #else #define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient #endif // PARTICLES @@ -102,8 +102,8 @@ typedef double Real; #else #ifdef PARTICLES #define N_GHOST_POTENTIAL \ - 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the - // CIC interpolation of the potential ) + 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the + // CIC interpolation of the potential ) #else #define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient #endif // PARTICLES @@ -133,7 +133,7 @@ extern int N_DATA_PER_PARTICLE_TRANSFER; #define SLOW_FACTOR 10 #endif // AVERAGE_SLOW_CELLS -#endif // PARTICLES +#endif // PARTICLES #define SIGN(a) (((a) < 0.) ? -1. : 1.) diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 296fa31f1..f14b5e893 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -120,4 +120,4 @@ __device__ double atomicAdd(double *address, double val) #endif // GLOBAL_CUDA_H -#endif // CUDA +#endif // CUDA diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 20ca67655..722623cc1 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -129,8 +129,8 @@ void Grav3D::AllocateMemory_CPU(void) // density F.potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the n-th timestep F.potential_1_h = - (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep - boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags + (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep + boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags #ifdef GRAV_ISOLATED_BOUNDARY_X F.pot_boundary_x0 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 1f4a08f7f..28d4562c7 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -53,7 +53,7 @@ void Grid3D::set_dt_Gravity() da_min = da_particles; chprintf(" Delta_a_particles: %f \n", da_particles); - #else // NOT ONLY_PARTICLES + #else // NOT ONLY_PARTICLES // Here da_min is the minumum between da_particles and da_hydro Real da_hydro; da_hydro = @@ -607,7 +607,7 @@ void Grid3D::Add_Analytic_Potential() #endif // PARALLEL_OMP #endif // GRAVITY_GPU else } - #endif // GRAVITY_ANALYTIC_COMP + #endif // GRAVITY_ANALYTIC_COMP void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) { @@ -661,7 +661,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU } #ifdef GRAVITY_ANALYTIC_COMP @@ -747,10 +747,10 @@ void Grid3D::Extrapolate_Grav_Potential_Function(int g_start, int g_end) for (i = 0; i < nx_pot; i++) { id_pot = i + j * nx_pot + k * nx_pot * ny_pot; id_grid = (i + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * nx_grid * ny_grid; - pot_now = potential_in[id_pot]; // Potential at the n-th timestep + pot_now = potential_in[id_pot]; // Potential at the n-th timestep if (Grav.INITIAL) { - pot_extrp = pot_now; // The first timestep the extrapolated potential - // is phi_0 + pot_extrp = pot_now; // The first timestep the extrapolated potential + // is phi_0 } else { pot_prev = Grav.F.potential_1_h[id_pot]; // Potential at the (n-1)-th // timestep ( previous step ) @@ -797,7 +797,7 @@ void Grid3D::Extrapolate_Grav_Potential() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU // After the first timestep the INITIAL flag is set to false, that way the // potential is properly extrapolated afterwards diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 236670b49..2c2ac8aee 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -217,9 +217,9 @@ void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real pot_now, pot_prev, pot_extrp; - pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep + pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep if (INITIAL) { - pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 + pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 } else { pot_prev = src_potential_1[tid_pot]; // Potential at the (n-1)-th timestep // ( previous step ) @@ -290,4 +290,4 @@ void Grid3D::Copy_Potential_From_GPU() } #endif // PARTICLES_CPU -#endif // GRAVITY +#endif // GRAVITY diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index 4fa2467b2..d40e574d8 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -61,23 +61,23 @@ class HenryPeriodic void filter(const size_t bytes, double *const before, double *const after, const F f) const; private: - int idi_, idj_, idk_; //!< MPI coordinates of 3D block - int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain - int nh_; //!< Global number of complex values in Z dimension, after R2C - //!< transform - int ni_, nj_, nk_; //!< Global number of real points in each dimension - int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil - int idp_, idq_; //!< X and Y task IDs within Z pencil + int idi_, idj_, idk_; //!< MPI coordinates of 3D block + int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain + int nh_; //!< Global number of complex values in Z dimension, after R2C + //!< transform + int ni_, nj_, nk_; //!< Global number of real points in each dimension + int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil + int idp_, idq_; //!< X and Y task IDs within Z pencil MPI_Comm commI_, commJ_, commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils int dh_, di_, dj_, dk_; //!< Max number of local points in each dimension int dhq_, dip_, djp_, - djq_; //!< Max number of local points in dimensions of 2D decompositions - size_t bytes_; //!< Max bytes needed for argument arrays + djq_; //!< Max number of local points in dimensions of 2D decompositions + size_t bytes_; //!< Max bytes needed for argument arrays cufftHandle c2ci_, c2cj_, c2rk_, - r2ck_; //!< Objects for forward and inverse FFTs + r2ck_; //!< Objects for forward and inverse FFTs #ifndef MPI_GPU - double *ha_, *hb_; //!< Host copies for MPI messages + double *ha_, *hb_; //!< Host copies for MPI messages #endif }; diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index 2650e156d..d9a43e457 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -44,9 +44,9 @@ class ParisPeriodic void solve(size_t bytes, double *density, double *potential) const; private: - int ni_, nj_; //!< Number of elements in X and Y dimensions + int ni_, nj_; //!< Number of elements in X and Y dimensions #if defined(PARIS_3PT) || defined(PARIS_5PT) - int nk_; //!< Number of elements in Z dimension + int nk_; //!< Number of elements in Z dimension #endif double ddi_, ddj_, ddk_; //!< Frequency-independent terms in Poisson solve HenryPeriodic henry; //!< FFT filter object diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 50c55126d..9c8df7ba1 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -119,13 +119,13 @@ void Grid3D::Set_Boundary_Conditions(parameters P) Grav.Set_Boundary_Flags(flags); #endif // Gravity -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /*Set boundaries, including MPI exchanges*/ Set_Boundaries_MPI(P); -#endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ } /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d83c32e0b..4d3c23a86 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -93,7 +93,7 @@ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *y_pos = H.ybound + H.dy * (j - H.n_ghost) + 0.5 * H.dy; *z_pos = H.zbound + H.dz * (k - H.n_ghost) + 0.5 * H.dz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* position relative to local xyz bounds */ /* This approach was replaced because it is less consistent for multiple @@ -179,7 +179,7 @@ void Grid3D::Initialize(struct parameters *P) // set total number of cells H.n_cells = H.nx * H.ny * H.nz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* perform domain decomposition * and set grid dimensions @@ -388,7 +388,7 @@ void Grid3D::set_dt(Real dti) H.dt = C_cfl / max_dti; -#endif // ONLY_PARTICLES +#endif // ONLY_PARTICLES #ifdef GRAVITY // Set dt for hydro and particles @@ -554,7 +554,7 @@ Real Grid3D::Update_Hydro_Grid() // Subtract the time spent on the Chemical Update #endif // CHEMISTRY_GPU Timer.Hydro.End(); -#endif // CPU_TIME +#endif // CPU_TIME #ifdef COOLING_GRACKLE #ifdef CPU_TIME diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 1cdf9faed..ed825d0d4 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -509,7 +509,7 @@ class Grid3D /*! \fn void Read_Grid_Cat(struct parameters P) * \brief Read in grid data from a single concatenated output file. */ - void Read_Grid_Cat(struct parameters P); + void Read_Grid_Cat(struct parameters P); /*! \fn Read_Grid_Binary(FILE *fp) * \brief Read in grid data from a binary file. */ diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 15e1d604a..cfbce7b70 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -53,9 +53,9 @@ enum : int { #ifdef DUST dust_density, - #endif // DUST + #endif // DUST -#endif // SCALAR +#endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 8a2bfae98..847df9ead 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -778,7 +778,7 @@ void Grid3D::KH_res_ind() #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); -#endif // DE +#endif // DE } // i loop } // j loop @@ -1388,18 +1388,18 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1459,20 +1459,20 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE - C.GasEnergy[id] = U; + C.GasEnergy[id] = U; #endif } } diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 9b858c8cf..1ea369e64 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -959,7 +959,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) } #endif // SOR - #endif // GRAVITY + #endif // GRAVITY #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { @@ -1015,7 +1015,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; - #endif // PARTICLES_GPU + #endif // PARTICLES_GPU if (index == 0) { (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2227172bf..9297f2fa2 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -467,7 +467,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); #endif // DE } - #endif // DENSITY_FLOOR + #endif // DENSITY_FLOOR } } diff --git a/src/io/io.cpp b/src/io/io.cpp index 7eac9775c..41ca67b49 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -14,7 +14,7 @@ #endif // HDF5 #include "../grid/grid3D.h" #include "../io/io.h" -#include "../utils/timing_functions.h" // provides ScopedTimer +#include "../utils/timing_functions.h" // provides ScopedTimer #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" #endif // MPI_CHOLLA @@ -341,7 +341,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) } #endif // DE #ifdef MHD - + // TODO : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); @@ -349,16 +349,16 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { - chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { - chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); } - + #endif free(dataset_buffer); @@ -941,7 +941,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } // 2D case @@ -976,7 +976,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } #ifdef MHD // Save the last line of magnetic fields @@ -987,7 +987,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } // 3D case @@ -1158,7 +1158,6 @@ herr_t Read_HDF5_Dataset(hid_t file_id, float *dataset_buffer, const char *name) return status; } - // Helper function which uses the correct HDF5 arguments based on the type of // dataset_buffer to avoid writing garbage herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, const char *name) @@ -1174,7 +1173,7 @@ herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buf herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float *dataset_buffer, const char *name) { - // Create the dataset id + // Create the dataset id hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); // Write the array to file herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); @@ -1238,20 +1237,20 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); } - /* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ -void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer) +void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real *hdf5_buffer, Real *grid_buffer) { int i, j, k, id, buf_id; // 3D case if (nx > 1 && ny > 1 && nz > 1) { for (k = 0; k < nz_real; k++) { for (j = 0; j < ny_real; j++) { - for (i = 0; i < nx_real; i++) { - id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; - buf_id = k + j * nz_real + i * nz_real * ny_real; - hdf5_buffer[buf_id] = grid_buffer[id]; - } + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; + } } } return; @@ -1261,9 +1260,9 @@ void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_ if (nx > 1 && ny > 1 && nz == 1) { for (j = 0; j < ny_real; j++) { for (i = 0; i < nx_real; i++) { - id = (i + n_ghost) + (j + n_ghost) * nx; - buf_id = j + i * ny_real; - hdf5_buffer[buf_id] = grid_buffer[id]; + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; } } return; @@ -1278,37 +1277,39 @@ void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_ } /* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ -void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer); +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real *hdf5_buffer, Real *device_hdf5_buffer, Real *device_grid_buffer); // From src/io/io_gpu // Set up dataspace for grid formatted data and write dataset -void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, hid_t file_id, Real* dataset_buffer, const char* name) +void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, hid_t file_id, + Real *dataset_buffer, const char *name) { // Set up dataspace - + hid_t dataspace_id; // 1-D Case if (nx > 1 && ny == 1 && nz == 1) { int rank = 1; hsize_t dims[1]; - dims[0] = nx_real; + dims[0] = nx_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } // 2-D Case if (nx > 1 && ny > 1 && nz == 1) { int rank = 2; hsize_t dims[2]; - dims[0] = nx_real; - dims[1] = ny_real; + dims[0] = nx_real; + dims[1] = ny_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } // 3-D Case if (nx > 1 && ny > 1 && nz > 1) { int rank = 3; hsize_t dims[3]; - dims[0] = nx_real; - dims[1] = ny_real; - dims[2] = nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; dataspace_id = H5Screate_simple(rank, dims, NULL); } @@ -1317,33 +1318,33 @@ void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, i Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); // Close dataspace - herr_t status = H5Sclose(dataspace_id); + herr_t status = H5Sclose(dataspace_id); } // Data moves from host grid_buffer to dataset_buffer to hdf5 file -void Write_Grid_HDF5_Field_CPU(Header H, hid_t file_id, Real* dataset_buffer, Real* grid_buffer, const char* name) +void Write_Grid_HDF5_Field_CPU(Header H, hid_t file_id, Real *dataset_buffer, Real *grid_buffer, const char *name) { - Fill_HDF5_Buffer_From_Grid_CPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); + Fill_HDF5_Buffer_From_Grid_CPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, + grid_buffer); Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); } // Data moves from device_grid_buffer to device_hdf5_buffer to dataset_buffer to hdf5 file -void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real* dataset_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer, const char* name) +void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer, + Real *device_grid_buffer, const char *name) { - Fill_HDF5_Buffer_From_Grid_GPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, device_hdf5_buffer, device_grid_buffer); + Fill_HDF5_Buffer_From_Grid_GPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, + device_hdf5_buffer, device_grid_buffer); Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); } -void Write_Generic_HDF5_Field(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, - hid_t file_id, Real* dataset_buffer, Real* source_buffer, const char* name) +void Write_Generic_HDF5_Field(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + Real *dataset_buffer, Real *source_buffer, const char *name) { Fill_HDF5_Buffer_From_Grid_CPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, source_buffer); - Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); + Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); } - - - /*! \fn void Write_Grid_HDF5(hid_t file_id) * \brief Write the grid to a file, at the current simulation time. */ void Grid3D::Write_Grid_HDF5(hid_t file_id) @@ -1360,13 +1361,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_ENERGY output_energy = true; #else // not OUTPUT_ENERGY - output_energy = false; + output_energy = false; #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; #else // not OUTPUT_MOMENTUM - output_momentum = false; + output_momentum = false; #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) @@ -1387,14 +1388,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) output_full_ionization = false; #endif // OUTPUT_FULL_IONIZATION - #endif // COOLING_GRACKLE or CHEMISTRY_GPU + #endif // COOLING_GRACKLE or CHEMISTRY_GPU #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL) CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // GRAVITY_GPU and OUTPUT_POTENTIAL - // Allocate necessary buffers int nx_dset = H.nx_real; int ny_dset = H.ny_real; @@ -1404,41 +1404,40 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #else size_t buffer_size = nx_dset * ny_dset * nz_dset; #endif - cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - Real *device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); - + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + Real *device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); + + // Start writing fields - // Start writing fields - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.density, "/density"); - if (output_momentum || H.Output_Complete_Data) { + if (output_momentum || H.Output_Complete_Data) { Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_x, "/momentum_x"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_y, "/momentum_y"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_z, "/momentum_z"); } - if (output_energy || H.Output_Complete_Data) { + if (output_energy || H.Output_Complete_Data) { Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.Energy, "/Energy"); -#ifdef DE + #ifdef DE Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); -#endif + #endif } - + #ifdef SCALAR - + #ifdef BASIC_SCALAR - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.scalar, "/scalar0"); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.scalar, "/scalar0"); #endif - #ifdef OUTPUT_CHEMISTRY - #ifdef CHEMISTRY_GPU + #ifdef OUTPUT_CHEMISTRY + #ifdef CHEMISTRY_GPU Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HI_density, "/HI_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HII_density, "/HII_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeI_density, "/HeI_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeII_density, "/HeII_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density"); - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.e_density, "/e_density"); - #elif defined(COOLING_GRACKLE) + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.e_density, "/e_density"); + #elif defined(COOLING_GRACKLE) Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density"); @@ -1447,57 +1446,55 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) if (output_electrons || H.Output_Complete_Data) { Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.e_density, "/e_density"); } - #endif -#endif //OUTPUT_CHEMISTRY - + #endif + #endif // OUTPUT_CHEMISTRY + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) - #ifdef GRACKLE_METALS - if (output_metals || H.Output_Complete_Data) { + #ifdef GRACKLE_METALS + if (output_metals || H.Output_Complete_Data) { Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.metal_density, "/metal_density"); } - #endif // GRACKLE_METALS - - - #ifdef OUTPUT_TEMPERATURE - #ifdef CHEMISTRY_GPU - Compute_Gas_Temperature(Chem.Fields.temperature_h, false); - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature"); - #elif defined(COOLING_GRACKLE) - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.temperature, "/temperature"); - #endif - #endif + #endif // GRACKLE_METALS + + #ifdef OUTPUT_TEMPERATURE + #ifdef CHEMISTRY_GPU + Compute_Gas_Temperature(Chem.Fields.temperature_h, false); + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature"); + #elif defined(COOLING_GRACKLE) + Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.temperature, "/temperature"); + #endif + #endif + + #endif // COOLING_GRACKLE || CHEMISTRY_GPU - - #endif // COOLING_GRACKLE || CHEMISTRY_GPU - - #endif // SCALAR + #endif // SCALAR // 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { - #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) // Copy the potential array to the memory buffer - Write_Generic_HDF5_Field(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, Grav.nz_local + 2 * N_GHOST_POTENTIAL, - nx, ny, nz, Grav.nx_local, Grav.ny_local, Grav.nz_local, N_GHOST_POTENTIAL, - file_id, dataset_buffer, Grav.F.potential_h, "/grav_potential"); - - /* - for (k = 0; k < Grav.nz_local; k++) { - for (j = 0; j < Grav.ny_local; j++) { - for (i = 0; i < Grav.nx_local; i++) { - // id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - // buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real; - id = (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + - (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * - (Grav.ny_local + 2 * N_GHOST_POTENTIAL); - buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; - dataset_buffer[buf_id] = Grav.F.potential_h[id]; + Write_Generic_HDF5_Field(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, + Grav.nz_local + 2 * N_GHOST_POTENTIAL, nx, ny, nz, Grav.nx_local, Grav.ny_local, + Grav.nz_local, N_GHOST_POTENTIAL, file_id, dataset_buffer, Grav.F.potential_h, + "/grav_potential"); + + /* + for (k = 0; k < Grav.nz_local; k++) { + for (j = 0; j < Grav.ny_local; j++) { + for (i = 0; i < Grav.nx_local; i++) { + // id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; + // buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real; + id = (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + + (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * + (Grav.ny_local + 2 * N_GHOST_POTENTIAL); + buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; + dataset_buffer[buf_id] = Grav.F.potential_h[id]; + } } } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); - */ + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); + */ #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD @@ -1512,10 +1509,9 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // MHD // Free the dataspace id - } - - free(dataset_buffer); + + free(dataset_buffer); } #endif // HDF5 @@ -2307,7 +2303,8 @@ void Grid3D::Read_Grid_Binary(FILE *fp) #ifdef HDF5 /* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ -void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer) +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real *hdf5_buffer, Real *grid_buffer) { // Note: for 1D ny_real and nz_real are not used // And for 2D nz_real is not used. @@ -2318,11 +2315,11 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real if (nx > 1 && ny > 1 && nz > 1) { for (k = 0; k < nz_real; k++) { for (j = 0; j < ny_real; j++) { - for (i = 0; i < nx_real; i++) { - id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; - buf_id = k + j * nz_real + i * nz_real * ny_real; - grid_buffer[id] = hdf5_buffer[buf_id]; - } + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; + } } } return; @@ -2332,9 +2329,9 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real if (nx > 1 && ny > 1 && nz == 1) { for (j = 0; j < ny_real; j++) { for (i = 0; i < nx_real; i++) { - id = (i + n_ghost) + (j + n_ghost) * nx; - buf_id = j + i * ny_real; - grid_buffer[id] = hdf5_buffer[buf_id]; + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; } } return; @@ -2348,21 +2345,20 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real } } -void Read_Grid_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, Real* grid_buffer, const char* name) +void Read_Grid_HDF5_Field(hid_t file_id, Real *dataset_buffer, Header H, Real *grid_buffer, const char *name) { Read_HDF5_Dataset(file_id, dataset_buffer, name); Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); } -void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, Real* grid_buffer, const char* name) +void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real *dataset_buffer, Header H, Real *grid_buffer, const char *name) { // Magnetic has 1 more real cell, 1 fewer n_ghost on one side. Read_HDF5_Dataset(file_id, dataset_buffer, name); - Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, dataset_buffer, grid_buffer); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, + dataset_buffer, grid_buffer); } - - /*! \fn void Read_Grid_HDF5(hid_t file_id) * \brief Read in grid data from an hdf5 file. */ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) @@ -2380,7 +2376,8 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); status = H5Aclose(attribute_id); /* - // Alwin: I don't think this is needed anymore because dt of the current state of cells is calculated for consistency and output was using previous timestep's H.dt + // Alwin: I don't think this is needed anymore because dt of the current state of cells is calculated for consistency + and output was using previous timestep's H.dt // This is because dti = Update_Grid, then output, then dt = 1/MPI_Allreduce(dti) in next step attribute_id = H5Aopen(file_id, "dt", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.dt); @@ -2406,11 +2403,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif #ifdef SCALAR - + #ifdef BASIC_SCALAR - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); #endif - + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HII_density, "/HII_density"); @@ -2418,14 +2415,14 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeII_density, "/HeII_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeIII_density, "/HeIII_density"); Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.e_density, "/e_density"); - #ifdef GRACKLE_METALS + #ifdef GRACKLE_METALS Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.metal_density, "/metal_density"); - #endif // GRACKLE_METALS - #endif // COOLING_GRACKLE , CHEMISTRY_GPU - - #endif // SCALAR + #endif // GRACKLE_METALS + #endif // COOLING_GRACKLE , CHEMISTRY_GPU + + #endif // SCALAR - // MHD only valid in 3D case + // MHD only valid in 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { // Compute Statistic of Initial data Real mean_l, min_l, max_l; diff --git a/src/io/io.h b/src/io/io.h index 2339887b7..a24fe788c 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -54,9 +54,6 @@ void write_debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp - - - herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute, const char* name); herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name); @@ -67,7 +64,8 @@ herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buf herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); /* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ -void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* grid_buffer); +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* hdf5_buffer, Real* grid_buffer); // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 7b2b9b17c..34da27ff2 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -17,7 +17,8 @@ // For the magnetic field case, a different // nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. -// 2D version of CopyReal3D_GPU_Kernel. Note that magnetic fields and float32 output are not enabled in 2-D so this is a simpler kernel +// 2D version of CopyReal3D_GPU_Kernel. Note that magnetic fields and float32 output are not enabled in 2-D so this is a +// simpler kernel __global__ void CopyReal2D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, Real* destination, Real* source) { @@ -39,7 +40,6 @@ __global__ void CopyReal2D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, destination[dest_id] = source[source_id]; } - // Copy Real (non-ghost) cells from source to a double destination (for writing // HDF5 in double precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, @@ -141,16 +141,19 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int printf("File write failed.\n"); } } -void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer){ +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer) +{ int mhd_direction = -1; - + // 3D case if (nx > 1 && ny > 1 && nz > 1) { dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, - device_hdf5_buffer, device_grid_buffer, mhd_direction); - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), cudaMemcpyDeviceToHost)); + device_hdf5_buffer, device_grid_buffer, mhd_direction); + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), + cudaMemcpyDeviceToHost)); return; } @@ -159,8 +162,8 @@ void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_ dim3 dim1dGrid((nx_real * ny_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal2D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, - device_hdf5_buffer, device_grid_buffer); - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); + device_hdf5_buffer, device_grid_buffer); + CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); return; } @@ -172,4 +175,3 @@ void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_ } #endif // HDF5 - diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 1217eea0a..3f3117641 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -4,23 +4,22 @@ #include "../utils/error_handling.h" #if defined(HDF5) && defined(MPI_CHOLLA) -#include -#include "../mpi/mpi_routines.h" -#include "../utils/timing_functions.h" // provides ScopedTimer - + #include + #include "../mpi/mpi_routines.h" + #include "../utils/timing_functions.h" // provides ScopedTimer // I think this helper function is finished. It's just meant to interface with HDF5 and open/free handles // I need to figure out offset and count elsewhere -// Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace file_space_id +// Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace +// file_space_id void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, double* buffer, const char* name) { hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); // Select the requested subset of data hid_t file_space_id = H5Dget_space(dataset_id); - hid_t mem_space_id = H5Screate_simple(3, count, NULL); - + hid_t mem_space_id = H5Screate_simple(3, count, NULL); // Notes on hyperslab call: @@ -42,28 +41,26 @@ void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, doub status = H5Dclose(dataset_id); } - // Alwin: I'm only writing a 3D version of this because that's what is practical. // Read from concatenated HDF5 file void Read_Grid_Cat_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, - Real* grid_buffer, const char* name) + Real* grid_buffer, const char* name) { Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); } void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, - Real* grid_buffer, const char* name) + Real* grid_buffer, const char* name) { Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); - Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, dataset_buffer, grid_buffer); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, + dataset_buffer, grid_buffer); } - /*! \brief Read in grid data from a single concatenated output file. */ void Grid3D::Read_Grid_Cat(struct parameters P) { - ScopedTimer timer("Read_Grid_Cat"); herr_t status; char filename[100]; @@ -77,14 +74,12 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (Alwin) : Need to consider how or whether to read attributes. // even if I do not read gamma from file, it is set in initial_conditions.cpp // if I do not set t or n_step what does it get set to?0 in grid/grid3D.cpp - // This should be okay to start with. + // This should be okay to start with. - // Offsets are global variables from mpi_routines.h hsize_t offset[3]; offset[0] = nx_local_start; @@ -93,15 +88,15 @@ void Grid3D::Read_Grid_Cat(struct parameters P) // This is really dims but I name it count because that's what HDF5 names it hsize_t count[3]; - count[0] = H.nx_real; - count[1] = H.ny_real; - count[2] = H.nz_real; - + count[0] = H.nx_real; + count[1] = H.ny_real; + count[2] = H.nz_real; + #ifdef MHD - Real* dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); + Real* dataset_buffer = (Real*)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); #else - Real* dataset_buffer = (Real *)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real)); - #endif + Real* dataset_buffer = (Real*)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real)); + #endif Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.density, "/density"); Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_x, "/momentum_x"); @@ -109,17 +104,16 @@ void Grid3D::Read_Grid_Cat(struct parameters P) Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_z, "/momentum_z"); Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/Energy"); #ifdef DE - Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/GasEnergy"); - #endif //DE + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/GasEnergy"); + #endif // DE // TODO (Alwin) : add scalar stuff - + #ifdef MHD Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); #endif - free(dataset_buffer); status = H5Fclose(file_id); diff --git a/src/main.cpp b/src/main.cpp index ae299c024..555eac3b8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) /* Initialize MPI communication */ #ifdef MPI_CHOLLA InitializeChollaMPI(&argc, &argv); -#endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ Real dti = 0; // inverse time step, 1.0 / dt diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 8e4bede3f..bad65fe37 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -194,16 +194,16 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int { // Routine to determine the hydrostatic density profile // along a ray from the galaxy center - int i; // index along r direction + int i; // index along r direction Real gamma = hdp[13]; // adiabatic index Real rho_eos = hdp[18]; // density where K_EOS is set Real cs = hdp[19]; // sound speed at rho_eos Real r_cool = hdp[20]; // cooling radius - Real Phi_0; // potential at cooling radius + Real Phi_0; // potential at cooling radius - Real D_rho; // ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos Real gmo = gamma - 1.0; // gamma-1 @@ -259,10 +259,10 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in Real cs = hdp[16]; - Real Phi_0; // potential at z=0 + Real Phi_0; // potential at z=0 - Real rho_0; // density at mid plane - Real D_rho; // ratio of density at mid plane and rho_eos + Real rho_0; // density at mid plane + Real D_rho; // ratio of density at mid plane and rho_eos Real z_0, z_1; // heights for iteration Real z_disk_max; @@ -274,8 +274,8 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in int n_int = 1000; int flag; // flag for integration - int ks; // start of integrals above disk plane - int km; // mirror of k + int ks; // start of integrals above disk plane + int km; // mirror of k if (nz % 2) { ks = ng + (nz - 1) / 2; } else { @@ -440,14 +440,14 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in Real rho_eos = hdp[15]; Real cs = hdp[16]; - Real Phi_0; // potential at z=0 + Real Phi_0; // potential at z=0 - Real D_rho; // ratio of density at mid plane and rho_eos - Real D_new; // new ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos + Real D_new; // new ratio of density at mid plane and rho_eos Real z_0, z_1, z_2; // heights for iteration Real z_disk_max; - Real A_0, A_1; // density function to find roots + Real A_0, A_1; // density function to find roots // density integration Real phi_int, A; @@ -760,8 +760,8 @@ void Grid3D::Disk_3D(parameters p) // contraction) r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) - M_h = M_vir - M_d; // halo mass in M_sun - R_s = R_vir / c_vir; // halo scale length in kpc + M_h = M_vir - M_d; // halo mass in M_sun + R_s = R_vir / c_vir; // halo scale length in kpc // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 // T_d = 2.0e5; T_d = 1.0e4; // CHANGED FOR ISOTHERMAL diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index 04249853f..c62aeb63f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -186,6 +186,6 @@ namespace Galaxies // static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); -}; // namespace Galaxies +}; // namespace Galaxies #endif // DISK_GALAXY diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 2e0faaa16..ef26732d8 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -13,14 +13,14 @@ #include "../utils/error_handling.h" /*Global MPI Variables*/ -int procID; /*process rank*/ -int nproc; /*number of processes in global comm*/ -int root; /*rank of root process*/ +int procID; /*process rank*/ +int nproc; /*number of processes in global comm*/ +int root; /*rank of root process*/ -int procID_node; /*process rank on node*/ -int nproc_node; /*number of MPI processes on node*/ +int procID_node; /*process rank on node*/ +int nproc_node; /*number of MPI processes on node*/ -MPI_Comm world; /*global communicator*/ +MPI_Comm world; /*global communicator*/ MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/ @@ -573,7 +573,7 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) N_PARTICLES_TRANSFER = n_max * n_max * factor; // Set the number of values that will be transferred for each particle - N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities + N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities #ifndef SINGLE_PARTICLE_MASS N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle mass #endif @@ -1002,7 +1002,7 @@ std::pair MPI_Comm_node() // get the hostname of the node std::string pname; // node hostname pname.resize(MPI_MAX_PROCESSOR_NAME); - int pname_length; // length of node hostname + int pname_length; // length of node hostname MPI_Get_processor_name(pname.data(), &pname_length); diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 2d2a644b4..74e2562c9 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -15,14 +15,14 @@ #endif /*FFTW*/ /*Global MPI Variables*/ -extern int procID; /*process rank*/ -extern int nproc; /*number of processes in global comm*/ -extern int root; /*rank of root process*/ -extern int procID_node; /*process rank on node*/ -extern int nproc_node; /*number of MPI processes on node*/ - -extern MPI_Comm world; /*global communicator*/ -extern MPI_Comm node; /*communicator for each node*/ +extern int procID; /*process rank*/ +extern int nproc; /*number of processes in global comm*/ +extern int root; /*rank of root process*/ +extern int procID_node; /*process rank on node*/ +extern int nproc_node; /*number of MPI processes on node*/ + +extern MPI_Comm world; /*global communicator*/ +extern MPI_Comm node; /*communicator for each node*/ extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/ diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index 3e28f55de..2391bd6f2 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -90,7 +90,7 @@ void Grid3D::Copy_Particles_Density() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU } void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) @@ -415,6 +415,6 @@ void Particles_3D::Get_Density_CIC_OMP() } #endif // PARALLEL_OMP - #endif // PARTICLES_CPU + #endif // PARTICLES_CPU #endif diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu index fd5c4ddca..b32979158 100644 --- a/src/particles/density_boundaries_gpu.cu +++ b/src/particles/density_boundaries_gpu.cu @@ -276,4 +276,4 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, in #endif // MPI_CHOLLA -#endif // PARTICLES_GPU & GRAVITY_GPU +#endif // PARTICLES_GPU & GRAVITY_GPU diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index 13938942e..74d14d6ae 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -350,4 +350,4 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) #endif // PARTICLES_CPU -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 19aee8941..36f2fe7cd 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -346,4 +346,4 @@ void Grid3D::Copy_Particles_Density_GPU() #endif // GRAVITY_GPU -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 02a7d6c3a..3064c9d83 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -46,7 +46,7 @@ void Particles_3D::Load_Particles_Data(struct parameters *P) G.nx_local); // Everyone reads the same file #else sprintf(filename, "%s.%d", filename, procID); - #endif // TILED_INITIAL_CONDITIONS + #endif // TILED_INITIAL_CONDITIONS #endif chprintf(" Loading particles file: %s \n", filename); diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index f2b56f62c..542cdb852 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -651,11 +651,11 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) std::uniform_real_distribution phiDist(0, 2 * M_PI); // for generating phi std::normal_distribution speedDist(0, - 1); // for generating random speeds. + 1); // for generating random speeds. - Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) - Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc - Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc + Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc + Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; R_max = P->xlen / 2.0; diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index b52f85bfc..1d3355902 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -125,7 +125,7 @@ class Particles_3D int_vector_t out_indxs_vec_z1; #endif // PARTICLES_CPU - #endif // MPI_CHOLLA + #endif // MPI_CHOLLA bool TRANSFER_DENSITY_BOUNDARIES; bool TRANSFER_PARTICLES_BOUNDARIES; @@ -210,7 +210,7 @@ class Particles_3D #endif // MPI_CHOLLA - #endif // PARTICLES_GPU + #endif // PARTICLES_GPU } G; diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 689beaccc..5d9266ce9 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -1042,5 +1042,5 @@ void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Rea #endif // PARTICLES_GPU - #endif // MPI_CHOLLA -#endif // PARTICLES + #endif // MPI_CHOLLA +#endif // PARTICLES diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 9baba2cc5..e6d6add6f 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -465,6 +465,6 @@ void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part #endif // PARTICLES_CPU - #endif // COSMOLOGY + #endif // COSMOLOGY -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/supernova.h b/src/particles/supernova.h index e788ea0b2..f7fdcb76c 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -18,9 +18,9 @@ const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRE // supernova rate: 1SN / 100 solar masses per 36 Myr static const Real DEFAULT_SNR = 2.8e-7; static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; -static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN +static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN static const Real FINAL_MOMENTUM = - 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) static const Real MU = 0.6; static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 787449f21..23a930655 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -14,7 +14,7 @@ #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" - #endif // DE + #endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..d84673d83 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -59,7 +59,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; - #endif // SCALAR + #endif // SCALAR #ifndef VL // Don't use velocities to reconstruct when using VL Real dtodx = dt / dx; @@ -283,7 +283,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - #endif // NO VL + #endif // NO VL // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 10c6a788d..64d3ae072 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1207,7 +1207,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - #endif // VL, i.e. CTU was used for this section + #endif // VL, i.e. CTU was used for this section // enforce minimum values d_L = fmax(d_L, (Real)TINY_NUMBER); diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 36b74aebf..3e12d3e4b 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -492,7 +492,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou for (int i = 0; i < NSCALARS; i++) { del_scalar[i] = scalar_R[i] - scalar_L[i]; } - #endif // SCALAR + #endif // SCALAR d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); // Fryxell Eqn 30 vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); // Fryxell Eqn 30 @@ -506,7 +506,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou for (int i = 0; i < NSCALARS; i++) { scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 } - #endif // SCALAR + #endif // SCALAR // set speed of characteristics (v-c, v, v+c) using average values of v and // c diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 918188441..868475fbb 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -48,7 +48,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds } Real dl, vxl, vyl, vzl, pl, - cl; // density, velocity, pressure, sound speed (left) + cl; // density, velocity, pressure, sound speed (left) Real dr, vxr, vyr, vzr, pr, cr; // density, velocity, pressure, sound speed (right) Real ds, vs, ps, Es; // sample_CUDAd density, velocity, pressure, total @@ -256,9 +256,9 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real Real c, sl, sr; - if (vm >= 0) // sampling point lies to the left of the contact discontinuity + if (vm >= 0) // sampling point lies to the left of the contact discontinuity { - if (pm <= pl) // left rarefaction + if (pm <= pl) // left rarefaction { if (vxl - cl >= 0) // sampled point is in left data state { @@ -294,7 +294,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p = pm; } } - } else // sampling point lies to the right of the contact discontinuity + } else // sampling point lies to the right of the contact discontinuity { if (pm > pr) // right shock { @@ -310,7 +310,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *v = vm; *p = pm; } - } else // right rarefaction + } else // right rarefaction { if (vxr + cr <= 0) // sampled point is in right data state { diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index c962325a7..2fcc4c480 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -22,7 +22,7 @@ #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" -#endif // DE +#endif // DE #ifdef CUDA diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 9d9639f65..725239d29 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -133,10 +133,10 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM +#else // PCM double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; -#endif // PCM +#endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 436c6129b..4c281394c 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -36,10 +36,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM +#else // PCM double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; -#endif // PCM +#endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, @@ -393,10 +393,10 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam -#include "../global/global.h" // Provides Real, get_time +#include "../global/global.h" // Provides Real, get_time -//#ifdef CPU_TIME -// Each instance of this class represents a single timer, timing a single -// section of code. All instances have their own n_steps, time_start, etc. so -// that all timers can run independently +// #ifdef CPU_TIME +// Each instance of this class represents a single timer, timing a single +// section of code. All instances have their own n_steps, time_start, etc. so +// that all timers can run independently class OneTime { public: @@ -67,8 +66,7 @@ class Time void Print_Times(); void Print_Average_Times(struct parameters P); }; -//#endif // CPU_TIME - +// #endif // CPU_TIME // ScopedTimer does nothing if CPU_TIME is disabled /* \brief ScopedTimer helps time a scope. Initialize as first variable and C++ guarantees it is destroyed last */ @@ -77,18 +75,12 @@ class ScopedTimer public: const char* name; double time_start = 0; - + /* \brief ScopedTimer Constructor initializes name and time */ ScopedTimer(const char* input_name); - + /* \brief ScopedTimer Destructor computes dt and prints */ ~ScopedTimer(void); - - }; - - - - -#endif // TIMING_FUNCTIONS_H +#endif // TIMING_FUNCTIONS_H From 3bb968e49c143ade9c57f15e737a53bed976dac5 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Apr 2023 03:16:29 -0400 Subject: [PATCH 305/694] format with clang 15 instead of 16 --- src/analysis/lya_statistics.cpp | 2 +- src/cooling/cooling_cuda.cu | 4 ++-- src/dust/dust_cuda.cu | 8 +++---- src/global/global.h | 28 ++++++++++++------------- src/global/global_cuda.h | 2 +- src/gravity/grav3D.cpp | 4 ++-- src/gravity/gravity_functions.cpp | 14 ++++++------- src/gravity/gravity_functions_gpu.cu | 6 +++--- src/gravity/paris/HenryPeriodic.hpp | 22 +++++++++---------- src/gravity/paris/ParisPeriodic.hpp | 4 ++-- src/grid/boundary_conditions.cpp | 4 ++-- src/grid/grid3D.cpp | 8 +++---- src/grid/grid_enum.h | 4 ++-- src/grid/initial_conditions.cpp | 24 ++++++++++----------- src/grid/mpi_boundaries.cpp | 4 ++-- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 12 +++++------ src/main.cpp | 2 +- src/model/disk_ICs.cpp | 28 ++++++++++++------------- src/model/disk_galaxy.h | 2 +- src/mpi/mpi_routines.cpp | 16 +++++++------- src/mpi/mpi_routines.h | 16 +++++++------- src/particles/density_CIC.cpp | 4 ++-- src/particles/density_boundaries_gpu.cu | 2 +- src/particles/gravity_CIC.cpp | 2 +- src/particles/gravity_CIC_gpu.cu | 2 +- src/particles/io_particles.cpp | 2 +- src/particles/particles_3D.cpp | 8 +++---- src/particles/particles_3D.h | 4 ++-- src/particles/particles_boundaries.cpp | 4 ++-- src/particles/particles_dynamics.cpp | 4 ++-- src/particles/supernova.h | 4 ++-- src/reconstruction/plmc_cuda.cu | 2 +- src/reconstruction/plmp_cuda.cu | 4 ++-- src/reconstruction/ppmc_cuda.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 4 ++-- src/riemann_solvers/exact_cuda.cu | 10 ++++----- src/riemann_solvers/hlld_cuda.cu | 2 +- src/system_tests/hydro_system_tests.cpp | 4 ++-- src/system_tests/mhd_system_tests.cpp | 18 ++++++++-------- src/utils/error_handling.cpp | 2 +- src/utils/reduction_utilities.h | 4 ++-- src/utils/timing_functions.cpp | 2 +- 43 files changed, 153 insertions(+), 153 deletions(-) diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index 1473365f7..3a22149a1 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -942,7 +942,7 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) c = 2.99792458e10; // cm/s kpc = 3.0857e21; // cm kpc3 = kpc * kpc * kpc; - e_charge = 4.8032e-10; // cm^3/2 g^1/2 s^-1 + e_charge = 4.8032e-10; // cm^3/2 g^1/2 s^-1 // Fill the Real cells first for (int los_id = 0; los_id < n_los_total; los_id++) { diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index c0fe25d34..4b09527d0 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -355,5 +355,5 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda } #endif // CLOUDY_COOL - #endif // COOLING_GPU -#endif // CUDA + #endif // COOLING_GPU +#endif // CUDA diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index f9c71fa2b..344d3c9ce 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -117,11 +117,11 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g __device__ __host__ Real calc_tau_sp(Real n, Real T) { Real YR_IN_S = 3.154e7; - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2e6; // K + Real a1 = 1; // dust grain size in units of 0.1 micrometers + Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 + Real T_0 = 2e6; // K Real omega = 2.5; - Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s + Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // s diff --git a/src/global/global.h b/src/global/global.h index 1b6e0ace8..9ba6ca331 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -31,16 +31,16 @@ typedef double Real; #define MP 1.672622e-24 // mass of proton, grams #define KB 1.380658e-16 // boltzmann constant, cgs // #define GN 6.67259e-8 // gravitational constant, cgs -#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 -#define C_L 0.306594593 // speed of light in kpc/kyr +#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 +#define C_L 0.306594593 // speed of light in kpc/kyr -#define MYR 31.536e12 // Myears in secs -#define KPC 3.086e16 // kpc in km -#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 -#define MSUN_CGS 1.98847e33; // Msun in gr -#define KPC_CGS 3.086e21; // kpc in cm -#define KM_CGS 1e5; // km in cm -#define MH 1.67262171e-24 // Mass of hydrogen [g] +#define MYR 31.536e12 // Myears in secs +#define KPC 3.086e16 // kpc in km +#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 +#define MSUN_CGS 1.98847e33; // Msun in gr +#define KPC_CGS 3.086e21; // kpc in cm +#define KM_CGS 1e5; // km in cm +#define MH 1.67262171e-24 // Mass of hydrogen [g] #define TIME_UNIT 3.15569e10 // 1 kyr in s #define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm @@ -93,8 +93,8 @@ typedef double Real; #ifdef GRAVITY_5_POINTS_GRADIENT #ifdef PARTICLES #define N_GHOST_POTENTIAL \ - 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the - // CIC interpolation of the potential ) + 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the + // CIC interpolation of the potential ) #else #define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient #endif // PARTICLES @@ -102,8 +102,8 @@ typedef double Real; #else #ifdef PARTICLES #define N_GHOST_POTENTIAL \ - 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the - // CIC interpolation of the potential ) + 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the + // CIC interpolation of the potential ) #else #define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient #endif // PARTICLES @@ -133,7 +133,7 @@ extern int N_DATA_PER_PARTICLE_TRANSFER; #define SLOW_FACTOR 10 #endif // AVERAGE_SLOW_CELLS -#endif // PARTICLES +#endif // PARTICLES #define SIGN(a) (((a) < 0.) ? -1. : 1.) diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index f14b5e893..296fa31f1 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -120,4 +120,4 @@ __device__ double atomicAdd(double *address, double val) #endif // GLOBAL_CUDA_H -#endif // CUDA +#endif // CUDA diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 722623cc1..20ca67655 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -129,8 +129,8 @@ void Grav3D::AllocateMemory_CPU(void) // density F.potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the n-th timestep F.potential_1_h = - (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep - boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags + (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep + boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags #ifdef GRAV_ISOLATED_BOUNDARY_X F.pot_boundary_x0 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 28d4562c7..1f4a08f7f 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -53,7 +53,7 @@ void Grid3D::set_dt_Gravity() da_min = da_particles; chprintf(" Delta_a_particles: %f \n", da_particles); - #else // NOT ONLY_PARTICLES + #else // NOT ONLY_PARTICLES // Here da_min is the minumum between da_particles and da_hydro Real da_hydro; da_hydro = @@ -607,7 +607,7 @@ void Grid3D::Add_Analytic_Potential() #endif // PARALLEL_OMP #endif // GRAVITY_GPU else } - #endif // GRAVITY_ANALYTIC_COMP + #endif // GRAVITY_ANALYTIC_COMP void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) { @@ -661,7 +661,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU } #ifdef GRAVITY_ANALYTIC_COMP @@ -747,10 +747,10 @@ void Grid3D::Extrapolate_Grav_Potential_Function(int g_start, int g_end) for (i = 0; i < nx_pot; i++) { id_pot = i + j * nx_pot + k * nx_pot * ny_pot; id_grid = (i + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * nx_grid * ny_grid; - pot_now = potential_in[id_pot]; // Potential at the n-th timestep + pot_now = potential_in[id_pot]; // Potential at the n-th timestep if (Grav.INITIAL) { - pot_extrp = pot_now; // The first timestep the extrapolated potential - // is phi_0 + pot_extrp = pot_now; // The first timestep the extrapolated potential + // is phi_0 } else { pot_prev = Grav.F.potential_1_h[id_pot]; // Potential at the (n-1)-th // timestep ( previous step ) @@ -797,7 +797,7 @@ void Grid3D::Extrapolate_Grav_Potential() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU // After the first timestep the INITIAL flag is set to false, that way the // potential is properly extrapolated afterwards diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 2c2ac8aee..236670b49 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -217,9 +217,9 @@ void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real pot_now, pot_prev, pot_extrp; - pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep + pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep if (INITIAL) { - pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 + pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 } else { pot_prev = src_potential_1[tid_pot]; // Potential at the (n-1)-th timestep // ( previous step ) @@ -290,4 +290,4 @@ void Grid3D::Copy_Potential_From_GPU() } #endif // PARTICLES_CPU -#endif // GRAVITY +#endif // GRAVITY diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index d40e574d8..4fa2467b2 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -61,23 +61,23 @@ class HenryPeriodic void filter(const size_t bytes, double *const before, double *const after, const F f) const; private: - int idi_, idj_, idk_; //!< MPI coordinates of 3D block - int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain - int nh_; //!< Global number of complex values in Z dimension, after R2C - //!< transform - int ni_, nj_, nk_; //!< Global number of real points in each dimension - int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil - int idp_, idq_; //!< X and Y task IDs within Z pencil + int idi_, idj_, idk_; //!< MPI coordinates of 3D block + int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain + int nh_; //!< Global number of complex values in Z dimension, after R2C + //!< transform + int ni_, nj_, nk_; //!< Global number of real points in each dimension + int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil + int idp_, idq_; //!< X and Y task IDs within Z pencil MPI_Comm commI_, commJ_, commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils int dh_, di_, dj_, dk_; //!< Max number of local points in each dimension int dhq_, dip_, djp_, - djq_; //!< Max number of local points in dimensions of 2D decompositions - size_t bytes_; //!< Max bytes needed for argument arrays + djq_; //!< Max number of local points in dimensions of 2D decompositions + size_t bytes_; //!< Max bytes needed for argument arrays cufftHandle c2ci_, c2cj_, c2rk_, - r2ck_; //!< Objects for forward and inverse FFTs + r2ck_; //!< Objects for forward and inverse FFTs #ifndef MPI_GPU - double *ha_, *hb_; //!< Host copies for MPI messages + double *ha_, *hb_; //!< Host copies for MPI messages #endif }; diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index d9a43e457..2650e156d 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -44,9 +44,9 @@ class ParisPeriodic void solve(size_t bytes, double *density, double *potential) const; private: - int ni_, nj_; //!< Number of elements in X and Y dimensions + int ni_, nj_; //!< Number of elements in X and Y dimensions #if defined(PARIS_3PT) || defined(PARIS_5PT) - int nk_; //!< Number of elements in Z dimension + int nk_; //!< Number of elements in Z dimension #endif double ddi_, ddj_, ddk_; //!< Frequency-independent terms in Poisson solve HenryPeriodic henry; //!< FFT filter object diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 9c8df7ba1..50c55126d 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -119,13 +119,13 @@ void Grid3D::Set_Boundary_Conditions(parameters P) Grav.Set_Boundary_Flags(flags); #endif // Gravity -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /*Set boundaries, including MPI exchanges*/ Set_Boundaries_MPI(P); -#endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ } /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 4d3c23a86..d83c32e0b 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -93,7 +93,7 @@ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *y_pos = H.ybound + H.dy * (j - H.n_ghost) + 0.5 * H.dy; *z_pos = H.zbound + H.dz * (k - H.n_ghost) + 0.5 * H.dz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* position relative to local xyz bounds */ /* This approach was replaced because it is less consistent for multiple @@ -179,7 +179,7 @@ void Grid3D::Initialize(struct parameters *P) // set total number of cells H.n_cells = H.nx * H.ny * H.nz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* perform domain decomposition * and set grid dimensions @@ -388,7 +388,7 @@ void Grid3D::set_dt(Real dti) H.dt = C_cfl / max_dti; -#endif // ONLY_PARTICLES +#endif // ONLY_PARTICLES #ifdef GRAVITY // Set dt for hydro and particles @@ -554,7 +554,7 @@ Real Grid3D::Update_Hydro_Grid() // Subtract the time spent on the Chemical Update #endif // CHEMISTRY_GPU Timer.Hydro.End(); -#endif // CPU_TIME +#endif // CPU_TIME #ifdef COOLING_GRACKLE #ifdef CPU_TIME diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index cfbce7b70..15e1d604a 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -53,9 +53,9 @@ enum : int { #ifdef DUST dust_density, - #endif // DUST + #endif // DUST -#endif // SCALAR +#endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 847df9ead..8a2bfae98 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -778,7 +778,7 @@ void Grid3D::KH_res_ind() #ifdef DE C.GasEnergy[id] = P / (gama - 1.0); -#endif // DE +#endif // DE } // i loop } // j loop @@ -1388,18 +1388,18 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1459,20 +1459,20 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE - C.GasEnergy[id] = U; + C.GasEnergy[id] = U; #endif } } diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 1ea369e64..9b858c8cf 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -959,7 +959,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) } #endif // SOR - #endif // GRAVITY + #endif // GRAVITY #ifdef PARTICLES if (Particles.TRANSFER_DENSITY_BOUNDARIES) { @@ -1015,7 +1015,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; - #endif // PARTICLES_GPU + #endif // PARTICLES_GPU if (index == 0) { (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 9297f2fa2..2227172bf 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -467,7 +467,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); #endif // DE } - #endif // DENSITY_FLOOR + #endif // DENSITY_FLOOR } } diff --git a/src/io/io.cpp b/src/io/io.cpp index 41ca67b49..b99cc1804 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -941,7 +941,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } // 2D case @@ -976,7 +976,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } #ifdef MHD // Save the last line of magnetic fields @@ -987,7 +987,7 @@ void Grid3D::Write_Grid_Text(FILE *fp) fprintf(fp, "\tNan"); #endif // DE fprintf(fp, "\n"); -#endif // MHD +#endif // MHD } // 3D case @@ -1388,7 +1388,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) output_full_ionization = false; #endif // OUTPUT_FULL_IONIZATION - #endif // COOLING_GRACKLE or CHEMISTRY_GPU + #endif // COOLING_GRACKLE or CHEMISTRY_GPU #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL) CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), @@ -1468,7 +1468,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // COOLING_GRACKLE || CHEMISTRY_GPU - #endif // SCALAR + #endif // SCALAR // 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { @@ -2420,7 +2420,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif // GRACKLE_METALS #endif // COOLING_GRACKLE , CHEMISTRY_GPU - #endif // SCALAR + #endif // SCALAR // MHD only valid in 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { diff --git a/src/main.cpp b/src/main.cpp index 555eac3b8..ae299c024 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) /* Initialize MPI communication */ #ifdef MPI_CHOLLA InitializeChollaMPI(&argc, &argv); -#endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ Real dti = 0; // inverse time step, 1.0 / dt diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index bad65fe37..8e4bede3f 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -194,16 +194,16 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int { // Routine to determine the hydrostatic density profile // along a ray from the galaxy center - int i; // index along r direction + int i; // index along r direction Real gamma = hdp[13]; // adiabatic index Real rho_eos = hdp[18]; // density where K_EOS is set Real cs = hdp[19]; // sound speed at rho_eos Real r_cool = hdp[20]; // cooling radius - Real Phi_0; // potential at cooling radius + Real Phi_0; // potential at cooling radius - Real D_rho; // ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos Real gmo = gamma - 1.0; // gamma-1 @@ -259,10 +259,10 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in Real cs = hdp[16]; - Real Phi_0; // potential at z=0 + Real Phi_0; // potential at z=0 - Real rho_0; // density at mid plane - Real D_rho; // ratio of density at mid plane and rho_eos + Real rho_0; // density at mid plane + Real D_rho; // ratio of density at mid plane and rho_eos Real z_0, z_1; // heights for iteration Real z_disk_max; @@ -274,8 +274,8 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in int n_int = 1000; int flag; // flag for integration - int ks; // start of integrals above disk plane - int km; // mirror of k + int ks; // start of integrals above disk plane + int km; // mirror of k if (nz % 2) { ks = ng + (nz - 1) / 2; } else { @@ -440,14 +440,14 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in Real rho_eos = hdp[15]; Real cs = hdp[16]; - Real Phi_0; // potential at z=0 + Real Phi_0; // potential at z=0 - Real D_rho; // ratio of density at mid plane and rho_eos - Real D_new; // new ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos + Real D_new; // new ratio of density at mid plane and rho_eos Real z_0, z_1, z_2; // heights for iteration Real z_disk_max; - Real A_0, A_1; // density function to find roots + Real A_0, A_1; // density function to find roots // density integration Real phi_int, A; @@ -760,8 +760,8 @@ void Grid3D::Disk_3D(parameters p) // contraction) r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) - M_h = M_vir - M_d; // halo mass in M_sun - R_s = R_vir / c_vir; // halo scale length in kpc + M_h = M_vir - M_d; // halo mass in M_sun + R_s = R_vir / c_vir; // halo scale length in kpc // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 // T_d = 2.0e5; T_d = 1.0e4; // CHANGED FOR ISOTHERMAL diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index c62aeb63f..04249853f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -186,6 +186,6 @@ namespace Galaxies // static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); -}; // namespace Galaxies +}; // namespace Galaxies #endif // DISK_GALAXY diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index ef26732d8..2e0faaa16 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -13,14 +13,14 @@ #include "../utils/error_handling.h" /*Global MPI Variables*/ -int procID; /*process rank*/ -int nproc; /*number of processes in global comm*/ -int root; /*rank of root process*/ +int procID; /*process rank*/ +int nproc; /*number of processes in global comm*/ +int root; /*rank of root process*/ -int procID_node; /*process rank on node*/ -int nproc_node; /*number of MPI processes on node*/ +int procID_node; /*process rank on node*/ +int nproc_node; /*number of MPI processes on node*/ -MPI_Comm world; /*global communicator*/ +MPI_Comm world; /*global communicator*/ MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/ @@ -573,7 +573,7 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) N_PARTICLES_TRANSFER = n_max * n_max * factor; // Set the number of values that will be transferred for each particle - N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities + N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities #ifndef SINGLE_PARTICLE_MASS N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle mass #endif @@ -1002,7 +1002,7 @@ std::pair MPI_Comm_node() // get the hostname of the node std::string pname; // node hostname pname.resize(MPI_MAX_PROCESSOR_NAME); - int pname_length; // length of node hostname + int pname_length; // length of node hostname MPI_Get_processor_name(pname.data(), &pname_length); diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 74e2562c9..2d2a644b4 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -15,14 +15,14 @@ #endif /*FFTW*/ /*Global MPI Variables*/ -extern int procID; /*process rank*/ -extern int nproc; /*number of processes in global comm*/ -extern int root; /*rank of root process*/ -extern int procID_node; /*process rank on node*/ -extern int nproc_node; /*number of MPI processes on node*/ - -extern MPI_Comm world; /*global communicator*/ -extern MPI_Comm node; /*communicator for each node*/ +extern int procID; /*process rank*/ +extern int nproc; /*number of processes in global comm*/ +extern int root; /*rank of root process*/ +extern int procID_node; /*process rank on node*/ +extern int nproc_node; /*number of MPI processes on node*/ + +extern MPI_Comm world; /*global communicator*/ +extern MPI_Comm node; /*communicator for each node*/ extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/ diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index 2391bd6f2..3e28f55de 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -90,7 +90,7 @@ void Grid3D::Copy_Particles_Density() } #endif // PARALLEL_OMP - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU } void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) @@ -415,6 +415,6 @@ void Particles_3D::Get_Density_CIC_OMP() } #endif // PARALLEL_OMP - #endif // PARTICLES_CPU + #endif // PARTICLES_CPU #endif diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu index b32979158..fd5c4ddca 100644 --- a/src/particles/density_boundaries_gpu.cu +++ b/src/particles/density_boundaries_gpu.cu @@ -276,4 +276,4 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, in #endif // MPI_CHOLLA -#endif // PARTICLES_GPU & GRAVITY_GPU +#endif // PARTICLES_GPU & GRAVITY_GPU diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index 74d14d6ae..13938942e 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -350,4 +350,4 @@ void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end) #endif // PARTICLES_CPU -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 36f2fe7cd..19aee8941 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -346,4 +346,4 @@ void Grid3D::Copy_Particles_Density_GPU() #endif // GRAVITY_GPU -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 3064c9d83..02a7d6c3a 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -46,7 +46,7 @@ void Particles_3D::Load_Particles_Data(struct parameters *P) G.nx_local); // Everyone reads the same file #else sprintf(filename, "%s.%d", filename, procID); - #endif // TILED_INITIAL_CONDITIONS + #endif // TILED_INITIAL_CONDITIONS #endif chprintf(" Loading particles file: %s \n", filename); diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 542cdb852..f2b56f62c 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -651,11 +651,11 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) std::uniform_real_distribution phiDist(0, 2 * M_PI); // for generating phi std::normal_distribution speedDist(0, - 1); // for generating random speeds. + 1); // for generating random speeds. - Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) - Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc - Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc + Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc + Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; R_max = P->xlen / 2.0; diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index 1d3355902..b52f85bfc 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -125,7 +125,7 @@ class Particles_3D int_vector_t out_indxs_vec_z1; #endif // PARTICLES_CPU - #endif // MPI_CHOLLA + #endif // MPI_CHOLLA bool TRANSFER_DENSITY_BOUNDARIES; bool TRANSFER_PARTICLES_BOUNDARIES; @@ -210,7 +210,7 @@ class Particles_3D #endif // MPI_CHOLLA - #endif // PARTICLES_GPU + #endif // PARTICLES_GPU } G; diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 5d9266ce9..689beaccc 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -1042,5 +1042,5 @@ void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Rea #endif // PARTICLES_GPU - #endif // MPI_CHOLLA -#endif // PARTICLES + #endif // MPI_CHOLLA +#endif // PARTICLES diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index e6d6add6f..9baba2cc5 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -465,6 +465,6 @@ void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part #endif // PARTICLES_CPU - #endif // COSMOLOGY + #endif // COSMOLOGY -#endif // PARTICLES +#endif // PARTICLES diff --git a/src/particles/supernova.h b/src/particles/supernova.h index f7fdcb76c..e788ea0b2 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -18,9 +18,9 @@ const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRE // supernova rate: 1SN / 100 solar masses per 36 Myr static const Real DEFAULT_SNR = 2.8e-7; static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; -static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN +static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN static const Real FINAL_MOMENTUM = - 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) + 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) static const Real MU = 0.6; static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 23a930655..787449f21 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -14,7 +14,7 @@ #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" - #endif // DE + #endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index d84673d83..f69bbdc4b 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -59,7 +59,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; - #endif // SCALAR + #endif // SCALAR #ifndef VL // Don't use velocities to reconstruct when using VL Real dtodx = dt / dx; @@ -283,7 +283,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - #endif // NO VL + #endif // NO VL // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 64d3ae072..10c6a788d 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1207,7 +1207,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - #endif // VL, i.e. CTU was used for this section + #endif // VL, i.e. CTU was used for this section // enforce minimum values d_L = fmax(d_L, (Real)TINY_NUMBER); diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 3e12d3e4b..36b74aebf 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -492,7 +492,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou for (int i = 0; i < NSCALARS; i++) { del_scalar[i] = scalar_R[i] - scalar_L[i]; } - #endif // SCALAR + #endif // SCALAR d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); // Fryxell Eqn 30 vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); // Fryxell Eqn 30 @@ -506,7 +506,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou for (int i = 0; i < NSCALARS; i++) { scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 } - #endif // SCALAR + #endif // SCALAR // set speed of characteristics (v-c, v, v+c) using average values of v and // c diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 868475fbb..918188441 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -48,7 +48,7 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds } Real dl, vxl, vyl, vzl, pl, - cl; // density, velocity, pressure, sound speed (left) + cl; // density, velocity, pressure, sound speed (left) Real dr, vxr, vyr, vzr, pr, cr; // density, velocity, pressure, sound speed (right) Real ds, vs, ps, Es; // sample_CUDAd density, velocity, pressure, total @@ -256,9 +256,9 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real Real c, sl, sr; - if (vm >= 0) // sampling point lies to the left of the contact discontinuity + if (vm >= 0) // sampling point lies to the left of the contact discontinuity { - if (pm <= pl) // left rarefaction + if (pm <= pl) // left rarefaction { if (vxl - cl >= 0) // sampled point is in left data state { @@ -294,7 +294,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p = pm; } } - } else // sampling point lies to the right of the contact discontinuity + } else // sampling point lies to the right of the contact discontinuity { if (pm > pr) // right shock { @@ -310,7 +310,7 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *v = vm; *p = pm; } - } else // right rarefaction + } else // right rarefaction { if (vxr + cr <= 0) // sampled point is in right data state { diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 2fcc4c480..c962325a7 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -22,7 +22,7 @@ #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" -#endif // DE +#endif // DE #ifdef CUDA diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 725239d29..9d9639f65 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -133,10 +133,10 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM +#else // PCM double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; -#endif // PCM +#endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 4c281394c..436c6129b 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -36,10 +36,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM +#else // PCM double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; -#endif // PCM +#endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, @@ -393,10 +393,10 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam Date: Tue, 11 Apr 2023 06:11:14 -0400 Subject: [PATCH 306/694] gravity output uses GPU --- src/io/io.cpp | 71 ++++++++++++++---------------------------- src/io/io_parallel.cpp | 25 ++++++++------- 2 files changed, 38 insertions(+), 58 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index b99cc1804..f7567711c 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -342,7 +342,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) #endif // DE #ifdef MHD - // TODO : Repair output format if needed and remove these chprintfs when appropriate + // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, @@ -1338,13 +1338,22 @@ void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real *dataset_buffer, Re Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); } -void Write_Generic_HDF5_Field(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, - Real *dataset_buffer, Real *source_buffer, const char *name) +void Write_Generic_HDF5_Field_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + hid_t file_id, Real *dataset_buffer, Real *source_buffer, const char *name) { Fill_HDF5_Buffer_From_Grid_CPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, source_buffer); Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); } +void Write_Generic_HDF5_Field_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer, Real *source_buffer, + const char *name) +{ + Fill_HDF5_Buffer_From_Grid_GPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, device_hdf5_buffer, + source_buffer); + Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); +} + /*! \fn void Write_Grid_HDF5(hid_t file_id) * \brief Write the grid to a file, at the current simulation time. */ void Grid3D::Write_Grid_HDF5(hid_t file_id) @@ -1390,11 +1399,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // COOLING_GRACKLE or CHEMISTRY_GPU - #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL) - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), - cudaMemcpyDeviceToHost)); - #endif // GRAVITY_GPU and OUTPUT_POTENTIAL - // Allocate necessary buffers int nx_dset = H.nx_real; int ny_dset = H.ny_real; @@ -1410,23 +1414,23 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // Start writing fields - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.density, "/density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); if (output_momentum || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.momentum_z, "/momentum_z"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); } if (output_energy || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.Energy, "/Energy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); #ifdef DE - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy"); #endif } #ifdef SCALAR #ifdef BASIC_SCALAR - Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.scalar, "/scalar0"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_basic_scalar, "/scalar0"); #endif #ifdef OUTPUT_CHEMISTRY @@ -1438,6 +1442,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.e_density, "/e_density"); #elif defined(COOLING_GRACKLE) + // Cool fields are CPU (host) only Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density"); Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density"); @@ -1473,28 +1478,10 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) // 3D case if (H.nx > 1 && H.ny > 1 && H.nz > 1) { #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) - // Copy the potential array to the memory buffer - Write_Generic_HDF5_Field(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, - Grav.nz_local + 2 * N_GHOST_POTENTIAL, nx, ny, nz, Grav.nx_local, Grav.ny_local, - Grav.nz_local, N_GHOST_POTENTIAL, file_id, dataset_buffer, Grav.F.potential_h, - "/grav_potential"); - - /* - for (k = 0; k < Grav.nz_local; k++) { - for (j = 0; j < Grav.ny_local; j++) { - for (i = 0; i < Grav.nx_local; i++) { - // id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - // buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real; - id = (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) + - (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * - (Grav.ny_local + 2 * N_GHOST_POTENTIAL); - buf_id = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local; - dataset_buffer[buf_id] = Grav.F.potential_h[id]; - } - } - } - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential"); - */ + Write_Generic_HDF5_Field_GPU(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, + Grav.nz_local + 2 * N_GHOST_POTENTIAL, Grav.nx_local, Grav.ny_local, Grav.nz_local, + N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_buffer, Grav.F.potential_d, + "/grav_potential"); #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD @@ -1507,8 +1494,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) device_dataset_buffer, C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD - - // Free the dataspace id } free(dataset_buffer); @@ -2375,14 +2360,6 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); status = H5Aclose(attribute_id); - /* - // Alwin: I don't think this is needed anymore because dt of the current state of cells is calculated for consistency - and output was using previous timestep's H.dt - // This is because dti = Update_Grid, then output, then dt = 1/MPI_Allreduce(dti) in next step - attribute_id = H5Aopen(file_id, "dt", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.dt); - status = H5Aclose(attribute_id); - */ attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT); status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); status = H5Aclose(attribute_id); diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 3f3117641..736bab5b7 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -9,9 +9,6 @@ #include "../mpi/mpi_routines.h" #include "../utils/timing_functions.h" // provides ScopedTimer -// I think this helper function is finished. It's just meant to interface with HDF5 and open/free handles -// I need to figure out offset and count elsewhere - // Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace // file_space_id void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, double* buffer, const char* name) @@ -74,11 +71,12 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (Alwin) : Need to consider how or whether to read attributes. - - // even if I do not read gamma from file, it is set in initial_conditions.cpp - // if I do not set t or n_step what does it get set to?0 in grid/grid3D.cpp + // TODO (by Alwin, for anyone) : Need to consider how or whether to read attributes. + // even without read gamma from file, it is set in initial_conditions.cpp + // if I do not set t or n_step it is set to 0 in grid/grid3D.cpp // This should be okay to start with. + // Choosing not to read attributes is because + // Parallel-reading attributes can be slow without collective calls. // Offsets are global variables from mpi_routines.h hsize_t offset[3]; @@ -104,15 +102,20 @@ void Grid3D::Read_Grid_Cat(struct parameters P) Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_z, "/momentum_z"); Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/Energy"); #ifdef DE - Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/GasEnergy"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.GasEnergy, "/GasEnergy"); #endif // DE + #ifdef SCALAR + #ifdef BASIC_SCALAR + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.basic_scalar, "/scalar0"); + #endif + #endif // TODO (Alwin) : add scalar stuff #ifdef MHD - Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); - Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); - Read_Grid_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); #endif free(dataset_buffer); From 614cb8f6b7e6eb2a5c0302d8d3e50367ba239595 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Apr 2023 06:26:25 -0400 Subject: [PATCH 307/694] minor typo --- src/io/io_parallel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 736bab5b7..1063bfbf8 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -71,7 +71,7 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (by Alwin, for anyone) : Need to consider how or whether to read attributes. + // TODO (written by Alwin, for anyone to do) : Need to consider how or whether to read attributes. // even without read gamma from file, it is set in initial_conditions.cpp // if I do not set t or n_step it is set to 0 in grid/grid3D.cpp // This should be okay to start with. @@ -113,9 +113,9 @@ void Grid3D::Read_Grid_Cat(struct parameters P) // TODO (Alwin) : add scalar stuff #ifdef MHD - Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_x, "/magnetic_x"); - Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_y, "/magnetic_y"); - Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, C.magnetic_z, "/magnetic_z"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_x, "/magnetic_x"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_y, "/magnetic_y"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_z, "/magnetic_z"); #endif free(dataset_buffer); From d8e27ec8a17932015d6cacdd7975ad95d2c95123 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 15 Apr 2023 19:20:10 -0400 Subject: [PATCH 308/694] TileBlockDecomposition3D --- src/mpi/mpi_routines.cpp | 56 ++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 0250080ea..d40704759 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -827,6 +827,24 @@ int greatest_prime_factor(int n) return np; } +/* + tile MPI processes in a block arrangement for the 3D case + */ +void TileBlockDecomposition3D(int number, int &np_x, int &np_y, int &np_z) { + + int dims[3] = {1,1,1}; + size_t index = 0; + while (number > 1) { + int gpf = greatest_prime_factor(number); + number /= gpf; + dims[index % 3] *= gpf; + index += 1; + } + np_x = dims[0]; + np_y = dims[1]; + np_z = dims[2]; +} + /*tile MPI processes in a block arrangement*/ void TileBlockDecomposition(void) { @@ -867,43 +885,7 @@ void TileBlockDecomposition(void) return; } - /*base decomposition on whether n_gpf==2*/ - if (n_gpf != 2) { - /*we are in 3-d, so split remainder evenly*/ - np_x = n_gpf; - n_gpf = greatest_prime_factor(nproc / n_gpf); - if (n_gpf != 2) { - /*the next greatest prime is odd, so just split*/ - np_y = n_gpf; - np_z = nproc / (np_x * np_y); - } else { - /*increase ny, nz round-robin*/ - while (np_x * np_y * np_z < nproc) { - np_y *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_z *= 2; - } - } - } else { - /*nproc is a power of 2*/ - /*we are in 3-d, so split remainder evenly*/ - - /*increase nx, ny, nz round-robin*/ - while (np_x * np_y * np_z < nproc) { - np_x *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_y *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_z *= 2; - } - } - + TileBlockDecomposition3D(nproc, np_x, np_y, np_z); // reorder x, y, z int n_tmp; From 3e6401eb0e11f5aadb6c0605c75184da1efb8d92 Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 15 Apr 2023 19:21:44 -0400 Subject: [PATCH 309/694] Add rocfft_rtc_cache_path to frontier setup --- builds/setup.frontier.cce.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/builds/setup.frontier.cce.sh b/builds/setup.frontier.cce.sh index 4a22344d2..afb251680 100755 --- a/builds/setup.frontier.cce.sh +++ b/builds/setup.frontier.cce.sh @@ -15,3 +15,4 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH} export CHOLLA_ENVSET=1 +export ROCFFT_RTC_CACHE_PATH=/dev/null From f46342c161f367580db16c39413153fe212ee37f Mon Sep 17 00:00:00 2001 From: Alwin Date: Sat, 15 Apr 2023 19:37:44 -0400 Subject: [PATCH 310/694] format --- src/mpi/mpi_routines.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index d40704759..513d29056 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -830,9 +830,9 @@ int greatest_prime_factor(int n) /* tile MPI processes in a block arrangement for the 3D case */ -void TileBlockDecomposition3D(int number, int &np_x, int &np_y, int &np_z) { - - int dims[3] = {1,1,1}; +void TileBlockDecomposition3D(int number, int &np_x, int &np_y, int &np_z) +{ + int dims[3] = {1, 1, 1}; size_t index = 0; while (number > 1) { int gpf = greatest_prime_factor(number); From 17a92b4bf648f05faccb6dccc73615e95ea9cd2c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 21 Apr 2023 09:34:48 -0400 Subject: [PATCH 311/694] start of io dust refactor --- src/io/io.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/io/io.cpp b/src/io/io.cpp index 34c0d3438..1ca3ce1e3 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2397,6 +2397,10 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif // GRACKLE_METALS #endif // COOLING_GRACKLE , CHEMISTRY_GPU + #if defined(DUST) + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.dust_density, "/dust_density"); + #endif // DUST + #endif // SCALAR // MHD only valid in 3D case From 72d3460a47c60707db3eb17233ae049f163e91be Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 14:44:47 -0400 Subject: [PATCH 312/694] add dust_density to Read_Grid_HDF5 and Write_Grid_HDF5 --- src/io/io.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 1ca3ce1e3..45aa1a3d9 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1431,7 +1431,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef BASIC_SCALAR Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_basic_scalar, "/scalar0"); - #endif + #endif // BASIC_SCALAR + + #ifdef DUST + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, &(C.d_density[H.n_cells*grid_enum::dust_density]), "/dust_density"); + #endif // DUST #ifdef OUTPUT_CHEMISTRY #ifdef CHEMISTRY_GPU @@ -2383,7 +2387,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #ifdef BASIC_SCALAR Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); - #endif + #endif // BASIC_SCALAR + + #ifdef DUST + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &(C.density[H.n_cells*grid_enum::dust_density]), "/dust_density"); + #endif // DUST #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); @@ -2397,10 +2405,6 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif // GRACKLE_METALS #endif // COOLING_GRACKLE , CHEMISTRY_GPU - #if defined(DUST) - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.dust_density, "/dust_density"); - #endif // DUST - #endif // SCALAR // MHD only valid in 3D case From 428da1dd20f3d319645026e22b8354627a72a768 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 14:49:47 -0400 Subject: [PATCH 313/694] run clang tidy --- src/io/io.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 45aa1a3d9..2dd993264 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1434,7 +1434,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // BASIC_SCALAR #ifdef DUST - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, &(C.d_density[H.n_cells*grid_enum::dust_density]), "/dust_density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, + &(C.d_density[H.n_cells * grid_enum::dust_density]), "/dust_density"); #endif // DUST #ifdef OUTPUT_CHEMISTRY @@ -2387,11 +2388,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #ifdef BASIC_SCALAR Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); - #endif // BASIC_SCALAR + #endif // BASIC_SCALAR #ifdef DUST - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &(C.density[H.n_cells*grid_enum::dust_density]), "/dust_density"); - #endif // DUST + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &(C.density[H.n_cells * grid_enum::dust_density]), "/dust_density"); + #endif // DUST #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); From 0fc13d038b7bc598251f66a39c98536c6ac6e7bd Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 14:53:15 -0400 Subject: [PATCH 314/694] change outdated reference to generalized scalar to dust-specific scalar --- src/grid/initial_conditions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 86f478faf..5b7b4581d 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1309,8 +1309,8 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_bg / (gama - 1.0); #endif #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = C.density[id] * 0.0; + #ifdef DUST + C.host[id + H.n_cells * grid_enum::dust_density] = 0.0; #endif #endif // add clouds From 8f5d67d435170b0066b12fa8e21b8aa95849c2a0 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 16:03:02 -0400 Subject: [PATCH 315/694] add dust_density attribute to Conserved --- src/grid/grid3D.cpp | 6 ++++++ src/grid/grid3D.h | 10 ++++++++-- src/io/io.cpp | 5 ++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index dac2795c8..1d7884a4a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -300,6 +300,9 @@ void Grid3D::AllocateMemory(void) #ifdef BASIC_SCALAR C.basic_scalar = &(C.host[H.n_cells * grid_enum::basic_scalar]); #endif + #ifdef DUST + C.dust_density = &(C.host[H.n_cells * grid_enum::dust_density]); + #endif #endif // SCALAR #ifdef MHD C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); @@ -323,6 +326,9 @@ void Grid3D::AllocateMemory(void) #ifdef BASIC_SCALAR C.d_basic_scalar = &(C.device[H.n_cells * grid_enum::basic_scalar]); #endif + #ifdef DUST + C.d_dust_density = &(C.device[H.n_cells * grid_enum::dust_density]); + #endif #endif // SCALAR #ifdef MHD C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 6e9b7e5bc..92b1e277f 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -360,6 +360,12 @@ class Grid3D */ Real *basic_scalar; #endif + #ifdef DUST + /*! \var basic_scalar + * \brief Array containing the values of a basic passive scalar variable. + */ + Real *dust_density; + #endif #endif // SCALAR #ifdef MHD @@ -405,8 +411,8 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, *d_magnetic_x, - *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, *d_dust_density, + *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; /*! pointer to gravitational potential on device */ Real *d_Grav_potential; diff --git a/src/io/io.cpp b/src/io/io.cpp index 2dd993264..1d2233de3 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1434,8 +1434,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // BASIC_SCALAR #ifdef DUST - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, - &(C.d_density[H.n_cells * grid_enum::dust_density]), "/dust_density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_dust_density, "/dust_density"); #endif // DUST #ifdef OUTPUT_CHEMISTRY @@ -2391,7 +2390,7 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #endif // BASIC_SCALAR #ifdef DUST - Read_Grid_HDF5_Field(file_id, dataset_buffer, H, &(C.density[H.n_cells * grid_enum::dust_density]), "/dust_density"); + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.dust_density, "/dust_density"); #endif // DUST #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) From 0316eb32f191cd5af8b5fcf4cb1f8abb3c8cf0ed Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 16:04:47 -0400 Subject: [PATCH 316/694] run clang tidy --- src/grid/grid3D.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 92b1e277f..5882bd1e9 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -411,8 +411,8 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, *d_dust_density, - *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, + *d_dust_density, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; /*! pointer to gravitational potential on device */ Real *d_Grav_potential; From 5c1a9e1b6a0bafb43d249406765f7326abd402ab Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 18:56:36 -0400 Subject: [PATCH 317/694] add dust to Write_Projection_HDF5 --- src/io/io.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/io/io.cpp b/src/io/io.cpp index 1d2233de3..ec7072ec7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1516,6 +1516,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real *dataset_buffer_Txy, *dataset_buffer_Txz; herr_t status; Real dxy, dxz, Txy, Txz, n, T; + #ifdef DUST + Real dust_xy, dust_xz; + Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; + #endif n = T = 0; Real mu = 0.6; @@ -1530,6 +1534,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef DUST + dataset_buffer_dust_xy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_dust_xz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif // Create the data space for the datasets dims[0] = nx_dset; @@ -1543,11 +1551,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxy = 0; Txy = 0; + #ifdef DUST + dust_xy = 0; + #endif // for each xy element, sum over the z column for (k = 0; k < H.nz_real; k++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxy += C.density[id] * H.dz; + #ifdef DUST + dust_xy += C.dust_density[id] * H.dz; + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1566,6 +1580,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = j + i * H.ny_real; dataset_buffer_dxy[buf_id] = dxy; dataset_buffer_Txy[buf_id] = Txy; + #ifdef DUST + dataset_buffer_dust_xy[buf_id] = dust_xy; + #endif } } @@ -1574,11 +1591,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxz = 0; Txz = 0; + #ifdef DUST + dust_xz = 0; + #endif // for each xz element, sum over the y column for (j = 0; j < H.ny_real; j++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxz += C.density[id] * H.dy; + #ifdef DUST + dust_xz += C.dust_density[id] * H.dy; + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1597,6 +1620,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = k + i * H.nz_real; dataset_buffer_dxz[buf_id] = dxz; dataset_buffer_Txz[buf_id] = Txz; + #ifdef DUST + dataset_buffer_dust_xz[buf_id] = dust_xz; + #endif } } @@ -1605,6 +1631,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); + #ifdef DUST + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xy, "/d_dust_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xz, "/d_dust_xz"); + #endif // Free the dataspace ids status = H5Sclose(dataspace_xz_id); From 635f8293363d4e7137e2e055adc14968f1733673 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 18:57:23 -0400 Subject: [PATCH 318/694] add dust to Write_Projection_HDF5 --- src/io/io.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index ec7072ec7..e21dba70b 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1518,7 +1518,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real dxy, dxz, Txy, Txz, n, T; #ifdef DUST Real dust_xy, dust_xz; - Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; + Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; #endif n = T = 0; @@ -1534,10 +1534,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); - #ifdef DUST + #ifdef DUST dataset_buffer_dust_xy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_dust_xz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); - #endif + #endif // Create the data space for the datasets dims[0] = nx_dset; @@ -1551,17 +1551,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxy = 0; Txy = 0; - #ifdef DUST + #ifdef DUST dust_xy = 0; - #endif + #endif // for each xy element, sum over the z column for (k = 0; k < H.nz_real; k++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxy += C.density[id] * H.dz; - #ifdef DUST + #ifdef DUST dust_xy += C.dust_density[id] * H.dz; - #endif + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1580,9 +1580,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = j + i * H.ny_real; dataset_buffer_dxy[buf_id] = dxy; dataset_buffer_Txy[buf_id] = Txy; - #ifdef DUST + #ifdef DUST dataset_buffer_dust_xy[buf_id] = dust_xy; - #endif + #endif } } @@ -1591,17 +1591,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxz = 0; Txz = 0; - #ifdef DUST + #ifdef DUST dust_xz = 0; - #endif + #endif // for each xz element, sum over the y column for (j = 0; j < H.ny_real; j++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxz += C.density[id] * H.dy; - #ifdef DUST + #ifdef DUST dust_xz += C.dust_density[id] * H.dy; - #endif + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1620,9 +1620,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = k + i * H.nz_real; dataset_buffer_dxz[buf_id] = dxz; dataset_buffer_Txz[buf_id] = Txz; - #ifdef DUST + #ifdef DUST dataset_buffer_dust_xz[buf_id] = dust_xz; - #endif + #endif } } @@ -1631,10 +1631,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); - #ifdef DUST + #ifdef DUST status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xy, "/d_dust_xy"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xz, "/d_dust_xz"); - #endif + #endif // Free the dataspace ids status = H5Sclose(dataspace_xz_id); From 783281337a0bde029f1fbd771cf06f1b9d5eb8f0 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 19:47:30 -0400 Subject: [PATCH 319/694] add dust to Read_Grid_Cat --- src/io/io_parallel.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 1063bfbf8..774e69723 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -109,6 +109,9 @@ void Grid3D::Read_Grid_Cat(struct parameters P) #ifdef BASIC_SCALAR Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.basic_scalar, "/scalar0"); #endif + #ifdef DUST + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.dust_density, "/dust_density"); + #endif #endif // TODO (Alwin) : add scalar stuff From 48b4bdaa2d86d0ef4d4a642ff3d8a4f898febaa2 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 24 Apr 2023 20:36:23 -0400 Subject: [PATCH 320/694] fix comment --- src/grid/grid3D.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 5882bd1e9..bd92e63fd 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -361,8 +361,8 @@ class Grid3D Real *basic_scalar; #endif #ifdef DUST - /*! \var basic_scalar - * \brief Array containing the values of a basic passive scalar variable. + /*! \var dust_density + * \brief Array containing the dust densities. */ Real *dust_density; #endif From 556bbf0f611fcb8b0b42fdc12914a0eb025fe008 Mon Sep 17 00:00:00 2001 From: Alwin Date: Wed, 26 Apr 2023 18:12:31 -0400 Subject: [PATCH 321/694] Improve timers and read_grid_cat restart --- src/grid/grid3D.cpp | 22 ++++++++++++++++++++-- src/io/io.cpp | 12 +++++++----- src/io/io_parallel.cpp | 18 ++++++++++++------ src/main.cpp | 15 ++++++++++++--- src/utils/timing_functions.cpp | 8 +++++++- src/utils/timing_functions.h | 4 +++- 6 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index dac2795c8..7647abcff 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -426,6 +426,11 @@ Real Grid3D::Update_Grid(void) U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; #endif + +#ifdef CPU_TIME + Timer.Hydro_Integrator.Start(); +#endif //CPU_TIME + // Run the hydro integrator on the grid if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D { @@ -468,11 +473,24 @@ Real Grid3D::Update_Grid(void) chexit(-1); } + +#ifdef CPU_TIME + Timer.Hydro_Integrator.End(); +#endif //CPU_TIME + + #ifdef CUDA #ifdef COOLING_GPU +#ifdef CPU_TIME + Timer.Cooling_GPU.Start(); +#endif // ==Apply Cooling from cooling/cooling_cuda.h== Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); +#ifdef CPU_TIME + Timer.Cooling_GPU.End(); +#endif + #endif // COOLING_GPU #ifdef DUST @@ -558,11 +576,11 @@ Real Grid3D::Update_Hydro_Grid() #ifdef COOLING_GRACKLE #ifdef CPU_TIME - Timer.Cooling.Start(); + Timer.Cooling_Grackle.Start(); #endif // CPU_TIME Do_Cooling_Step_Grackle(); #ifdef CPU_TIME - Timer.Cooling.End(); + Timer.Cooling_Grackle.End(); #endif // CPU_TIME #endif // COOLING_GRACKLE diff --git a/src/io/io.cpp b/src/io/io.cpp index 34c0d3438..05ae36dd7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -190,7 +190,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #elif defined HDF5 filename += ".h5"; #else - strcat(filename, ".txt"); + filename += ".txt"; if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); #endif #ifdef MPI_CHOLLA @@ -240,7 +240,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #else // open the file for txt writes FILE *out; - out = fopen(filename, "w"); + out = fopen(filename.data(), "w"); if (out == NULL) { printf("Error opening output file.\n"); exit(-1); @@ -259,6 +259,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) void OutputFloat32(Grid3D &G, struct parameters P, int nfile) { +#ifdef HDF5 Header H = G.H; // Do nothing in 1-D and 2-D case if (H.ny_real == 1) { @@ -278,7 +279,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) filename += ".float32.h5"; #ifdef MPI_CHOLLA filename += "." + std::to_string(procID); -#endif +#endif // MPI_CHOLLA // create hdf5 file hid_t file_id; /* file identifier */ @@ -305,7 +306,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); #else buffer_size = nx_dset * ny_dset * nz_dset; -#endif +#endif // MHD // Using static DeviceVector here automatically allocates the buffer the // first time it is needed It persists until program exit, and then calls @@ -359,7 +360,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); } -#endif +#endif // MHD free(dataset_buffer); @@ -371,6 +372,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // close the file status = H5Fclose(file_id); +#endif // HDF5 } /* Output a projection of the grid data to file. */ diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 1063bfbf8..924273fbc 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -71,12 +71,18 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (written by Alwin, for anyone to do) : Need to consider how or whether to read attributes. - // even without read gamma from file, it is set in initial_conditions.cpp - // if I do not set t or n_step it is set to 0 in grid/grid3D.cpp - // This should be okay to start with. - // Choosing not to read attributes is because - // Parallel-reading attributes can be slow without collective calls. + // TODO (written by Alwin, for anyone to do) : + // Consider using collective calls if this part is slow at scale + hid_t attribute_id; + attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); + status = H5Aclose(attribute_id); + attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); + status = H5Aclose(attribute_id); + + + // Offsets are global variables from mpi_routines.h hsize_t offset[3]; diff --git a/src/main.cpp b/src/main.cpp index ae299c024..b33ee8eba 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -80,7 +80,16 @@ int main(int argc, char *argv[]) "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + + bool is_restart = false; if (strcmp(P.init, "Read_Grid") == 0) { + is_restart = true; + } + if (strcmp(P.init, "Read_Grid_Cat") == 0) { + is_restart = true; + } + + if (is_restart) { chprintf("Input directory: %s\n", P.indir); } chprintf("Output directory: %s\n", P.outdir); @@ -107,8 +116,8 @@ int main(int argc, char *argv[]) chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); chprintf("Initial conditions set.\n"); - // set main variables for Read_Grid initial conditions - if (strcmp(P.init, "Read_Grid") == 0) { + // set main variables for Read_Grid and Read_Grid_Cat initial conditions + if (is_restart) { outtime += G.H.t; nfile = P.nfile; } @@ -192,7 +201,7 @@ int main(int argc, char *argv[]) chprintf("Nstep = %d Simulation time = %f\n", G.H.n_step, G.H.t); #ifdef OUTPUT - if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now) { + if (!is_restart || G.H.Output_Now) { // write the initial conditions to file chprintf("Writing initial conditions to file...\n"); WriteData(G, P, nfile); diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 05c0546be..133971b68 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -14,6 +14,7 @@ void OneTime::Start() { + cudaDeviceSynchronize(); if (inactive) { return; } @@ -29,6 +30,7 @@ void OneTime::Subtract(Real time_to_subtract) void OneTime::End() { + cudaDeviceSynchronize(); if (inactive) { return; } @@ -96,6 +98,7 @@ void Time::Initialize() #ifdef PARTICLES &(Calc_dt = OneTime("Calc_dt")), #endif + &(Hydro_Integrator = OneTime("Hydro_Integrator")), &(Hydro = OneTime("Hydro")), &(Boundaries = OneTime("Boundaries")), #ifdef GRAVITY @@ -109,8 +112,11 @@ void Time::Initialize() &(Advance_Part_1 = OneTime("Advance_Part_1")), &(Advance_Part_2 = OneTime("Advance_Part_2")), #endif + #ifdef COOLING_GPU + &(Cooling_GPU = OneTime("Cooling_GPU")), + #endif #ifdef COOLING_GRACKLE - &(Cooling = OneTime("Cooling")), + &(Cooling_Grackle = OneTime("Cooling_Grackle")), #endif #ifdef CHEMISTRY_GPU &(Chemistry = OneTime("Chemistry")), diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 2438c1595..09e209f8b 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -45,6 +45,7 @@ class Time OneTime Total; OneTime Calc_dt; + OneTime Hydro_Integrator; OneTime Hydro; OneTime Boundaries; OneTime Grav_Potential; @@ -54,7 +55,8 @@ class Time OneTime Part_Dens_Transf; OneTime Advance_Part_1; OneTime Advance_Part_2; - OneTime Cooling; + OneTime Cooling_GPU; + OneTime Cooling_Grackle; OneTime Chemistry; OneTime Feedback; OneTime FeedbackAnalysis; From a4c30e7317dd8b1ac48b5a16e4968790d25bdb23 Mon Sep 17 00:00:00 2001 From: Alwin Date: Wed, 26 Apr 2023 18:18:27 -0400 Subject: [PATCH 322/694] format --- builds/make.type.hydro | 4 ++-- src/grid/grid3D.cpp | 15 ++++++--------- src/io/io.cpp | 24 ++++++++++++------------ src/io/io_parallel.cpp | 5 +---- 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index f34d78172..e08a319ae 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -21,9 +21,9 @@ DFLAGS += -DTEMPERATURE_FLOOR #DFLAGS += -DCOOLING_GPU # Measure the Timing of the different stages -#DFLAGS += -DCPU_TIME +DFLAGS += -DCPU_TIME # Select output format # Can also add -DSLICES and -DPROJECTIONS -OUTPUT ?= -DOUTPUT -DHDF5 +# OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 7647abcff..33e04159d 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -426,10 +426,9 @@ Real Grid3D::Update_Grid(void) U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; #endif - #ifdef CPU_TIME Timer.Hydro_Integrator.Start(); -#endif //CPU_TIME +#endif // CPU_TIME // Run the hydro integrator on the grid if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D @@ -473,23 +472,21 @@ Real Grid3D::Update_Grid(void) chexit(-1); } - #ifdef CPU_TIME Timer.Hydro_Integrator.End(); -#endif //CPU_TIME - +#endif // CPU_TIME #ifdef CUDA #ifdef COOLING_GPU -#ifdef CPU_TIME + #ifdef CPU_TIME Timer.Cooling_GPU.Start(); -#endif + #endif // ==Apply Cooling from cooling/cooling_cuda.h== Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); -#ifdef CPU_TIME + #ifdef CPU_TIME Timer.Cooling_GPU.End(); -#endif + #endif #endif // COOLING_GPU diff --git a/src/io/io.cpp b/src/io/io.cpp index 05ae36dd7..e5e02cf74 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -277,9 +277,9 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) std::string filename(P.outdir); filename += std::to_string(nfile); filename += ".float32.h5"; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA filename += "." + std::to_string(procID); -#endif // MPI_CHOLLA + #endif // MPI_CHOLLA // create hdf5 file hid_t file_id; /* file identifier */ @@ -302,11 +302,11 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // Need a larger device buffer for MHD. In the future, if other fields need // a larger device buffer, choose the maximum of the sizes. If the buffer is // too large, it does not cause bugs (Oct 6 2022) -#ifdef MHD + #ifdef MHD buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); -#else + #else buffer_size = nx_dset * ny_dset * nz_dset; -#endif // MHD + #endif // MHD // Using static DeviceVector here automatically allocates the buffer the // first time it is needed It persists until program exit, and then calls @@ -335,13 +335,13 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); } -#ifdef DE + #ifdef DE if (P.out_float32_GasEnergy > 0) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); } -#endif // DE -#ifdef MHD + #endif // DE + #ifdef MHD // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { @@ -360,7 +360,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); } -#endif // MHD + #endif // MHD free(dataset_buffer); @@ -372,7 +372,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // close the file status = H5Fclose(file_id); -#endif // HDF5 +#endif // HDF5 } /* Output a projection of the grid data to file. */ @@ -1372,13 +1372,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_ENERGY output_energy = true; #else // not OUTPUT_ENERGY - output_energy = false; + output_energy = false; #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; #else // not OUTPUT_MOMENTUM - output_momentum = false; + output_momentum = false; #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 924273fbc..ca32091e4 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -71,7 +71,7 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (written by Alwin, for anyone to do) : + // TODO (written by Alwin, for anyone to do) : // Consider using collective calls if this part is slow at scale hid_t attribute_id; attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); @@ -81,9 +81,6 @@ void Grid3D::Read_Grid_Cat(struct parameters P) status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); status = H5Aclose(attribute_id); - - - // Offsets are global variables from mpi_routines.h hsize_t offset[3]; offset[0] = nx_local_start; From 701186a2a7094426de288c319a2851de0c3f91e5 Mon Sep 17 00:00:00 2001 From: Alwin Date: Wed, 26 Apr 2023 18:22:16 -0400 Subject: [PATCH 323/694] undo accidental commit of make.type.hydro edits --- builds/make.type.hydro | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index e08a319ae..f34d78172 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -21,9 +21,9 @@ DFLAGS += -DTEMPERATURE_FLOOR #DFLAGS += -DCOOLING_GPU # Measure the Timing of the different stages -DFLAGS += -DCPU_TIME +#DFLAGS += -DCPU_TIME # Select output format # Can also add -DSLICES and -DPROJECTIONS -# OUTPUT ?= -DOUTPUT -DHDF5 +OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) From 7f118c61a8a450cebe9828f7e763faff248275aa Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 28 Apr 2023 16:06:38 -0400 Subject: [PATCH 324/694] update ngrid and TPB to avoid 0 block cuda errors for particles --- src/grid/grid3D.cpp | 4 ++-- src/mhd/ct_electric_fields_tests.cu | 2 +- src/mhd/magnetic_update_tests.cu | 2 +- src/particles/density_CIC_gpu.cu | 2 +- src/particles/feedback_CIC_gpu.cu | 2 +- src/particles/gravity_CIC_gpu.cu | 2 +- src/particles/particles_3D_gpu.cu | 4 ++-- src/particles/particles_dynamics.cpp | 2 +- src/particles/particles_dynamics_gpu.cu | 10 +++++----- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 33e04159d..4750ef881 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -482,11 +482,11 @@ Real Grid3D::Update_Grid(void) #ifdef CPU_TIME Timer.Cooling_GPU.Start(); #endif - // ==Apply Cooling from cooling/cooling_cuda.h== - Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #ifdef CPU_TIME Timer.Cooling_GPU.End(); #endif + // ==Apply Cooling from cooling/cooling_cuda.h== + Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #endif // COOLING_GPU diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index a57f8afe2..e009fe948 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -49,7 +49,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test grid(n_cells * (grid_enum::num_fields)), testCTElectricFields(n_cells * 3, -999.), fiducialData(n_cells * 3, -999.), - dimGrid((n_cells + TPB - 1), 1, 1), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), dimBlock(TPB, 1, 1) { // Allocate device arrays diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 79dc81db7..402d9c737 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -46,7 +46,7 @@ class tMHDUpdateMagneticField3D : public ::testing::Test destinationGrid(n_cells * (grid_enum::num_fields), -999.), ctElectricFields(n_cells * 3), fiducialData(n_cells * (grid_enum::num_fields), -999.), - dimGrid((n_cells + TPB - 1), 1, 1), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), dimBlock(TPB, 1, 1) { // Allocate device arrays diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 86ddd7e36..c33544046 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -155,7 +155,7 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 286ae92e3..09281e49f 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -133,7 +133,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat n_states = n_local * allocation_factor; cudaMalloc((void**)&randStates, n_states * sizeof(feedback_prng_t)); - int ngrid = (n_states + TPB_FEEDBACK - 1) / TPB_FEEDBACK; + int ngrid = (n_states - 1) / TPB_FEEDBACK + 1; dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 19aee8941..e66eb928e 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -273,7 +273,7 @@ void Particles_3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local Real *gravity_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index fc5210f77..0a4915d5c 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -74,7 +74,7 @@ void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, template void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) { - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size - 1) / TPB_PARTICLES + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); @@ -186,7 +186,7 @@ __global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, par void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) { // set values for GPU kernels - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 9baba2cc5..977fd936c 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -63,7 +63,7 @@ Real Grid3D::Calc_Particles_dt() Real Grid3D::Calc_Particles_dt_GPU() { // set values for GPU kernels - int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; if (ngrid > Particles.G.size_blocks_array) { chprintf(" Error: particles dt_array too small\n"); diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index d0552abe6..665be8ff3 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -80,7 +80,7 @@ Real Particles_3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_partic Real *dti_array_dev) { // // set values for GPU kernels - // int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + // int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -150,7 +150,7 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -169,7 +169,7 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -265,7 +265,7 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_l Real Omega_K) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -288,7 +288,7 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_l Real Omega_K) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block From 5b3b4a7c1878c4b5a4de61ec2bb4b60364accc94 Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 28 Apr 2023 16:12:13 -0400 Subject: [PATCH 325/694] put back cooling after accidental git commit -am --- src/grid/grid3D.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 4750ef881..33e04159d 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -482,11 +482,11 @@ Real Grid3D::Update_Grid(void) #ifdef CPU_TIME Timer.Cooling_GPU.Start(); #endif + // ==Apply Cooling from cooling/cooling_cuda.h== + Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #ifdef CPU_TIME Timer.Cooling_GPU.End(); #endif - // ==Apply Cooling from cooling/cooling_cuda.h== - Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #endif // COOLING_GPU From 3ecb87dce89e7714ea0c9ce4fc05373bec0b2d59 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 28 Apr 2023 16:39:37 -0400 Subject: [PATCH 326/694] add function to apply a floor to any conserved variable and apply a dust density floor --- src/hydro/hydro_cuda.cu | 26 ++++++++++++++++++++++++++ src/hydro/hydro_cuda.h | 5 +++++ src/integrators/VL_3D_cuda.cu | 6 ++++++ src/integrators/simple_3D_cuda.cu | 6 ++++++ 4 files changed, 43 insertions(+) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index dad6f3b66..1cafca97f 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1170,4 +1170,30 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int #endif // DE } + #ifdef CONSERVED_FLOOR +__global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real conserved_floor) +{ + int id, xid, yid, zid, n_cells; + Real field_0; + n_cells = nx * ny * nz; + + // get a global thread ID + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + + // threads corresponding to real cells do the calculation + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + field_0 = dev_conserved[id + n_cells * field_num]; + + if (field_0 < conserved_floor) { + dev_conserved[id + n_cells * field_num] = conserved_floor; + } + } +} + #endif // CONSERVED_FLOOR + #endif // CUDA diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index a5c4ab713..bbd8ba5db 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -89,6 +89,11 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int Real U_floor); #endif + #ifdef CONSERVED_FLOOR +__global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real conserved_floor); + #endif + __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2227172bf..cf5c5d4b8 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -328,6 +328,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int CudaCheckError(); #endif // TEMPERATURE_FLOOR + #ifdef DUST_FLOOR + hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + grid_enum::dust_density, 1e-5); + CudaCheckError(); + #endif // DUST_FLOOR + return; } diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 01c9c6ac1..694eddac3 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -188,6 +188,12 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, CudaCheckError(); #endif // TEMPERATURE_FLOOR + #ifdef DUST_FLOOR + hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + grid_enum::dust_density, 1e-5); + CudaCheckError(); + #endif // DUST_FLOOR + return; } From 3dd1d300856e013ebe12d6d84070b995c872922f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 28 Apr 2023 16:46:39 -0400 Subject: [PATCH 327/694] remove unnecessary macro --- src/hydro/hydro_cuda.cu | 2 -- src/hydro/hydro_cuda.h | 2 -- src/integrators/VL_3D_cuda.cu | 4 ++-- src/integrators/simple_3D_cuda.cu | 4 ++-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 1cafca97f..f53836fb7 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1170,7 +1170,6 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int #endif // DE } - #ifdef CONSERVED_FLOOR __global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real conserved_floor) { @@ -1194,6 +1193,5 @@ __global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int n } } } - #endif // CONSERVED_FLOOR #endif // CUDA diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index bbd8ba5db..287224c57 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -89,10 +89,8 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int Real U_floor); #endif - #ifdef CONSERVED_FLOOR __global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real conserved_floor); - #endif __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index cf5c5d4b8..3fed3a9bf 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -328,11 +328,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int CudaCheckError(); #endif // TEMPERATURE_FLOOR - #ifdef DUST_FLOOR + #ifdef DUST hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-5); CudaCheckError(); - #endif // DUST_FLOOR + #endif // DUST return; } diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 694eddac3..4a24bb362 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -188,11 +188,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, CudaCheckError(); #endif // TEMPERATURE_FLOOR - #ifdef DUST_FLOOR + #ifdef DUST hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-5); CudaCheckError(); - #endif // DUST_FLOOR + #endif // DUST return; } From fb8fca81a167044faf26efc3120bab23386332b1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 3 May 2023 16:06:14 -0400 Subject: [PATCH 328/694] Bugfix for HLLD double star state test --- src/riemann_solvers/hlld_cuda.h | 4 ++-- src/riemann_solvers/hlld_cuda_tests.cu | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index b2311071f..7c0a36f40 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -93,8 +93,8 @@ struct DoubleStarState { // pressureDoubleStar = pressureStar // Shared values Real velocityY, velocityZ, magneticY, magneticZ; - // Different values - Real energyL, energyR; + // Different values. Initializing these since one or the other can be uninitializing leading to bad tests + Real energyL = 0.0, energyR = 0.0; }; /*! diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 4a9a10270..70c87e311 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -2170,10 +2170,8 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) testParams const parameters; std::vector fiducialState{ - {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, -999.79694164635089, - 90.44484278669114}, - {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, -999.79694164635089, - 61.33664731346812}}; + {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114}, + {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, 0.0, 61.33664731346812}}; for (size_t i = 0; i < parameters.names.size(); i++) { mhd::_internal::DoubleStarState const testState = From 08610638b4319fecc021594d0f3640d63c3f6ec5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 4 May 2023 16:23:42 -0400 Subject: [PATCH 329/694] Add L2 Norm option for data based system tests The MHD Blast wave test was failing on Frontier. The failures were on the 1% level and the results appeared correct so I've added a testing option that is less sensitive to a small number of cells with small errors, namely the same method that current L2 Norm based tests use --- src/system_tests/mhd_system_tests.cpp | 4 +- src/system_tests/system_tester.cpp | 60 ++++++++++++++++++++------- src/system_tests/system_tester.h | 3 +- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index aaeb2f4e7..3cb6f563e 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -606,7 +606,9 @@ TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOu TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput) { test_runner.numMpiRanks = GetParam(); - test_runner.runTest(); + + // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems + test_runner.runTest(true, 2.2E-4, 0.35); } /// Test the Orszag-Tang Vortex diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 677581353..0df225677 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -31,7 +31,8 @@ // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runTest() +void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, double const &maxAllowedL1Error, + double const &maxAllowedError) { /// Only run if this variable is set to `true`. Generally this and /// globalCompareSystemTestResults should only be used for large MPI / tests @@ -106,6 +107,9 @@ void systemTest::SystemTestRunner::runTest() << _fiducialDataSetNames.size() << " datasets" << std::endl << std::endl; + // Compute the L1 Error. + double L2Norm = 0; + double maxError = 0; // Loop over the datasets to be tested for (auto dataSetName : _fiducialDataSetNames) { // check that the test data has the dataset in it @@ -146,26 +150,54 @@ void systemTest::SystemTestRunner::runTest() << "The fiducial and test '" << dataSetName << "' datasets are not the same length"; // Compare values + double L1_error = 0.0; + double fp_sum_error = 0.0; for (size_t i = 0; i < testDims[0]; i++) { for (size_t j = 0; j < testDims[1]; j++) { for (size_t k = 0; k < testDims[2]; k++) { size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - // Check for equality and iff not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, - ulpsDiff, _fixedEpsilon); - ASSERT_TRUE(areEqual) << std::endl - << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k - << "]" << std::endl - << "The fiducial value is: " << fiducialData[index] << std::endl - << "The test value is: " << testData[index] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + if (compute_L2_norm_only) { + double const diff = std::abs(fiducialData.at(index) - testData.at(index)); + + maxError = std::max(maxError, diff); + + // Perform a Kahan sum to maintain precision in the result + double const y = diff - fp_sum_error; + double const t = L1_error + y; + fp_sum_error = (t - L1_error) - y; + L1_error = t; + } else { + // Check for equality and iff not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, + ulpsDiff, _fixedEpsilon); + ASSERT_TRUE(areEqual) << std::endl + << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k + << "]" << std::endl + << "The fiducial value is: " << fiducialData[index] << std::endl + << "The test value is: " << testData[index] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + } } } } + + if (compute_L2_norm_only) { + L1_error /= static_cast(testDims[0] * testDims[1] * testDims[2]); + L2Norm += L1_error * L1_error; + } + } + + if (compute_L2_norm_only) { + // Check the L2 Norm + L2Norm = std::sqrt(L2Norm); + EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; } } // ============================================================================= @@ -287,7 +319,7 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro << "the L1 error for the " << dataSetName << " data has exceeded the allowed value"; } - // Check the L1 Norm + // Check the L2 Norm L2Norm = std::sqrt(L2Norm); EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 1c942a766..5690a3020 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -70,7 +70,8 @@ class systemTest::SystemTestRunner * \brief Run the system test that has been set up * */ - void runTest(); + void runTest(bool const &compute_L2_norm_only = false, double const &maxAllowedL1Error = 0.0, + double const &maxAllowedError = 0.0); /*! * \brief Compute the L1 error for each field compared to the initial From 4b48ee39a1382055d797482d93bde985f470e8c2 Mon Sep 17 00:00:00 2001 From: alwinm Date: Mon, 15 May 2023 14:31:12 -0400 Subject: [PATCH 330/694] Fix T_xz bug --- src/io/io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index e5e02cf74..f3c3cbf23 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1602,7 +1602,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy"); status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); - status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_Txz, "/T_xz"); // Free the dataspace ids status = H5Sclose(dataspace_xz_id); From eb9b2af7358c348eaa1163d916c60382b2cf55a6 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 15 May 2023 15:06:10 -0400 Subject: [PATCH 331/694] add cat.py to python_scripts --- python_scripts/cat.py | 406 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100755 python_scripts/cat.py diff --git a/python_scripts/cat.py b/python_scripts/cat.py new file mode 100755 index 000000000..dc840c570 --- /dev/null +++ b/python_scripts/cat.py @@ -0,0 +1,406 @@ +# Utils for concat cholla output + +import h5py +import numpy as np +import os + +verbose = True + +def parse(argv): + # Determine prefix + if 'h5' in argv: + preprefix = argv.split('.h5')[0] + prefix = preprefix +'.h5' + + else: + prefix = './{}.h5'.format(argv) + + # Check existing + firstfile = prefix+'.0' + if not os.path.isfile(firstfile): + print(firstfile,' is missing') + exit() + + # Set dirnames + dnamein = os.path.dirname(firstfile)+'/' + dnameout = os.path.dirname(firstfile) + '/' + return dnamein,dnameout + +def hydro(n,dnamein,dnameout,double=True): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. + """ + + fileout = h5py.File(dnameout+str(n)+'.h5', 'a') + + i = -1 + # loops over all files + while True: + i += 1 + + fileinname = dnamein+str(n)+'.h5.'+str(i) + + if not os.path.isfile(fileinname): + break + print('Load:',fileinname,flush=True) + + # open the input file for reading + filein = h5py.File(fileinname,'r') + + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'densit\ +y_unit'] + for unit in units: + fileout.attrs[unit] = [head[unit][0]] + keys = list(filein.keys()) + #['density','momentum_x','momentum_y','momentum_z','Energy','GasEnergy','scalar0'] + + for key in keys: + if key not in fileout: + # WARNING: If you don't set dataset dtype it will default to 32-bit, but CHOLLA likes to be 64-bit + if double: + dtype = filein[key].dtype + else: + dtype = None + if nz > 1: + fileout.create_dataset(key, (nx, ny, nz), chunks=(nxl,nyl,nzl), dtype=dtype) + elif ny > 1: + fileout.create_dataset(key, (nx, ny), chunks=(nxl,nyl), dtype=dtype) + elif nx > 1: + fileout.create_dataset(key, (nx,), chunks=(nxl,), dtype=dtype) + #fileout.create_dataset(key, (nx, ny, nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + for key in keys: + if key in filein: + if nz > 1: + fileout[key][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein[key] + elif ny > 1: + fileout[key][xs:xs+nxl,ys:ys+nyl] = filein[key] + elif nx > 1: + fileout[key][xs:xs+nxl] = filein[key] + filein.close() + + # end loop over all files + fileout.close() + + +def projection(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. + """ + + # open the output file for writing + fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w') + i = -1 + while True: + i += 1 + + fileinname = dnamein+str(n)+'_proj.h5.'+str(i) + + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + # open the input file for reading + filein = h5py.File(fileinname,'r') + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + dxy = np.zeros((nx,ny)) + dxz = np.zeros((nx,nz)) + Txy = np.zeros((nx,ny)) + Txz = np.zeros((nx,nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + + dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] + dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] + Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy'] + Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz'] + + filein.close() + + # write out the new datasets + fileout.create_dataset('d_xy', data=dxy) + fileout.create_dataset('d_xz', data=dxz) + fileout.create_dataset('T_xy', data=Txy) + fileout.create_dataset('T_xz', data=Txz) + + fileout.close() + return + +def slice(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}_slice.h5.{rank}, looping over rank, outputting to file dnameout{n}_slice.h5. + """ + + # open the output file for writing + fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w') + + i = -1 + while True: + # loop over files for a given output time + i += 1 + + fileinname = dnamein+str(n)+'_slice.h5.'+str(i) + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + # open the input file for reading + filein = h5py.File(fileinname,'r') + # read in the header data from the input file + head = filein.attrs + + # Detect DE + DE = 'GE_xy' in filein + SCALAR = 'scalar_xy' in filein + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + gamma = head['gamma'] + t = head['t'] + dt = head['dt'] + n_step = head['n_step'] + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['gamma'] = gamma + fileout.attrs['t'] = t + fileout.attrs['dt'] = dt + fileout.attrs['n_step'] = n_step + fileout.attrs['dims'] = [nx, ny, nz] + + d_xy = np.zeros((nx,ny)) + d_xz = np.zeros((nx,nz)) + d_yz = np.zeros((ny,nz)) + mx_xy = np.zeros((nx,ny)) + mx_xz = np.zeros((nx,nz)) + mx_yz = np.zeros((ny,nz)) + my_xy = np.zeros((nx,ny)) + my_xz = np.zeros((nx,nz)) + my_yz = np.zeros((ny,nz)) + mz_xy = np.zeros((nx,ny)) + mz_xz = np.zeros((nx,nz)) + mz_yz = np.zeros((ny,nz)) + E_xy = np.zeros((nx,ny)) + E_xz = np.zeros((nx,nz)) + E_yz = np.zeros((ny,nz)) + if DE: + GE_xy = np.zeros((nx,ny)) + GE_xz = np.zeros((nx,nz)) + GE_yz = np.zeros((ny,nz)) + if SCALAR: + scalar_xy = np.zeros((nx,ny)) + scalar_xz = np.zeros((nx,nz)) + scalar_yz = np.zeros((ny,nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + + d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] + d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] + d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz'] + mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy'] + mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz'] + mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz'] + my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy'] + my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz'] + my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz'] + mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy'] + mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz'] + mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz'] + E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy'] + E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz'] + E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz'] + if DE: + GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy'] + GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz'] + GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz'] + if SCALAR: + scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy'] + scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz'] + scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz'] + + filein.close() + + # wrte out the new datasets + fileout.create_dataset('d_xy', data=d_xy) + fileout.create_dataset('d_xz', data=d_xz) + fileout.create_dataset('d_yz', data=d_yz) + fileout.create_dataset('mx_xy', data=mx_xy) + fileout.create_dataset('mx_xz', data=mx_xz) + fileout.create_dataset('mx_yz', data=mx_yz) + fileout.create_dataset('my_xy', data=my_xy) + fileout.create_dataset('my_xz', data=my_xz) + fileout.create_dataset('my_yz', data=my_yz) + fileout.create_dataset('mz_xy', data=mz_xy) + fileout.create_dataset('mz_xz', data=mz_xz) + fileout.create_dataset('mz_yz', data=mz_yz) + fileout.create_dataset('E_xy', data=E_xy) + fileout.create_dataset('E_xz', data=E_xz) + fileout.create_dataset('E_yz', data=E_yz) + if DE: + fileout.create_dataset('GE_xy', data=GE_xy) + fileout.create_dataset('GE_xz', data=GE_xz) + fileout.create_dataset('GE_yz', data=GE_yz) + if SCALAR: + fileout.create_dataset('scalar_xy', data=scalar_xy) + fileout.create_dataset('scalar_xz', data=scalar_xz) + fileout.create_dataset('scalar_yz', data=scalar_yz) + + fileout.close() + return + +def rot_proj(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}_rot_proj.h5.{rank}, looping over rank, outputting to file dnameout{n}_rot_proj.h5. + """ + + fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w') + i = -1 + + while True: + # loop over files for a given output time + i += 1 + fileinname = dnamein+str(n)+'_rot_proj.h5.'+str(i) + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + + filein = h5py.File(dnamein+fileinname,'r') + head = filein.attrs + # if it's the first input file, write the header attributes + # and create the arrays to hold the output data + if (i == 0): + + nxr = int(head['nxr']) + nzr = int(head['nzr']) + Lx = head['Lx'] + Lz = head['Lz'] + delta = head['delta'] + theta = head['theta'] + phi = head['phi'] + gamma = head['gamma'] + t = head['t'] + dt = head['dt'] + n_step = head['n_step'] + fileout.attrs['nxr'] = nxr + fileout.attrs['nzr'] = nzr + fileout.attrs['Lx'] = Lx + fileout.attrs['Lz'] = Lz + fileout.attrs['delta'] = delta + fileout.attrs['theta'] = theta + fileout.attrs['phi'] = phi + fileout.attrs['gamma'] = gamma + fileout.attrs['t'] = t + fileout.attrs['dt'] = dt + fileout.attrs['n_step'] = n_step + + d_xzr = np.zeros((nxr, nzr)) + vx_xzr = np.zeros((nxr, nzr)) + vy_xzr = np.zeros((nxr, nzr)) + vz_xzr = np.zeros((nxr, nzr)) + T_xzr = np.zeros((nxr, nzr)) + + # end first input file + + # write data from individual processor file to + # correct location in concatenated file + nx_min = int(head['nx_min']) + nx_max = int(head['nx_max']) + nz_min = int(head['nz_min']) + nz_max = int(head['nz_max']) + + d_xzr[nx_min:nx_max,nz_min:nz_max] += filein['d_xzr'][:] + vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:] + vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:] + vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:] + T_xzr[nx_min:nx_max,nz_min:nz_max] += filein['T_xzr'][:] + + filein.close() + # end while loop + + # write out the new datasets + fileout.create_dataset("d_xzr", data=d_xzr) + fileout.create_dataset("vx_xzr", data=vx_xzr) + fileout.create_dataset("vy_xzr", data=vy_xzr) + fileout.create_dataset("vz_xzr", data=vz_xzr) + fileout.create_dataset("T_xzr", data=T_xzr) + + fileout.close() From c6df0e18f923a49b6846d0a8618df3f60f11a31a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 12:03:34 -0400 Subject: [PATCH 332/694] Allow MHD to compile with PLMC --- builds/make.type.mhd | 2 +- src/utils/error_handling.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 486ba2547..e0a817b3d 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -11,7 +11,7 @@ DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 # Currently MHD only supports PCM reconstruction. Higher order reconstruction # methods will be added later -DFLAGS += -DPCM +DFLAGS += -DPLMC DFLAGS += -DHLLD DFLAGS += -DMHD diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index da2ea80fe..a9b217d1a 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -79,9 +79,9 @@ void Check_Configuration(parameters const &P) #endif //! HLLD or EXACT or ROE or HLL or HLLC // May only use certain reconstructions - #if !defined(PCM) || defined(PLMP) || defined(PLMC) || defined(PPMC) || defined(PPMP) - #error "MHD only supports PCM reconstruction" - #endif //! PCM or PLMP or PLMC or PPMC or PPMP + #if (!defined(PCM) == !defined(PLMC)) || defined(PLMP) || defined(PPMC) || defined(PPMP) + #error "MHD only supports PCM and PLMC reconstruction" + #endif //! PCM or PLMP or PPMC or PPMP // must have HDF5 #ifndef HDF5 From 764607d1479acf6374a7ed06c3df60da9e11b742 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 14:05:47 -0400 Subject: [PATCH 333/694] Replace `if (dir)` blocks with switch statements IMO switch statements are clearer are more concise. Plus clang-tidy was bugging me about if statments using braces --- src/reconstruction/plmc_cuda.cu | 123 +++++++++++------- src/reconstruction/ppmc_cuda.cu | 169 ++++++++++++++----------- src/riemann_solvers/hlld_cuda.cu | 30 +++-- src/riemann_solvers/hlld_cuda_tests.cu | 60 +++++---- 4 files changed, 217 insertions(+), 165 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 787449f21..c0d803608 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -26,20 +26,22 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou { int n_cells = nx * ny * nz; int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (dir) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } // declare primitive variables for each stencil @@ -100,29 +102,31 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 1; - xe = nx - 2; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 1; - ye = ny - 2; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 1; - ze = nz - 2; + switch (dir) { + case 0: + xs = 1; + xe = nx - 2; + ys = 0; + ye = ny; + zs = 0; + ze = nz; + break; + case 1: + xs = 0; + xe = nx; + ys = 1; + ye = ny - 2; + zs = 0; + ze = nz; + break; + case 2: + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 1; + ze = nz - 2; + break; } if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { @@ -151,9 +155,17 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_i = dge / d_i; #endif // DE // cell i-1 - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; + } d_imo = dev_conserved[id]; vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; @@ -177,9 +189,19 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_imo = dge / d_imo; #endif // DE // cell i+1 - if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny; + + switch (dir) { + case 0: + id = xid + 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 1) * nx * ny; + break; + } + d_ipo = dev_conserved[id]; vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; @@ -622,9 +644,18 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right // side of the i-1/2 interface - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; + } + dev_bounds_R[id] = d_R_imh; dev_bounds_R[o1 * n_cells + id] = d_R_imh * vx_R_imh; dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 10c6a788d..b69c07eb1 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -26,20 +26,22 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou { int n_cells = nx * ny * nz; int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (dir) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } // declare primitive variables for each stencil @@ -110,29 +112,31 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; + switch (dir) { + case 0: + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; + break; + case 1: + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; + break; + case 2: + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; + break; } if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { @@ -161,15 +165,18 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i-1 - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; } + d_imo = dev_conserved[id]; vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; @@ -193,14 +200,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i+1 - if (dir == 0) { - id = xid + 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid + 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid + 1) * nx * ny; + switch (dir) { + case 0: + id = xid + 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 1) * nx * ny; + break; } d_ipo = dev_conserved[id]; vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; @@ -225,14 +234,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i-2 - if (dir == 0) { - id = xid - 2 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 2) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 2) * nx * ny; + switch (dir) { + case 0: + id = xid - 2 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 2) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 2) * nx * ny; + break; } d_imt = dev_conserved[id]; vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; @@ -257,14 +268,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR // cell i+2 - if (dir == 0) { - id = xid + 2 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid + 2) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid + 2) * nx * ny; + switch (dir) { + case 0: + id = xid + 2 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 2) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 2) * nx * ny; + break; } d_ipt = dev_conserved[id]; vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; @@ -1218,14 +1231,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 11 - Send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; } dev_bounds_R[id] = d_L; dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index c962325a7..3ab26f371 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -44,20 +44,22 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // Offsets & indices int o1, o2, o3; - if (direction == 0) { - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - } - if (direction == 1) { - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - } - if (direction == 2) { - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; + switch (direction) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } // ============================ diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 70c87e311..6fd27f99d 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -2266,20 +2266,22 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) for (size_t direction = 0; direction < 1; direction++) { int o1, o2, o3; - if (direction == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (direction == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (direction == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } std::vector testFluxArray(nFields * n_cells, dummyValue); @@ -2362,20 +2364,22 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) for (size_t direction = 0; direction < 3; direction++) { int o1, o2, o3; - if (direction == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (direction == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (direction == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } mhd::_internal::State const testState = mhd::_internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), From 09de919136205152039d5b9404cab195f68d38f6 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 14:10:05 -0400 Subject: [PATCH 334/694] Remove CUDA macro usage in PLMC files --- src/reconstruction/plmc_cuda.cu | 200 ++++++++++++++++---------------- src/reconstruction/plmc_cuda.h | 15 ++- 2 files changed, 106 insertions(+), 109 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index c0d803608..7bcad1ced 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -2,19 +2,18 @@ * \brief Definitions of the piecewise linear reconstruction functions with limiting applied in the characteristic variables, as described in Stone et al., 2008. */ -#ifdef CUDA - #ifdef PLMC +#ifdef PLMC - #include + #include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../utils/gpu.hpp" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../utils/gpu.hpp" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif // DE + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -66,32 +65,32 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; Real C; - #ifndef VL + #ifndef VL Real dtodx = dt / dx; Real lambda_m, lambda_0, lambda_p; Real qx; Real lamdiff; Real sum_0, sum_1, sum_2, sum_3, sum_4; - #endif // not VL - #ifdef DE + #endif // not VL + #ifdef DE Real ge_i, ge_imo, ge_ipo; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_i; Real ge_L_iph, ge_R_imh; Real E, E_kin, dge; - #ifndef VL + #ifndef VL Real sum_ge; - #endif // CTU - #endif // DE - #ifdef SCALAR + #endif // CTU + #endif // DE + #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL + #ifndef VL Real sum_scalar[NSCALARS]; - #endif // CTU - #endif // SCALAR + #endif // CTU + #endif // SCALAR // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -137,23 +136,23 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_i = dev_conserved[o1 * n_cells + id] / d_i; vy_i = dev_conserved[o2 * n_cells + id] / d_i; vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE + #else // not DE p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE ge_i = dge / d_i; - #endif // DE + #endif // DE // cell i-1 switch (dir) { case 0: @@ -170,24 +169,24 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE + #else // not DE p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE ge_imo = dge / d_imo; - #endif // DE + #endif // DE // cell i+1 switch (dir) { @@ -206,35 +205,35 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE + #else // not DE p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE ge_ipo = dge / d_ipo; - #endif // DE + #endif // DE // calculate the adiabatic sound speed in cell i a_i = sqrt(gamma * p_i / d_i); - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - #ifndef VL + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + #ifndef VL lambda_m = vx_i - a_i; lambda_0 = vx_i; lambda_p = vx_i + a_i; - #endif // VL + #endif // VL // Compute the left, right, centered, and van Leer differences of the // primitive variables Note that here L and R refer to locations relative to @@ -288,7 +287,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_p_G = 0.0; } - #ifdef DE + #ifdef DE del_ge_L = ge_i - ge_imo; del_ge_R = ge_ipo - ge_i; del_ge_C = 0.5 * (ge_ipo - ge_imo); @@ -297,8 +296,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_G = 0.0; } - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; @@ -309,7 +308,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_G[i] = 0.0; } } - #endif // SCALAR + #endif // SCALAR // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in @@ -369,15 +368,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - #ifdef DE + #ifdef DE del_ge_m_i = 0.0; if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = 0.0; if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { @@ -386,7 +385,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } - #endif // SCALAR + #endif // SCALAR // Project the monotonized difference in the characteristic variables back // onto the primitive variables Stone Eqn 39 @@ -411,16 +410,16 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_L_iph = vz_i + 0.5 * del_vz_m_i; p_L_iph = p_i + 0.5 * del_p_m_i; - #ifdef DE + #ifdef DE ge_R_imh = ge_i - 0.5 * del_ge_m_i; ge_L_iph = ge_i + 0.5 * del_ge_m_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR C = d_R_imh + d_L_iph; d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); @@ -468,7 +467,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_vz_m_i = vz_L_iph - vz_R_imh; del_p_m_i = p_L_iph - p_R_imh; - #ifdef DE + #ifdef DE C = ge_R_imh + ge_L_iph; ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); @@ -477,9 +476,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); ge_R_imh = C - ge_L_iph; del_ge_m_i = ge_L_iph - ge_R_imh; - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { C = scalar_R_imh[i] + scalar_L_iph[i]; scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); @@ -490,9 +489,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou scalar_R_imh[i] = C - scalar_L_iph[i]; del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; } - #endif // SCALAR + #endif // SCALAR - #ifndef VL + #ifndef VL // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; @@ -509,31 +508,31 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_L_iph = vz_L_iph - qx * del_vz_m_i; p_L_iph = p_L_iph - qx * del_p_m_i; - #ifdef DE + #ifdef DE ge_R_imh = ge_R_imh + qx * del_ge_m_i; ge_L_iph = ge_L_iph - qx * del_ge_m_i; - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR // Perform the characteristic tracing // Stone Eqns 42 & 43 // left-hand interface value, i+1/2 sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0.0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; @@ -547,14 +546,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; - #ifdef DE + #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; @@ -570,25 +569,25 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_L_iph += 0.5 * dtodx * sum_2; vz_L_iph += 0.5 * dtodx * sum_3; p_L_iph += 0.5 * dtodx * sum_4; - #ifdef DE + #ifdef DE ge_L_iph += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; } - #endif // SCALAR + #endif // SCALAR // right-hand interface value, i-1/2 sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m <= 0) { lamdiff = lambda_m - lambda_m; @@ -602,14 +601,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; - #ifdef DE + #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p <= 0) { lamdiff = lambda_m - lambda_p; @@ -625,15 +624,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_R_imh += 0.5 * dtodx * sum_2; vz_R_imh += 0.5 * dtodx * sum_3; p_R_imh += 0.5 * dtodx * sum_4; - #ifdef DE + #ifdef DE ge_R_imh += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; } - #endif // SCALAR - #endif // CTU + #endif // SCALAR + #endif // CTU // apply minimum constraints d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); @@ -662,14 +661,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; dev_bounds_R[4 * n_cells + id] = (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; - #endif // DE + #endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; dev_bounds_L[id] = d_L_iph; @@ -678,16 +677,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; dev_bounds_L[4 * n_cells + id] = (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; - #endif // DE + #endif // DE } } - #endif // PLMC -#endif // CUDA +#endif // PLMC diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 36c707354..948a839dd 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -2,13 +2,12 @@ * \brief Declarations of the cuda plm kernels, characteristic reconstruction * version. */ -#ifdef CUDA - #ifdef PLMC +#ifndef PLMC_CUDA_H +#define PLMC_CUDA_H - #ifndef PLMC_CUDA_H - #define PLMC_CUDA_H +#include "../global/global.h" - #include "../global/global.h" +#ifdef PLMC /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -18,6 +17,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - #endif // PLMC_CUDA_H - #endif // PLMC -#endif // CUDA +#endif // PLMC + +#endif // PLMC_CUDA_H From 457a021935dd270d859bda05876499495d045936 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 14:24:06 -0400 Subject: [PATCH 335/694] Remove unused n_ghost arg from PLMC_cuda --- src/integrators/VL_1D_cuda.cu | 4 ++-- src/integrators/VL_2D_cuda.cu | 8 ++++---- src/integrators/VL_3D_cuda.cu | 12 ++++++------ src/integrators/simple_1D_cuda.cu | 4 ++-- src/integrators/simple_2D_cuda.cu | 8 ++++---- src/integrators/simple_3D_cuda.cu | 12 ++++++------ src/reconstruction/plmc_cuda.cu | 6 +++--- src/reconstruction/plmc_cuda.h | 4 ++-- 8 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 99463f927..baa4f81cb 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -93,8 +93,8 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); #endif #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 79d410033..0ba1fc98a 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -103,10 +103,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1, n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2227172bf..0a5aa2a13 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -218,12 +218,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, + 2, n_fields); #endif // PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 8e622b85c..c4be22acd 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -66,8 +66,8 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, CudaCheckError(); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); CudaCheckError(); #endif #ifdef PPMP diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index bf75e97cc..2e53d6c12 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -67,10 +67,10 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 01c9c6ac1..c52ecf0d2 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -100,12 +100,12 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, + n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 7bcad1ced..a5d0836f2 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -16,12 +16,12 @@ #endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir, int n_fields) { int n_cells = nx * ny * nz; int o1, o2, o3; diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 948a839dd..aad7a028d 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -14,8 +14,8 @@ gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir, int n_fields); #endif // PLMC From b814adcaf25581d796cda69463b274818615cd36 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 27 Mar 2023 15:49:49 -0400 Subject: [PATCH 336/694] Add PLMC test, remove extra #ifdef PLMC --- src/reconstruction/plmc_cuda.cu | 197 +++++++++++++------------- src/reconstruction/plmc_cuda.h | 4 - src/reconstruction/plmc_cuda_tests.cu | 121 ++++++++++++++++ 3 files changed, 218 insertions(+), 104 deletions(-) create mode 100644 src/reconstruction/plmc_cuda_tests.cu diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index a5d0836f2..177fdec3f 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -2,18 +2,17 @@ * \brief Definitions of the piecewise linear reconstruction functions with limiting applied in the characteristic variables, as described in Stone et al., 2008. */ -#ifdef PLMC - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/plmc_cuda.h" +#include "../utils/gpu.hpp" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif // DE +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real @@ -65,32 +64,32 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; Real C; - #ifndef VL +#ifndef VL Real dtodx = dt / dx; Real lambda_m, lambda_0, lambda_p; Real qx; Real lamdiff; Real sum_0, sum_1, sum_2, sum_3, sum_4; - #endif // not VL - #ifdef DE +#endif // not VL +#ifdef DE Real ge_i, ge_imo, ge_ipo; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_i; Real ge_L_iph, ge_R_imh; Real E, E_kin, dge; - #ifndef VL + #ifndef VL Real sum_ge; - #endif // CTU - #endif // DE - #ifdef SCALAR + #endif // CTU +#endif // DE +#ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL + #ifndef VL Real sum_scalar[NSCALARS]; - #endif // CTU - #endif // SCALAR + #endif // CTU +#endif // SCALAR // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -136,23 +135,23 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_i = dev_conserved[o1 * n_cells + id] / d_i; vy_i = dev_conserved[o2 * n_cells + id] / d_i; vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_i = dge / d_i; - #endif // DE +#endif // DE // cell i-1 switch (dir) { case 0: @@ -169,24 +168,24 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_imo = dge / d_imo; - #endif // DE +#endif // DE // cell i+1 switch (dir) { @@ -205,35 +204,35 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_ipo = dge / d_ipo; - #endif // DE +#endif // DE // calculate the adiabatic sound speed in cell i a_i = sqrt(gamma * p_i / d_i); - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - #ifndef VL +// Compute the eigenvalues of the linearized equations in the +// primitive variables using the cell-centered primitive variables +#ifndef VL lambda_m = vx_i - a_i; lambda_0 = vx_i; lambda_p = vx_i + a_i; - #endif // VL +#endif // VL // Compute the left, right, centered, and van Leer differences of the // primitive variables Note that here L and R refer to locations relative to @@ -287,7 +286,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_p_G = 0.0; } - #ifdef DE +#ifdef DE del_ge_L = ge_i - ge_imo; del_ge_R = ge_ipo - ge_i; del_ge_C = 0.5 * (ge_ipo - ge_imo); @@ -296,8 +295,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_G = 0.0; } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; @@ -308,7 +307,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_G[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in @@ -368,15 +367,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - #ifdef DE +#ifdef DE del_ge_m_i = 0.0; if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = 0.0; if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { @@ -385,7 +384,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } - #endif // SCALAR +#endif // SCALAR // Project the monotonized difference in the characteristic variables back // onto the primitive variables Stone Eqn 39 @@ -410,16 +409,16 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_L_iph = vz_i + 0.5 * del_vz_m_i; p_L_iph = p_i + 0.5 * del_p_m_i; - #ifdef DE +#ifdef DE ge_R_imh = ge_i - 0.5 * del_ge_m_i; ge_L_iph = ge_i + 0.5 * del_ge_m_i; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; } - #endif // SCALAR +#endif // SCALAR C = d_R_imh + d_L_iph; d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); @@ -467,7 +466,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_vz_m_i = vz_L_iph - vz_R_imh; del_p_m_i = p_L_iph - p_R_imh; - #ifdef DE +#ifdef DE C = ge_R_imh + ge_L_iph; ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); @@ -476,9 +475,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); ge_R_imh = C - ge_L_iph; del_ge_m_i = ge_L_iph - ge_R_imh; - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { C = scalar_R_imh[i] + scalar_L_iph[i]; scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); @@ -489,9 +488,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou scalar_R_imh[i] = C - scalar_L_iph[i]; del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; } - #endif // SCALAR +#endif // SCALAR - #ifndef VL +#ifndef VL // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; @@ -508,31 +507,31 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_L_iph = vz_L_iph - qx * del_vz_m_i; p_L_iph = p_L_iph - qx * del_p_m_i; - #ifdef DE + #ifdef DE ge_R_imh = ge_R_imh + qx * del_ge_m_i; ge_L_iph = ge_L_iph - qx * del_ge_m_i; - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR // Perform the characteristic tracing // Stone Eqns 42 & 43 // left-hand interface value, i+1/2 sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0.0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; @@ -546,14 +545,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; - #ifdef DE + #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; @@ -569,25 +568,25 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_L_iph += 0.5 * dtodx * sum_2; vz_L_iph += 0.5 * dtodx * sum_3; p_L_iph += 0.5 * dtodx * sum_4; - #ifdef DE + #ifdef DE ge_L_iph += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; } - #endif // SCALAR + #endif // SCALAR // right-hand interface value, i-1/2 sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m <= 0) { lamdiff = lambda_m - lambda_m; @@ -601,14 +600,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; - #ifdef DE + #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p <= 0) { lamdiff = lambda_m - lambda_p; @@ -624,15 +623,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_R_imh += 0.5 * dtodx * sum_2; vz_R_imh += 0.5 * dtodx * sum_3; p_R_imh += 0.5 * dtodx * sum_4; - #ifdef DE + #ifdef DE ge_R_imh += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; } - #endif // SCALAR - #endif // CTU + #endif // SCALAR +#endif // CTU // apply minimum constraints d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); @@ -661,14 +660,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; dev_bounds_R[4 * n_cells + id] = (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; - #endif // DE +#endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; dev_bounds_L[id] = d_L_iph; @@ -677,15 +676,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; dev_bounds_L[4 * n_cells + id] = (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; - #endif // DE +#endif // DE } } - -#endif // PLMC diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index aad7a028d..505250044 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -7,8 +7,6 @@ #include "../global/global.h" -#ifdef PLMC - /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) @@ -17,6 +15,4 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields); -#endif // PLMC - #endif // PLMC_CUDA_H diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu new file mode 100644 index 000000000..dfc690b87 --- /dev/null +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -0,0 +1,121 @@ +/*! + * \file plmc_cuda_tests.cu + * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include + +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/plmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 4; + size_t const ny = 1; + size_t const nz = 1; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::unordered_map fiducial_interface_left = {{1, 0.76773614979894189}, + {5, 1.927149727335306}, + {9, 2.666157385974266}, + {13, 4.7339225843521469}, + {17, 21.643063389483491}}; + std::unordered_map fiducial_interface_right = {{0, 0.76773614979894189}, + {4, 1.927149727335306}, + {8, 2.666157385974266}, + {12, 4.7339225843521469}, + {16, 21.643063389483491}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Assign the shape + size_t nx_rot, ny_rot, nz_rot; + switch (direction) { + case 0: + nx_rot = nx; + ny_rot = ny; + nz_rot = nz; + break; + case 1: + nx_rot = ny; + ny_rot = nz; + nz_rot = nx; + break; + case 2: + nx_rot = nz; + ny_rot = nx; + nz_rot = ny; + break; + } + + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + double absolute_diff; + int64_t ulps_diff; + + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = (test_val == 0.0) ? 0.0 : fiducial_interface_left[i]; + + EXPECT_TRUE(testingUtilities::nearlyEqualDbl(fiducial_val, test_val, absolute_diff, ulps_diff)) + << "Error in left interface" << std::endl + << "The fiducial value is: " << fiducial_val << std::endl + << "The test value is: " << test_val << std::endl + << "The absolute difference is: " << absolute_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; + + // Check the left interface + test_val = dev_interface_right.at(i); + fiducial_val = (test_val == 0.0) ? 0.0 : fiducial_interface_right[i]; + + EXPECT_TRUE(testingUtilities::nearlyEqualDbl(fiducial_val, test_val, absolute_diff, ulps_diff)) + << "Error in rigt interface" << std::endl + << "The fiducial value is: " << fiducial_val << std::endl + << "The test value is: " << test_val << std::endl + << "The absolute difference is: " << absolute_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; + } + } +} From 94a91c9b87d12e703dbc31bee77ace1f4459e27f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 28 Mar 2023 13:45:15 -0400 Subject: [PATCH 337/694] Add some comments --- src/reconstruction/plmc_cuda.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 177fdec3f..4bad5db1a 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -58,7 +58,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; + Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; // _m means monotized slope Real lim_slope_a, lim_slope_b; Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; @@ -340,7 +340,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Apply monotonicity constraints to the differences in the characteristic // variables - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; // This should be in the declaration if (del_a_0_L * del_a_0_R > 0.0) { lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); @@ -420,6 +420,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR + // try removing this on shock tubes C = d_R_imh + d_L_iph; d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); d_R_imh = fmin(fmax(d_i, d_imo), d_R_imh); From 993d56a3c370e798184da5860ad3daea08615df4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 11:19:43 -0400 Subject: [PATCH 338/694] Consolidate all pressure and energy functions The MHD and hydro pressure and energy functions have been consolidated into the hydro functions with #ifdefs to set them to hydro or MHD mode. --- src/grid/initial_conditions.cpp | 17 ++-- src/hydro/hydro_cuda.cu | 7 +- src/integrators/VL_3D_cuda.cu | 4 +- src/riemann_solvers/hlld_cuda.cu | 2 +- src/riemann_solvers/hlld_cuda.h | 15 +++ src/riemann_solvers/hlld_cuda_tests.cu | 13 +-- src/utils/hydro_utilities.h | 55 +++++++++-- src/utils/hydro_utilities_tests.cpp | 132 ++++++++++++++++++++++--- src/utils/mhd_utilities.h | 75 -------------- src/utils/mhd_utilities_tests.cu | 94 ------------------ 10 files changed, 200 insertions(+), 214 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 5b7b4581d..b20dcd996 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -19,6 +19,7 @@ #include "../io/io.h" #include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" +#include "../utils/hydro_utilities.h" #include "../utils/math_utilities.h" #include "../utils/mhd_utilities.h" @@ -322,15 +323,15 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re Real x_pos, y_pos, z_pos; Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); - // set constant initial states. Note that mhd::utils::computeEnergy - // computes the hydro energy if MHD is turned off + // set constant initial states. Note that hydro_utilities::Calc_Energy_Primitive computes the correct MHD or + // hydro energy Real sine_wave = std::sin(2.0 * M_PI * x_pos); C.density[id] = rho; C.momentum_x[id] = rho * vx; C.momentum_y[id] = rho * vy; C.momentum_z[id] = rho * vz; - C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P, rho, vx, vy, vz, gama, Bx, By, Bz); // add small-amplitude perturbations C.density[id] += A * rEigenVec_rho * sine_wave; C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; @@ -480,7 +481,7 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real C.momentum_x[id] = rho_l * vx_l; C.momentum_y[id] = rho_l * vy_l; C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = mhd::utils::computeEnergy(P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P_l, rho_l, vx_l, vy_l, vz_l, gama, Bx_l, By_l, Bz_l); #ifdef SCALAR #ifdef BASIC_SCALAR C.basic_scalar[id] = 1.0 * rho_l; @@ -494,7 +495,7 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real C.momentum_x[id] = rho_r * vx_r; C.momentum_y[id] = rho_r * vy_r; C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = mhd::utils::computeEnergy(P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P_r, rho_r, vx_r, vy_r, vz_r, gama, Bx_r, By_r, Bz_r); #ifdef SCALAR #ifdef BASIC_SCALAR C.basic_scalar[id] = 0.0 * rho_r; @@ -1693,9 +1694,9 @@ void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) // Compute the Energy auto const magnetic_centered = mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); - Real const energy = mhd::utils::computeEnergy( - pressure, density, momentum_x_rot / density, momentum_y_rot / density, momentum_z_rot / density, - magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + Real const energy = hydro_utilities::Calc_Energy_Conserved(pressure, density, momentum_x_rot, momentum_y_rot, + momentum_z_rot, ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); // Final assignment C.density[id] = density; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index dad6f3b66..2daa65888 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -421,7 +421,7 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Re Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhd::utils::computeGasPressure(E, d, vx * d, vy * d, vz * d, avgBx, avgBy, avgBz, gamma); + Real gasP = hydro_utilities::Calc_Pressure_Conserved(E, d, vx * d, vy * d, vz * d, gamma, avgBx, avgBy, avgBz); Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) @@ -787,8 +787,9 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields( - dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); + auto [centeredBx, centeredBy, centeredBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 0a5aa2a13..3fca47d42 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -395,8 +395,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de #ifdef MHD // Add the magnetic energy auto const [centeredBx, centeredBy, centeredBz] = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += - mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 3ab26f371..2f7cb0d58 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -202,7 +202,7 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re #else // Note that this function does the positive pressure check // internally - state.gasPressure = mhd::utils::computeGasPressure(state, magneticX, gamma); + state.gasPressure = mhd::_internal::Calc_Pressure_Primitive(state, magneticX, gamma); #endif // DE state.totalPressure = diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 7c0a36f40..c7363ed65 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -13,6 +13,7 @@ // Local Includes #include "../global/global.h" +#include "../utils/hydro_utilities.h" #ifdef CUDA /*! @@ -253,6 +254,20 @@ __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); +/*! + * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver + * + * \param state The State to compute the gas pressure of + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \return Real The gas pressure + */ +inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::_internal::State const &state, Real const &magneticX, + Real const &gamma) +{ + return hydro_utilities::Calc_Pressure_Primitive(state.energy, state.density, state.velocityX, state.velocityY, + state.velocityZ, gamma, magneticX, state.magneticY, state.magneticZ); +} } // namespace _internal } // end namespace mhd #endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 6fd27f99d..07706f543 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -19,6 +19,7 @@ #include "../grid/grid_enum.h" #include "../riemann_solvers/hlld_cuda.h" // Include code to test #include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" #include "../utils/mhd_utilities.h" #include "../utils/testing_utilities.h" @@ -234,12 +235,12 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhd::utils::computeEnergy(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), - input.at(5), input.at(6), input.at(7), - gamma); // Pressure to Energy - output.at(5) = input.at(5); // X Magnetic Field - output.at(6) = input.at(6); // Y Magnetic Field - output.at(7) = input.at(7); // Z Magnetic Field + output.at(4) = + hydro_utilities::Calc_Energy_Primitive(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), gamma, + input.at(5), input.at(6), input.at(7)); // Pressure to Energy + output.at(5) = input.at(5); // X Magnetic Field + output.at(6) = input.at(6); // Y Magnetic Field + output.at(7) = input.at(7); // Z Magnetic Field #ifdef SCALAR std::vector conservedScalar(primitiveScalars.size()); diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 358d7e352..8fbf4c017 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -14,6 +14,8 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/math_utilities.h" +#include "../utils/mhd_utilities.h" /*! * INDEX OF VARIABLES @@ -30,19 +32,29 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, - Real const &vz, Real const &gamma) + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real P; - P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); - P = fmax(P, TINY_NUMBER); - return P; + Real pressure = (E - 0.5 * d * (vx * vx + ((vy * vy) + (vz * vz)))); + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); } inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, - Real const &mz, Real const &gamma) + Real const &mz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real P = (E - 0.5 * (mx * mx + my * my + mz * mz) / d) * (gamma - 1.); - return fmax(P, TINY_NUMBER); + Real pressure = (E - 0.5 * (mx * mx + my * my + mz * mz) / d); + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); } inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) @@ -60,10 +72,33 @@ inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real #endif // DE inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, - Real const &vz, Real const &gamma) + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { // Compute and return energy - return (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; +} + +inline __host__ __device__ Real Calc_Energy_Conserved(Real const &P, Real const &d, Real const &momentum_x, + Real const &momentum_y, Real const &momentum_z, Real const &gamma, + Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0, + Real const &magnetic_z = 0.0) +{ + // Compute and return energy + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + + (0.5 / d) * (momentum_x * momentum_x + momentum_y * momentum_y + momentum_z * momentum_z); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index 6c8c37cf1..7bab43b69 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -50,33 +50,78 @@ struct TestParams { std::vector ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; std::vector U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100}; std::vector U_advected{1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; + std::vector magnetic_x{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; + std::vector magnetic_y{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; + std::vector magnetic_z{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; std::vector names{"Small number case", "Medium number case", "Large number case"}; }; } // namespace -TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139983415580.5549, 1.2697896247496674e+301}; +#ifdef MHD + std::vector fiducial_pressure{0, 139982878676.5015, 1.2697896247496674e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139983415580.5549, 1.2697896247496674e+301}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); } } -TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139984604373.87094, 1.3965808056866668e+301}; +#ifdef MHD + std::vector fiducial_pressure{0, 139984067469.81754, 1.3965808056866668e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139984604373.87094, 1.3965808056866668e+301}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), - parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; + } +} + +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; + + for (size_t i = 0; i < parameters.names.size() - 1; i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + 1E-10 * parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.mx.at(i), 1E4 * parameters.my.at(i), + 1E4 * parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), + parameters.magnetic_z.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; } } @@ -107,16 +152,73 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) } #endif // DE -TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Es{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 2589863.8420712831, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 806673.86799851817, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1317.7878679524658, 1.0389584427972784e+101}; +#endif // MHD + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{1.4999999999999998e-20, 2588562.2478059679, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{0, 1783206.1676754025, 1.9018677140549926e+300}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{0, 805372.27373320318, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{0, 16.193602637465997, 3.0042157852278494e+99}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 57bf14549..8ced8b3ba 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -79,81 +79,6 @@ inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Rea } // ========================================================================= -// ========================================================================= -/*! - * \brief Compute the energy in a cell. If MHD is not defined then simply - * return the hydro only energy - * - * \param[in] pressure The gas pressure - * \param[in] density The density - * \param[in] velocityX Velocity in the x-direction - * \param[in] velocityY Velocity in the y-direction - * \param[in] velocityZ Velocity in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The energy within a cell - */ -inline __host__ __device__ Real computeEnergy(Real const &pressure, Real const &density, Real const &velocityX, - Real const &velocityY, Real const &velocityZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) -{ - // Compute and return energy - Real energy = (fmax(pressure, TINY_NUMBER) / (gamma - 1.)) + - 0.5 * density * (velocityX * velocityX + ((velocityY * velocityY) + (velocityZ * velocityZ))); -#ifdef MHD - energy += computeMagneticEnergy(magneticX, magneticY, magneticZ); -#endif // MHD - - return energy; -} -// ========================================================================= - -// ========================================================================= -/*! - * \brief Compute the MHD gas pressure in a cell - * - * \param[in] energy The energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The gas pressure in a cell - */ -inline __host__ __device__ Real computeGasPressure(Real const &energy, Real const &density, Real const &momentumX, - Real const &momentumY, Real const &momentumZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) -{ - Real pressure = - (gamma - 1.) * - (energy - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / density - - computeMagneticEnergy(magneticX, magneticY, magneticZ)); - - return fmax(pressure, TINY_NUMBER); -} - -/*! - * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver - * - * \param state The State to compute the gas pressure of - * \param magneticX The X magnetic field - * \param gamma The adiabatic index - * \return Real The gas pressure - */ -inline __host__ __device__ Real computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, - Real const &gamma) -{ - return mhd::utils::computeGasPressure(state.energy, state.density, state.velocityX * state.density, - state.velocityY * state.density, state.velocityZ * state.density, magneticX, - state.magneticY, state.magneticZ, gamma); -} -// ========================================================================= - // ========================================================================= /*! * \brief Compute the MHD thermal energy in a cell diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 7383ef0e3..980259d28 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -46,100 +46,6 @@ struct testParams { } // namespace // ============================================================================= -// ============================================================================= -// Tests for the mhd::utils::computeEnergy function -// ============================================================================= -/*! - * \brief Test the mhd::utils::computeEnergy function with the standard set of - * parameters - * - */ -TEST(tMHDComputeEnergy, CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, 137786230.15630624, 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = - mhd::utils::computeEnergy(parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), - parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhd::utils::computeEnergy function with a the standard set of - * parameters except pressure is now negative - * - */ -TEST(tMHDComputeEnergy, NegativePressureExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, 137784928.56204093, 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = - mhd::utils::computeEnergy(-parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), - parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhd::utils::computeEnergy function -// ============================================================================= - -// ============================================================================= -// Tests for the mhd::utils::computeGasPressure function -// ============================================================================= -/*! - * \brief Test the mhd::utils::computeGasPressure function with the standard set - * of parameters. Energy has been increased to avoid negative pressures - * - */ -TEST(tMHDComputeGasPressure, CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector energyMultiplier{3, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{1.8586864490415075e-100, 4591434.7663756227, 1.29869419465575e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhd::utils::computeGasPressure( - energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), - parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), - parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhd::utils::computeGasPressure function with a the standard - * set of parameters which produce negative pressures - * - */ -TEST(tMHDComputeGasPressure, NegativePressureExpectAutomaticFix) -{ - testParams parameters; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhd::utils::computeGasPressure( - parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), - parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), - parameters.gamma); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testGasPressure) << "Difference in " << parameters.names.at(i) << std::endl; - } -} -// ============================================================================= -// End of tests for the mhd::utils::computeGasPressure function -// ============================================================================= - // ============================================================================= // Tests for the mhd::utils::computeThermalEnergy function // ============================================================================= From 6880986ad9550231fbbce6013855a70805f227bc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 11:40:22 -0400 Subject: [PATCH 339/694] Add PLMC load data function --- src/reconstruction/plmc_cuda.cu | 99 +++++++++++++++++++++++++++++---- src/reconstruction/plmc_cuda.h | 25 +++++++++ src/utils/hydro_utilities.h | 3 +- 3 files changed, 116 insertions(+), 11 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 4bad5db1a..c6fc76403 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -8,7 +8,10 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/plmc_cuda.h" +#include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" @@ -22,23 +25,26 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields) { - int n_cells = nx * ny * nz; + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions int o1, o2, o3; switch (dir) { case 0: - o1 = 1; - o2 = 2; - o3 = 3; + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; break; case 1: - o1 = 2; - o2 = 3; - o3 = 1; + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; break; case 2: - o1 = 3; - o2 = 1; - o3 = 2; + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; break; } @@ -687,3 +693,76 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // DE } } + +// ============================================================================= +plmc_utils::PlmcPrimitive __device__ __host__ plmc_utils::Load_Data( + Real const *dev_conserved, size_t const &xid, size_t const &yid, size_t const &zid, size_t const &nx, + size_t const &ny, size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +{ + // Compute index + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + + // Declare the variable we will return + PlmcPrimitive loaded_data; + + // Load hydro variables except pressure + loaded_data.density = dev_conserved[grid_enum::density * n_cells + id]; + loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density; + loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density; + loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density; + + // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial + // computation of the primitive variables +#ifdef MHD + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + switch (o1) { + case grid_enum::momentum_x: + loaded_data.magnetic_y = magnetic_centered.y; + loaded_data.magnetic_z = magnetic_centered.z; + break; + case grid_enum::momentum_y: + loaded_data.magnetic_y = magnetic_centered.z; + loaded_data.magnetic_z = magnetic_centered.x; + break; + case grid_enum::momentum_z: + loaded_data.magnetic_y = magnetic_centered.x; + loaded_data.magnetic_z = magnetic_centered.y; + break; + } +#endif // MHD + +// Load pressure accounting for duel energy if enabled +#ifdef DE // DE + Real const E = dev_conserved[grid_enum::Energy * n_cells + id]; + Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + + Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z); + + #ifdef MHD + E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #endif // MHD + + loaded_data.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma); + loaded_data.gas_energy = gas_energy / loaded_data.density; +#else // not DE + #ifdef MHD + loaded_data.pressure = + hydro_utilities::Calc_Pressure_Primitive(dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, + loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z, + gamma, magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #else // not MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma); + #endif // MHD +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density; + } +#endif // SCALAR + + return loaded_data; +} \ No newline at end of file diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 505250044..986d05049 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -6,6 +6,7 @@ #define PLMC_CUDA_H #include "../global/global.h" +#include "../grid/grid_enum.h" /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -15,4 +16,28 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields); +namespace plmc_utils +{ +struct PlmcPrimitive { + // Hydro variables + Real density, velocity_x, velocity_y, velocity_z, pressure; + +#ifdef MHD + Real magnetic_y, magnetic_z; +#endif // MHD + +#ifdef DE + Real gas_energy; +#endif // DE + +#ifdef SCALAR + Real scalar[grid_enum::nscalars]; +#endif // SCALAR +}; + +PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, + size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, + Real const &gamma); +} // namespace plmc_utils #endif // PLMC_CUDA_H diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 8fbf4c017..fb1621d09 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -114,7 +114,8 @@ inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const & U = U_advected; } P = U * (gamma - 1.0); - return P; + return fmax(P, (Real)TINY_NUMBER); + ; } /*! From cd567665d221c45ef5bf5df4b1b4e9d37094aebc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 14:18:21 -0400 Subject: [PATCH 340/694] PLMC update threadguard and tests --- src/reconstruction/plmc_cuda.cu | 930 +++++++++++++------------- src/reconstruction/plmc_cuda_tests.cu | 59 +- 2 files changed, 477 insertions(+), 512 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index c6fc76403..fa9ba46a2 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -105,593 +105,567 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou int yid = (tid - zid * nx * ny) / nx; int xid = tid - zid * nx * ny - yid * nx; - int xs, xe, ys, ye, zs, ze; - switch (dir) { - case 0: - xs = 1; - xe = nx - 2; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - break; - case 1: - xs = 0; - xe = nx; - ys = 1; - ye = ny - 2; - zs = 0; - ze = nz; - break; - case 2: - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 1; - ze = nz - 2; - break; + // Thread guard to prevent overrun + if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { + return; } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 3-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; + // load the 3-cell stencil into registers + // cell i + id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); + p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; + } #endif // SCALAR #ifdef DE - ge_i = dge / d_i; + ge_i = dge / d_i; #endif // DE - // cell i-1 - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; + // cell i-1 + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; + } + d_imo = dev_conserved[id]; + vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; + vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; + vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); + p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; + } #endif // SCALAR #ifdef DE - ge_imo = dge / d_imo; + ge_imo = dge / d_imo; #endif // DE - // cell i+1 - - switch (dir) { - case 0: - id = xid + 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 1) * nx * ny; - break; - } + // cell i+1 - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; + switch (dir) { + case 0: + id = xid + 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 1) * nx * ny; + break; + } + + d_ipo = dev_conserved[id]; + vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; + vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; + vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); + p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; + } #endif // SCALAR #ifdef DE - ge_ipo = dge / d_ipo; + ge_ipo = dge / d_ipo; #endif // DE - // calculate the adiabatic sound speed in cell i - a_i = sqrt(gamma * p_i / d_i); + // calculate the adiabatic sound speed in cell i + a_i = sqrt(gamma * p_i / d_i); // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables #ifndef VL - lambda_m = vx_i - a_i; - lambda_0 = vx_i; - lambda_p = vx_i + a_i; + lambda_m = vx_i - a_i; + lambda_0 = vx_i; + lambda_p = vx_i + a_i; #endif // VL - // Compute the left, right, centered, and van Leer differences of the - // primitive variables Note that here L and R refer to locations relative to - // the cell center - - // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; - - // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; - - // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); - - // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // Compute the left, right, centered, and van Leer differences of the + // primitive variables Note that here L and R refer to locations relative to + // the cell center + + // left + del_d_L = d_i - d_imo; + del_vx_L = vx_i - vx_imo; + del_vy_L = vy_i - vy_imo; + del_vz_L = vz_i - vz_imo; + del_p_L = p_i - p_imo; + + // right + del_d_R = d_ipo - d_i; + del_vx_R = vx_ipo - vx_i; + del_vy_R = vy_ipo - vy_i; + del_vz_R = vz_ipo - vz_i; + del_p_R = p_ipo - p_i; + + // centered + del_d_C = 0.5 * (d_ipo - d_imo); + del_vx_C = 0.5 * (vx_ipo - vx_imo); + del_vy_C = 0.5 * (vy_ipo - vy_imo); + del_vz_C = 0.5 * (vz_ipo - vz_imo); + del_p_C = 0.5 * (p_ipo - p_imo); + + // Van Leer + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } + del_ge_L = ge_i - ge_imo; + del_ge_R = ge_ipo - ge_i; + del_ge_C = 0.5 * (ge_ipo - ge_imo); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; + del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; + del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; } + } #endif // SCALAR - // Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - del_a_0_L = -d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - del_a_1_L = del_d_L - del_p_L / (a_i * a_i); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - - del_a_0_R = -d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - del_a_1_R = del_d_R - del_p_R / (a_i * a_i); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - - del_a_0_C = -d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - del_a_1_C = del_d_C - del_p_C / (a_i * a_i); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - - del_a_0_G = -d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - del_a_1_G = del_d_G - del_p_G / (a_i * a_i); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - - // Apply monotonicity constraints to the differences in the characteristic - // variables - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; // This should be in the declaration - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_0_L = -d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + del_a_1_L = del_d_L - del_p_L / (a_i * a_i); + del_a_2_L = del_vy_L; + del_a_3_L = del_vz_L; + del_a_4_L = d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + + del_a_0_R = -d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + del_a_1_R = del_d_R - del_p_R / (a_i * a_i); + del_a_2_R = del_vy_R; + del_a_3_R = del_vz_R; + del_a_4_R = d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + + del_a_0_C = -d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + del_a_1_C = del_d_C - del_p_C / (a_i * a_i); + del_a_2_C = del_vy_C; + del_a_3_C = del_vz_C; + del_a_4_C = d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + + del_a_0_G = -d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + del_a_1_G = del_d_G - del_p_G / (a_i * a_i); + del_a_2_G = del_vy_G; + del_a_3_G = del_vz_G; + del_a_4_G = d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + + // Apply monotonicity constraints to the differences in the characteristic + // variables + + del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; // This should be in the declaration + + if (del_a_0_L * del_a_0_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); + lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_1_L * del_a_1_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); + lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_2_L * del_a_2_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); + lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_3_L * del_a_3_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); + lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_4_L * del_a_4_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); + lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } #ifdef DE - del_ge_m_i = 0.0; - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } + del_ge_m_i = 0.0; + if (del_ge_L * del_ge_R > 0.0) { + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = 0.0; - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_m_i[i] = 0.0; + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } + } #endif // SCALAR - // Project the monotonized difference in the characteristic variables back - // onto the primitive variables Stone Eqn 39 - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i * del_a_0_m / d_i + a_i * del_a_4_m / d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; - - // Compute the left and right interface values using the monotonized - // difference in the primitive variables - - d_R_imh = d_i - 0.5 * del_d_m_i; - vx_R_imh = vx_i - 0.5 * del_vx_m_i; - vy_R_imh = vy_i - 0.5 * del_vy_m_i; - vz_R_imh = vz_i - 0.5 * del_vz_m_i; - p_R_imh = p_i - 0.5 * del_p_m_i; - - d_L_iph = d_i + 0.5 * del_d_m_i; - vx_L_iph = vx_i + 0.5 * del_vx_m_i; - vy_L_iph = vy_i + 0.5 * del_vy_m_i; - vz_L_iph = vz_i + 0.5 * del_vz_m_i; - p_L_iph = p_i + 0.5 * del_p_m_i; + // Project the monotonized difference in the characteristic variables back + // onto the primitive variables Stone Eqn 39 + del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; + del_vx_m_i = -a_i * del_a_0_m / d_i + a_i * del_a_4_m / d_i; + del_vy_m_i = del_a_2_m; + del_vz_m_i = del_a_3_m; + del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; + + // Compute the left and right interface values using the monotonized + // difference in the primitive variables + + d_R_imh = d_i - 0.5 * del_d_m_i; + vx_R_imh = vx_i - 0.5 * del_vx_m_i; + vy_R_imh = vy_i - 0.5 * del_vy_m_i; + vz_R_imh = vz_i - 0.5 * del_vz_m_i; + p_R_imh = p_i - 0.5 * del_p_m_i; + + d_L_iph = d_i + 0.5 * del_d_m_i; + vx_L_iph = vx_i + 0.5 * del_vx_m_i; + vy_L_iph = vy_i + 0.5 * del_vy_m_i; + vz_L_iph = vz_i + 0.5 * del_vz_m_i; + p_L_iph = p_i + 0.5 * del_p_m_i; #ifdef DE - ge_R_imh = ge_i - 0.5 * del_ge_m_i; - ge_L_iph = ge_i + 0.5 * del_ge_m_i; + ge_R_imh = ge_i - 0.5 * del_ge_m_i; + ge_L_iph = ge_i + 0.5 * del_ge_m_i; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; + scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; + } #endif // SCALAR - // try removing this on shock tubes - C = d_R_imh + d_L_iph; - d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); - d_R_imh = fmin(fmax(d_i, d_imo), d_R_imh); - d_L_iph = C - d_R_imh; - d_L_iph = fmax(fmin(d_i, d_ipo), d_L_iph); - d_L_iph = fmin(fmax(d_i, d_ipo), d_L_iph); - d_R_imh = C - d_L_iph; - - C = vx_R_imh + vx_L_iph; - vx_R_imh = fmax(fmin(vx_i, vx_imo), vx_R_imh); - vx_R_imh = fmin(fmax(vx_i, vx_imo), vx_R_imh); - vx_L_iph = C - vx_R_imh; - vx_L_iph = fmax(fmin(vx_i, vx_ipo), vx_L_iph); - vx_L_iph = fmin(fmax(vx_i, vx_ipo), vx_L_iph); - vx_R_imh = C - vx_L_iph; - - C = vy_R_imh + vy_L_iph; - vy_R_imh = fmax(fmin(vy_i, vy_imo), vy_R_imh); - vy_R_imh = fmin(fmax(vy_i, vy_imo), vy_R_imh); - vy_L_iph = C - vy_R_imh; - vy_L_iph = fmax(fmin(vy_i, vy_ipo), vy_L_iph); - vy_L_iph = fmin(fmax(vy_i, vy_ipo), vy_L_iph); - vy_R_imh = C - vy_L_iph; - - C = vz_R_imh + vz_L_iph; - vz_R_imh = fmax(fmin(vz_i, vz_imo), vz_R_imh); - vz_R_imh = fmin(fmax(vz_i, vz_imo), vz_R_imh); - vz_L_iph = C - vz_R_imh; - vz_L_iph = fmax(fmin(vz_i, vz_ipo), vz_L_iph); - vz_L_iph = fmin(fmax(vz_i, vz_ipo), vz_L_iph); - vz_R_imh = C - vz_L_iph; - - C = p_R_imh + p_L_iph; - p_R_imh = fmax(fmin(p_i, p_imo), p_R_imh); - p_R_imh = fmin(fmax(p_i, p_imo), p_R_imh); - p_L_iph = C - p_R_imh; - p_L_iph = fmax(fmin(p_i, p_ipo), p_L_iph); - p_L_iph = fmin(fmax(p_i, p_ipo), p_L_iph); - p_R_imh = C - p_L_iph; - - del_d_m_i = d_L_iph - d_R_imh; - del_vx_m_i = vx_L_iph - vx_R_imh; - del_vy_m_i = vy_L_iph - vy_R_imh; - del_vz_m_i = vz_L_iph - vz_R_imh; - del_p_m_i = p_L_iph - p_R_imh; + // try removing this on shock tubes + C = d_R_imh + d_L_iph; + d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); + d_R_imh = fmin(fmax(d_i, d_imo), d_R_imh); + d_L_iph = C - d_R_imh; + d_L_iph = fmax(fmin(d_i, d_ipo), d_L_iph); + d_L_iph = fmin(fmax(d_i, d_ipo), d_L_iph); + d_R_imh = C - d_L_iph; + + C = vx_R_imh + vx_L_iph; + vx_R_imh = fmax(fmin(vx_i, vx_imo), vx_R_imh); + vx_R_imh = fmin(fmax(vx_i, vx_imo), vx_R_imh); + vx_L_iph = C - vx_R_imh; + vx_L_iph = fmax(fmin(vx_i, vx_ipo), vx_L_iph); + vx_L_iph = fmin(fmax(vx_i, vx_ipo), vx_L_iph); + vx_R_imh = C - vx_L_iph; + + C = vy_R_imh + vy_L_iph; + vy_R_imh = fmax(fmin(vy_i, vy_imo), vy_R_imh); + vy_R_imh = fmin(fmax(vy_i, vy_imo), vy_R_imh); + vy_L_iph = C - vy_R_imh; + vy_L_iph = fmax(fmin(vy_i, vy_ipo), vy_L_iph); + vy_L_iph = fmin(fmax(vy_i, vy_ipo), vy_L_iph); + vy_R_imh = C - vy_L_iph; + + C = vz_R_imh + vz_L_iph; + vz_R_imh = fmax(fmin(vz_i, vz_imo), vz_R_imh); + vz_R_imh = fmin(fmax(vz_i, vz_imo), vz_R_imh); + vz_L_iph = C - vz_R_imh; + vz_L_iph = fmax(fmin(vz_i, vz_ipo), vz_L_iph); + vz_L_iph = fmin(fmax(vz_i, vz_ipo), vz_L_iph); + vz_R_imh = C - vz_L_iph; + + C = p_R_imh + p_L_iph; + p_R_imh = fmax(fmin(p_i, p_imo), p_R_imh); + p_R_imh = fmin(fmax(p_i, p_imo), p_R_imh); + p_L_iph = C - p_R_imh; + p_L_iph = fmax(fmin(p_i, p_ipo), p_L_iph); + p_L_iph = fmin(fmax(p_i, p_ipo), p_L_iph); + p_R_imh = C - p_L_iph; + + del_d_m_i = d_L_iph - d_R_imh; + del_vx_m_i = vx_L_iph - vx_R_imh; + del_vy_m_i = vy_L_iph - vy_R_imh; + del_vz_m_i = vz_L_iph - vz_R_imh; + del_p_m_i = p_L_iph - p_R_imh; #ifdef DE - C = ge_R_imh + ge_L_iph; - ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); - ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); - ge_L_iph = C - ge_R_imh; - ge_L_iph = fmax(fmin(ge_i, ge_ipo), ge_L_iph); - ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); - ge_R_imh = C - ge_L_iph; - del_ge_m_i = ge_L_iph - ge_R_imh; + C = ge_R_imh + ge_L_iph; + ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); + ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); + ge_L_iph = C - ge_R_imh; + ge_L_iph = fmax(fmin(ge_i, ge_ipo), ge_L_iph); + ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); + ge_R_imh = C - ge_L_iph; + del_ge_m_i = ge_L_iph - ge_R_imh; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - C = scalar_R_imh[i] + scalar_L_iph[i]; - scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_L_iph[i] = C - scalar_R_imh[i]; - scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_R_imh[i] = C - scalar_L_iph[i]; - del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; - } + for (int i = 0; i < NSCALARS; i++) { + C = scalar_R_imh[i] + scalar_L_iph[i]; + scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); + scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); + scalar_L_iph[i] = C - scalar_R_imh[i]; + scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); + scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); + scalar_R_imh[i] = C - scalar_L_iph[i]; + del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; + } #endif // SCALAR #ifndef VL - // Integrate linear interpolation function over domain of dependence - // defined by max(min) eigenvalue - qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - d_R_imh = d_R_imh + qx * del_d_m_i; - vx_R_imh = vx_R_imh + qx * del_vx_m_i; - vy_R_imh = vy_R_imh + qx * del_vy_m_i; - vz_R_imh = vz_R_imh + qx * del_vz_m_i; - p_R_imh = p_R_imh + qx * del_p_m_i; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - d_L_iph = d_L_iph - qx * del_d_m_i; - vx_L_iph = vx_L_iph - qx * del_vx_m_i; - vy_L_iph = vy_L_iph - qx * del_vy_m_i; - vz_L_iph = vz_L_iph - qx * del_vz_m_i; - p_L_iph = p_L_iph - qx * del_p_m_i; + // Integrate linear interpolation function over domain of dependence + // defined by max(min) eigenvalue + qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + d_R_imh = d_R_imh + qx * del_d_m_i; + vx_R_imh = vx_R_imh + qx * del_vx_m_i; + vy_R_imh = vy_R_imh + qx * del_vy_m_i; + vz_R_imh = vz_R_imh + qx * del_vz_m_i; + p_R_imh = p_R_imh + qx * del_p_m_i; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + d_L_iph = d_L_iph - qx * del_d_m_i; + vx_L_iph = vx_L_iph - qx * del_vx_m_i; + vy_L_iph = vy_L_iph - qx * del_vy_m_i; + vz_L_iph = vz_L_iph - qx * del_vz_m_i; + p_L_iph = p_L_iph - qx * del_p_m_i; #ifdef DE - ge_R_imh = ge_R_imh + qx * del_ge_m_i; - ge_L_iph = ge_L_iph - qx * del_ge_m_i; + ge_R_imh = ge_R_imh + qx * del_ge_m_i; + ge_L_iph = ge_L_iph - qx * del_ge_m_i; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; + scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; + } #endif // SCALAR - // Perform the characteristic tracing - // Stone Eqns 42 & 43 + // Perform the characteristic tracing + // Stone Eqns 42 & 43 - // left-hand interface value, i+1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + // left-hand interface value, i+1/2 + sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; #ifdef DE - sum_ge = 0; + sum_ge = 0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0.0; + } #endif // SCALAR - if (lambda_m >= 0) { - lamdiff = lambda_p - lambda_m; + if (lambda_m >= 0) { + lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } - if (lambda_0 >= 0) { - lamdiff = lambda_p - lambda_0; + sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + } + if (lambda_0 >= 0) { + lamdiff = lambda_p - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; + sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); + sum_2 += lamdiff * del_vy_m_i; + sum_3 += lamdiff * del_vz_m_i; #ifdef DE - sum_ge += lamdiff * del_ge_m_i; + sum_ge += lamdiff * del_ge_m_i; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; - } - #endif // SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - if (lambda_p >= 0) { - lamdiff = lambda_p - lambda_p; + #endif // SCALAR + } + if (lambda_p >= 0) { + lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } + sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + } - // add the corrections to the initial guesses for the interface values - d_L_iph += 0.5 * dtodx * sum_0; - vx_L_iph += 0.5 * dtodx * sum_1; - vy_L_iph += 0.5 * dtodx * sum_2; - vz_L_iph += 0.5 * dtodx * sum_3; - p_L_iph += 0.5 * dtodx * sum_4; + // add the corrections to the initial guesses for the interface values + d_L_iph += 0.5 * dtodx * sum_0; + vx_L_iph += 0.5 * dtodx * sum_1; + vy_L_iph += 0.5 * dtodx * sum_2; + vz_L_iph += 0.5 * dtodx * sum_3; + p_L_iph += 0.5 * dtodx * sum_4; #ifdef DE - ge_L_iph += 0.5 * dtodx * sum_ge; + ge_L_iph += 0.5 * dtodx * sum_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; + } #endif // SCALAR - // right-hand interface value, i-1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + // right-hand interface value, i-1/2 + sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; #ifdef DE - sum_ge = 0; + sum_ge = 0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } #endif // SCALAR - if (lambda_m <= 0) { - lamdiff = lambda_m - lambda_m; + if (lambda_m <= 0) { + lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } - if (lambda_0 <= 0) { - lamdiff = lambda_m - lambda_0; + sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + } + if (lambda_0 <= 0) { + lamdiff = lambda_m - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; + sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); + sum_2 += lamdiff * del_vy_m_i; + sum_3 += lamdiff * del_vz_m_i; #ifdef DE - sum_ge += lamdiff * del_ge_m_i; + sum_ge += lamdiff * del_ge_m_i; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; - } - #endif // SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_scalar_m_i[i]; } - if (lambda_p <= 0) { - lamdiff = lambda_m - lambda_p; + #endif // SCALAR + } + if (lambda_p <= 0) { + lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } + sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); + sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + } - // add the corrections - d_R_imh += 0.5 * dtodx * sum_0; - vx_R_imh += 0.5 * dtodx * sum_1; - vy_R_imh += 0.5 * dtodx * sum_2; - vz_R_imh += 0.5 * dtodx * sum_3; - p_R_imh += 0.5 * dtodx * sum_4; + // add the corrections + d_R_imh += 0.5 * dtodx * sum_0; + vx_R_imh += 0.5 * dtodx * sum_1; + vy_R_imh += 0.5 * dtodx * sum_2; + vz_R_imh += 0.5 * dtodx * sum_3; + p_R_imh += 0.5 * dtodx * sum_4; #ifdef DE - ge_R_imh += 0.5 * dtodx * sum_ge; + ge_R_imh += 0.5 * dtodx * sum_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; + } #endif // SCALAR #endif // CTU - // apply minimum constraints - d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); - d_L_iph = fmax(d_L_iph, (Real)TINY_NUMBER); - p_R_imh = fmax(p_R_imh, (Real)TINY_NUMBER); - p_L_iph = fmax(p_L_iph, (Real)TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved - // variables send final values back from kernel bounds_R refers to the right - // side of the i-1/2 interface - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } + // apply minimum constraints + d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); + d_L_iph = fmax(d_L_iph, (Real)TINY_NUMBER); + p_R_imh = fmax(p_R_imh, (Real)TINY_NUMBER); + p_L_iph = fmax(p_L_iph, (Real)TINY_NUMBER); - dev_bounds_R[id] = d_R_imh; - dev_bounds_R[o1 * n_cells + id] = d_R_imh * vx_R_imh; - dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; - dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; - dev_bounds_R[4 * n_cells + id] = - (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); + // Convert the left and right states in the primitive to the conserved + // variables send final values back from kernel bounds_R refers to the right + // side of the i-1/2 interface + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; + } + + dev_bounds_R[id] = d_R_imh; + dev_bounds_R[o1 * n_cells + id] = d_R_imh * vx_R_imh; + dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; + dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; + dev_bounds_R[4 * n_cells + id] = + (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; - } + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; + } #endif // SCALAR #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; + dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; #endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_L_iph; - dev_bounds_L[o1 * n_cells + id] = d_L_iph * vx_L_iph; - dev_bounds_L[o2 * n_cells + id] = d_L_iph * vy_L_iph; - dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; - dev_bounds_L[4 * n_cells + id] = - (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); + // bounds_L refers to the left side of the i+1/2 interface + id = xid + yid * nx + zid * nx * ny; + dev_bounds_L[id] = d_L_iph; + dev_bounds_L[o1 * n_cells + id] = d_L_iph * vx_L_iph; + dev_bounds_L[o2 * n_cells + id] = d_L_iph * vy_L_iph; + dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; + dev_bounds_L[4 * n_cells + id] = + (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; - } + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; + } #endif // SCALAR #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; + dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; #endif // DE - } } // ============================================================================= diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index dfc690b87..2aec8002a 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -28,9 +28,9 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 4; - size_t const ny = 1; - size_t const nz = 1; + size_t const nx = 5; + size_t const ny = 4; + size_t const nz = 4; size_t const n_fields = 5; double const dx = doubleRand(prng); double const dt = doubleRand(prng); @@ -47,19 +47,17 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::unordered_map fiducial_interface_left = {{1, 0.76773614979894189}, - {5, 1.927149727335306}, - {9, 2.666157385974266}, - {13, 4.7339225843521469}, - {17, 21.643063389483491}}; - std::unordered_map fiducial_interface_right = {{0, 0.76773614979894189}, - {4, 1.927149727335306}, - {8, 2.666157385974266}, - {12, 4.7339225843521469}, - {16, 21.643063389483491}}; + std::unordered_map fiducial_interface_left = { + {26, 2.1584359129984056}, {27, 0.70033864721549188}, {106, 2.2476363309467553}, {107, 3.0633780053857027}, + {186, 2.2245934101106259}, {187, 2.1015872413794123}, {266, 2.1263341057778309}, {267, 3.9675148506537838}, + {346, 3.3640057502842691}, {347, 21.091316282933843}}; + std::unordered_map fiducial_interface_right = { + {25, 3.8877922383184833}, {26, 0.70033864721549188}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, + {185, 4.0069556576401011}, {186, 2.1015872413794123}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, + {345, 2.8032969746372527}, {346, 21.091316282933843}}; // Loop over different directions - for (size_t direction = 0; direction < 3; direction++) { + for (size_t direction = 0; direction < 1; direction++) { // Assign the shape size_t nx_rot, ny_rot, nz_rot; switch (direction) { @@ -92,30 +90,23 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) // Perform Comparison for (size_t i = 0; i < host_grid.size(); i++) { - double absolute_diff; - int64_t ulps_diff; - // Check the left interface - double test_val = dev_interface_left.at(i); - double fiducial_val = (test_val == 0.0) ? 0.0 : fiducial_interface_left[i]; + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.find(i) == fiducial_interface_left.end()) ? 0.0 : fiducial_interface_left[i]; - EXPECT_TRUE(testingUtilities::nearlyEqualDbl(fiducial_val, test_val, absolute_diff, ulps_diff)) - << "Error in left interface" << std::endl - << "The fiducial value is: " << fiducial_val << std::endl - << "The test value is: " << test_val << std::endl - << "The absolute difference is: " << absolute_diff << std::endl - << "The ULP difference is: " << ulps_diff << std::endl; + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); // Check the left interface - test_val = dev_interface_right.at(i); - fiducial_val = (test_val == 0.0) ? 0.0 : fiducial_interface_right[i]; - - EXPECT_TRUE(testingUtilities::nearlyEqualDbl(fiducial_val, test_val, absolute_diff, ulps_diff)) - << "Error in rigt interface" << std::endl - << "The fiducial value is: " << fiducial_val << std::endl - << "The test value is: " << test_val << std::endl - << "The absolute difference is: " << absolute_diff << std::endl - << "The ULP difference is: " << ulps_diff << std::endl; + test_val = dev_interface_right.at(i); + fiducial_val = + (fiducial_interface_right.find(i) == fiducial_interface_right.end()) ? 0.0 : fiducial_interface_right[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } } } From 31cc3fc2cfd67f6a4c84e7a198cb2ac7fd0935d0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 14:22:56 -0400 Subject: [PATCH 341/694] Update id calculations --- src/reconstruction/plmc_cuda.cu | 20 +++++++++----------- src/riemann_solvers/hlld_cuda.cu | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index fa9ba46a2..dafa35d1f 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -98,24 +98,22 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; + int const tid = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(tid, nx, ny, xid, yid, zid); // Thread guard to prevent overrun if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { return; } + // load the 3-cell stencil into registers // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; + int id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 2f7cb0d58..c0708811a 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -35,9 +35,9 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const Real const gamma, int const direction, int const n_fields) { // get a thread index - int threadId = threadIdx.x + blockIdx.x * blockDim.x; + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; - // Thread guard to avoid overrun + // Thread guard to avoid overrunx if (threadId >= n_cells) { return; } From fce8e9038359e9c131568dd38d2711b23617889e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 14:49:25 -0400 Subject: [PATCH 342/694] PLMC: Replace data loads with function --- src/reconstruction/plmc_cuda.cu | 250 +++++++++++--------------------- 1 file changed, 81 insertions(+), 169 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index dafa35d1f..02bdab3d0 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -48,12 +48,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - // declare primitive variables for each stencil - // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - // declare other variables to be used Real a_i; Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; @@ -98,119 +92,36 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR // get a thread ID - int const tid = threadIdx.x + blockIdx.x * blockDim.x; + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; int xid, yid, zid; - cuda_utilities::compute3DIndices(tid, nx, ny, xid, yid, zid); + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); // Thread guard to prevent overrun if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { return; } - // load the 3-cell stencil into registers // cell i - int id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); -#endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } -#endif // SCALAR -#ifdef DE - ge_i = dge / d_i; -#endif // DE - // cell i-1 - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); -#endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } -#endif // SCALAR -#ifdef DE - ge_imo = dge / d_imo; -#endif // DE - // cell i+1 + plmc_utils::PlmcPrimitive const cell_i = + plmc_utils::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - switch (dir) { - case 0: - id = xid + 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 1) * nx * ny; - break; - } + // cell i-1. The equality checks check the direction and subtract one from the direction + plmc_utils::PlmcPrimitive const cell_imo = plmc_utils::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); -#endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } -#endif // SCALAR -#ifdef DE - ge_ipo = dge / d_ipo; -#endif // DE + // cell i+1. The equality checks check the direction and add one to the direction + plmc_utils::PlmcPrimitive const cell_ipo = plmc_utils::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // calculate the adiabatic sound speed in cell i - a_i = sqrt(gamma * p_i / d_i); + a_i = sqrt(gamma * cell_i.pressure / cell_i.density); // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables #ifndef VL - lambda_m = vx_i - a_i; - lambda_0 = vx_i; - lambda_p = vx_i + a_i; + lambda_m = cell_i.velocity_x - a_i; + lambda_0 = cell_i.velocity_x; + lambda_p = cell_i.velocity_x + a_i; #endif // VL // Compute the left, right, centered, and van Leer differences of the @@ -218,25 +129,25 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // the cell center // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; + del_d_L = cell_i.density - cell_imo.density; + del_vx_L = cell_i.velocity_x - cell_imo.velocity_x; + del_vy_L = cell_i.velocity_y - cell_imo.velocity_y; + del_vz_L = cell_i.velocity_z - cell_imo.velocity_z; + del_p_L = cell_i.pressure - cell_imo.pressure; // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; + del_d_R = cell_ipo.density - cell_i.density; + del_vx_R = cell_ipo.velocity_x - cell_i.velocity_x; + del_vy_R = cell_ipo.velocity_y - cell_i.velocity_y; + del_vz_R = cell_ipo.velocity_z - cell_i.velocity_z; + del_p_R = cell_ipo.pressure - cell_i.pressure; // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); + del_d_C = 0.5 * (cell_ipo.density - cell_imo.density); + del_vx_C = 0.5 * (cell_ipo.velocity_x - cell_imo.velocity_x); + del_vy_C = 0.5 * (cell_ipo.velocity_y - cell_imo.velocity_y); + del_vz_C = 0.5 * (cell_ipo.velocity_z - cell_imo.velocity_z); + del_p_C = 0.5 * (cell_ipo.pressure - cell_imo.pressure); // Van Leer if (del_d_L * del_d_R > 0.0) { @@ -292,29 +203,29 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_0_L = -d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + del_a_0_L = -cell_i.density * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); del_a_1_L = del_d_L - del_p_L / (a_i * a_i); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + del_a_4_L = cell_i.density * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - del_a_0_R = -d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + del_a_0_R = -cell_i.density * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); del_a_1_R = del_d_R - del_p_R / (a_i * a_i); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + del_a_4_R = cell_i.density * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - del_a_0_C = -d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + del_a_0_C = -cell_i.density * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); del_a_1_C = del_d_C - del_p_C / (a_i * a_i); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + del_a_4_C = cell_i.density * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - del_a_0_G = -d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + del_a_0_G = -cell_i.density * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); del_a_1_G = del_d_G - del_p_G / (a_i * a_i); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + del_a_4_G = cell_i.density * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); // Apply monotonicity constraints to the differences in the characteristic // variables @@ -368,7 +279,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Project the monotonized difference in the characteristic variables back // onto the primitive variables Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i * del_a_0_m / d_i + a_i * del_a_4_m / d_i; + del_vx_m_i = -a_i * del_a_0_m / cell_i.density + a_i * del_a_4_m / cell_i.density; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; @@ -376,17 +287,17 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Compute the left and right interface values using the monotonized // difference in the primitive variables - d_R_imh = d_i - 0.5 * del_d_m_i; - vx_R_imh = vx_i - 0.5 * del_vx_m_i; - vy_R_imh = vy_i - 0.5 * del_vy_m_i; - vz_R_imh = vz_i - 0.5 * del_vz_m_i; - p_R_imh = p_i - 0.5 * del_p_m_i; + d_R_imh = cell_i.density - 0.5 * del_d_m_i; + vx_R_imh = cell_i.velocity_x - 0.5 * del_vx_m_i; + vy_R_imh = cell_i.velocity_y - 0.5 * del_vy_m_i; + vz_R_imh = cell_i.velocity_z - 0.5 * del_vz_m_i; + p_R_imh = cell_i.pressure - 0.5 * del_p_m_i; - d_L_iph = d_i + 0.5 * del_d_m_i; - vx_L_iph = vx_i + 0.5 * del_vx_m_i; - vy_L_iph = vy_i + 0.5 * del_vy_m_i; - vz_L_iph = vz_i + 0.5 * del_vz_m_i; - p_L_iph = p_i + 0.5 * del_p_m_i; + d_L_iph = cell_i.density + 0.5 * del_d_m_i; + vx_L_iph = cell_i.velocity_x + 0.5 * del_vx_m_i; + vy_L_iph = cell_i.velocity_y + 0.5 * del_vy_m_i; + vz_L_iph = cell_i.velocity_z + 0.5 * del_vz_m_i; + p_L_iph = cell_i.pressure + 0.5 * del_p_m_i; #ifdef DE ge_R_imh = ge_i - 0.5 * del_ge_m_i; @@ -401,43 +312,43 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // try removing this on shock tubes C = d_R_imh + d_L_iph; - d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); - d_R_imh = fmin(fmax(d_i, d_imo), d_R_imh); + d_R_imh = fmax(fmin(cell_i.density, cell_imo.density), d_R_imh); + d_R_imh = fmin(fmax(cell_i.density, cell_imo.density), d_R_imh); d_L_iph = C - d_R_imh; - d_L_iph = fmax(fmin(d_i, d_ipo), d_L_iph); - d_L_iph = fmin(fmax(d_i, d_ipo), d_L_iph); + d_L_iph = fmax(fmin(cell_i.density, cell_ipo.density), d_L_iph); + d_L_iph = fmin(fmax(cell_i.density, cell_ipo.density), d_L_iph); d_R_imh = C - d_L_iph; C = vx_R_imh + vx_L_iph; - vx_R_imh = fmax(fmin(vx_i, vx_imo), vx_R_imh); - vx_R_imh = fmin(fmax(vx_i, vx_imo), vx_R_imh); + vx_R_imh = fmax(fmin(cell_i.velocity_x, cell_imo.velocity_x), vx_R_imh); + vx_R_imh = fmin(fmax(cell_i.velocity_x, cell_imo.velocity_x), vx_R_imh); vx_L_iph = C - vx_R_imh; - vx_L_iph = fmax(fmin(vx_i, vx_ipo), vx_L_iph); - vx_L_iph = fmin(fmax(vx_i, vx_ipo), vx_L_iph); + vx_L_iph = fmax(fmin(cell_i.velocity_x, cell_ipo.velocity_x), vx_L_iph); + vx_L_iph = fmin(fmax(cell_i.velocity_x, cell_ipo.velocity_x), vx_L_iph); vx_R_imh = C - vx_L_iph; C = vy_R_imh + vy_L_iph; - vy_R_imh = fmax(fmin(vy_i, vy_imo), vy_R_imh); - vy_R_imh = fmin(fmax(vy_i, vy_imo), vy_R_imh); + vy_R_imh = fmax(fmin(cell_i.velocity_y, cell_imo.velocity_y), vy_R_imh); + vy_R_imh = fmin(fmax(cell_i.velocity_y, cell_imo.velocity_y), vy_R_imh); vy_L_iph = C - vy_R_imh; - vy_L_iph = fmax(fmin(vy_i, vy_ipo), vy_L_iph); - vy_L_iph = fmin(fmax(vy_i, vy_ipo), vy_L_iph); + vy_L_iph = fmax(fmin(cell_i.velocity_y, cell_ipo.velocity_y), vy_L_iph); + vy_L_iph = fmin(fmax(cell_i.velocity_y, cell_ipo.velocity_y), vy_L_iph); vy_R_imh = C - vy_L_iph; C = vz_R_imh + vz_L_iph; - vz_R_imh = fmax(fmin(vz_i, vz_imo), vz_R_imh); - vz_R_imh = fmin(fmax(vz_i, vz_imo), vz_R_imh); + vz_R_imh = fmax(fmin(cell_i.velocity_z, cell_imo.velocity_z), vz_R_imh); + vz_R_imh = fmin(fmax(cell_i.velocity_z, cell_imo.velocity_z), vz_R_imh); vz_L_iph = C - vz_R_imh; - vz_L_iph = fmax(fmin(vz_i, vz_ipo), vz_L_iph); - vz_L_iph = fmin(fmax(vz_i, vz_ipo), vz_L_iph); + vz_L_iph = fmax(fmin(cell_i.velocity_z, cell_ipo.velocity_z), vz_L_iph); + vz_L_iph = fmin(fmax(cell_i.velocity_z, cell_ipo.velocity_z), vz_L_iph); vz_R_imh = C - vz_L_iph; C = p_R_imh + p_L_iph; - p_R_imh = fmax(fmin(p_i, p_imo), p_R_imh); - p_R_imh = fmin(fmax(p_i, p_imo), p_R_imh); + p_R_imh = fmax(fmin(cell_i.pressure, cell_imo.pressure), p_R_imh); + p_R_imh = fmin(fmax(cell_i.pressure, cell_imo.pressure), p_R_imh); p_L_iph = C - p_R_imh; - p_L_iph = fmax(fmin(p_i, p_ipo), p_L_iph); - p_L_iph = fmin(fmax(p_i, p_ipo), p_L_iph); + p_L_iph = fmax(fmin(cell_i.pressure, cell_ipo.pressure), p_L_iph); + p_L_iph = fmin(fmax(cell_i.pressure, cell_ipo.pressure), p_L_iph); p_R_imh = C - p_L_iph; del_d_m_i = d_L_iph - d_R_imh; @@ -515,9 +426,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } if (lambda_0 >= 0) { lamdiff = lambda_p - lambda_0; @@ -537,9 +448,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } // add the corrections to the initial guesses for the interface values @@ -570,9 +481,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m <= 0) { lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } if (lambda_0 <= 0) { lamdiff = lambda_m - lambda_0; @@ -592,9 +503,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_p <= 0) { lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); } // add the corrections @@ -622,6 +533,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right // side of the i-1/2 interface + int id; switch (dir) { case 0: id = xid - 1 + yid * nx + zid * nx * ny; From 0ffbc9c4c7f93e808c8905bf166bfbd04060d0f7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 14:56:41 -0400 Subject: [PATCH 343/694] rename sound speed --- src/reconstruction/plmc_cuda.cu | 64 ++++++++++++++++----------------- src/utils/hydro_utilities.h | 24 +++++++++++++ 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 02bdab3d0..631069f87 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -49,7 +49,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // declare other variables to be used - Real a_i; Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; @@ -114,14 +113,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // calculate the adiabatic sound speed in cell i - a_i = sqrt(gamma * cell_i.pressure / cell_i.density); + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables #ifndef VL - lambda_m = cell_i.velocity_x - a_i; + lambda_m = cell_i.velocity_x - sound_speed; lambda_0 = cell_i.velocity_x; - lambda_p = cell_i.velocity_x + a_i; + lambda_p = cell_i.velocity_x + sound_speed; #endif // VL // Compute the left, right, centered, and van Leer differences of the @@ -203,29 +203,29 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_0_L = -cell_i.density * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - del_a_1_L = del_d_L - del_p_L / (a_i * a_i); + del_a_0_L = -cell_i.density * del_vx_L / (2 * sound_speed) + del_p_L / (2 * sound_speed * sound_speed); + del_a_1_L = del_d_L - del_p_L / (sound_speed_squared); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = cell_i.density * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); + del_a_4_L = cell_i.density * del_vx_L / (2 * sound_speed) + del_p_L / (2 * sound_speed_squared); - del_a_0_R = -cell_i.density * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - del_a_1_R = del_d_R - del_p_R / (a_i * a_i); + del_a_0_R = -cell_i.density * del_vx_R / (2 * sound_speed) + del_p_R / (2 * sound_speed_squared); + del_a_1_R = del_d_R - del_p_R / (sound_speed_squared); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = cell_i.density * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); + del_a_4_R = cell_i.density * del_vx_R / (2 * sound_speed) + del_p_R / (2 * sound_speed_squared); - del_a_0_C = -cell_i.density * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - del_a_1_C = del_d_C - del_p_C / (a_i * a_i); + del_a_0_C = -cell_i.density * del_vx_C / (2 * sound_speed) + del_p_C / (2 * sound_speed_squared); + del_a_1_C = del_d_C - del_p_C / (sound_speed_squared); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = cell_i.density * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); + del_a_4_C = cell_i.density * del_vx_C / (2 * sound_speed) + del_p_C / (2 * sound_speed_squared); - del_a_0_G = -cell_i.density * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - del_a_1_G = del_d_G - del_p_G / (a_i * a_i); + del_a_0_G = -cell_i.density * del_vx_G / (2 * sound_speed) + del_p_G / (2 * sound_speed_squared); + del_a_1_G = del_d_G - del_p_G / (sound_speed_squared); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = cell_i.density * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); + del_a_4_G = cell_i.density * del_vx_G / (2 * sound_speed) + del_p_G / (2 * sound_speed_squared); // Apply monotonicity constraints to the differences in the characteristic // variables @@ -279,10 +279,10 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Project the monotonized difference in the characteristic variables back // onto the primitive variables Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i * del_a_0_m / cell_i.density + a_i * del_a_4_m / cell_i.density; + del_vx_m_i = -sound_speed * del_a_0_m / cell_i.density + sound_speed * del_a_4_m / cell_i.density; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; - del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; + del_p_m_i = sound_speed_squared * del_a_0_m + sound_speed_squared * del_a_4_m; // Compute the left and right interface values using the monotonized // difference in the primitive variables @@ -426,14 +426,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m >= 0) { lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } if (lambda_0 >= 0) { lamdiff = lambda_p - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); + sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; #ifdef DE @@ -448,9 +448,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_p >= 0) { lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } // add the corrections to the initial guesses for the interface values @@ -481,14 +481,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m <= 0) { lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } if (lambda_0 <= 0) { lamdiff = lambda_m - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); + sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); sum_2 += lamdiff * del_vy_m_i; sum_3 += lamdiff * del_vz_m_i; #ifdef DE @@ -503,9 +503,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_p <= 0) { lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } // add the corrections diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index fb1621d09..c0f783e1c 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -148,6 +148,17 @@ inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, return (0.5 / d) * (mx * mx + my * my * mz * mz); } +/*! + * \brief Compute the sound speed in the cell from conserved variables + * + * \param E Energy + * \param d densidy + * \param mx x momentum + * \param my y momentum + * \param mz z momentum + * \param gamma adiabatic index + * \return Real The sound speed + */ inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { @@ -155,4 +166,17 @@ inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, R return sqrt(gamma * P / d); } +/*! + * \brief Compute the sound in the cell from primitive variables + * + * \param P + * \param d + * \param gamma + * \return __host__ + */ +inline __host__ __device__ Real Calc_Sound_Speed(Real const &P, Real const &d, Real const &gamma) +{ + return sqrt(gamma * P / d); +} + } // namespace hydro_utilities From 0a4968b2973d13f7fbad1646db104f73fdae67e3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 15:50:54 -0400 Subject: [PATCH 344/694] PLMC: replace slopes with functions --- src/reconstruction/plmc_cuda.cu | 251 ++++++++++++++++---------------- src/reconstruction/plmc_cuda.h | 40 +++++ 2 files changed, 168 insertions(+), 123 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 631069f87..5fc6a5219 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -49,10 +49,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // declare other variables to be used - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; @@ -65,14 +61,11 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real C; #ifndef VL Real dtodx = dt / dx; - Real lambda_m, lambda_0, lambda_p; Real qx; Real lamdiff; Real sum_0, sum_1, sum_2, sum_3, sum_4; #endif // not VL #ifdef DE - Real ge_i, ge_imo, ge_ipo; - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_i; Real ge_L_iph, ge_R_imh; Real E, E_kin, dge; @@ -81,8 +74,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // CTU #endif // DE #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; #ifndef VL @@ -119,9 +110,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables #ifndef VL - lambda_m = cell_i.velocity_x - sound_speed; - lambda_0 = cell_i.velocity_x; - lambda_p = cell_i.velocity_x + sound_speed; + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; #endif // VL // Compute the left, right, centered, and van Leer differences of the @@ -129,103 +120,44 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // the cell center // left - del_d_L = cell_i.density - cell_imo.density; - del_vx_L = cell_i.velocity_x - cell_imo.velocity_x; - del_vy_L = cell_i.velocity_y - cell_imo.velocity_y; - del_vz_L = cell_i.velocity_z - cell_imo.velocity_z; - del_p_L = cell_i.pressure - cell_imo.pressure; + plmc_utils::PlmcPrimitive const del_L = plmc_utils::Compute_Slope(cell_i, cell_imo); // right - del_d_R = cell_ipo.density - cell_i.density; - del_vx_R = cell_ipo.velocity_x - cell_i.velocity_x; - del_vy_R = cell_ipo.velocity_y - cell_i.velocity_y; - del_vz_R = cell_ipo.velocity_z - cell_i.velocity_z; - del_p_R = cell_ipo.pressure - cell_i.pressure; + plmc_utils::PlmcPrimitive const del_R = plmc_utils::Compute_Slope(cell_ipo, cell_i); // centered - del_d_C = 0.5 * (cell_ipo.density - cell_imo.density); - del_vx_C = 0.5 * (cell_ipo.velocity_x - cell_imo.velocity_x); - del_vy_C = 0.5 * (cell_ipo.velocity_y - cell_imo.velocity_y); - del_vz_C = 0.5 * (cell_ipo.velocity_z - cell_imo.velocity_z); - del_p_C = 0.5 * (cell_ipo.pressure - cell_imo.pressure); + plmc_utils::PlmcPrimitive const del_C = plmc_utils::Compute_Slope(cell_ipo, cell_imo, 0.5); // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } - -#ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } - } -#endif // SCALAR + plmc_utils::PlmcPrimitive const del_G = plmc_utils::Van_Leer_Slope(del_L, del_R); // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_0_L = -cell_i.density * del_vx_L / (2 * sound_speed) + del_p_L / (2 * sound_speed * sound_speed); - del_a_1_L = del_d_L - del_p_L / (sound_speed_squared); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = cell_i.density * del_vx_L / (2 * sound_speed) + del_p_L / (2 * sound_speed_squared); - - del_a_0_R = -cell_i.density * del_vx_R / (2 * sound_speed) + del_p_R / (2 * sound_speed_squared); - del_a_1_R = del_d_R - del_p_R / (sound_speed_squared); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = cell_i.density * del_vx_R / (2 * sound_speed) + del_p_R / (2 * sound_speed_squared); - - del_a_0_C = -cell_i.density * del_vx_C / (2 * sound_speed) + del_p_C / (2 * sound_speed_squared); - del_a_1_C = del_d_C - del_p_C / (sound_speed_squared); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = cell_i.density * del_vx_C / (2 * sound_speed) + del_p_C / (2 * sound_speed_squared); - - del_a_0_G = -cell_i.density * del_vx_G / (2 * sound_speed) + del_p_G / (2 * sound_speed_squared); - del_a_1_G = del_d_G - del_p_G / (sound_speed_squared); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = cell_i.density * del_vx_G / (2 * sound_speed) + del_p_G / (2 * sound_speed_squared); + del_a_0_L = -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed * sound_speed); + del_a_1_L = del_L.density - del_L.pressure / (sound_speed_squared); + del_a_2_L = del_L.velocity_y; + del_a_3_L = del_L.velocity_z; + del_a_4_L = cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); + + del_a_0_R = -cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); + del_a_1_R = del_R.density - del_R.pressure / (sound_speed_squared); + del_a_2_R = del_R.velocity_y; + del_a_3_R = del_R.velocity_z; + del_a_4_R = cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); + + del_a_0_C = -cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); + del_a_1_C = del_C.density - del_C.pressure / (sound_speed_squared); + del_a_2_C = del_C.velocity_y; + del_a_3_C = del_C.velocity_z; + del_a_4_C = cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); + + del_a_0_G = -cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); + del_a_1_G = del_G.density - del_G.pressure / (sound_speed_squared); + del_a_2_G = del_G.velocity_y; + del_a_3_G = del_G.velocity_z; + del_a_4_G = cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); // Apply monotonicity constraints to the differences in the characteristic // variables @@ -259,19 +191,19 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE del_ge_m_i = 0.0; - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_ge_m_i = sgn_CUDA(del_C.gas_energy) * fmin(2.0 * lim_slope_a, lim_slope_b); } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = 0.0; - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); + lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); + del_scalar_m_i[i] = sgn_CUDA(del_C.scalar[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } #endif // SCALAR @@ -300,13 +232,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_L_iph = cell_i.pressure + 0.5 * del_p_m_i; #ifdef DE - ge_R_imh = ge_i - 0.5 * del_ge_m_i; - ge_L_iph = ge_i + 0.5 * del_ge_m_i; + ge_R_imh = cell_i.gas_energy - 0.5 * del_ge_m_i; + ge_L_iph = cell_i.gas_energy + 0.5 * del_ge_m_i; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; + scalar_R_imh[i] = cell_i.scalar[i] - 0.5 * del_scalar_m_i[i]; + scalar_L_iph[i] = cell_i.scalar[i] + 0.5 * del_scalar_m_i[i]; } #endif // SCALAR @@ -359,11 +291,11 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE C = ge_R_imh + ge_L_iph; - ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); - ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); + ge_R_imh = fmax(fmin(cell_i.gas_energy, cell_imo.gas_energy), ge_R_imh); + ge_R_imh = fmin(fmax(cell_i.gas_energy, cell_imo.gas_energy), ge_R_imh); ge_L_iph = C - ge_R_imh; - ge_L_iph = fmax(fmin(ge_i, ge_ipo), ge_L_iph); - ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); + ge_L_iph = fmax(fmin(cell_i.gas_energy, cell_ipo.gas_energy), ge_L_iph); + ge_L_iph = fmin(fmax(cell_i.gas_energy, cell_ipo.gas_energy), ge_L_iph); ge_R_imh = C - ge_L_iph; del_ge_m_i = ge_L_iph - ge_R_imh; #endif // DE @@ -371,11 +303,11 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { C = scalar_R_imh[i] + scalar_L_iph[i]; - scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); + scalar_R_imh[i] = fmax(fmin(cell_i.scalar[i], cell_imo.scalar[i]), scalar_R_imh[i]); + scalar_R_imh[i] = fmin(fmax(cell_i.scalar[i], cell_imo.scalar[i]), scalar_R_imh[i]); scalar_L_iph[i] = C - scalar_R_imh[i]; - scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); + scalar_L_iph[i] = fmax(fmin(cell_i.scalar[i], cell_ipo.scalar[i]), scalar_L_iph[i]); + scalar_L_iph[i] = fmin(fmax(cell_i.scalar[i], cell_ipo.scalar[i]), scalar_L_iph[i]); scalar_R_imh[i] = C - scalar_L_iph[i]; del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; } @@ -578,10 +510,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // DE } -// ============================================================================= -plmc_utils::PlmcPrimitive __device__ __host__ plmc_utils::Load_Data( - Real const *dev_conserved, size_t const &xid, size_t const &yid, size_t const &zid, size_t const &nx, - size_t const &ny, size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +namespace plmc_utils +{ +// ===================================================================================================================== +PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, + size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, + Real const &gamma) { // Compute index size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); @@ -649,4 +584,74 @@ plmc_utils::PlmcPrimitive __device__ __host__ plmc_utils::Load_Data( #endif // SCALAR return loaded_data; -} \ No newline at end of file +} +// ===================================================================================================================== + +// ===================================================================================================================== +PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcPrimitive const &right, Real const &coef) +{ + PlmcPrimitive slopes; + + slopes.density = coef * (left.density - right.density); + slopes.velocity_x = coef * (left.velocity_x - right.velocity_x); + slopes.velocity_y = coef * (left.velocity_y - right.velocity_y); + slopes.velocity_z = coef * (left.velocity_z - right.velocity_z); + slopes.pressure = coef * (left.pressure - right.pressure); + +#ifdef MHD + slopes.magnetic_y = coef * (left.magnetic_y - right.magnetic_y); + slopes.magnetic_z = coef * (left.magnetic_z - right.magnetic_z); +#endif // MHD + +#ifdef DE + slopes.gas_energy = coef * (left.gas_energy - right.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + slopes.scalar[i] = coef * (left.scalar[i] - right.scalar[i]); + } +#endif // SCALAR + + return slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope) +{ + PlmcPrimitive vl_slopes; + + auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real { + if (left * right > 0.0) { + return 2.0 * left * right / (left + right); + } else { + return 0.0; + } + }; + + vl_slopes.density = Calc_Vl_Slope(left_slope.density, right_slope.density); + vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x); + vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y); + vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z); + vl_slopes.pressure = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure); + +#ifdef MHD + vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y); + vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z); +#endif // MHD + +#ifdef DE + vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]); + } +#endif // SCALAR + + return vl_slopes; +} +// ===================================================================================================================== +} // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 986d05049..32d155ff0 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -18,6 +18,10 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou namespace plmc_utils { +/*! + * \brief A struct for the primitive variables + * + */ struct PlmcPrimitive { // Hydro variables Real density, velocity_x, velocity_y, velocity_z, pressure; @@ -35,9 +39,45 @@ struct PlmcPrimitive { #endif // SCALAR }; +/*! + * \brief Load the data for PLMC reconstruction + * + * \param dev_conserved The conserved array + * \param xid The xid of the cell to load data from + * \param yid The yid of the cell to load data from + * \param zid The zid of the cell to load data from + * \param nx Size in the X direction + * \param ny Size in the Y direction + * \param n_cells The total number of cells + * \param o1 Directional parameter + * \param o2 Directional parameter + * \param o3 Directional parameter + * \param gamma The adiabatic index + * \return PlmcPrimitive The loaded cell data + */ PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, size_t const &zid, size_t const &nx, size_t const &ny, size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma); + +/*! + * \brief Compute a simple slope. Equation is `coef * (left - right)`. + * + * \param left The data on the positive side of the slope + * \param right The data on the negative side of the slope + * \param coef The coefficient to multiply the slope by. Defaults to zero + * \return PlmcPrimitive The slopes + */ +PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcPrimitive const &right, + Real const &coef = 1.0); + +/*! + * \brief Compute the Van Lear slope from the left and right slopes + * + * \param left_slope The left slope + * \param right_slope The right slope + * \return PlmcPrimitive The Van Leer slope + */ +PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope); } // namespace plmc_utils #endif // PLMC_CUDA_H From 6875cf0f4e46535bf18a0062cfa86e4f3d9b5ff0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 15:59:43 -0400 Subject: [PATCH 345/694] PLMC: consolodate all the #ifndef VL stuff into one place --- src/reconstruction/plmc_cuda.cu | 52 +++++++++++++-------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 5fc6a5219..b5026dedb 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -59,27 +59,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; Real C; -#ifndef VL - Real dtodx = dt / dx; - Real qx; - Real lamdiff; - Real sum_0, sum_1, sum_2, sum_3, sum_4; -#endif // not VL #ifdef DE Real del_ge_m_i; Real ge_L_iph, ge_R_imh; Real E, E_kin, dge; - #ifndef VL - Real sum_ge; - #endif // CTU -#endif // DE +#endif // DE #ifdef SCALAR Real del_scalar_m_i[NSCALARS]; Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL - Real sum_scalar[NSCALARS]; - #endif // CTU -#endif // SCALAR +#endif // SCALAR // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -107,14 +95,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; -// Compute the eigenvalues of the linearized equations in the -// primitive variables using the cell-centered primitive variables -#ifndef VL - Real const lambda_m = cell_i.velocity_x - sound_speed; - Real const lambda_0 = cell_i.velocity_x; - Real const lambda_p = cell_i.velocity_x + sound_speed; -#endif // VL - // Compute the left, right, centered, and van Leer differences of the // primitive variables Note that here L and R refer to locations relative to // the cell center @@ -314,9 +294,18 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR #ifndef VL + + Real const dtodx = dt / dx; + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; + // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue - qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; d_R_imh = d_R_imh + qx * del_d_m_i; vx_R_imh = vx_R_imh + qx * del_vx_m_i; vy_R_imh = vy_R_imh + qx * del_vy_m_i; @@ -346,24 +335,25 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqns 42 & 43 // left-hand interface value, i+1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; #ifdef DE - sum_ge = 0; + Real sum_ge = 0; #endif // DE #ifdef SCALAR + Real sum_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0.0; } #endif // SCALAR if (lambda_m >= 0) { - lamdiff = lambda_p - lambda_m; + Real lamdiff = lambda_p - lambda_m; sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } if (lambda_0 >= 0) { - lamdiff = lambda_p - lambda_0; + Real lamdiff = lambda_p - lambda_0; sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); sum_2 += lamdiff * del_vy_m_i; @@ -378,7 +368,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR } if (lambda_p >= 0) { - lamdiff = lambda_p - lambda_p; + Real lamdiff = lambda_p - lambda_p; sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); @@ -411,14 +401,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR if (lambda_m <= 0) { - lamdiff = lambda_m - lambda_m; + Real lamdiff = lambda_m - lambda_m; sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); } if (lambda_0 <= 0) { - lamdiff = lambda_m - lambda_0; + Real lamdiff = lambda_m - lambda_0; sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); sum_2 += lamdiff * del_vy_m_i; @@ -433,7 +423,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR } if (lambda_p <= 0) { - lamdiff = lambda_m - lambda_p; + Real lamdiff = lambda_m - lambda_p; sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); From ed71652b7c441db9eb645233e9b6133634ea6103 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 16:33:35 -0400 Subject: [PATCH 346/694] Remove all declarations at the begining and replace with structs Move thread guard to the beginning --- src/reconstruction/plmc_cuda.cu | 462 ++++++++++++++++---------------- src/reconstruction/plmc_cuda.h | 13 + 2 files changed, 241 insertions(+), 234 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index b5026dedb..b8d22e550 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -25,6 +25,16 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields) { + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Thread guard to prevent overrun + if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { + return; + } + // Compute the total number of cells int const n_cells = nx * ny * nz; @@ -48,36 +58,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - // declare other variables to be used - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; // _m means monotized slope - Real lim_slope_a, lim_slope_b; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; - Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; - Real C; -#ifdef DE - Real del_ge_m_i; - Real ge_L_iph, ge_R_imh; - Real E, E_kin, dge; -#endif // DE -#ifdef SCALAR - Real del_scalar_m_i[NSCALARS]; - Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; -#endif // SCALAR - - // get a thread ID - int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - - // Thread guard to prevent overrun - if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { - return; - } // load the 3-cell stencil into registers // cell i plmc_utils::PlmcPrimitive const cell_i = @@ -115,181 +95,185 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_0_L = -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed * sound_speed); - del_a_1_L = del_L.density - del_L.pressure / (sound_speed_squared); - del_a_2_L = del_L.velocity_y; - del_a_3_L = del_L.velocity_z; - del_a_4_L = cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); - - del_a_0_R = -cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); - del_a_1_R = del_R.density - del_R.pressure / (sound_speed_squared); - del_a_2_R = del_R.velocity_y; - del_a_3_R = del_R.velocity_z; - del_a_4_R = cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); - - del_a_0_C = -cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); - del_a_1_C = del_C.density - del_C.pressure / (sound_speed_squared); - del_a_2_C = del_C.velocity_y; - del_a_3_C = del_C.velocity_z; - del_a_4_C = cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); - - del_a_0_G = -cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); - del_a_1_G = del_G.density - del_G.pressure / (sound_speed_squared); - del_a_2_G = del_G.velocity_y; - del_a_3_G = del_G.velocity_z; - del_a_4_G = cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); + plmc_utils::PlmcCharacteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; + del_a_L.a0 = + -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed * sound_speed); + del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed_squared); + del_a_L.a2 = del_L.velocity_y; + del_a_G.a3 = del_L.velocity_z; + del_a_L.a4 = cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); + + del_a_R.a0 = -cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); + del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed_squared); + del_a_R.a2 = del_R.velocity_y; + del_a_R.a3 = del_R.velocity_z; + del_a_R.a4 = cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); + + del_a_C.a0 = -cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); + del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed_squared); + del_a_C.a2 = del_C.velocity_y; + del_a_C.a3 = del_C.velocity_z; + del_a_C.a4 = cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); + + del_a_G.a0 = -cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); + del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed_squared); + del_a_G.a2 = del_G.velocity_y; + del_a_G.a3 = del_G.velocity_z; + del_a_G.a4 = cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); // Apply monotonicity constraints to the differences in the characteristic // variables - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; // This should be in the declaration - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; // This should be in the declaration + plmc_utils::PlmcPrimitive del_m_i; + if (del_a_L.a0 * del_a_R.a0 > 0.0) { + Real const lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); + Real const lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); + del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a1 * del_a_R.a1 > 0.0) { + Real const lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); + Real const lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); + del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a2 * del_a_R.a2 > 0.0) { + Real const lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); + Real const lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); + del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_a_G.a3 * del_a_R.a3 > 0.0) { + Real const lim_slope_a = fmin(fabs(del_a_G.a3), fabs(del_a_R.a3)); + Real const lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); + del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin(2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a4 * del_a_R.a4 > 0.0) { + Real const lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); + Real const lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); + del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin(2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - del_ge_m_i = 0.0; + del_m_i.gas_energy = 0.0; if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_ge_m_i = sgn_CUDA(del_C.gas_energy) * fmin(2.0 * lim_slope_a, lim_slope_b); + Real const lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + Real const lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_m_i.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin(2.0 * lim_slope_a, lim_slope_b); } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = 0.0; + del_m_i.scalar[i] = 0.0; if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); - lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_scalar_m_i[i] = sgn_CUDA(del_C.scalar[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); + Real const lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); + Real const lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); + del_m_i.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); } } #endif // SCALAR // Project the monotonized difference in the characteristic variables back // onto the primitive variables Stone Eqn 39 - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -sound_speed * del_a_0_m / cell_i.density + sound_speed * del_a_4_m / cell_i.density; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = sound_speed_squared * del_a_0_m + sound_speed_squared * del_a_4_m; + del_m_i.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_m_i.velocity_x = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; + del_m_i.velocity_y = del_a_m.a2; + del_m_i.velocity_z = del_a_m.a3; + del_m_i.pressure = sound_speed_squared * del_a_m.a0 + sound_speed_squared * del_a_m.a4; // Compute the left and right interface values using the monotonized // difference in the primitive variables - d_R_imh = cell_i.density - 0.5 * del_d_m_i; - vx_R_imh = cell_i.velocity_x - 0.5 * del_vx_m_i; - vy_R_imh = cell_i.velocity_y - 0.5 * del_vy_m_i; - vz_R_imh = cell_i.velocity_z - 0.5 * del_vz_m_i; - p_R_imh = cell_i.pressure - 0.5 * del_p_m_i; + plmc_utils::PlmcPrimitive interface_L_iph, interface_R_imh; + + interface_R_imh.density = cell_i.density - 0.5 * del_m_i.density; + interface_R_imh.velocity_x = cell_i.velocity_x - 0.5 * del_m_i.velocity_x; + interface_R_imh.velocity_y = cell_i.velocity_y - 0.5 * del_m_i.velocity_y; + interface_R_imh.velocity_z = cell_i.velocity_z - 0.5 * del_m_i.velocity_z; + interface_R_imh.pressure = cell_i.pressure - 0.5 * del_m_i.pressure; - d_L_iph = cell_i.density + 0.5 * del_d_m_i; - vx_L_iph = cell_i.velocity_x + 0.5 * del_vx_m_i; - vy_L_iph = cell_i.velocity_y + 0.5 * del_vy_m_i; - vz_L_iph = cell_i.velocity_z + 0.5 * del_vz_m_i; - p_L_iph = cell_i.pressure + 0.5 * del_p_m_i; + interface_L_iph.density = cell_i.density + 0.5 * del_m_i.density; + interface_L_iph.velocity_x = cell_i.velocity_x + 0.5 * del_m_i.velocity_x; + interface_L_iph.velocity_y = cell_i.velocity_y + 0.5 * del_m_i.velocity_y; + interface_L_iph.velocity_z = cell_i.velocity_z + 0.5 * del_m_i.velocity_z; + interface_L_iph.pressure = cell_i.pressure + 0.5 * del_m_i.pressure; #ifdef DE - ge_R_imh = cell_i.gas_energy - 0.5 * del_ge_m_i; - ge_L_iph = cell_i.gas_energy + 0.5 * del_ge_m_i; + interface_R_imh.gas_energy = cell_i.gas_energy - 0.5 * del_m_i.gas_energy; + interface_L_iph.gas_energy = cell_i.gas_energy + 0.5 * del_m_i.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = cell_i.scalar[i] - 0.5 * del_scalar_m_i[i]; - scalar_L_iph[i] = cell_i.scalar[i] + 0.5 * del_scalar_m_i[i]; + interface_R_imh.scalar[i] = cell_i.scalar[i] - 0.5 * del_m_i.scalar[i]; + interface_L_iph.scalar[i] = cell_i.scalar[i] + 0.5 * del_m_i.scalar[i]; } #endif // SCALAR // try removing this on shock tubes - C = d_R_imh + d_L_iph; - d_R_imh = fmax(fmin(cell_i.density, cell_imo.density), d_R_imh); - d_R_imh = fmin(fmax(cell_i.density, cell_imo.density), d_R_imh); - d_L_iph = C - d_R_imh; - d_L_iph = fmax(fmin(cell_i.density, cell_ipo.density), d_L_iph); - d_L_iph = fmin(fmax(cell_i.density, cell_ipo.density), d_L_iph); - d_R_imh = C - d_L_iph; - - C = vx_R_imh + vx_L_iph; - vx_R_imh = fmax(fmin(cell_i.velocity_x, cell_imo.velocity_x), vx_R_imh); - vx_R_imh = fmin(fmax(cell_i.velocity_x, cell_imo.velocity_x), vx_R_imh); - vx_L_iph = C - vx_R_imh; - vx_L_iph = fmax(fmin(cell_i.velocity_x, cell_ipo.velocity_x), vx_L_iph); - vx_L_iph = fmin(fmax(cell_i.velocity_x, cell_ipo.velocity_x), vx_L_iph); - vx_R_imh = C - vx_L_iph; - - C = vy_R_imh + vy_L_iph; - vy_R_imh = fmax(fmin(cell_i.velocity_y, cell_imo.velocity_y), vy_R_imh); - vy_R_imh = fmin(fmax(cell_i.velocity_y, cell_imo.velocity_y), vy_R_imh); - vy_L_iph = C - vy_R_imh; - vy_L_iph = fmax(fmin(cell_i.velocity_y, cell_ipo.velocity_y), vy_L_iph); - vy_L_iph = fmin(fmax(cell_i.velocity_y, cell_ipo.velocity_y), vy_L_iph); - vy_R_imh = C - vy_L_iph; - - C = vz_R_imh + vz_L_iph; - vz_R_imh = fmax(fmin(cell_i.velocity_z, cell_imo.velocity_z), vz_R_imh); - vz_R_imh = fmin(fmax(cell_i.velocity_z, cell_imo.velocity_z), vz_R_imh); - vz_L_iph = C - vz_R_imh; - vz_L_iph = fmax(fmin(cell_i.velocity_z, cell_ipo.velocity_z), vz_L_iph); - vz_L_iph = fmin(fmax(cell_i.velocity_z, cell_ipo.velocity_z), vz_L_iph); - vz_R_imh = C - vz_L_iph; - - C = p_R_imh + p_L_iph; - p_R_imh = fmax(fmin(cell_i.pressure, cell_imo.pressure), p_R_imh); - p_R_imh = fmin(fmax(cell_i.pressure, cell_imo.pressure), p_R_imh); - p_L_iph = C - p_R_imh; - p_L_iph = fmax(fmin(cell_i.pressure, cell_ipo.pressure), p_L_iph); - p_L_iph = fmin(fmax(cell_i.pressure, cell_ipo.pressure), p_L_iph); - p_R_imh = C - p_L_iph; - - del_d_m_i = d_L_iph - d_R_imh; - del_vx_m_i = vx_L_iph - vx_R_imh; - del_vy_m_i = vy_L_iph - vy_R_imh; - del_vz_m_i = vz_L_iph - vz_R_imh; - del_p_m_i = p_L_iph - p_R_imh; + Real C = interface_R_imh.density + interface_L_iph.density; + interface_R_imh.density = fmax(fmin(cell_i.density, cell_imo.density), interface_R_imh.density); + interface_R_imh.density = fmin(fmax(cell_i.density, cell_imo.density), interface_R_imh.density); + interface_L_iph.density = C - interface_R_imh.density; + interface_L_iph.density = fmax(fmin(cell_i.density, cell_ipo.density), interface_L_iph.density); + interface_L_iph.density = fmin(fmax(cell_i.density, cell_ipo.density), interface_L_iph.density); + interface_R_imh.density = C - interface_L_iph.density; + + C = interface_R_imh.velocity_x + interface_L_iph.velocity_x; + interface_R_imh.velocity_x = fmax(fmin(cell_i.velocity_x, cell_imo.velocity_x), interface_R_imh.velocity_x); + interface_R_imh.velocity_x = fmin(fmax(cell_i.velocity_x, cell_imo.velocity_x), interface_R_imh.velocity_x); + interface_L_iph.velocity_x = C - interface_R_imh.velocity_x; + interface_L_iph.velocity_x = fmax(fmin(cell_i.velocity_x, cell_ipo.velocity_x), interface_L_iph.velocity_x); + interface_L_iph.velocity_x = fmin(fmax(cell_i.velocity_x, cell_ipo.velocity_x), interface_L_iph.velocity_x); + interface_R_imh.velocity_x = C - interface_L_iph.velocity_x; + + C = interface_R_imh.velocity_y + interface_L_iph.velocity_y; + interface_R_imh.velocity_y = fmax(fmin(cell_i.velocity_y, cell_imo.velocity_y), interface_R_imh.velocity_y); + interface_R_imh.velocity_y = fmin(fmax(cell_i.velocity_y, cell_imo.velocity_y), interface_R_imh.velocity_y); + interface_L_iph.velocity_y = C - interface_R_imh.velocity_y; + interface_L_iph.velocity_y = fmax(fmin(cell_i.velocity_y, cell_ipo.velocity_y), interface_L_iph.velocity_y); + interface_L_iph.velocity_y = fmin(fmax(cell_i.velocity_y, cell_ipo.velocity_y), interface_L_iph.velocity_y); + interface_R_imh.velocity_y = C - interface_L_iph.velocity_y; + + C = interface_R_imh.velocity_z + interface_L_iph.velocity_z; + interface_R_imh.velocity_z = fmax(fmin(cell_i.velocity_z, cell_imo.velocity_z), interface_R_imh.velocity_z); + interface_R_imh.velocity_z = fmin(fmax(cell_i.velocity_z, cell_imo.velocity_z), interface_R_imh.velocity_z); + interface_L_iph.velocity_z = C - interface_R_imh.velocity_z; + interface_L_iph.velocity_z = fmax(fmin(cell_i.velocity_z, cell_ipo.velocity_z), interface_L_iph.velocity_z); + interface_L_iph.velocity_z = fmin(fmax(cell_i.velocity_z, cell_ipo.velocity_z), interface_L_iph.velocity_z); + interface_R_imh.velocity_z = C - interface_L_iph.velocity_z; + + C = interface_R_imh.pressure + interface_L_iph.pressure; + interface_R_imh.pressure = fmax(fmin(cell_i.pressure, cell_imo.pressure), interface_R_imh.pressure); + interface_R_imh.pressure = fmin(fmax(cell_i.pressure, cell_imo.pressure), interface_R_imh.pressure); + interface_L_iph.pressure = C - interface_R_imh.pressure; + interface_L_iph.pressure = fmax(fmin(cell_i.pressure, cell_ipo.pressure), interface_L_iph.pressure); + interface_L_iph.pressure = fmin(fmax(cell_i.pressure, cell_ipo.pressure), interface_L_iph.pressure); + interface_R_imh.pressure = C - interface_L_iph.pressure; + + del_m_i.density = interface_L_iph.density - interface_R_imh.density; + del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; #ifdef DE - C = ge_R_imh + ge_L_iph; - ge_R_imh = fmax(fmin(cell_i.gas_energy, cell_imo.gas_energy), ge_R_imh); - ge_R_imh = fmin(fmax(cell_i.gas_energy, cell_imo.gas_energy), ge_R_imh); - ge_L_iph = C - ge_R_imh; - ge_L_iph = fmax(fmin(cell_i.gas_energy, cell_ipo.gas_energy), ge_L_iph); - ge_L_iph = fmin(fmax(cell_i.gas_energy, cell_ipo.gas_energy), ge_L_iph); - ge_R_imh = C - ge_L_iph; - del_ge_m_i = ge_L_iph - ge_R_imh; + C = interface_R_imh.gas_energy + interface_L_iph.gas_energy; + interface_R_imh.gas_energy = fmax(fmin(cell_i.gas_energy, cell_imo.gas_energy), interface_R_imh.gas_energy); + interface_R_imh.gas_energy = fmin(fmax(cell_i.gas_energy, cell_imo.gas_energy), interface_R_imh.gas_energy); + interface_L_iph.gas_energy = C - interface_R_imh.gas_energy; + interface_L_iph.gas_energy = fmax(fmin(cell_i.gas_energy, cell_ipo.gas_energy), interface_L_iph.gas_energy); + interface_L_iph.gas_energy = fmin(fmax(cell_i.gas_energy, cell_ipo.gas_energy), interface_L_iph.gas_energy); + interface_R_imh.gas_energy = C - interface_L_iph.gas_energy; + del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - C = scalar_R_imh[i] + scalar_L_iph[i]; - scalar_R_imh[i] = fmax(fmin(cell_i.scalar[i], cell_imo.scalar[i]), scalar_R_imh[i]); - scalar_R_imh[i] = fmin(fmax(cell_i.scalar[i], cell_imo.scalar[i]), scalar_R_imh[i]); - scalar_L_iph[i] = C - scalar_R_imh[i]; - scalar_L_iph[i] = fmax(fmin(cell_i.scalar[i], cell_ipo.scalar[i]), scalar_L_iph[i]); - scalar_L_iph[i] = fmin(fmax(cell_i.scalar[i], cell_ipo.scalar[i]), scalar_L_iph[i]); - scalar_R_imh[i] = C - scalar_L_iph[i]; - del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; + C = interface_R_imh.scalar[i] + interface_L_iph.scalar[i]; + interface_R_imh.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_imo.scalar[i]), interface_R_imh.scalar[i]); + interface_R_imh.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_imo.scalar[i]), interface_R_imh.scalar[i]); + interface_L_iph.scalar[i] = C - interface_R_imh.scalar[i]; + interface_L_iph.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_ipo.scalar[i]), interface_L_iph.scalar[i]); + interface_L_iph.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_ipo.scalar[i]), interface_L_iph.scalar[i]); + interface_R_imh.scalar[i] = C - interface_L_iph.scalar[i]; + del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; } #endif // SCALAR @@ -305,29 +289,29 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue - Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - d_R_imh = d_R_imh + qx * del_d_m_i; - vx_R_imh = vx_R_imh + qx * del_vx_m_i; - vy_R_imh = vy_R_imh + qx * del_vy_m_i; - vz_R_imh = vz_R_imh + qx * del_vz_m_i; - p_R_imh = p_R_imh + qx * del_p_m_i; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - d_L_iph = d_L_iph - qx * del_d_m_i; - vx_L_iph = vx_L_iph - qx * del_vx_m_i; - vy_L_iph = vy_L_iph - qx * del_vy_m_i; - vz_L_iph = vz_L_iph - qx * del_vz_m_i; - p_L_iph = p_L_iph - qx * del_p_m_i; + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m_i.density; + interface_R_imh.velocity_x = interface_R_imh.velocity_x + qx * del_m_i.velocity_x; + interface_R_imh.velocity_y = interface_R_imh.velocity_y + qx * del_m_i.velocity_y; + interface_R_imh.velocity_z = interface_R_imh.velocity_z + qx * del_m_i.velocity_z; + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m_i.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m_i.density; + interface_L_iph.velocity_x = interface_L_iph.velocity_x - qx * del_m_i.velocity_x; + interface_L_iph.velocity_y = interface_L_iph.velocity_y - qx * del_m_i.velocity_y; + interface_L_iph.velocity_z = interface_L_iph.velocity_z - qx * del_m_i.velocity_z; + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m_i.pressure; #ifdef DE - ge_R_imh = ge_R_imh + qx * del_ge_m_i; - ge_L_iph = ge_L_iph - qx * del_ge_m_i; + interface_R_imh.gas_energy = interface_R_imh.gas_energy + qx * del_m_i.gas_energy; + interface_L_iph.gas_energy = interface_L_iph.gas_energy - qx * del_m_i.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m_i.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m_i.scalar[i]; } #endif // SCALAR @@ -348,45 +332,47 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m >= 0) { Real lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * + (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); } if (lambda_0 >= 0) { Real lamdiff = lambda_p - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m_i.velocity_y; + sum_3 += lamdiff * del_m_i.velocity_z; #ifdef DE - sum_ge += lamdiff * del_ge_m_i; + sum_ge += lamdiff * del_m_i.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; + sum_scalar[i] += lamdiff * del_m_i.scalar[i]; } #endif // SCALAR } if (lambda_p >= 0) { Real lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * + (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); } // add the corrections to the initial guesses for the interface values - d_L_iph += 0.5 * dtodx * sum_0; - vx_L_iph += 0.5 * dtodx * sum_1; - vy_L_iph += 0.5 * dtodx * sum_2; - vz_L_iph += 0.5 * dtodx * sum_3; - p_L_iph += 0.5 * dtodx * sum_4; + interface_L_iph.density += 0.5 * dtodx * sum_0; + interface_L_iph.velocity_x += 0.5 * dtodx * sum_1; + interface_L_iph.velocity_y += 0.5 * dtodx * sum_2; + interface_L_iph.velocity_z += 0.5 * dtodx * sum_3; + interface_L_iph.pressure += 0.5 * dtodx * sum_4; #ifdef DE - ge_L_iph += 0.5 * dtodx * sum_ge; + interface_L_iph.gas_energy += 0.5 * dtodx * sum_ge; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; + interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; } #endif // SCALAR @@ -403,54 +389,56 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (lambda_m <= 0) { Real lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * + (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); } if (lambda_0 <= 0) { Real lamdiff = lambda_m - lambda_0; - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (sound_speed_squared)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m_i.velocity_y; + sum_3 += lamdiff * del_m_i.velocity_z; #ifdef DE - sum_ge += lamdiff * del_ge_m_i; + sum_ge += lamdiff * del_m_i.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; + sum_scalar[i] += lamdiff * del_m_i.scalar[i]; } #endif // SCALAR } if (lambda_p <= 0) { Real lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_vx_m_i / (2 * sound_speed) + del_p_m_i / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_vx_m_i * sound_speed / 2.0 + del_p_m_i / 2.0); + sum_0 += lamdiff * + (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); } // add the corrections - d_R_imh += 0.5 * dtodx * sum_0; - vx_R_imh += 0.5 * dtodx * sum_1; - vy_R_imh += 0.5 * dtodx * sum_2; - vz_R_imh += 0.5 * dtodx * sum_3; - p_R_imh += 0.5 * dtodx * sum_4; + interface_R_imh.density += 0.5 * dtodx * sum_0; + interface_R_imh.velocity_x += 0.5 * dtodx * sum_1; + interface_R_imh.velocity_y += 0.5 * dtodx * sum_2; + interface_R_imh.velocity_z += 0.5 * dtodx * sum_3; + interface_R_imh.pressure += 0.5 * dtodx * sum_4; #ifdef DE - ge_R_imh += 0.5 * dtodx * sum_ge; + interface_R_imh.gas_energy += 0.5 * dtodx * sum_ge; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; + interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; } #endif // SCALAR #endif // CTU // apply minimum constraints - d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); - d_L_iph = fmax(d_L_iph, (Real)TINY_NUMBER); - p_R_imh = fmax(p_R_imh, (Real)TINY_NUMBER); - p_L_iph = fmax(p_L_iph, (Real)TINY_NUMBER); + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right @@ -468,35 +456,41 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - dev_bounds_R[id] = d_R_imh; - dev_bounds_R[o1 * n_cells + id] = d_R_imh * vx_R_imh; - dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; - dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; + dev_bounds_R[id] = interface_R_imh.density; + dev_bounds_R[o1 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_x; + dev_bounds_R[o2 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_y; + dev_bounds_R[o3 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_z; dev_bounds_R[4 * n_cells + id] = - (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); + (interface_R_imh.pressure / (gamma - 1.0)) + 0.5 * interface_R_imh.density * + (interface_R_imh.velocity_x * interface_R_imh.velocity_x + + interface_R_imh.velocity_y * interface_R_imh.velocity_y + + interface_R_imh.velocity_z * interface_R_imh.velocity_z); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; + dev_bounds_R[(5 + i) * n_cells + id] = interface_R_imh.density * interface_R_imh.scalar[i]; } #endif // SCALAR #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; + dev_bounds_R[(n_fields - 1) * n_cells + id] = interface_R_imh.density * interface_R_imh.gas_energy; #endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_L_iph; - dev_bounds_L[o1 * n_cells + id] = d_L_iph * vx_L_iph; - dev_bounds_L[o2 * n_cells + id] = d_L_iph * vy_L_iph; - dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; + dev_bounds_L[id] = interface_L_iph.density; + dev_bounds_L[o1 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_x; + dev_bounds_L[o2 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_y; + dev_bounds_L[o3 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_z; dev_bounds_L[4 * n_cells + id] = - (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); + (interface_L_iph.pressure / (gamma - 1.0)) + 0.5 * interface_L_iph.density * + (interface_L_iph.velocity_x * interface_L_iph.velocity_x + + interface_L_iph.velocity_y * interface_L_iph.velocity_y + + interface_L_iph.velocity_z * interface_L_iph.velocity_z); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; + dev_bounds_L[(5 + i) * n_cells + id] = interface_L_iph.density * interface_L_iph.scalar[i]; } #endif // SCALAR #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; + dev_bounds_L[(n_fields - 1) * n_cells + id] = interface_L_iph.density * interface_L_iph.gas_energy; #endif // DE } diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 32d155ff0..53f64b9f2 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -39,6 +39,19 @@ struct PlmcPrimitive { #endif // SCALAR }; +/*! + * \brief A struct for the characteristic variables + * + */ +struct PlmcCharacteristic { + // Hydro variables + Real a0, a1, a2, a3, a4; + +#ifdef MHD + Real a5, a6; +#endif // MHD +}; + /*! * \brief Load the data for PLMC reconstruction * From 4a018153db3e5276ab57c4f4a14fac3e57323f69 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 29 Mar 2023 16:37:36 -0400 Subject: [PATCH 347/694] add sound_speed_squared to a spot I missed --- src/reconstruction/plmc_cuda.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index b8d22e550..2ae31db42 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -96,8 +96,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A plmc_utils::PlmcCharacteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; - del_a_L.a0 = - -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed * sound_speed); + del_a_L.a0 = -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed_squared); del_a_L.a2 = del_L.velocity_y; del_a_G.a3 = del_L.velocity_z; From b69b72da98f731090e87c116db9bd6614c5e69c9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 09:47:30 -0400 Subject: [PATCH 348/694] Replace projection into characteristics with function --- src/reconstruction/plmc_cuda.cu | 57 ++++++++++++++++++--------------- src/reconstruction/plmc_cuda.h | 15 +++++++++ 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 2ae31db42..4eb467fbb 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -95,34 +95,21 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - plmc_utils::PlmcCharacteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; - del_a_L.a0 = -cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); - del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed_squared); - del_a_L.a2 = del_L.velocity_y; - del_a_G.a3 = del_L.velocity_z; - del_a_L.a4 = cell_i.density * del_L.velocity_x / (2 * sound_speed) + del_L.pressure / (2 * sound_speed_squared); - - del_a_R.a0 = -cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); - del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed_squared); - del_a_R.a2 = del_R.velocity_y; - del_a_R.a3 = del_R.velocity_z; - del_a_R.a4 = cell_i.density * del_R.velocity_x / (2 * sound_speed) + del_R.pressure / (2 * sound_speed_squared); - - del_a_C.a0 = -cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); - del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed_squared); - del_a_C.a2 = del_C.velocity_y; - del_a_C.a3 = del_C.velocity_z; - del_a_C.a4 = cell_i.density * del_C.velocity_x / (2 * sound_speed) + del_C.pressure / (2 * sound_speed_squared); - - del_a_G.a0 = -cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); - del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed_squared); - del_a_G.a2 = del_G.velocity_y; - del_a_G.a3 = del_G.velocity_z; - del_a_G.a4 = cell_i.density * del_G.velocity_x / (2 * sound_speed) + del_G.pressure / (2 * sound_speed_squared); + plmc_utils::PlmcCharacteristic const del_a_L = + plmc_utils::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared); + + plmc_utils::PlmcCharacteristic const del_a_R = + plmc_utils::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared); + + plmc_utils::PlmcCharacteristic const del_a_C = + plmc_utils::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared); + + plmc_utils::PlmcCharacteristic const del_a_G = + plmc_utils::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared); // Apply monotonicity constraints to the differences in the characteristic // variables - + plmc_utils::PlmcCharacteristic del_a_m; del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; // This should be in the declaration plmc_utils::PlmcPrimitive del_m_i; if (del_a_L.a0 * del_a_R.a0 > 0.0) { @@ -637,4 +624,24 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope return vl_slopes; } // ===================================================================================================================== + +// ===================================================================================================================== +PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, + PlmcPrimitive const &primitive_slope, + Real const &sound_speed, + Real const &sound_speed_squared) +{ + PlmcCharacteristic output; + + output.a0 = -primitive.density * primitive_slope.velocity_x / (2 * sound_speed) + + primitive_slope.pressure / (2 * sound_speed_squared); + output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a2 = primitive_slope.velocity_y; + output.a3 = primitive_slope.velocity_z; + output.a4 = primitive.density * primitive_slope.velocity_x / (2 * sound_speed) + + primitive_slope.pressure / (2 * sound_speed_squared); + + return output; +} +// ===================================================================================================================== } // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 53f64b9f2..03b720d8a 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -92,5 +92,20 @@ PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcP * \return PlmcPrimitive The Van Leer slope */ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope); + +/*! + * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param primitive The primitive variables + * \param primitive_slope The primitive variables slopes + * \param sound_speed The speed of sound + * \param sound_speed_squared The speed of sound squared + * \return PlmcCharacteristic + */ +PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, + PlmcPrimitive const &primitive_slope, + Real const &sound_speed, + Real const &sound_speed_squared); } // namespace plmc_utils #endif // PLMC_CUDA_H From afdb3e1500711f7568e4a9bd8166e915b338b78e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 10:41:57 -0400 Subject: [PATCH 349/694] PLMC: monotize & characteristic to primitive functions --- src/reconstruction/plmc_cuda.cu | 122 +++++++++++++++++--------------- src/reconstruction/plmc_cuda.h | 77 ++++++++++++++------ 2 files changed, 123 insertions(+), 76 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 4eb467fbb..6a9d556b6 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -107,62 +107,10 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou plmc_utils::PlmcCharacteristic const del_a_G = plmc_utils::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared); - // Apply monotonicity constraints to the differences in the characteristic - // variables - plmc_utils::PlmcCharacteristic del_a_m; - del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; // This should be in the declaration - plmc_utils::PlmcPrimitive del_m_i; - if (del_a_L.a0 * del_a_R.a0 > 0.0) { - Real const lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); - Real const lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); - del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a1 * del_a_R.a1 > 0.0) { - Real const lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); - Real const lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); - del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a2 * del_a_R.a2 > 0.0) { - Real const lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); - Real const lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); - del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_G.a3 * del_a_R.a3 > 0.0) { - Real const lim_slope_a = fmin(fabs(del_a_G.a3), fabs(del_a_R.a3)); - Real const lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); - del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a4 * del_a_R.a4 > 0.0) { - Real const lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); - Real const lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); - del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin(2.0 * lim_slope_a, lim_slope_b); - } -#ifdef DE - del_m_i.gas_energy = 0.0; - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - Real const lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - Real const lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_m_i.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin(2.0 * lim_slope_a, lim_slope_b); - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m_i.scalar[i] = 0.0; - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - Real const lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); - Real const lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_m_i.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - } -#endif // SCALAR - - // Project the monotonized difference in the characteristic variables back - // onto the primitive variables Stone Eqn 39 - del_m_i.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_m_i.velocity_x = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; - del_m_i.velocity_y = del_a_m.a2; - del_m_i.velocity_z = del_a_m.a3; - del_m_i.pressure = sound_speed_squared * del_a_m.a0 + sound_speed_squared * del_a_m.a4; + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + plmc_utils::PlmcPrimitive del_m_i = plmc_utils::Monotize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared); // Compute the left and right interface values using the monotonized // difference in the primitive variables @@ -644,4 +592,66 @@ PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive return output; } // ===================================================================================================================== + +// ===================================================================================================================== +void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, + PlmcCharacteristic const &characteristic_slope, + Real const &sound_speed, Real const &sound_speed_squared, + PlmcPrimitive &output) +{ + output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; + output.velocity_x = -sound_speed * characteristic_slope.a0 / primitive.density + + sound_speed * characteristic_slope.a4 / primitive.density; + output.velocity_y = characteristic_slope.a2; + output.velocity_z = characteristic_slope.a3; + output.pressure = sound_speed_squared * characteristic_slope.a0 + sound_speed_squared * characteristic_slope.a4; +} +// ===================================================================================================================== + +// ===================================================================================================================== +PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( + PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, + PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, + PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, + Real const &sound_speed_squared) +{ + // The function that will actually do the monotization + auto Monotize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { + if (left * right > 0.0) { + Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); + Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); + return copysign(fmin(lim_slope_a, lim_slope_b), centered); + } else { + return 0.0; + } + }; + + // the monotized difference in the characteristic variables + PlmcCharacteristic del_a_m; + // The monotized difference in the characteristic variables projected into the primitive variables + PlmcPrimitive output; + + // Monotize the slopes + del_a_m.a0 = Monotize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Monotize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Monotize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Monotize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Monotize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + +#ifdef DE + output.gas_energy = Monotize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = Monotize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); + } +#endif // SCALAR + + // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and + // scalars + Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, output); + + return output; +} +// ===================================================================================================================== } // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 03b720d8a..5f1f22e9f 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -55,17 +55,17 @@ struct PlmcCharacteristic { /*! * \brief Load the data for PLMC reconstruction * - * \param dev_conserved The conserved array - * \param xid The xid of the cell to load data from - * \param yid The yid of the cell to load data from - * \param zid The zid of the cell to load data from - * \param nx Size in the X direction - * \param ny Size in the Y direction - * \param n_cells The total number of cells - * \param o1 Directional parameter - * \param o2 Directional parameter - * \param o3 Directional parameter - * \param gamma The adiabatic index + * \param[in] dev_conserved The conserved array + * \param[in] xid The xid of the cell to load data from + * \param[in] yid The yid of the cell to load data from + * \param[in] zid The zid of the cell to load data from + * \param[in] nx Size in the X direction + * \param[in] ny Size in the Y direction + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index * \return PlmcPrimitive The loaded cell data */ PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, @@ -76,9 +76,9 @@ PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t co /*! * \brief Compute a simple slope. Equation is `coef * (left - right)`. * - * \param left The data on the positive side of the slope - * \param right The data on the negative side of the slope - * \param coef The coefficient to multiply the slope by. Defaults to zero + * \param[in] left The data on the positive side of the slope + * \param[in] right The data on the negative side of the slope + * \param[in] coef The coefficient to multiply the slope by. Defaults to zero * \return PlmcPrimitive The slopes */ PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcPrimitive const &right, @@ -87,8 +87,8 @@ PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcP /*! * \brief Compute the Van Lear slope from the left and right slopes * - * \param left_slope The left slope - * \param right_slope The right slope + * \param[in] left_slope The left slope + * \param[in] right_slope The right slope * \return PlmcPrimitive The Van Leer slope */ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope); @@ -97,15 +97,52 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the * eigenvectors given in Stone 2008, Appendix A * - * \param primitive The primitive variables - * \param primitive_slope The primitive variables slopes - * \param sound_speed The speed of sound - * \param sound_speed_squared The speed of sound squared + * \param[in] primitive The primitive variables + * \param[in] primitive_slope The primitive variables slopes + * \param[in] sound_speed The speed of sound + * \param[in] sound_speed_squared The speed of sound squared * \return PlmcCharacteristic */ PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, PlmcPrimitive const &primitive_slope, Real const &sound_speed, Real const &sound_speed_squared); + +/*! + * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] characteristic_slope The characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[out] output The primitive slopes + */ +void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, + PlmcCharacteristic const &characteristic_slope, + Real const &sound_speed, Real const &sound_speed_squared, + PlmcPrimitive &output); + +/*! + * \brief Monotize the characteristic slopes and project back into the primitive slopes + * + * \param[in] primitive The primitive variables + * \param[in] del_L The left primitive slopes + * \param[in] del_R The right primitive slopes + * \param[in] del_C The centered primitive slopes + * \param[in] del_G The Van Leer primitive slopes + * \param[in] del_a_L The left characteristic slopes + * \param[in] del_a_R The right characteristic slopes + * \param[in] del_a_C The centered characteristic slopes + * \param[in] del_a_G The Van Leer characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \return PlmcPrimitive The monotized primitive slopes + */ +PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( + PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, + PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, + PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, + Real const &sound_speed_squared); } // namespace plmc_utils #endif // PLMC_CUDA_H From 460d7df8e784b6a79354893cc0ff4676e7099525 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 10:51:33 -0400 Subject: [PATCH 350/694] PLMC: add a function to compute the interfaces --- src/reconstruction/plmc_cuda.cu | 55 +++++++++++++++++---------------- src/reconstruction/plmc_cuda.h | 11 +++++++ 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 6a9d556b6..e98d9ca9d 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -112,33 +112,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou plmc_utils::PlmcPrimitive del_m_i = plmc_utils::Monotize_Characteristic_Return_Primitive( cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared); - // Compute the left and right interface values using the monotonized - // difference in the primitive variables - - plmc_utils::PlmcPrimitive interface_L_iph, interface_R_imh; - - interface_R_imh.density = cell_i.density - 0.5 * del_m_i.density; - interface_R_imh.velocity_x = cell_i.velocity_x - 0.5 * del_m_i.velocity_x; - interface_R_imh.velocity_y = cell_i.velocity_y - 0.5 * del_m_i.velocity_y; - interface_R_imh.velocity_z = cell_i.velocity_z - 0.5 * del_m_i.velocity_z; - interface_R_imh.pressure = cell_i.pressure - 0.5 * del_m_i.pressure; - - interface_L_iph.density = cell_i.density + 0.5 * del_m_i.density; - interface_L_iph.velocity_x = cell_i.velocity_x + 0.5 * del_m_i.velocity_x; - interface_L_iph.velocity_y = cell_i.velocity_y + 0.5 * del_m_i.velocity_y; - interface_L_iph.velocity_z = cell_i.velocity_z + 0.5 * del_m_i.velocity_z; - interface_L_iph.pressure = cell_i.pressure + 0.5 * del_m_i.pressure; - -#ifdef DE - interface_R_imh.gas_energy = cell_i.gas_energy - 0.5 * del_m_i.gas_energy; - interface_L_iph.gas_energy = cell_i.gas_energy + 0.5 * del_m_i.gas_energy; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = cell_i.scalar[i] - 0.5 * del_m_i.scalar[i]; - interface_L_iph.scalar[i] = cell_i.scalar[i] + 0.5 * del_m_i.scalar[i]; - } -#endif // SCALAR + // Compute the left and right interface values using the monotonized difference in the primitive variables + plmc_utils::PlmcPrimitive interface_L_iph = plmc_utils::Calc_Interface(cell_i, del_m_i, 1.0); + plmc_utils::PlmcPrimitive interface_R_imh = plmc_utils::Calc_Interface(cell_i, del_m_i, -1.0); // try removing this on shock tubes Real C = interface_R_imh.density + interface_L_iph.density; @@ -654,4 +630,29 @@ PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( return output; } // ===================================================================================================================== + +// ===================================================================================================================== +PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, + Real const &sign) +{ + plmc_utils::PlmcPrimitive output; + + output.density = primitive.density + sign * 0.5 * slopes.density; + output.velocity_x = primitive.velocity_x + sign * 0.5 * slopes.velocity_x; + output.velocity_y = primitive.velocity_y + sign * 0.5 * slopes.velocity_y; + output.velocity_z = primitive.velocity_z + sign * 0.5 * slopes.velocity_z; + output.pressure = primitive.pressure + sign * 0.5 * slopes.pressure; + +#ifdef DE + output.gas_energy = primitive.gas_energy + sign * 0.5 * slopes.gas_energy; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = primitive.scalar[i] + sign * 0.5 * slopes.scalar[i]; + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== } // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 5f1f22e9f..816d1421f 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -144,5 +144,16 @@ PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, Real const &sound_speed_squared); + +/*! + * \brief Compute the interface state from the slope and cell centered state. + * + * \param primitive The cell centered state + * \param slopes The slopes + * \param sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it + * \return plmc_utils::PlmcPrimitive The interface state + */ +PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, + Real const &sign); } // namespace plmc_utils #endif // PLMC_CUDA_H From 76d1a4baa24288aaa51fd0506da8efd05678f9dc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 11:17:45 -0400 Subject: [PATCH 351/694] PLMC: function for primitive monotizing --- src/reconstruction/plmc_cuda.cu | 125 ++++++++++++++------------------ src/reconstruction/plmc_cuda.h | 20 ++++- 2 files changed, 72 insertions(+), 73 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index e98d9ca9d..7e8b05531 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -116,76 +116,9 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou plmc_utils::PlmcPrimitive interface_L_iph = plmc_utils::Calc_Interface(cell_i, del_m_i, 1.0); plmc_utils::PlmcPrimitive interface_R_imh = plmc_utils::Calc_Interface(cell_i, del_m_i, -1.0); - // try removing this on shock tubes - Real C = interface_R_imh.density + interface_L_iph.density; - interface_R_imh.density = fmax(fmin(cell_i.density, cell_imo.density), interface_R_imh.density); - interface_R_imh.density = fmin(fmax(cell_i.density, cell_imo.density), interface_R_imh.density); - interface_L_iph.density = C - interface_R_imh.density; - interface_L_iph.density = fmax(fmin(cell_i.density, cell_ipo.density), interface_L_iph.density); - interface_L_iph.density = fmin(fmax(cell_i.density, cell_ipo.density), interface_L_iph.density); - interface_R_imh.density = C - interface_L_iph.density; - - C = interface_R_imh.velocity_x + interface_L_iph.velocity_x; - interface_R_imh.velocity_x = fmax(fmin(cell_i.velocity_x, cell_imo.velocity_x), interface_R_imh.velocity_x); - interface_R_imh.velocity_x = fmin(fmax(cell_i.velocity_x, cell_imo.velocity_x), interface_R_imh.velocity_x); - interface_L_iph.velocity_x = C - interface_R_imh.velocity_x; - interface_L_iph.velocity_x = fmax(fmin(cell_i.velocity_x, cell_ipo.velocity_x), interface_L_iph.velocity_x); - interface_L_iph.velocity_x = fmin(fmax(cell_i.velocity_x, cell_ipo.velocity_x), interface_L_iph.velocity_x); - interface_R_imh.velocity_x = C - interface_L_iph.velocity_x; - - C = interface_R_imh.velocity_y + interface_L_iph.velocity_y; - interface_R_imh.velocity_y = fmax(fmin(cell_i.velocity_y, cell_imo.velocity_y), interface_R_imh.velocity_y); - interface_R_imh.velocity_y = fmin(fmax(cell_i.velocity_y, cell_imo.velocity_y), interface_R_imh.velocity_y); - interface_L_iph.velocity_y = C - interface_R_imh.velocity_y; - interface_L_iph.velocity_y = fmax(fmin(cell_i.velocity_y, cell_ipo.velocity_y), interface_L_iph.velocity_y); - interface_L_iph.velocity_y = fmin(fmax(cell_i.velocity_y, cell_ipo.velocity_y), interface_L_iph.velocity_y); - interface_R_imh.velocity_y = C - interface_L_iph.velocity_y; - - C = interface_R_imh.velocity_z + interface_L_iph.velocity_z; - interface_R_imh.velocity_z = fmax(fmin(cell_i.velocity_z, cell_imo.velocity_z), interface_R_imh.velocity_z); - interface_R_imh.velocity_z = fmin(fmax(cell_i.velocity_z, cell_imo.velocity_z), interface_R_imh.velocity_z); - interface_L_iph.velocity_z = C - interface_R_imh.velocity_z; - interface_L_iph.velocity_z = fmax(fmin(cell_i.velocity_z, cell_ipo.velocity_z), interface_L_iph.velocity_z); - interface_L_iph.velocity_z = fmin(fmax(cell_i.velocity_z, cell_ipo.velocity_z), interface_L_iph.velocity_z); - interface_R_imh.velocity_z = C - interface_L_iph.velocity_z; - - C = interface_R_imh.pressure + interface_L_iph.pressure; - interface_R_imh.pressure = fmax(fmin(cell_i.pressure, cell_imo.pressure), interface_R_imh.pressure); - interface_R_imh.pressure = fmin(fmax(cell_i.pressure, cell_imo.pressure), interface_R_imh.pressure); - interface_L_iph.pressure = C - interface_R_imh.pressure; - interface_L_iph.pressure = fmax(fmin(cell_i.pressure, cell_ipo.pressure), interface_L_iph.pressure); - interface_L_iph.pressure = fmin(fmax(cell_i.pressure, cell_ipo.pressure), interface_L_iph.pressure); - interface_R_imh.pressure = C - interface_L_iph.pressure; - - del_m_i.density = interface_L_iph.density - interface_R_imh.density; - del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; - del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; - del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; - del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; - -#ifdef DE - C = interface_R_imh.gas_energy + interface_L_iph.gas_energy; - interface_R_imh.gas_energy = fmax(fmin(cell_i.gas_energy, cell_imo.gas_energy), interface_R_imh.gas_energy); - interface_R_imh.gas_energy = fmin(fmax(cell_i.gas_energy, cell_imo.gas_energy), interface_R_imh.gas_energy); - interface_L_iph.gas_energy = C - interface_R_imh.gas_energy; - interface_L_iph.gas_energy = fmax(fmin(cell_i.gas_energy, cell_ipo.gas_energy), interface_L_iph.gas_energy); - interface_L_iph.gas_energy = fmin(fmax(cell_i.gas_energy, cell_ipo.gas_energy), interface_L_iph.gas_energy); - interface_R_imh.gas_energy = C - interface_L_iph.gas_energy; - del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; -#endif // DE - -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - C = interface_R_imh.scalar[i] + interface_L_iph.scalar[i]; - interface_R_imh.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_imo.scalar[i]), interface_R_imh.scalar[i]); - interface_R_imh.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_imo.scalar[i]), interface_R_imh.scalar[i]); - interface_L_iph.scalar[i] = C - interface_R_imh.scalar[i]; - interface_L_iph.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_ipo.scalar[i]), interface_L_iph.scalar[i]); - interface_L_iph.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_ipo.scalar[i]), interface_L_iph.scalar[i]); - interface_R_imh.scalar[i] = C - interface_L_iph.scalar[i]; - del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; - } -#endif // SCALAR + // Monotize the primitive variables, note the return by reference. Try removing this as it may not be necessary. A + // good test for that would be shock tubes + plmc_utils::Monotize_Primitive(cell_i, cell_imo, cell_ipo, interface_L_iph, interface_R_imh, del_m_i); #ifndef VL @@ -655,4 +588,56 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, return output; } // ===================================================================================================================== + +// ===================================================================================================================== +void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, + PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, + PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i) +{ + // The function that will actually do the monotization. Note that it return the interfaces by reference + auto Monotize = [](Real const &val_i, Real const &val_imo, Real const &val_ipo, Real &interface_L, + Real &interface_R) { + Real const C = interface_R + interface_L; + + interface_R = fmax(fmin(val_i, val_imo), interface_R); + interface_R = fmin(fmax(val_i, val_imo), interface_R); + interface_L = C - interface_R; + + interface_L = fmax(fmin(val_i, val_ipo), interface_L); + interface_L = fmin(fmax(val_i, val_ipo), interface_L); + interface_R = C - interface_L; + }; + + // Monotize + Monotize(cell_i.density, cell_imo.density, cell_ipo.density, interface_L_iph.density, interface_R_imh.density); + Monotize(cell_i.velocity_x, cell_imo.velocity_x, cell_ipo.velocity_x, interface_L_iph.velocity_x, + interface_R_imh.velocity_x); + Monotize(cell_i.velocity_y, cell_imo.velocity_y, cell_ipo.velocity_y, interface_L_iph.velocity_y, + interface_R_imh.velocity_y); + Monotize(cell_i.velocity_z, cell_imo.velocity_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.velocity_z); + Monotize(cell_i.pressure, cell_imo.pressure, cell_ipo.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + + // Compute the new slopes + del_m_i.density = interface_L_iph.density - interface_R_imh.density; + del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; + +#ifdef DE + Monotize(cell_i.gas_energy, cell_imo.gas_energy, cell_ipo.gas_energy, interface_L_iph.gas_energy, + interface_R_imh.gas_energy); + del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; +#endif // DE + +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + Monotize(cell_i.scalar[i], cell_imo.scalar[i], cell_ipo.scalar[i], interface_L_iph.scalar[i], + interface_R_imh.scalar[i]); + del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; + } +#endif // SCALAR +} +// ===================================================================================================================== } // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 816d1421f..c73ea575c 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -148,12 +148,26 @@ PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( /*! * \brief Compute the interface state from the slope and cell centered state. * - * \param primitive The cell centered state - * \param slopes The slopes - * \param sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it + * \param[in] primitive The cell centered state + * \param[in] slopes The slopes + * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it * \return plmc_utils::PlmcPrimitive The interface state */ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, Real const &sign); + +/*! + * \brief Monotize the interface states and primitive slopes. + * + * \param[in] cell_i The state in the i cell + * \param[in] cell_imo The state in the i-1 cell + * \param[in] cell_ipo The state in the i+1 cell + * \param[in,out] interface_L_iph The left interface state at i+1/2 + * \param[in,out] interface_R_imh The right interface state at i-1/2 + * \param[in,out] del_m_i The primitive slopes in the cell + */ +void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, + PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, + PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i); } // namespace plmc_utils #endif // PLMC_CUDA_H From 67e29dbab2eab9016745931bdc6ce205863ce1a2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 13:09:13 -0400 Subject: [PATCH 352/694] PLMC: Update test for all 3 directions --- src/reconstruction/plmc_cuda_tests.cu | 71 +++++++++++++++++++++------ 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 2aec8002a..e96586623 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -47,17 +47,57 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::unordered_map fiducial_interface_left = { - {26, 2.1584359129984056}, {27, 0.70033864721549188}, {106, 2.2476363309467553}, {107, 3.0633780053857027}, - {186, 2.2245934101106259}, {187, 2.1015872413794123}, {266, 2.1263341057778309}, {267, 3.9675148506537838}, - {346, 3.3640057502842691}, {347, 21.091316282933843}}; - std::unordered_map fiducial_interface_right = { - {25, 3.8877922383184833}, {26, 0.70033864721549188}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, - {185, 4.0069556576401011}, {186, 2.1015872413794123}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, - {345, 2.8032969746372527}, {346, 21.091316282933843}}; + std::vector> fiducial_interface_left = { + {{26, 2.1584359129984056}, + {27, 0.70033864721549188}, + {106, 2.2476363309467553}, + {107, 3.0633780053857027}, + {186, 2.2245934101106259}, + {187, 2.1015872413794123}, + {266, 2.1263341057778309}, + {267, 3.9675148506537838}, + {346, 3.3640057502842691}, + {347, 21.091316282933843}}, + {{21, 0.72430827309279655}, {26, 2.1584359129984056}, {27, 0.70033864721549188}, {37, 0.19457128219588618}, + {101, 5.4739527659741896}, {106, 2.2476363309467553}, {107, 3.0633780053857027}, {117, 4.4286255636679313}, + {181, 0.12703829036056602}, {186, 2.2245934101106259}, {187, 2.1015872413794123}, {197, 2.2851440769830953}, + {261, 1.5337035731959561}, {266, 2.1263341057778309}, {267, 3.9675148506537838}, {277, 2.697375839048191}, + {341, 22.319601655044117}, {346, 3.3640057502842691}, {347, 21.091316282933843}, {357, 82.515887983144168}}, + {{21, 0.72430827309279655}, {25, 2.2863650183226212}, {26, 2.1584359129984056}, {27, 0.70033864721549188}, + {29, 1.686415421301841}, {37, 0.19457128219588618}, {101, 5.4739527659741896}, {105, 0.72340346106443465}, + {106, 2.2476363309467553}, {107, 3.0633780053857027}, {109, 5.4713687086831388}, {117, 4.4286255636679313}, + {181, 0.12703829036056602}, {185, 3.929100145230096}, {186, 2.2245934101106259}, {187, 2.1015872413794123}, + {189, 4.9166140516911483}, {197, 2.2851440769830953}, {261, 1.5337035731959561}, {265, 0.95177493689267167}, + {266, 2.1263341057778309}, {267, 3.9675148506537838}, {269, 0.46056494878491938}, {277, 2.697375839048191}, + {341, 22.319601655044117}, {345, 3.6886096301452787}, {346, 3.3640057502842691}, {347, 21.091316282933843}, + {349, 16.105488797582133}, {357, 82.515887983144168}}}; + std::vector> fiducial_interface_right = { + {{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.5947787943675635}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935785}, + {266, 3.9675148506537838}, + {345, 2.8032969746372527}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, {25, 3.8877922383184833}, {26, 0.70033864721549188}, {33, 0.19457128219588618}, + {97, 3.2697645945288754}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, {113, 4.4286255636679313}, + {177, 0.07588397666718491}, {185, 4.0069556576401011}, {186, 2.1015872413794123}, {193, 2.2851440769830953}, + {257, 0.91612950577699748}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, {273, 2.697375839048191}, + {337, 13.332201861384396}, {345, 2.8032969746372527}, {346, 21.091316282933843}, {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, {9, 1.686415421301841}, {17, 0.43265217076853835}, {25, 3.8877922383184833}, + {26, 0.70033864721549188}, {33, 0.19457128219588618}, {85, 0.72340346106443465}, {89, 1.7792505446336098}, + {97, 3.2697645945288754}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, {113, 4.4286255636679313}, + {165, 5.3997753452111859}, {169, 1.4379190463124139}, {177, 0.07588397666718491}, {185, 4.0069556576401011}, + {186, 2.1015872413794123}, {193, 2.2851440769830953}, {245, 0.95177493689267167}, {249, 0.46056494878491938}, + {257, 0.91612950577699748}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, {273, 2.697375839048191}, + {325, 6.6889498465051407}, {329, 1.6145084086614281}, {337, 13.332201861384396}, {345, 2.8032969746372527}, + {346, 21.091316282933843}, {353, 82.515887983144168}}}; // Loop over different directions - for (size_t direction = 0; direction < 1; direction++) { + for (size_t direction = 0; direction < 3; direction++) { // Assign the shape size_t nx_rot, ny_rot, nz_rot; switch (direction) { @@ -93,16 +133,19 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) // Check the left interface double test_val = dev_interface_left.at(i); double fiducial_val = - (fiducial_interface_left.find(i) == fiducial_interface_left.end()) ? 0.0 : fiducial_interface_left[i]; + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; testingUtilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - // Check the left interface - test_val = dev_interface_right.at(i); - fiducial_val = - (fiducial_interface_right.find(i) == fiducial_interface_right.end()) ? 0.0 : fiducial_interface_right[i]; + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; testingUtilities::checkResults( fiducial_val, test_val, From f5a0ecfc445b87482d0768f1b69b65384056f651 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 13:24:41 -0400 Subject: [PATCH 353/694] PLMC: Add a function to write out the data --- src/reconstruction/plmc_cuda.cu | 111 +++++++++++++++++--------------- src/reconstruction/plmc_cuda.h | 17 +++++ 2 files changed, 77 insertions(+), 51 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 7e8b05531..4f7586ce8 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -283,58 +283,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); - // Convert the left and right states in the primitive to the conserved - // variables send final values back from kernel bounds_R refers to the right - // side of the i-1/2 interface - int id; - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + plmc_utils::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); - dev_bounds_R[id] = interface_R_imh.density; - dev_bounds_R[o1 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_x; - dev_bounds_R[o2 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_y; - dev_bounds_R[o3 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_z; - dev_bounds_R[4 * n_cells + id] = - (interface_R_imh.pressure / (gamma - 1.0)) + 0.5 * interface_R_imh.density * - (interface_R_imh.velocity_x * interface_R_imh.velocity_x + - interface_R_imh.velocity_y * interface_R_imh.velocity_y + - interface_R_imh.velocity_z * interface_R_imh.velocity_z); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = interface_R_imh.density * interface_R_imh.scalar[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = interface_R_imh.density * interface_R_imh.gas_energy; -#endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = interface_L_iph.density; - dev_bounds_L[o1 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_x; - dev_bounds_L[o2 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_y; - dev_bounds_L[o3 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_z; - dev_bounds_L[4 * n_cells + id] = - (interface_L_iph.pressure / (gamma - 1.0)) + 0.5 * interface_L_iph.density * - (interface_L_iph.velocity_x * interface_L_iph.velocity_x + - interface_L_iph.velocity_y * interface_L_iph.velocity_y + - interface_L_iph.velocity_z * interface_L_iph.velocity_z); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = interface_L_iph.density * interface_L_iph.scalar[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = interface_L_iph.density * interface_L_iph.gas_energy; -#endif // DE + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + plmc_utils::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } namespace plmc_utils @@ -640,4 +595,58 @@ void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPri #endif // SCALAR } // ===================================================================================================================== + +// ===================================================================================================================== +void __device__ __host__ Write_Data(PlmcPrimitive const &interface_state, Real *dev_interface, + Real const *dev_conserved, size_t const &id, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +{ + // Write out density and momentum + dev_interface[grid_enum::density * n_cells + id] = interface_state.density; + dev_interface[o1 * n_cells + id] = interface_state.density * interface_state.velocity_x; + dev_interface[o2 * n_cells + id] = interface_state.density * interface_state.velocity_y; + dev_interface[o3 * n_cells + id] = interface_state.density * interface_state.velocity_z; + +#ifdef MHD + // Write the Y and Z interface states and load the X magnetic face needed to compute the energy + Real magnetic_x; + switch (o1) { + case grid_enum::momentum_x: + dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_x * n_cells + id]; + break; + case grid_enum::momentum_y: + dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_y * n_cells + id]; + break; + case grid_enum::momentum_z: + dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_z * n_cells + id]; + break; + } + + // Compute the MHD energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z); +#else // not MHD + // Compute the hydro energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma); +#endif // MHD + +#ifdef DE + dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i]; + } +#endif // SCALAR +} +// ===================================================================================================================== } // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index c73ea575c..8abef9087 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -169,5 +169,22 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i); + +/*! + * \brief Write the interface data to the appropriate arrays + * + * \param[in] interface_state The interface state to write + * \param[out] dev_interface The interface array + * \param[in] dev_conserved The conserved variables + * \param[in] id The cell id to write to + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + */ +void __device__ __host__ Write_Data(PlmcPrimitive const &interface_state, Real *dev_interface, + Real const *dev_conserved, size_t const &id, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma); } // namespace plmc_utils #endif // PLMC_CUDA_H From dae7da5f7792ffaad22b667cbadaf1b83eff4c75 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 13:52:57 -0400 Subject: [PATCH 354/694] PLMC: Add MHD too all but projecting functions --- src/reconstruction/plmc_cuda.cu | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 4f7586ce8..c24a55433 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -502,6 +502,11 @@ PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( del_a_m.a3 = Monotize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); del_a_m.a4 = Monotize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); +#ifdef MHD + del_a_m.a5 = Monotize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Monotize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); +#endif // MHD + #ifdef DE output.gas_energy = Monotize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); #endif // DE @@ -531,6 +536,11 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, output.velocity_z = primitive.velocity_z + sign * 0.5 * slopes.velocity_z; output.pressure = primitive.pressure + sign * 0.5 * slopes.pressure; +#ifdef MHD + output.magnetic_y = primitive.magnetic_y + sign * 0.5 * slopes.magnetic_y; + output.magnetic_z = primitive.magnetic_z + sign * 0.5 * slopes.magnetic_z; +#endif // MHD + #ifdef DE output.gas_energy = primitive.gas_energy + sign * 0.5 * slopes.gas_energy; #endif // DE @@ -580,6 +590,16 @@ void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPri del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; +#ifdef MHD + Monotize(cell_i.magnetic_y, cell_imo.magnetic_y, cell_ipo.magnetic_y, interface_L_iph.magnetic_y, + interface_R_imh.magnetic_y); + Monotize(cell_i.magnetic_z, cell_imo.magnetic_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.magnetic_z); + + del_m_i.magnetic_y = interface_L_iph.magnetic_y - interface_R_imh.magnetic_y; + del_m_i.magnetic_z = interface_L_iph.magnetic_z - interface_R_imh.magnetic_z; +#endif // MHD + #ifdef DE Monotize(cell_i.gas_energy, cell_imo.gas_energy, cell_ipo.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy); From 1b78bf3dabd7f3be70ab134f1d07406bf6f3e733 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 14:07:03 -0400 Subject: [PATCH 355/694] PLMC: slightly rearrange projection calculations --- src/reconstruction/plmc_cuda.cu | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index c24a55433..034854da0 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -445,13 +445,13 @@ PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive { PlmcCharacteristic output; - output.a0 = -primitive.density * primitive_slope.velocity_x / (2 * sound_speed) + - primitive_slope.pressure / (2 * sound_speed_squared); + output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); output.a2 = primitive_slope.velocity_y; output.a3 = primitive_slope.velocity_z; - output.a4 = primitive.density * primitive_slope.velocity_x / (2 * sound_speed) + - primitive_slope.pressure / (2 * sound_speed_squared); + output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); return output; } @@ -464,11 +464,10 @@ void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primit PlmcPrimitive &output) { output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; - output.velocity_x = -sound_speed * characteristic_slope.a0 / primitive.density + - sound_speed * characteristic_slope.a4 / primitive.density; + output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); output.velocity_y = characteristic_slope.a2; output.velocity_z = characteristic_slope.a3; - output.pressure = sound_speed_squared * characteristic_slope.a0 + sound_speed_squared * characteristic_slope.a4; + output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); } // ===================================================================================================================== From 92ee34ba6f98e0d0d29d3089783a31c5cb8ee85d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 30 Mar 2023 16:28:27 -0400 Subject: [PATCH 356/694] PLMC: mhd support for primitive to characteristic projection --- src/reconstruction/plmc_cuda.cu | 106 ++++++++++++++++++++++++++++---- src/reconstruction/plmc_cuda.h | 11 ++-- 2 files changed, 100 insertions(+), 17 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 034854da0..8ce20d782 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -96,16 +96,16 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A plmc_utils::PlmcCharacteristic const del_a_L = - plmc_utils::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared); + plmc_utils::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared, gamma); plmc_utils::PlmcCharacteristic const del_a_R = - plmc_utils::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared); + plmc_utils::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared, gamma); plmc_utils::PlmcCharacteristic const del_a_C = - plmc_utils::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared); + plmc_utils::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared, gamma); plmc_utils::PlmcCharacteristic const del_a_G = - plmc_utils::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared); + plmc_utils::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared, gamma); // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 @@ -318,14 +318,17 @@ PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t co auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); switch (o1) { case grid_enum::momentum_x: + loaded_data.magnetic_x = magnetic_centered.x; loaded_data.magnetic_y = magnetic_centered.y; loaded_data.magnetic_z = magnetic_centered.z; break; case grid_enum::momentum_y: + loaded_data.magnetic_x = magnetic_centered.y; loaded_data.magnetic_y = magnetic_centered.z; loaded_data.magnetic_z = magnetic_centered.x; break; case grid_enum::momentum_z: + loaded_data.magnetic_x = magnetic_centered.z; loaded_data.magnetic_y = magnetic_centered.x; loaded_data.magnetic_z = magnetic_centered.y; break; @@ -348,10 +351,10 @@ PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t co loaded_data.gas_energy = gas_energy / loaded_data.density; #else // not DE #ifdef MHD - loaded_data.pressure = - hydro_utilities::Calc_Pressure_Primitive(dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, - loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z, - gamma, magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y, + loaded_data.magnetic_z); #else // not MHD loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, @@ -438,13 +441,91 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope // ===================================================================================================================== // ===================================================================================================================== -PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, - PlmcPrimitive const &primitive_slope, - Real const &sound_speed, - Real const &sound_speed_squared) +PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, + PlmcPrimitive const &primitive_slope, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) { PlmcCharacteristic output; +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // First, compute some basic quantities we will need later + Real const inverse_sqrt_density = rsqrt(primitive.density); + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17) + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; + Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + + // Compute Q(s) (equation A14) + Real const n_fs = 0.5 / sound_speed_squared; // equation A19 + Real const sign = copysign(1.0, primitive.magnetic_x); + ; + Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); + Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); + + // Multiply the slopes by the left eigenvector matrix given in equation 18 + output.a0 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); + + output.a2 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + + output.a4 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); + + output.a6 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + +#else // not MHD output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + primitive_slope.pressure / (2.0 * sound_speed_squared); output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); @@ -452,6 +533,7 @@ PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive output.a3 = primitive_slope.velocity_z; output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + primitive_slope.pressure / (2.0 * sound_speed_squared); +#endif // MHD return output; } diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 8abef9087..194c84d54 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -27,7 +27,8 @@ struct PlmcPrimitive { Real density, velocity_x, velocity_y, velocity_z, pressure; #ifdef MHD - Real magnetic_y, magnetic_z; + // These are all cell centered values + Real magnetic_x, magnetic_y, magnetic_z; #endif // MHD #ifdef DE @@ -101,12 +102,12 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope * \param[in] primitive_slope The primitive variables slopes * \param[in] sound_speed The speed of sound * \param[in] sound_speed_squared The speed of sound squared + * \param[in] gamma The adiabatic index * \return PlmcCharacteristic */ -PlmcCharacteristic __device__ __host__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, - PlmcPrimitive const &primitive_slope, - Real const &sound_speed, - Real const &sound_speed_squared); +PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, + PlmcPrimitive const &primitive_slope, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma); /*! * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the From 8f76e34127e757f955e9a31f354e0b05402f3299 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 09:40:13 -0400 Subject: [PATCH 357/694] PLMC: MHD characteristic to primitive conversion --- src/reconstruction/plmc_cuda.cu | 165 ++++++++++++++++++++++---------- src/reconstruction/plmc_cuda.h | 25 ++--- 2 files changed, 127 insertions(+), 63 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 8ce20d782..df679e0b0 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -109,16 +109,16 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - plmc_utils::PlmcPrimitive del_m_i = plmc_utils::Monotize_Characteristic_Return_Primitive( - cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared); + plmc_utils::PlmcPrimitive del_m_i = plmc_utils::Monotonize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma); // Compute the left and right interface values using the monotonized difference in the primitive variables plmc_utils::PlmcPrimitive interface_L_iph = plmc_utils::Calc_Interface(cell_i, del_m_i, 1.0); plmc_utils::PlmcPrimitive interface_R_imh = plmc_utils::Calc_Interface(cell_i, del_m_i, -1.0); - // Monotize the primitive variables, note the return by reference. Try removing this as it may not be necessary. A + // Monotonize the primitive variables, note the return by reference. Try removing this as it may not be necessary. A // good test for that would be shock tubes - plmc_utils::Monotize_Primitive(cell_i, cell_imo, cell_ipo, interface_L_iph, interface_R_imh, del_m_i); + plmc_utils::Monotonize_Primitive(cell_i, cell_imo, cell_ipo, interface_L_iph, interface_R_imh, del_m_i); #ifndef VL @@ -483,9 +483,8 @@ PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &p Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; // Compute Q(s) (equation A14) - Real const n_fs = 0.5 / sound_speed_squared; // equation A19 - Real const sign = copysign(1.0, primitive.magnetic_x); - ; + Real const n_fs = 0.5 / sound_speed_squared; // equation A19 + Real const sign = copysign(1.0, primitive.magnetic_x); Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; @@ -540,28 +539,92 @@ PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &p // ===================================================================================================================== // ===================================================================================================================== -void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, - PlmcCharacteristic const &characteristic_slope, - Real const &sound_speed, Real const &sound_speed_squared, - PlmcPrimitive &output) +void __device__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, + PlmcCharacteristic const &characteristic_slope, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma, PlmcPrimitive &output) { +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17) + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; + Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + + // Compute Q(s) (equation A14) + Real const sign = copysign(1.0, primitive.magnetic_x); + Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); + Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); + + // Multiply the slopes by the right eigenvector matrix given in equation 12 + output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + characteristic_slope.a3; + output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.pressure = primitive.density * sound_speed_squared * + (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + +#else // not MHD output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); output.velocity_y = characteristic_slope.a2; output.velocity_z = characteristic_slope.a3; output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); +#endif // MHD } // ===================================================================================================================== // ===================================================================================================================== -PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( +PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, - Real const &sound_speed_squared) + Real const &sound_speed_squared, Real const &gamma) { - // The function that will actually do the monotization - auto Monotize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { + // The function that will actually do the monotozation + auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { if (left * right > 0.0) { Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); @@ -571,35 +634,35 @@ PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( } }; - // the monotized difference in the characteristic variables + // the monotonized difference in the characteristic variables PlmcCharacteristic del_a_m; - // The monotized difference in the characteristic variables projected into the primitive variables + // The monotonized difference in the characteristic variables projected into the primitive variables PlmcPrimitive output; - // Monotize the slopes - del_a_m.a0 = Monotize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); - del_a_m.a1 = Monotize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); - del_a_m.a2 = Monotize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); - del_a_m.a3 = Monotize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); - del_a_m.a4 = Monotize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + // Monotonize the slopes + del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); #ifdef MHD - del_a_m.a5 = Monotize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); - del_a_m.a6 = Monotize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); + del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); #endif // MHD #ifdef DE - output.gas_energy = Monotize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); + output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - output.scalar[i] = Monotize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); + output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); } #endif // SCALAR // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and // scalars - Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, output); + Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, gamma, output); return output; } @@ -636,13 +699,13 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, // ===================================================================================================================== // ===================================================================================================================== -void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, - PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, - PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i) +void __device__ __host__ Monotonize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, + PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, + PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i) { - // The function that will actually do the monotization. Note that it return the interfaces by reference - auto Monotize = [](Real const &val_i, Real const &val_imo, Real const &val_ipo, Real &interface_L, - Real &interface_R) { + // The function that will actually do the monotozation. Note that it return the interfaces by reference + auto Monotonize = [](Real const &val_i, Real const &val_imo, Real const &val_ipo, Real &interface_L, + Real &interface_R) { Real const C = interface_R + interface_L; interface_R = fmax(fmin(val_i, val_imo), interface_R); @@ -654,15 +717,15 @@ void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPri interface_R = C - interface_L; }; - // Monotize - Monotize(cell_i.density, cell_imo.density, cell_ipo.density, interface_L_iph.density, interface_R_imh.density); - Monotize(cell_i.velocity_x, cell_imo.velocity_x, cell_ipo.velocity_x, interface_L_iph.velocity_x, - interface_R_imh.velocity_x); - Monotize(cell_i.velocity_y, cell_imo.velocity_y, cell_ipo.velocity_y, interface_L_iph.velocity_y, - interface_R_imh.velocity_y); - Monotize(cell_i.velocity_z, cell_imo.velocity_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, - interface_R_imh.velocity_z); - Monotize(cell_i.pressure, cell_imo.pressure, cell_ipo.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + // Monotonize + Monotonize(cell_i.density, cell_imo.density, cell_ipo.density, interface_L_iph.density, interface_R_imh.density); + Monotonize(cell_i.velocity_x, cell_imo.velocity_x, cell_ipo.velocity_x, interface_L_iph.velocity_x, + interface_R_imh.velocity_x); + Monotonize(cell_i.velocity_y, cell_imo.velocity_y, cell_ipo.velocity_y, interface_L_iph.velocity_y, + interface_R_imh.velocity_y); + Monotonize(cell_i.velocity_z, cell_imo.velocity_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.velocity_z); + Monotonize(cell_i.pressure, cell_imo.pressure, cell_ipo.pressure, interface_L_iph.pressure, interface_R_imh.pressure); // Compute the new slopes del_m_i.density = interface_L_iph.density - interface_R_imh.density; @@ -672,25 +735,25 @@ void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPri del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; #ifdef MHD - Monotize(cell_i.magnetic_y, cell_imo.magnetic_y, cell_ipo.magnetic_y, interface_L_iph.magnetic_y, - interface_R_imh.magnetic_y); - Monotize(cell_i.magnetic_z, cell_imo.magnetic_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, - interface_R_imh.magnetic_z); + Monotonize(cell_i.magnetic_y, cell_imo.magnetic_y, cell_ipo.magnetic_y, interface_L_iph.magnetic_y, + interface_R_imh.magnetic_y); + Monotonize(cell_i.magnetic_z, cell_imo.magnetic_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.magnetic_z); del_m_i.magnetic_y = interface_L_iph.magnetic_y - interface_R_imh.magnetic_y; del_m_i.magnetic_z = interface_L_iph.magnetic_z - interface_R_imh.magnetic_z; #endif // MHD #ifdef DE - Monotize(cell_i.gas_energy, cell_imo.gas_energy, cell_ipo.gas_energy, interface_L_iph.gas_energy, - interface_R_imh.gas_energy); + Monotonize(cell_i.gas_energy, cell_imo.gas_energy, cell_ipo.gas_energy, interface_L_iph.gas_energy, + interface_R_imh.gas_energy); del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - Monotize(cell_i.scalar[i], cell_imo.scalar[i], cell_ipo.scalar[i], interface_L_iph.scalar[i], - interface_R_imh.scalar[i]); + Monotonize(cell_i.scalar[i], cell_imo.scalar[i], cell_ipo.scalar[i], interface_L_iph.scalar[i], + interface_R_imh.scalar[i]); del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; } #endif // SCALAR diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 194c84d54..503c02fac 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -117,15 +117,15 @@ PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &p * \param[in] characteristic_slope The characteristic slopes * \param[in] sound_speed The sound speed * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index * \param[out] output The primitive slopes */ -void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, - PlmcCharacteristic const &characteristic_slope, - Real const &sound_speed, Real const &sound_speed_squared, - PlmcPrimitive &output); +void __device__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, + PlmcCharacteristic const &characteristic_slope, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma, PlmcPrimitive &output); /*! - * \brief Monotize the characteristic slopes and project back into the primitive slopes + * \brief Monotonize the characteristic slopes and project back into the primitive slopes * * \param[in] primitive The primitive variables * \param[in] del_L The left primitive slopes @@ -138,13 +138,14 @@ void __device__ __host__ Characteristic_To_Primitive(PlmcPrimitive const &primit * \param[in] del_a_G The Van Leer characteristic slopes * \param[in] sound_speed The sound speed * \param[in] sound_speed_squared The sound speed squared - * \return PlmcPrimitive The monotized primitive slopes + * \param[in] gamma The adiabatic index + * \return PlmcPrimitive The Monotonized primitive slopes */ -PlmcPrimitive __device__ __host__ Monotize_Characteristic_Return_Primitive( +PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, - Real const &sound_speed_squared); + Real const &sound_speed_squared, Real const &gamma); /*! * \brief Compute the interface state from the slope and cell centered state. @@ -158,7 +159,7 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, Real const &sign); /*! - * \brief Monotize the interface states and primitive slopes. + * \brief Monotonize the interface states and primitive slopes. * * \param[in] cell_i The state in the i cell * \param[in] cell_imo The state in the i-1 cell @@ -167,9 +168,9 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, * \param[in,out] interface_R_imh The right interface state at i-1/2 * \param[in,out] del_m_i The primitive slopes in the cell */ -void __device__ __host__ Monotize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, - PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, - PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i); +void __device__ __host__ Monotonize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, + PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, + PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i); /*! * \brief Write the interface data to the appropriate arrays From eee9f0e07a659abce034bc078005c620510cd482 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 31 Mar 2023 13:26:10 -0400 Subject: [PATCH 358/694] Add an MHD PLMC test --- src/reconstruction/plmc_cuda_tests.cu | 114 ++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index e96586623..3efb97417 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -153,3 +153,117 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) } } } + +TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 4, ny = nx, nz = nx; + size_t const n_fields = 8; + size_t const n_cells_grid = nx * ny * nz * n_fields; + size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(n_cells_grid); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, + {85, 3.0043379408547275}, + {149, 2.6320759184913625}, + {213, 0.94878676231467451}, + {277, 18.551193003661723}, + {341, 1.8587936590169301}, + {405, 2.1583975283044725}}, + {{21, 0.73640639402573249}, + {85, 1.2543813093357532}, + {149, 2.194558499445812}, + {213, 1.1837630990406585}, + {277, 11.028931161539937}, + {341, 2.1583975283044725}, + {405, 1.6994195863331925}}, + {{21, 0.25340904981266843}, + {85, 2.0441984720128734}, + {149, 2.0072227310539077}, + {213, 0.45377591914009824}, + {277, 24.026326855982607}, + {341, 1.7033818819502551}, + {405, 1.8141353672443383}}}; + std::vector> fiducial_interface_right = { + {{20, 0.59023012197434721}, + {84, 3.0043379408547275}, + {148, 2.6320759184913625}, + {212, 0.9487867623146744}, + {276, 22.111134849009044}, + {340, 1.8587936590169301}, + {404, 2.1583975283044725}}, + {{17, 0.44405384992296193}, + {20, 0.59023012197434721}, + {81, 2.5027813113931279}, + {84, 3.0043379408547275}, + {145, 2.6371119205792346}, + {148, 2.6320759184913625}, + {209, 0.71381042558869023}, + {212, 0.9487867623146744}, + {273, 29.633443857492487}, + {276, 22.111134849009044}, + {337, 2.1583975283044725}, + {340, 1.8587936590169301}, + {401, 4.5479767726660523}, + {404, 2.1583975283044725}}, + {{5, 0.92705119413602599}, {17, 0.44405384992296193}, {20, 0.59023012197434721}, {69, 1.959259898225878}, + {81, 2.5027813113931279}, {84, 3.0043379408547275}, {133, 0.96653490574340462}, {145, 2.6371119205792346}, + {148, 2.6320759184913625}, {197, 1.3203867992383289}, {209, 0.71381042558869023}, {212, 0.9487867623146744}, + {261, 7.6371723945376502}, {273, 29.633443857492487}, {276, 22.111134849009044}, {325, 1.7033818819502551}, + {337, 2.1583975283044725}, {340, 1.8587936590169301}, {389, 1.8587936590169303}, {401, 4.5479767726660523}, + {404, 2.1583975283044725}}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(n_cells_interface); + cuda_utilities::DeviceVector dev_interface_right(n_cells_interface); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < dev_interface_right.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} \ No newline at end of file From 5847541bfce26ebaef09f1c9ff7308f2afae5800 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 4 Apr 2023 12:57:07 -0400 Subject: [PATCH 359/694] PLMC: use lambda in slope calculations --- src/reconstruction/plmc_cuda.cu | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index df679e0b0..c4faba462 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -674,23 +674,25 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, { plmc_utils::PlmcPrimitive output; - output.density = primitive.density + sign * 0.5 * slopes.density; - output.velocity_x = primitive.velocity_x + sign * 0.5 * slopes.velocity_x; - output.velocity_y = primitive.velocity_y + sign * 0.5 * slopes.velocity_y; - output.velocity_z = primitive.velocity_z + sign * 0.5 * slopes.velocity_z; - output.pressure = primitive.pressure + sign * 0.5 * slopes.pressure; + auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; }; + + output.density = interface(primitive.density, slopes.density); + output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x); + output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y); + output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z); + output.pressure = interface(primitive.pressure, slopes.pressure); #ifdef MHD - output.magnetic_y = primitive.magnetic_y + sign * 0.5 * slopes.magnetic_y; - output.magnetic_z = primitive.magnetic_z + sign * 0.5 * slopes.magnetic_z; + output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y); + output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z); #endif // MHD #ifdef DE - output.gas_energy = primitive.gas_energy + sign * 0.5 * slopes.gas_energy; + output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - output.scalar[i] = primitive.scalar[i] + sign * 0.5 * slopes.scalar[i]; + output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]); } #endif // SCALAR From 82d0866556a3735e8c08ee34d60f8691eeeae98b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 5 Apr 2023 09:37:16 -0400 Subject: [PATCH 360/694] PLMC: Move some functions into .h for easy testing Move primitive and characteristic projections into the header file and add tests for them. Currently the characteristic to primitive is failing when compared to Athena equivalent. --- src/reconstruction/plmc_cuda.cu | 178 ------------------------- src/reconstruction/plmc_cuda.h | 180 +++++++++++++++++++++++++- src/reconstruction/plmc_cuda_tests.cu | 82 ++++++++++++ 3 files changed, 256 insertions(+), 184 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index c4faba462..7e3ddff63 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -10,8 +10,6 @@ #include "../reconstruction/plmc_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" -#include "../utils/hydro_utilities.h" -#include "../utils/mhd_utilities.h" #ifdef DE // PRESSURE_DE #include "../utils/hydro_utilities.h" @@ -440,182 +438,6 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope } // ===================================================================================================================== -// ===================================================================================================================== -PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, - PlmcPrimitive const &primitive_slope, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma) -{ - PlmcCharacteristic output; - -#ifdef MHD - // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - - // First, compute some basic quantities we will need later - Real const inverse_sqrt_density = rsqrt(primitive.density); - - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; - - // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); - denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; - } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); - } - - // Compute Betas (equation A17) - Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; - Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; - - // Compute Q(s) (equation A14) - Real const n_fs = 0.5 / sound_speed_squared; // equation A19 - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; - - // Compute A(s) (equation A15) - Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); - Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); - - // Multiply the slopes by the left eigenvector matrix given in equation 18 - output.a0 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - - output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); - - output.a2 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - - output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; - - output.a4 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); - - output.a6 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - -#else // not MHD - output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); - output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); - output.a2 = primitive_slope.velocity_y; - output.a3 = primitive_slope.velocity_z; - output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); -#endif // MHD - - return output; -} -// ===================================================================================================================== - -// ===================================================================================================================== -void __device__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, - PlmcCharacteristic const &characteristic_slope, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma, PlmcPrimitive &output) -{ -#ifdef MHD - // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; - - // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); - denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; - } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); - } - - // Compute Betas (equation A17) - Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; - Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; - - // Compute Q(s) (equation A14) - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; - - // Compute A(s) (equation A15) - Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); - Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); - - // Multiply the slopes by the right eigenvector matrix given in equation 12 - output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + - characteristic_slope.a3; - output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + - magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); - output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_z * (characteristic_slope.a5 - characteristic_slope.a1); - output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_y * (characteristic_slope.a1 - characteristic_slope.a5); - output.pressure = primitive.density * sound_speed_squared * - (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); - output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - - beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); - output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + - beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); - -#else // not MHD - output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; - output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); - output.velocity_y = characteristic_slope.a2; - output.velocity_z = characteristic_slope.a3; - output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); -#endif // MHD -} -// ===================================================================================================================== - // ===================================================================================================================== PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 503c02fac..3ebf7b61a 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -7,6 +7,8 @@ #include "../global/global.h" #include "../grid/grid_enum.h" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -105,9 +107,102 @@ PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope * \param[in] gamma The adiabatic index * \return PlmcCharacteristic */ -PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, - PlmcPrimitive const &primitive_slope, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma); +PlmcCharacteristic __device__ __inline__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, + PlmcPrimitive const &primitive_slope, + Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + PlmcCharacteristic output; + +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // First, compute some basic quantities we will need later + Real const inverse_sqrt_density = rsqrt(primitive.density); + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17) + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; + Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + + // Compute Q(s) (equation A14) + Real const n_fs = 0.5 / sound_speed_squared; // equation A19 + Real const sign = copysign(1.0, primitive.magnetic_x); + Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); + Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); + + // Multiply the slopes by the left eigenvector matrix given in equation 18 + output.a0 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); + + output.a2 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + + output.a4 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); + + output.a6 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + +#else // not MHD + output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); + output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a2 = primitive_slope.velocity_y; + output.a3 = primitive_slope.velocity_z; + output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); +#endif // MHD + + return output; +} /*! * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the @@ -120,9 +215,82 @@ PlmcCharacteristic __device__ Primitive_To_Characteristic(PlmcPrimitive const &p * \param[in] gamma The adiabatic index * \param[out] output The primitive slopes */ -void __device__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, - PlmcCharacteristic const &characteristic_slope, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma, PlmcPrimitive &output); +void __device__ __inline__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, + PlmcCharacteristic const &characteristic_slope, + Real const &sound_speed, Real const &sound_speed_squared, + Real const &gamma, PlmcPrimitive &output) +{ +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17) + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; + Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + + // Compute Q(s) (equation A14) + Real const sign = copysign(1.0, primitive.magnetic_x); + Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); + Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); + + // Multiply the slopes by the right eigenvector matrix given in equation 12 + output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + characteristic_slope.a3; + output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.pressure = primitive.density * sound_speed_squared * + (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + +#else // not MHD + output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; + output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); + output.velocity_y = characteristic_slope.a2; + output.velocity_z = characteristic_slope.a3; + output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); +#endif // MHD +} /*! * \brief Monotonize the characteristic slopes and project back into the primitive slopes diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 3efb97417..0b04a0235 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -19,6 +19,7 @@ #include "../io/io.h" #include "../reconstruction/plmc_cuda.h" #include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) @@ -266,4 +267,85 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } } +} + +namespace +{ +__global__ void test_prim_2_char(plmc_utils::PlmcPrimitive const primitive, + plmc_utils::PlmcPrimitive const primitive_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + plmc_utils::PlmcCharacteristic *characteristic_slope) +{ + *characteristic_slope = + plmc_utils::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_char_2_prim(plmc_utils::PlmcPrimitive const primitive, + plmc_utils::PlmcCharacteristic const characteristic_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + plmc_utils::PlmcPrimitive *primitive_slope) +{ + plmc_utils::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, + *primitive_slope); +} +} // namespace + +TEST(tMHDPlmcPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + plmc_utils::PlmcPrimitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + plmc_utils::PlmcPrimitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + plmc_utils::PlmcCharacteristic const host_results = dev_results.at(0); + + // Check results + plmc_utils::PlmcCharacteristic const fiducial_results{ + 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, + 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; + testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); + testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); + testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); + testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); + testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); + testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); + testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); +} +TEST(tMHDPlmcCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + plmc_utils::PlmcPrimitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + plmc_utils::PlmcCharacteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + plmc_utils::PlmcPrimitive const host_results = dev_results.at(0); + + // Check results + plmc_utils::PlmcPrimitive const fiducial_results{6.63368382259080249e+01, 1.74361246693441956e+01, + -5.55049640164519076e-01, -6.70871148175067944e+00, + 3.86140318549233655e+02, -999, + 3.15793270038508922e+01, 9.68343497914561624e+01}; + testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); } \ No newline at end of file From d608bc86e0c2fca578e93508e3a08c4eb3b01a56 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 5 Apr 2023 11:05:58 -0400 Subject: [PATCH 361/694] Update PLMC char2prim test The fiducial data was wrong --- src/reconstruction/plmc_cuda_tests.cu | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 0b04a0235..6018e84cf 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -337,14 +337,13 @@ TEST(tMHDPlmcCharacteristic2Primitive, CorrectInputExpectCorrectOutput) plmc_utils::PlmcPrimitive const host_results = dev_results.at(0); // Check results - plmc_utils::PlmcPrimitive const fiducial_results{6.63368382259080249e+01, 1.74361246693441956e+01, - -5.55049640164519076e-01, -6.70871148175067944e+00, - 3.86140318549233655e+02, -999, - 3.15793270038508922e+01, 9.68343497914561624e+01}; + plmc_utils::PlmcPrimitive const fiducial_results{ + 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, + 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); From 72e1d3b084b054783cb02032e6611c906ef42bc4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 5 Apr 2023 16:17:11 -0400 Subject: [PATCH 362/694] PLMC: Remove monotonization in primitive variables It didn't do anything so it's been removed --- src/reconstruction/plmc_cuda.cu | 66 --------------------------- src/reconstruction/plmc_cuda.h | 14 ------ src/reconstruction/plmc_cuda_tests.cu | 40 ++++++++-------- 3 files changed, 21 insertions(+), 99 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 7e3ddff63..3388f4318 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -114,10 +114,6 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou plmc_utils::PlmcPrimitive interface_L_iph = plmc_utils::Calc_Interface(cell_i, del_m_i, 1.0); plmc_utils::PlmcPrimitive interface_R_imh = plmc_utils::Calc_Interface(cell_i, del_m_i, -1.0); - // Monotonize the primitive variables, note the return by reference. Try removing this as it may not be necessary. A - // good test for that would be shock tubes - plmc_utils::Monotonize_Primitive(cell_i, cell_imo, cell_ipo, interface_L_iph, interface_R_imh, del_m_i); - #ifndef VL Real const dtodx = dt / dx; @@ -522,68 +518,6 @@ PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, } // ===================================================================================================================== -// ===================================================================================================================== -void __device__ __host__ Monotonize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, - PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, - PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i) -{ - // The function that will actually do the monotozation. Note that it return the interfaces by reference - auto Monotonize = [](Real const &val_i, Real const &val_imo, Real const &val_ipo, Real &interface_L, - Real &interface_R) { - Real const C = interface_R + interface_L; - - interface_R = fmax(fmin(val_i, val_imo), interface_R); - interface_R = fmin(fmax(val_i, val_imo), interface_R); - interface_L = C - interface_R; - - interface_L = fmax(fmin(val_i, val_ipo), interface_L); - interface_L = fmin(fmax(val_i, val_ipo), interface_L); - interface_R = C - interface_L; - }; - - // Monotonize - Monotonize(cell_i.density, cell_imo.density, cell_ipo.density, interface_L_iph.density, interface_R_imh.density); - Monotonize(cell_i.velocity_x, cell_imo.velocity_x, cell_ipo.velocity_x, interface_L_iph.velocity_x, - interface_R_imh.velocity_x); - Monotonize(cell_i.velocity_y, cell_imo.velocity_y, cell_ipo.velocity_y, interface_L_iph.velocity_y, - interface_R_imh.velocity_y); - Monotonize(cell_i.velocity_z, cell_imo.velocity_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, - interface_R_imh.velocity_z); - Monotonize(cell_i.pressure, cell_imo.pressure, cell_ipo.pressure, interface_L_iph.pressure, interface_R_imh.pressure); - - // Compute the new slopes - del_m_i.density = interface_L_iph.density - interface_R_imh.density; - del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; - del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; - del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; - del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; - -#ifdef MHD - Monotonize(cell_i.magnetic_y, cell_imo.magnetic_y, cell_ipo.magnetic_y, interface_L_iph.magnetic_y, - interface_R_imh.magnetic_y); - Monotonize(cell_i.magnetic_z, cell_imo.magnetic_z, cell_ipo.velocity_z, interface_L_iph.velocity_z, - interface_R_imh.magnetic_z); - - del_m_i.magnetic_y = interface_L_iph.magnetic_y - interface_R_imh.magnetic_y; - del_m_i.magnetic_z = interface_L_iph.magnetic_z - interface_R_imh.magnetic_z; -#endif // MHD - -#ifdef DE - Monotonize(cell_i.gas_energy, cell_imo.gas_energy, cell_ipo.gas_energy, interface_L_iph.gas_energy, - interface_R_imh.gas_energy); - del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; -#endif // DE - -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - Monotonize(cell_i.scalar[i], cell_imo.scalar[i], cell_ipo.scalar[i], interface_L_iph.scalar[i], - interface_R_imh.scalar[i]); - del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; - } -#endif // SCALAR -} -// ===================================================================================================================== - // ===================================================================================================================== void __device__ __host__ Write_Data(PlmcPrimitive const &interface_state, Real *dev_interface, Real const *dev_conserved, size_t const &id, size_t const &n_cells, diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 3ebf7b61a..4e21924de 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -326,20 +326,6 @@ PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, Real const &sign); -/*! - * \brief Monotonize the interface states and primitive slopes. - * - * \param[in] cell_i The state in the i cell - * \param[in] cell_imo The state in the i-1 cell - * \param[in] cell_ipo The state in the i+1 cell - * \param[in,out] interface_L_iph The left interface state at i+1/2 - * \param[in,out] interface_R_imh The right interface state at i-1/2 - * \param[in,out] del_m_i The primitive slopes in the cell - */ -void __device__ __host__ Monotonize_Primitive(PlmcPrimitive const &cell_i, PlmcPrimitive const &cell_imo, - PlmcPrimitive const &cell_ipo, PlmcPrimitive &interface_L_iph, - PlmcPrimitive &interface_R_imh, PlmcPrimitive &del_m_i); - /*! * \brief Write the interface data to the appropriate arrays * diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 6018e84cf..0c53f0224 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -184,23 +184,23 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, {85, 3.0043379408547275}, {149, 2.6320759184913625}, - {213, 0.94878676231467451}, + {213, 0.9487867623146744}, {277, 18.551193003661723}, {341, 1.8587936590169301}, {405, 2.1583975283044725}}, {{21, 0.73640639402573249}, - {85, 1.2543813093357532}, - {149, 2.194558499445812}, - {213, 1.1837630990406585}, - {277, 11.028931161539937}, - {341, 2.1583975283044725}, + {85, 3.3462413154443715}, + {149, 2.1945584994458125}, + {213, 0.67418839414138987}, + {277, 16.909618487528142}, + {341, 2.1533768050263267}, {405, 1.6994195863331925}}, {{21, 0.25340904981266843}, {85, 2.0441984720128734}, - {149, 2.0072227310539077}, + {149, 1.9959059157695584}, {213, 0.45377591914009824}, - {277, 24.026326855982607}, - {341, 1.7033818819502551}, + {277, 23.677832869261188}, + {341, 1.5437923271692418}, {405, 1.8141353672443383}}}; std::vector> fiducial_interface_right = { {{20, 0.59023012197434721}, @@ -216,19 +216,19 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {84, 3.0043379408547275}, {145, 2.6371119205792346}, {148, 2.6320759184913625}, - {209, 0.71381042558869023}, + {209, 1.0210845222961809}, {212, 0.9487867623146744}, - {273, 29.633443857492487}, + {273, 21.360010722689488}, {276, 22.111134849009044}, - {337, 2.1583975283044725}, + {337, 2.1634182515826184}, {340, 1.8587936590169301}, - {401, 4.5479767726660523}, + {401, 1.7073441775673177}, {404, 2.1583975283044725}}, - {{5, 0.92705119413602599}, {17, 0.44405384992296193}, {20, 0.59023012197434721}, {69, 1.959259898225878}, - {81, 2.5027813113931279}, {84, 3.0043379408547275}, {133, 0.96653490574340462}, {145, 2.6371119205792346}, - {148, 2.6320759184913625}, {197, 1.3203867992383289}, {209, 0.71381042558869023}, {212, 0.9487867623146744}, - {261, 7.6371723945376502}, {273, 29.633443857492487}, {276, 22.111134849009044}, {325, 1.7033818819502551}, - {337, 2.1583975283044725}, {340, 1.8587936590169301}, {389, 1.8587936590169303}, {401, 4.5479767726660523}, + {{5, 0.92705119413602599}, {17, 0.44405384992296193}, {20, 0.59023012197434721}, {69, 1.9592598982258778}, + {81, 2.5027813113931279}, {84, 3.0043379408547275}, {133, 0.96653490574340428}, {145, 2.6371119205792346}, + {148, 2.6320759184913625}, {197, 1.3203867992383289}, {209, 1.0210845222961809}, {212, 0.9487867623146744}, + {261, 8.0057564947791793}, {273, 21.360010722689488}, {276, 22.111134849009044}, {325, 1.8629714367312684}, + {337, 2.1634182515826184}, {340, 1.8587936590169301}, {389, 1.9034519507895218}, {401, 1.7073441775673177}, {404, 2.1583975283044725}}}; // Loop over different directions @@ -290,6 +290,7 @@ __global__ void test_char_2_prim(plmc_utils::PlmcPrimitive const primitive, } } // namespace +#ifdef MHD TEST(tMHDPlmcPrimitive2Characteristic, CorrectInputExpectCorrectOutput) { // Test parameters @@ -347,4 +348,5 @@ TEST(tMHDPlmcCharacteristic2Primitive, CorrectInputExpectCorrectOutput) testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); -} \ No newline at end of file +} +#endif // MHD From c9c087992428438bbf8d490ec7cc697b24265db7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 7 Apr 2023 10:35:45 -0400 Subject: [PATCH 363/694] Fix undetected inf bug If By and Bz were 0 then the rhypot function in the characeristic/primitive projection would return that I wasn't checking for. I'm checking for it now and it appears to fix the issue with the Ryu & Jones 4d shock tube --- src/reconstruction/plmc_cuda.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 4e21924de..bc2d3f3f8 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -144,10 +144,11 @@ PlmcCharacteristic __device__ __inline__ Primitive_To_Characteristic(PlmcPrimiti alpha_slow = sqrt(numerator_2 / denom); } - // Compute Betas (equation A17) + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; - Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; // Compute Q(s) (equation A14) Real const n_fs = 0.5 / sound_speed_squared; // equation A19 @@ -247,10 +248,11 @@ void __device__ __inline__ Characteristic_To_Primitive(PlmcPrimitive const &prim alpha_slow = sqrt(numerator_2 / denom); } - // Compute Betas (equation A17) + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (beta_denom == 0) ? 0.0 : primitive.magnetic_y * beta_denom; - Real const beta_z = (beta_denom == 0) ? 0.0 : primitive.magnetic_z * beta_denom; + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; // Compute Q(s) (equation A14) Real const sign = copysign(1.0, primitive.magnetic_x); From 0bf829f6145d7e83921b4303cc61ad5b725331ce Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 7 Apr 2023 16:46:31 -0400 Subject: [PATCH 364/694] Fix ghost cell bug. MHD needs one extra ghost cell --- src/grid/grid3D.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 74eaf0505..f0a9fd064 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -72,14 +72,8 @@ Grid3D::Grid3D(void) #endif #ifdef MHD - // Set the number of ghost cells high enough for MHD - if (H.n_ghost < 3) { - chprintf( - "Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting " - "to 3.\n", - H.n_ghost); - H.n_ghost = 3; - } + // Set the number of ghost cells high enough for MHD. MHD needs one extra for the left most face + H.n_ghost++; #endif // MHD } From 3b03480bdf35c5fad69f2c2c2db0789aa8702154 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Apr 2023 09:40:40 -0400 Subject: [PATCH 365/694] Post rebase fixes --- src/grid/initial_conditions.cpp | 24 ++++++++++++------------ src/hydro/hydro_cuda.cu | 6 +++--- src/reconstruction/plmc_cuda_tests.cu | 2 +- src/riemann_solvers/hlld_cuda.cu | 2 +- src/utils/mhd_utilities.h | 1 - 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index b20dcd996..679325f3a 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1767,9 +1767,9 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) C.momentum_x[id] = P.rho * P.vx; C.momentum_y[id] = P.rho * P.vy; C.momentum_z[id] = P.rho * P.vz; - C.Energy[id] = mhd::utils::computeEnergy(P.P, P.rho, C.momentum_x[id] / P.rho, C.momentum_y[id] / P.rho, - C.momentum_z[id] / P.rho, magnetic_centered.x, magnetic_centered.y, - magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, P.rho, C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } @@ -1823,13 +1823,13 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) // Set the field(s) that do depend on pressure. That's just energy Real radius = std::hypot(x, y, z); if (radius < P.radius) { - C.Energy[id] = mhd::utils::computeEnergy( - P.P_blast, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], - C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P_blast, C.density[id], C.momentum_x[id], + C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } else { - C.Energy[id] = mhd::utils::computeEnergy( - P.P, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], - C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, C.density[id], C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } @@ -1887,9 +1887,9 @@ void Grid3D::Orszag_Tang_Vortex() C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y); C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x); C.momentum_z[id] = 0.0; - C.Energy[id] = mhd::utils::computeEnergy(pressure_background, C.density[id], C.momentum_x[id] / C.density[id], - C.momentum_y[id] / C.density[id], C.momentum_z[id] / C.density[id], - magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(pressure_background, C.density[id], C.momentum_x[id], + C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 2daa65888..d9b4629ed 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -421,7 +421,7 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Re Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = hydro_utilities::Calc_Pressure_Conserved(E, d, vx * d, vy * d, vz * d, gamma, avgBx, avgBy, avgBz); + Real gasP = hydro_utilities::Calc_Pressure_Primitive(E, d, vx, vy, vz, gamma, avgBx, avgBy, avgBz); Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) @@ -787,9 +787,9 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - auto [centeredBx, centeredBy, centeredBz] = + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); + E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 0c53f0224..b2a2d8ece 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -269,6 +269,7 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) } } +#ifdef MHD namespace { __global__ void test_prim_2_char(plmc_utils::PlmcPrimitive const primitive, @@ -290,7 +291,6 @@ __global__ void test_char_2_prim(plmc_utils::PlmcPrimitive const primitive, } } // namespace -#ifdef MHD TEST(tMHDPlmcPrimitive2Characteristic, CorrectInputExpectCorrectOutput) { // Test parameters diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index c0708811a..b7c1fdff0 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -37,7 +37,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // get a thread index int const threadId = threadIdx.x + blockIdx.x * blockDim.x; - // Thread guard to avoid overrunx + // Thread guard to avoid overrun if (threadId >= n_cells) { return; } diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 8ced8b3ba..1a0c91674 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -16,7 +16,6 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../grid/grid3D.h" -#include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" From 3bb12e9afde55022c84d59a8133c6f9fd93d123b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Apr 2023 14:34:34 -0400 Subject: [PATCH 366/694] Update MHD blast wave test data --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index d6202baad..7d1aaa235 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit d6202baadc9eaac6dce5ec4060f1f3fda8abdf1f +Subproject commit 7d1aaa235911f9293ab51967ff8ea4c389429684 From f17239b4b2b1ca8877b37eb58af24fc31a9f9d3f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Apr 2023 14:35:35 -0400 Subject: [PATCH 367/694] Formatting --- src/grid/initial_conditions.cpp | 12 ++++++------ src/hydro/hydro_cuda.cu | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 679325f3a..97b9368e8 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1823,9 +1823,9 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) // Set the field(s) that do depend on pressure. That's just energy Real radius = std::hypot(x, y, z); if (radius < P.radius) { - C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P_blast, C.density[id], C.momentum_x[id], - C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, - magnetic_centered.y, magnetic_centered.z); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + P.P_blast, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); } else { C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, @@ -1887,9 +1887,9 @@ void Grid3D::Orszag_Tang_Vortex() C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y); C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x); C.momentum_z[id] = 0.0; - C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(pressure_background, C.density[id], C.momentum_x[id], - C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, - magnetic_centered.y, magnetic_centered.z); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure_background, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); } } } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index d9b4629ed..06090d476 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -787,8 +787,7 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - auto magnetic_centered = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); From 24e978730ec63bb080b8cbb33839cd5767eaa8da Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Apr 2023 14:39:03 -0400 Subject: [PATCH 368/694] Remove Ryu & Jones 2a shock tube It's the same as Dai & Woodward --- cholla-tests-data | 2 +- examples/3D/Ryu_and_Jones_2a.txt | 74 ------------------- ...ockTubeCorrectInputExpectCorrectOutput.txt | 4 +- ...ockTubeCorrectInputExpectCorrectOutput.txt | 74 ------------------- src/system_tests/mhd_system_tests.cpp | 7 -- 5 files changed, 3 insertions(+), 158 deletions(-) delete mode 100644 examples/3D/Ryu_and_Jones_2a.txt delete mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt diff --git a/cholla-tests-data b/cholla-tests-data index 7d1aaa235..ad50e1fc5 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 7d1aaa235911f9293ab51967ff8ea4c389429684 +Subproject commit ad50e1fc5ed18c9162d1c069449bc0259c37591a diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/examples/3D/Ryu_and_Jones_2a.txt deleted file mode 100644 index 70a7ef8e9..000000000 --- a/examples/3D/Ryu_and_Jones_2a.txt +++ /dev/null @@ -1,74 +0,0 @@ -# -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. -# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: -# Algorithms and Tests for One-Dimensional Flow" -# -# Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d -# - -################################################ -# number of grid cells in the x dimension -nx=64 -# number of grid cells in the y dimension -ny=64 -# number of grid cells in the z dimension -nz=64 -# final output time -tout=0.2 -# time interval for output -outstep=0.2 -# name of initial conditions -init=Riemann - -# domain properties -xmin=0.0 -ymin=0.0 -zmin=0.0 -xlen=1.0 -ylen=1.0 -zlen=1.0 - -# type of boundary conditions -xl_bcnd=3 -xu_bcnd=3 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 - -# path to output directory -outdir=./ - -################################################# -# Parameters for 1D Riemann problems -# density of left state -rho_l=1.08 -# velocity of left state -vx_l=1.2 -vy_l=0.01 -vz_l=0.5 -# pressure of left state -P_l=0.95 -# Magnetic field of the left state -Bx_l=0.5641895835477563 -By_l=1.0155412503859613 -Bz_l=0.5641895835477563 - -# density of right state -rho_r=1.0 -# velocity of right state -vx_r=0.0 -vy_r=0.0 -vz_r=0.0 -# pressure of right state -P_r=1.0 -# Magnetic field of the right state -Bx_r=0.5641895835477563 -By_r=1.1283791670955126 -Bz_r=0.5641895835477563 - -# location of initial discontinuity -diaph=0.5 -# value of gamma -gamma=1.6666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt index 07440faa3..a03aef938 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt @@ -1,10 +1,10 @@ # -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Parameter File for 3D Ryu & Jones MHD shock tube 1a. # Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: # Algorithms and Tests for One-Dimensional Flow" # # Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d +# specifically for shock tube 1a # ################################################ diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt deleted file mode 100644 index 70a7ef8e9..000000000 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt +++ /dev/null @@ -1,74 +0,0 @@ -# -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. -# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: -# Algorithms and Tests for One-Dimensional Flow" -# -# Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d -# - -################################################ -# number of grid cells in the x dimension -nx=64 -# number of grid cells in the y dimension -ny=64 -# number of grid cells in the z dimension -nz=64 -# final output time -tout=0.2 -# time interval for output -outstep=0.2 -# name of initial conditions -init=Riemann - -# domain properties -xmin=0.0 -ymin=0.0 -zmin=0.0 -xlen=1.0 -ylen=1.0 -zlen=1.0 - -# type of boundary conditions -xl_bcnd=3 -xu_bcnd=3 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 - -# path to output directory -outdir=./ - -################################################# -# Parameters for 1D Riemann problems -# density of left state -rho_l=1.08 -# velocity of left state -vx_l=1.2 -vy_l=0.01 -vz_l=0.5 -# pressure of left state -P_l=0.95 -# Magnetic field of the left state -Bx_l=0.5641895835477563 -By_l=1.0155412503859613 -Bz_l=0.5641895835477563 - -# density of right state -rho_r=1.0 -# velocity of right state -vx_r=0.0 -vy_r=0.0 -vz_r=0.0 -# pressure of right state -P_r=1.0 -# Magnetic field of the right state -Bx_r=0.5641895835477563 -By_r=1.1283791670955126 -Bz_r=0.5641895835477563 - -# location of initial discontinuity -diaph=0.5 -# value of gamma -gamma=1.6666666666666667 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 3cb6f563e..9d81cdfb4 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -581,13 +581,6 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones1aShockTubeCorrectInputExpectCorre test_runner.runTest(); } -/// Test the Ryu & Jones 2a Shock Tube (Ryu & Jones 1995) -TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput) -{ - test_runner.numMpiRanks = GetParam(); - test_runner.runTest(); -} - /// Test the Ryu & Jones 4d Shock Tube (Ryu & Jones 1995) TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) { From e76cb6ab9abaccf272737ec92109de5b034a02a8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 12 Apr 2023 14:23:29 -0400 Subject: [PATCH 369/694] Update MHD blast test to version in VL+CT paper See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. Note that I believe they have a typo, the pressure in the blast region should be 10 not 100, that is consistent with the Athena++ code. --- examples/3D/mhd_blast.txt | 36 +++++++++---------- ...astWaveCorrectInputExpectCorrectOutput.txt | 36 +++++++++---------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt index f60997c7e..e796c763e 100644 --- a/examples/3D/mhd_blast.txt +++ b/examples/3D/mhd_blast.txt @@ -1,27 +1,27 @@ # # Parameter File for the MHD Blast wavelength -# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. # ################################################ # number of grid cells in the x dimension -nx=128 +nx=200 # number of grid cells in the y dimension -ny=128 +ny=300 # number of grid cells in the z dimension -nz=128 +nz=200 # final output time -tout=0.02 +tout=0.2 # time interval for output -outstep=0.02 +outstep=0.005 # name of initial conditions init=MHD_Spherical_Blast # domain properties xmin=-0.5 -ymin=-0.5 +ymin=-0.75 zmin=-0.5 xlen=1.0 -ylen=1.0 +ylen=1.5 zlen=1.0 # type of boundary conditions xl_bcnd=1 @@ -45,17 +45,17 @@ vy=0.0 # velocity in the z direction vz=0.0 # initial pressure outside the blast zone -P=1.0 -# initial pressure inside the blast zone -P_blast=100.0 +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 # The radius of the blast zone -radius=0.125 -# magnetic field in the x direction. Equal to 10/sqrt(2) -Bx=7.0710678118654746 -# magnetic field in the y direction -By=0.0 -# magnetic field in the z direction. Equal to 10/sqrt(2) -Bz=7.0710678118654746 +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 # value of gamma gamma=1.666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt index 4e70c2993..77ef94b72 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt @@ -1,27 +1,27 @@ # # Parameter File for the MHD Blast wavelength -# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. # ################################################ # number of grid cells in the x dimension -nx=64 +nx=50 # number of grid cells in the y dimension -ny=64 +ny=100 # number of grid cells in the z dimension -nz=64 +nz=50 # final output time -tout=0.02 +tout=0.2 # time interval for output -outstep=0.02 +outstep=0.2 # name of initial conditions init=MHD_Spherical_Blast # domain properties xmin=-0.5 -ymin=-0.5 +ymin=-0.75 zmin=-0.5 xlen=1.0 -ylen=1.0 +ylen=1.5 zlen=1.0 # type of boundary conditions xl_bcnd=1 @@ -45,17 +45,17 @@ vy=0.0 # velocity in the z direction vz=0.0 # initial pressure outside the blast zone -P=1.0 -# initial pressure inside the blast zone -P_blast=100.0 +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 # The radius of the blast zone -radius=0.125 -# magnetic field in the x direction. Equal to 10/sqrt(2) -Bx=7.0710678118654746 -# magnetic field in the y direction -By=0.0 -# magnetic field in the z direction. Equal to 10/sqrt(2) -Bz=7.0710678118654746 +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 # value of gamma gamma=1.666666666666667 From aafe1b9560e0eb64d47bd77475e213628c8d0b5e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 12 Apr 2023 14:25:14 -0400 Subject: [PATCH 370/694] Refactor MHD blast wave ICs for clarity --- src/grid/initial_conditions.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 97b9368e8..5f6b482b4 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1821,16 +1821,16 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); // Set the field(s) that do depend on pressure. That's just energy - Real radius = std::hypot(x, y, z); + Real const radius = std::hypot(x, y, z); + Real pressure; if (radius < P.radius) { - C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( - P.P_blast, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, - magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + pressure = P.P_blast; } else { - C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, C.density[id], C.momentum_x[id], C.momentum_y[id], - C.momentum_z[id], ::gama, magnetic_centered.x, - magnetic_centered.y, magnetic_centered.z); + pressure = P.P; } + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } From aa9f0b2dca515a650c6fb2671942f6b4a69ab77f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Apr 2023 14:54:24 -0400 Subject: [PATCH 371/694] Fix bug in Orszag-Tang vortex initial conditions --- src/grid/initial_conditions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 5f6b482b4..987a73f00 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1858,8 +1858,8 @@ void Grid3D::Orszag_Tang_Vortex() Get_Position(i, j, k, &x, &y, &z); // Z vector potential - vectorPotential.at(id + 2 * H.n_cells) = (magnetic_background / 4.0 * M_PI) * std::cos(4.0 * M_PI * x) - - (magnetic_background / 2.0 * M_PI) * std::cos(2.0 * M_PI * y); + vectorPotential.at(id + 2 * H.n_cells) = + magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) - 2.0 * std::cos(2.0 * M_PI * y)); } } } From 443415606aeb404685dba730d148be2e0879882c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Apr 2023 14:54:49 -0400 Subject: [PATCH 372/694] Update all MHD system tests for PLMC --- cholla-tests-data | 2 +- src/system_tests/hydro_system_tests.cpp | 16 ++- src/system_tests/mhd_system_tests.cpp | 142 +++++++++++++++--------- 3 files changed, 104 insertions(+), 56 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index ad50e1fc5..d59317178 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit ad50e1fc5ed18c9162d1c069449bc0259c37591a +Subproject commit d593171787379092a8d2189cf4be3e98fb35300f diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 9d9639f65..dd92fbddc 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -97,8 +97,15 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) #ifdef MHD // Loosen correctness check to account for MHD only having PCM. This is // about the error between PCM and PPMP in hydro - tolerance = 1E-6; -#endif // MHD + // Check Results. Values based on results in Gardiner & Stone 2008 + #ifdef PCM + tolerance = 1e-6; + #elif defined(PLMC) + tolerance = 1.0E-7; + #elif defined(PPMC) + tolerance = 0.0; + #endif // PCM +#endif // MHD testObject.launchCholla(); @@ -133,7 +140,10 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM +#elif defined(PLMC) + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; +#elif defined(PPMC) double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; #endif // PCM diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 9d81cdfb4..b90d07cd0 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -33,14 +33,6 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: systemTest::SystemTestRunner waveTest; -#ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; -#else // PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; -#endif // PCM - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, @@ -162,11 +154,13 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); -// Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); #endif // PCM } @@ -196,11 +190,13 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCo // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); -// Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); #endif // PCM } @@ -232,8 +228,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); +#endif // PCM } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -262,8 +264,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); +#endif // PCM } // Alfven Waves Moving Left and Right @@ -293,8 +301,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); +#endif // PCM } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -322,8 +336,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInput // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); +#endif // PCM } // Contact Wave Moving Right @@ -355,10 +375,13 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM - waveTest.runL1ErrorTest(1.35 * allowedL1Error, 1.35 * allowedError); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + waveTest.runL1ErrorTest(5.4E-7, 5.4E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(0.0, 0.0); #endif // PCM } @@ -390,14 +413,6 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam Date: Thu, 13 Apr 2023 15:22:20 -0400 Subject: [PATCH 373/694] Move all PLMC utils to reconstruction namespace --- src/reconstruction/plmc_cuda.cu | 330 +----------- src/reconstruction/plmc_cuda.h | 327 ------------ src/reconstruction/plmc_cuda_tests.cu | 82 --- src/reconstruction/reconstruction.h | 615 ++++++++++++++++++++++ src/reconstruction/reconstruction_tests.h | 102 ++++ 5 files changed, 739 insertions(+), 717 deletions(-) create mode 100644 src/reconstruction/reconstruction.h create mode 100644 src/reconstruction/reconstruction_tests.h diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 3388f4318..d0e200803 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -8,6 +8,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/reconstruction.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -58,15 +59,15 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // load the 3-cell stencil into registers // cell i - plmc_utils::PlmcPrimitive const cell_i = - plmc_utils::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); // cell i-1. The equality checks check the direction and subtract one from the direction - plmc_utils::PlmcPrimitive const cell_imo = plmc_utils::Load_Data( + reconstruction::Primitive const cell_imo = reconstruction::Load_Data( dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // cell i+1. The equality checks check the direction and add one to the direction - plmc_utils::PlmcPrimitive const cell_ipo = plmc_utils::Load_Data( + reconstruction::Primitive const cell_ipo = reconstruction::Load_Data( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // calculate the adiabatic sound speed in cell i @@ -78,41 +79,41 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // the cell center // left - plmc_utils::PlmcPrimitive const del_L = plmc_utils::Compute_Slope(cell_i, cell_imo); + reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_i, cell_imo); // right - plmc_utils::PlmcPrimitive const del_R = plmc_utils::Compute_Slope(cell_ipo, cell_i); + reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_ipo, cell_i); // centered - plmc_utils::PlmcPrimitive const del_C = plmc_utils::Compute_Slope(cell_ipo, cell_imo, 0.5); + reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_ipo, cell_imo, 0.5); // Van Leer - plmc_utils::PlmcPrimitive const del_G = plmc_utils::Van_Leer_Slope(del_L, del_R); + reconstruction::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - plmc_utils::PlmcCharacteristic const del_a_L = - plmc_utils::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared, gamma); - plmc_utils::PlmcCharacteristic const del_a_R = - plmc_utils::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared, gamma); - plmc_utils::PlmcCharacteristic const del_a_C = - plmc_utils::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared, gamma); - plmc_utils::PlmcCharacteristic const del_a_G = - plmc_utils::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared, gamma); // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - plmc_utils::PlmcPrimitive del_m_i = plmc_utils::Monotonize_Characteristic_Return_Primitive( + reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma); // Compute the left and right interface values using the monotonized difference in the primitive variables - plmc_utils::PlmcPrimitive interface_L_iph = plmc_utils::Calc_Interface(cell_i, del_m_i, 1.0); - plmc_utils::PlmcPrimitive interface_R_imh = plmc_utils::Calc_Interface(cell_i, del_m_i, -1.0); + reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface(cell_i, del_m_i, 1.0); + reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface(cell_i, del_m_i, -1.0); #ifndef VL @@ -280,295 +281,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Convert the left and right states in the primitive to the conserved variables send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - plmc_utils::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); - plmc_utils::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } - -namespace plmc_utils -{ -// ===================================================================================================================== -PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, - size_t const &zid, size_t const &nx, size_t const &ny, - size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, - Real const &gamma) -{ - // Compute index - size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - - // Declare the variable we will return - PlmcPrimitive loaded_data; - - // Load hydro variables except pressure - loaded_data.density = dev_conserved[grid_enum::density * n_cells + id]; - loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density; - loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density; - loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density; - - // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial - // computation of the primitive variables -#ifdef MHD - auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - switch (o1) { - case grid_enum::momentum_x: - loaded_data.magnetic_x = magnetic_centered.x; - loaded_data.magnetic_y = magnetic_centered.y; - loaded_data.magnetic_z = magnetic_centered.z; - break; - case grid_enum::momentum_y: - loaded_data.magnetic_x = magnetic_centered.y; - loaded_data.magnetic_y = magnetic_centered.z; - loaded_data.magnetic_z = magnetic_centered.x; - break; - case grid_enum::momentum_z: - loaded_data.magnetic_x = magnetic_centered.z; - loaded_data.magnetic_y = magnetic_centered.x; - loaded_data.magnetic_z = magnetic_centered.y; - break; - } -#endif // MHD - -// Load pressure accounting for duel energy if enabled -#ifdef DE // DE - Real const E = dev_conserved[grid_enum::Energy * n_cells + id]; - Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - - Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( - loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z); - - #ifdef MHD - E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); - #endif // MHD - - loaded_data.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma); - loaded_data.gas_energy = gas_energy / loaded_data.density; -#else // not DE - #ifdef MHD - loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( - dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, - loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y, - loaded_data.magnetic_z); - #else // not MHD - loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( - dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, - loaded_data.velocity_y, loaded_data.velocity_z, gamma); - #endif // MHD -#endif // DE - -#ifdef SCALAR - for (size_t i = 0; i < grid_enum::nscalars; i++) { - loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density; - } -#endif // SCALAR - - return loaded_data; -} -// ===================================================================================================================== - -// ===================================================================================================================== -PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcPrimitive const &right, Real const &coef) -{ - PlmcPrimitive slopes; - - slopes.density = coef * (left.density - right.density); - slopes.velocity_x = coef * (left.velocity_x - right.velocity_x); - slopes.velocity_y = coef * (left.velocity_y - right.velocity_y); - slopes.velocity_z = coef * (left.velocity_z - right.velocity_z); - slopes.pressure = coef * (left.pressure - right.pressure); - -#ifdef MHD - slopes.magnetic_y = coef * (left.magnetic_y - right.magnetic_y); - slopes.magnetic_z = coef * (left.magnetic_z - right.magnetic_z); -#endif // MHD - -#ifdef DE - slopes.gas_energy = coef * (left.gas_energy - right.gas_energy); -#endif // DE - -#ifdef SCALAR - for (size_t i = 0; i < grid_enum::nscalars; i++) { - slopes.scalar[i] = coef * (left.scalar[i] - right.scalar[i]); - } -#endif // SCALAR - - return slopes; -} -// ===================================================================================================================== - -// ===================================================================================================================== -PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope) -{ - PlmcPrimitive vl_slopes; - - auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real { - if (left * right > 0.0) { - return 2.0 * left * right / (left + right); - } else { - return 0.0; - } - }; - - vl_slopes.density = Calc_Vl_Slope(left_slope.density, right_slope.density); - vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x); - vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y); - vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z); - vl_slopes.pressure = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure); - -#ifdef MHD - vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y); - vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z); -#endif // MHD - -#ifdef DE - vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy); -#endif // DE - -#ifdef SCALAR - for (size_t i = 0; i < grid_enum::nscalars; i++) { - vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]); - } -#endif // SCALAR - - return vl_slopes; -} -// ===================================================================================================================== - -// ===================================================================================================================== -PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( - PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, - PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, - PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma) -{ - // The function that will actually do the monotozation - auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { - if (left * right > 0.0) { - Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); - Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); - return copysign(fmin(lim_slope_a, lim_slope_b), centered); - } else { - return 0.0; - } - }; - - // the monotonized difference in the characteristic variables - PlmcCharacteristic del_a_m; - // The monotonized difference in the characteristic variables projected into the primitive variables - PlmcPrimitive output; - - // Monotonize the slopes - del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); - del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); - del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); - del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); - del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); - -#ifdef MHD - del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); - del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); -#endif // MHD - -#ifdef DE - output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); - } -#endif // SCALAR - - // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and - // scalars - Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, gamma, output); - - return output; -} -// ===================================================================================================================== - -// ===================================================================================================================== -PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, - Real const &sign) -{ - plmc_utils::PlmcPrimitive output; - - auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; }; - - output.density = interface(primitive.density, slopes.density); - output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x); - output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y); - output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z); - output.pressure = interface(primitive.pressure, slopes.pressure); - -#ifdef MHD - output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y); - output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z); -#endif // MHD - -#ifdef DE - output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]); - } -#endif // SCALAR - - return output; -} -// ===================================================================================================================== - -// ===================================================================================================================== -void __device__ __host__ Write_Data(PlmcPrimitive const &interface_state, Real *dev_interface, - Real const *dev_conserved, size_t const &id, size_t const &n_cells, - size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) -{ - // Write out density and momentum - dev_interface[grid_enum::density * n_cells + id] = interface_state.density; - dev_interface[o1 * n_cells + id] = interface_state.density * interface_state.velocity_x; - dev_interface[o2 * n_cells + id] = interface_state.density * interface_state.velocity_y; - dev_interface[o3 * n_cells + id] = interface_state.density * interface_state.velocity_z; - -#ifdef MHD - // Write the Y and Z interface states and load the X magnetic face needed to compute the energy - Real magnetic_x; - switch (o1) { - case grid_enum::momentum_x: - dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y; - dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z; - magnetic_x = dev_conserved[grid_enum::magnetic_x * n_cells + id]; - break; - case grid_enum::momentum_y: - dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y; - dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z; - magnetic_x = dev_conserved[grid_enum::magnetic_y * n_cells + id]; - break; - case grid_enum::momentum_z: - dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y; - dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z; - magnetic_x = dev_conserved[grid_enum::magnetic_z * n_cells + id]; - break; - } - - // Compute the MHD energy - dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( - interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, - interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z); -#else // not MHD - // Compute the hydro energy - dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( - interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, - interface_state.velocity_z, gamma); -#endif // MHD - -#ifdef DE - dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i]; - } -#endif // SCALAR -} -// ===================================================================================================================== -} // namespace plmc_utils diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index bc2d3f3f8..4a1ca322b 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -18,331 +18,4 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields); -namespace plmc_utils -{ -/*! - * \brief A struct for the primitive variables - * - */ -struct PlmcPrimitive { - // Hydro variables - Real density, velocity_x, velocity_y, velocity_z, pressure; - -#ifdef MHD - // These are all cell centered values - Real magnetic_x, magnetic_y, magnetic_z; -#endif // MHD - -#ifdef DE - Real gas_energy; -#endif // DE - -#ifdef SCALAR - Real scalar[grid_enum::nscalars]; -#endif // SCALAR -}; - -/*! - * \brief A struct for the characteristic variables - * - */ -struct PlmcCharacteristic { - // Hydro variables - Real a0, a1, a2, a3, a4; - -#ifdef MHD - Real a5, a6; -#endif // MHD -}; - -/*! - * \brief Load the data for PLMC reconstruction - * - * \param[in] dev_conserved The conserved array - * \param[in] xid The xid of the cell to load data from - * \param[in] yid The yid of the cell to load data from - * \param[in] zid The zid of the cell to load data from - * \param[in] nx Size in the X direction - * \param[in] ny Size in the Y direction - * \param[in] n_cells The total number of cells - * \param[in] o1 Directional parameter - * \param[in] o2 Directional parameter - * \param[in] o3 Directional parameter - * \param[in] gamma The adiabatic index - * \return PlmcPrimitive The loaded cell data - */ -PlmcPrimitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, - size_t const &zid, size_t const &nx, size_t const &ny, - size_t const &n_cells, size_t const &o1, size_t const &o2, size_t const &o3, - Real const &gamma); - -/*! - * \brief Compute a simple slope. Equation is `coef * (left - right)`. - * - * \param[in] left The data on the positive side of the slope - * \param[in] right The data on the negative side of the slope - * \param[in] coef The coefficient to multiply the slope by. Defaults to zero - * \return PlmcPrimitive The slopes - */ -PlmcPrimitive __device__ __host__ Compute_Slope(PlmcPrimitive const &left, PlmcPrimitive const &right, - Real const &coef = 1.0); - -/*! - * \brief Compute the Van Lear slope from the left and right slopes - * - * \param[in] left_slope The left slope - * \param[in] right_slope The right slope - * \return PlmcPrimitive The Van Leer slope - */ -PlmcPrimitive __device__ __host__ Van_Leer_Slope(PlmcPrimitive const &left_slope, PlmcPrimitive const &right_slope); - -/*! - * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the - * eigenvectors given in Stone 2008, Appendix A - * - * \param[in] primitive The primitive variables - * \param[in] primitive_slope The primitive variables slopes - * \param[in] sound_speed The speed of sound - * \param[in] sound_speed_squared The speed of sound squared - * \param[in] gamma The adiabatic index - * \return PlmcCharacteristic - */ -PlmcCharacteristic __device__ __inline__ Primitive_To_Characteristic(PlmcPrimitive const &primitive, - PlmcPrimitive const &primitive_slope, - Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma) -{ - PlmcCharacteristic output; - -#ifdef MHD - // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - - // First, compute some basic quantities we will need later - Real const inverse_sqrt_density = rsqrt(primitive.density); - - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; - - // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); - denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; - } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); - } - - // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check - // handles that case - Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; - Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; - - // Compute Q(s) (equation A14) - Real const n_fs = 0.5 / sound_speed_squared; // equation A19 - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; - - // Compute A(s) (equation A15) - Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); - Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); - - // Multiply the slopes by the left eigenvector matrix given in equation 18 - output.a0 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - - output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); - - output.a2 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - - output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; - - output.a4 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); - - output.a6 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - -#else // not MHD - output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); - output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); - output.a2 = primitive_slope.velocity_y; - output.a3 = primitive_slope.velocity_z; - output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); -#endif // MHD - - return output; -} - -/*! - * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the - * eigenvectors given in Stone 2008, Appendix A - * - * \param[in] primitive The primitive variables - * \param[in] characteristic_slope The characteristic slopes - * \param[in] sound_speed The sound speed - * \param[in] sound_speed_squared The sound speed squared - * \param[in] gamma The adiabatic index - * \param[out] output The primitive slopes - */ -void __device__ __inline__ Characteristic_To_Primitive(PlmcPrimitive const &primitive, - PlmcCharacteristic const &characteristic_slope, - Real const &sound_speed, Real const &sound_speed_squared, - Real const &gamma, PlmcPrimitive &output) -{ -#ifdef MHD - // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; - - // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); - denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; - } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); - } - - // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check - // handles that case - Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; - Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; - - // Compute Q(s) (equation A14) - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; - - // Compute A(s) (equation A15) - Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); - Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); - - // Multiply the slopes by the right eigenvector matrix given in equation 12 - output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + - characteristic_slope.a3; - output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + - magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); - output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_z * (characteristic_slope.a5 - characteristic_slope.a1); - output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_y * (characteristic_slope.a1 - characteristic_slope.a5); - output.pressure = primitive.density * sound_speed_squared * - (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); - output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - - beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); - output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + - beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); - -#else // not MHD - output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; - output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); - output.velocity_y = characteristic_slope.a2; - output.velocity_z = characteristic_slope.a3; - output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); -#endif // MHD -} - -/*! - * \brief Monotonize the characteristic slopes and project back into the primitive slopes - * - * \param[in] primitive The primitive variables - * \param[in] del_L The left primitive slopes - * \param[in] del_R The right primitive slopes - * \param[in] del_C The centered primitive slopes - * \param[in] del_G The Van Leer primitive slopes - * \param[in] del_a_L The left characteristic slopes - * \param[in] del_a_R The right characteristic slopes - * \param[in] del_a_C The centered characteristic slopes - * \param[in] del_a_G The Van Leer characteristic slopes - * \param[in] sound_speed The sound speed - * \param[in] sound_speed_squared The sound speed squared - * \param[in] gamma The adiabatic index - * \return PlmcPrimitive The Monotonized primitive slopes - */ -PlmcPrimitive __device__ Monotonize_Characteristic_Return_Primitive( - PlmcPrimitive const &primitive, PlmcPrimitive const &del_L, PlmcPrimitive const &del_R, PlmcPrimitive const &del_C, - PlmcPrimitive const &del_G, PlmcCharacteristic const &del_a_L, PlmcCharacteristic const &del_a_R, - PlmcCharacteristic const &del_a_C, PlmcCharacteristic const &del_a_G, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma); - -/*! - * \brief Compute the interface state from the slope and cell centered state. - * - * \param[in] primitive The cell centered state - * \param[in] slopes The slopes - * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it - * \return plmc_utils::PlmcPrimitive The interface state - */ -PlmcPrimitive __device__ __host__ Calc_Interface(PlmcPrimitive const &primitive, PlmcPrimitive const &slopes, - Real const &sign); - -/*! - * \brief Write the interface data to the appropriate arrays - * - * \param[in] interface_state The interface state to write - * \param[out] dev_interface The interface array - * \param[in] dev_conserved The conserved variables - * \param[in] id The cell id to write to - * \param[in] n_cells The total number of cells - * \param[in] o1 Directional parameter - * \param[in] o2 Directional parameter - * \param[in] o3 Directional parameter - * \param[in] gamma The adiabatic index - */ -void __device__ __host__ Write_Data(PlmcPrimitive const &interface_state, Real *dev_interface, - Real const *dev_conserved, size_t const &id, size_t const &n_cells, - size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma); -} // namespace plmc_utils #endif // PLMC_CUDA_H diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index b2a2d8ece..272bca85d 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -268,85 +268,3 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) } } } - -#ifdef MHD -namespace -{ -__global__ void test_prim_2_char(plmc_utils::PlmcPrimitive const primitive, - plmc_utils::PlmcPrimitive const primitive_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - plmc_utils::PlmcCharacteristic *characteristic_slope) -{ - *characteristic_slope = - plmc_utils::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); -} - -__global__ void test_char_2_prim(plmc_utils::PlmcPrimitive const primitive, - plmc_utils::PlmcCharacteristic const characteristic_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - plmc_utils::PlmcPrimitive *primitive_slope) -{ - plmc_utils::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, - *primitive_slope); -} -} // namespace - -TEST(tMHDPlmcPrimitive2Characteristic, CorrectInputExpectCorrectOutput) -{ - // Test parameters - Real const &gamma = 5. / 3.; - plmc_utils::PlmcPrimitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; - plmc_utils::PlmcPrimitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - - // Run test - cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, - dev_results.data()); - CudaCheckError(); - cudaDeviceSynchronize(); - plmc_utils::PlmcCharacteristic const host_results = dev_results.at(0); - - // Check results - plmc_utils::PlmcCharacteristic const fiducial_results{ - 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, - 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; - testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); - testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); - testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); - testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); - testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); - testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); - testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); -} -TEST(tMHDPlmcCharacteristic2Primitive, CorrectInputExpectCorrectOutput) -{ - // Test parameters - Real const &gamma = 5. / 3.; - plmc_utils::PlmcPrimitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; - plmc_utils::PlmcCharacteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - - // Run test - cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, - sound_speed_squared, dev_results.data()); - CudaCheckError(); - cudaDeviceSynchronize(); - plmc_utils::PlmcPrimitive const host_results = dev_results.at(0); - - // Check results - plmc_utils::PlmcPrimitive const fiducial_results{ - 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, - 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; - testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); - testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); - testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); -} -#endif // MHD diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h new file mode 100644 index 000000000..68b559cb8 --- /dev/null +++ b/src/reconstruction/reconstruction.h @@ -0,0 +1,615 @@ +/*! + * \file reconstruction.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contain the various structs and device functions needed for interface reconstruction + * + */ + +#pragma once + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" + +/*! + * \brief Namespace to contain various utilities for the interface reconstruction kernels + * + */ +namespace reconstruction +{ +// ===================================================================================================================== +/*! + * \brief A struct for the primitive variables + * + */ +struct Primitive { + // Hydro variables + Real density, velocity_x, velocity_y, velocity_z, pressure; + +#ifdef MHD + // These are all cell centered values + Real magnetic_x, magnetic_y, magnetic_z; +#endif // MHD + +#ifdef DE + Real gas_energy; +#endif // DE + +#ifdef SCALAR + Real scalar[grid_enum::nscalars]; +#endif // SCALAR +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief A struct for the characteristic variables + * + */ +struct Characteristic { + // Hydro variables + Real a0, a1, a2, a3, a4; + +#ifdef MHD + Real a5, a6; +#endif // MHD +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Load the data for reconstruction + * + * \param[in] dev_conserved The conserved array + * \param[in] xid The xid of the cell to load data from + * \param[in] yid The yid of the cell to load data from + * \param[in] zid The zid of the cell to load data from + * \param[in] nx Size in the X direction + * \param[in] ny Size in the Y direction + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + * \return Primitive The loaded cell data + */ +Primitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +{ // Compute index + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + + // Declare the variable we will return + Primitive loaded_data; + + // Load hydro variables except pressure + loaded_data.density = dev_conserved[grid_enum::density * n_cells + id]; + loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density; + loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density; + loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density; + + // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial + // computation of the primitive variables +#ifdef MHD + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + switch (o1) { + case grid_enum::momentum_x: + loaded_data.magnetic_x = magnetic_centered.x; + loaded_data.magnetic_y = magnetic_centered.y; + loaded_data.magnetic_z = magnetic_centered.z; + break; + case grid_enum::momentum_y: + loaded_data.magnetic_x = magnetic_centered.y; + loaded_data.magnetic_y = magnetic_centered.z; + loaded_data.magnetic_z = magnetic_centered.x; + break; + case grid_enum::momentum_z: + loaded_data.magnetic_x = magnetic_centered.z; + loaded_data.magnetic_y = magnetic_centered.x; + loaded_data.magnetic_z = magnetic_centered.y; + break; + } +#endif // MHD + +// Load pressure accounting for duel energy if enabled +#ifdef DE // DE + Real const E = dev_conserved[grid_enum::Energy * n_cells + id]; + Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + + Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z); + + #ifdef MHD + E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #endif // MHD + + loaded_data.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma); + loaded_data.gas_energy = gas_energy / loaded_data.density; +#else // not DE + #ifdef MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y, + loaded_data.magnetic_z); + #else // not MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma); + #endif // MHD +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density; + } +#endif // SCALAR + + return loaded_data; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute a simple slope. Equation is `coef * (left - right)`. + * + * \param[in] left The data on the positive side of the slope + * \param[in] right The data on the negative side of the slope + * \param[in] coef The coefficient to multiply the slope by. Defaults to zero + * \return Primitive The slopes + */ +Primitive __device__ __host__ Compute_Slope(Primitive const &left, Primitive const &right, Real const &coef = 1.0) +{ + Primitive slopes; + + slopes.density = coef * (left.density - right.density); + slopes.velocity_x = coef * (left.velocity_x - right.velocity_x); + slopes.velocity_y = coef * (left.velocity_y - right.velocity_y); + slopes.velocity_z = coef * (left.velocity_z - right.velocity_z); + slopes.pressure = coef * (left.pressure - right.pressure); + +#ifdef MHD + slopes.magnetic_y = coef * (left.magnetic_y - right.magnetic_y); + slopes.magnetic_z = coef * (left.magnetic_z - right.magnetic_z); +#endif // MHD + +#ifdef DE + slopes.gas_energy = coef * (left.gas_energy - right.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + slopes.scalar[i] = coef * (left.scalar[i] - right.scalar[i]); + } +#endif // SCALAR + + return slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the Van Lear slope from the left and right slopes + * + * \param[in] left_slope The left slope + * \param[in] right_slope The right slope + * \return Primitive The Van Leer slope + */ +Primitive __device__ __host__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope) +{ + Primitive vl_slopes; + + auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real { + if (left * right > 0.0) { + return 2.0 * left * right / (left + right); + } else { + return 0.0; + } + }; + + vl_slopes.density = Calc_Vl_Slope(left_slope.density, right_slope.density); + vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x); + vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y); + vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z); + vl_slopes.pressure = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure); + +#ifdef MHD + vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y); + vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z); +#endif // MHD + +#ifdef DE + vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]); + } +#endif // SCALAR + + return vl_slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] primitive_slope The primitive variables slopes + * \param[in] sound_speed The speed of sound + * \param[in] sound_speed_squared The speed of sound squared + * \param[in] gamma The adiabatic index + * \return Characteristic + */ +Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, + Primitive const &primitive_slope, + Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Characteristic output; + +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // First, compute some basic quantities we will need later + Real const inverse_sqrt_density = rsqrt(primitive.density); + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; + + // Compute Q(s) (equation A14) + Real const n_fs = 0.5 / sound_speed_squared; // equation A19 + Real const sign = copysign(1.0, primitive.magnetic_x); + Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); + Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); + + // Multiply the slopes by the left eigenvector matrix given in equation 18 + output.a0 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); + + output.a2 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + + output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + + output.a4 = + n_fs * alpha_slow * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + + q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - + a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - + beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); + + output.a6 = + n_fs * alpha_fast * + (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - + q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + + a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + +#else // not MHD + output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); + output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a2 = primitive_slope.velocity_y; + output.a3 = primitive_slope.velocity_z; + output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); +#endif // MHD + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] characteristic_slope The characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \param[out] output The primitive slopes + */ +void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, + Characteristic const &characteristic_slope, + Real const &sound_speed, Real const &sound_speed_squared, + Real const &gamma, Primitive &output) +{ +#ifdef MHD + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // Compute wave speeds and their squares + Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; + Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + Real alpha_fast, alpha_slow; + if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), + numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + alpha_fast = 1.0; + alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { + alpha_fast = 0.0; + alpha_slow = 1.0; + } else { + alpha_fast = sqrt(numerator_1 / denom); + alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; + + // Compute Q(s) (equation A14) + Real const sign = copysign(1.0, primitive.magnetic_x); + Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; + Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); + Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); + + // Multiply the slopes by the right eigenvector matrix given in equation 12 + output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + characteristic_slope.a3; + output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.pressure = primitive.density * sound_speed_squared * + (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + +#else // not MHD + output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; + output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); + output.velocity_y = characteristic_slope.a2; + output.velocity_z = characteristic_slope.a3; + output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); +#endif // MHD +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Monotonize the characteristic slopes and project back into the primitive slopes + * + * \param[in] primitive The primitive variables + * \param[in] del_L The left primitive slopes + * \param[in] del_R The right primitive slopes + * \param[in] del_C The centered primitive slopes + * \param[in] del_G The Van Leer primitive slopes + * \param[in] del_a_L The left characteristic slopes + * \param[in] del_a_R The right characteristic slopes + * \param[in] del_a_C The centered characteristic slopes + * \param[in] del_a_G The Van Leer characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return Primitive The Monotonized primitive slopes + */ +Primitive __device__ Monotonize_Characteristic_Return_Primitive( + Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, + Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, + Characteristic const &del_a_G, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) +{ + // The function that will actually do the monotozation + auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { + if (left * right > 0.0) { + Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); + Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); + return copysign(fmin(lim_slope_a, lim_slope_b), centered); + } else { + return 0.0; + } + }; + + // the monotonized difference in the characteristic variables + Characteristic del_a_m; + // The monotonized difference in the characteristic variables projected into the primitive variables + Primitive output; + + // Monotonize the slopes + del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + +#ifdef MHD + del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); +#endif // MHD + +#ifdef DE + output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); + } +#endif // SCALAR + + // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and + // scalars + Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, gamma, output); + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the interface state from the slope and cell centered state. + * + * \param[in] primitive The cell centered state + * \param[in] slopes The slopes + * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it + * \return Primitive The interface state + */ +Primitive __device__ __host__ Calc_Interface(Primitive const &primitive, Primitive const &slopes, Real const &sign) +{ + Primitive output; + + auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; }; + + output.density = interface(primitive.density, slopes.density); + output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x); + output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y); + output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z); + output.pressure = interface(primitive.pressure, slopes.pressure); + +#ifdef MHD + output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y); + output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z); +#endif // MHD + +#ifdef DE + output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Write the interface data to the appropriate arrays + * + * \param[in] interface_state The interface state to write + * \param[out] dev_interface The interface array + * \param[in] dev_conserved The conserved variables + * \param[in] id The cell id to write to + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + */ +void __device__ __host__ Write_Data(Primitive const &interface_state, Real *dev_interface, Real const *dev_conserved, + size_t const &id, size_t const &n_cells, size_t const &o1, size_t const &o2, + size_t const &o3, Real const &gamma) +{ + // Write out density and momentum + dev_interface[grid_enum::density * n_cells + id] = interface_state.density; + dev_interface[o1 * n_cells + id] = interface_state.density * interface_state.velocity_x; + dev_interface[o2 * n_cells + id] = interface_state.density * interface_state.velocity_y; + dev_interface[o3 * n_cells + id] = interface_state.density * interface_state.velocity_z; + +#ifdef MHD + // Write the Y and Z interface states and load the X magnetic face needed to compute the energy + Real magnetic_x; + switch (o1) { + case grid_enum::momentum_x: + dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_x * n_cells + id]; + break; + case grid_enum::momentum_y: + dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_y * n_cells + id]; + break; + case grid_enum::momentum_z: + dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_z * n_cells + id]; + break; + } + + // Compute the MHD energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z); +#else // not MHD + // Compute the hydro energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma); +#endif // MHD + +#ifdef DE + dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i]; + } +#endif // SCALAR +} +// ===================================================================================================================== +} // namespace reconstruction diff --git a/src/reconstruction/reconstruction_tests.h b/src/reconstruction/reconstruction_tests.h new file mode 100644 index 000000000..305f58f7b --- /dev/null +++ b/src/reconstruction/reconstruction_tests.h @@ -0,0 +1,102 @@ +/*! + * \file reconstruction_tests.cu + * \brief Tests for the contents of reconstruction.h + * + */ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/DeviceVector.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" + +#ifdef MHD +namespace +{ +__global__ void test_prim_2_char(reconstruction::Primitive const primitive, + reconstruction::Primitive const primitive_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + reconstruction::Characteristic *characteristic_slope) +{ + *characteristic_slope = + reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_char_2_prim(reconstruction::Primitive const primitive, + reconstruction::Characteristic const characteristic_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + reconstruction::Primitive *primitive_slope) +{ + reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, + *primitive_slope); +} +} // namespace + +TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Characteristic const host_results = dev_results.at(0); + + // Check results + reconstruction::Characteristic const fiducial_results{ + 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, + 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; + testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); + testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); + testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); + testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); + testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); + testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); + testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); +} + +TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results + reconstruction::Primitive const fiducial_results{ + 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, + 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; + testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); +} +#endif // MHD \ No newline at end of file From 28ee8d0e8dd628d99babbef8f9840163abdb7c00 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Apr 2023 17:11:46 -0400 Subject: [PATCH 374/694] Add more tests for reconstruction functions --- src/reconstruction/reconstruction.h | 25 +- src/reconstruction/reconstruction_tests.cu | 361 +++++++++++++++++++++ src/reconstruction/reconstruction_tests.h | 102 ------ 3 files changed, 375 insertions(+), 113 deletions(-) create mode 100644 src/reconstruction/reconstruction_tests.cu delete mode 100644 src/reconstruction/reconstruction_tests.h diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 68b559cb8..bdfd9b46a 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -79,9 +79,10 @@ struct Characteristic { * \param[in] gamma The adiabatic index * \return Primitive The loaded cell data */ -Primitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, - size_t const &zid, size_t const &nx, size_t const &ny, size_t const &n_cells, - size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +Primitive __device__ __host__ __inline__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, + size_t const &n_cells, size_t const &o1, size_t const &o2, + size_t const &o3, Real const &gamma) { // Compute index size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); @@ -160,10 +161,11 @@ Primitive __device__ __host__ Load_Data(Real const *dev_conserved, size_t const * * \param[in] left The data on the positive side of the slope * \param[in] right The data on the negative side of the slope - * \param[in] coef The coefficient to multiply the slope by. Defaults to zero + * \param[in] coef The coefficient to multiply the slope by. Defaults to 1.0 * \return Primitive The slopes */ -Primitive __device__ __host__ Compute_Slope(Primitive const &left, Primitive const &right, Real const &coef = 1.0) +Primitive __device__ __host__ __inline__ Compute_Slope(Primitive const &left, Primitive const &right, + Real const &coef = 1.0) { Primitive slopes; @@ -200,7 +202,7 @@ Primitive __device__ __host__ Compute_Slope(Primitive const &left, Primitive con * \param[in] right_slope The right slope * \return Primitive The Van Leer slope */ -Primitive __device__ __host__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope) +Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope) { Primitive vl_slopes; @@ -457,7 +459,7 @@ void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitiv * \param[in] gamma The adiabatic index * \return Primitive The Monotonized primitive slopes */ -Primitive __device__ Monotonize_Characteristic_Return_Primitive( +Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, Characteristic const &del_a_G, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) @@ -516,7 +518,8 @@ Primitive __device__ Monotonize_Characteristic_Return_Primitive( * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it * \return Primitive The interface state */ -Primitive __device__ __host__ Calc_Interface(Primitive const &primitive, Primitive const &slopes, Real const &sign) +Primitive __device__ __host__ __inline__ Calc_Interface(Primitive const &primitive, Primitive const &slopes, + Real const &sign) { Primitive output; @@ -560,9 +563,9 @@ Primitive __device__ __host__ Calc_Interface(Primitive const &primitive, Primiti * \param[in] o3 Directional parameter * \param[in] gamma The adiabatic index */ -void __device__ __host__ Write_Data(Primitive const &interface_state, Real *dev_interface, Real const *dev_conserved, - size_t const &id, size_t const &n_cells, size_t const &o1, size_t const &o2, - size_t const &o3, Real const &gamma) +void __device__ __host__ __inline__ Write_Data(Primitive const &interface_state, Real *dev_interface, + Real const *dev_conserved, size_t const &id, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) { // Write out density and momentum dev_interface[grid_enum::density * n_cells + id] = interface_state.density; diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu new file mode 100644 index 000000000..d786bff5e --- /dev/null +++ b/src/reconstruction/reconstruction_tests.cu @@ -0,0 +1,361 @@ +/*! + * \file reconstruction_tests.cu + * \brief Tests for the contents of reconstruction.h + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/DeviceVector.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" + +#ifdef MHD +__global__ void test_prim_2_char(reconstruction::Primitive const primitive, + reconstruction::Primitive const primitive_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + reconstruction::Characteristic *characteristic_slope) +{ + *characteristic_slope = + reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_char_2_prim(reconstruction::Primitive const primitive, + reconstruction::Characteristic const characteristic_slope, Real const gamma, + Real const sound_speed, Real const sound_speed_squared, + reconstruction::Primitive *primitive_slope) +{ + reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, + *primitive_slope); +} + +TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Characteristic const host_results = dev_results.at(0); + + // Check results + reconstruction::Characteristic const fiducial_results{ + 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, + 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; + testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); + testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); + testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); + testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); + testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); + testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); + testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); +} + +TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results + reconstruction::Primitive const fiducial_results{ + 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, + 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; + testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); +} +#endif // MHD + +TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::iota(conserved.begin(), conserved.end(), 0.0); + + // Up the energy part of the grid to avoid negative pressure + for (size_t i = grid_enum::Energy * n_cells; i < (grid_enum::Energy + 1) * n_cells; i++) { + conserved.at(i) *= 5.0E2; + } + + // Get test data + auto const test_data = reconstruction::Load_Data(conserved.data(), xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + +// Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{ + 13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, + 39950.641025641031}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Compute_Slope(left, right, coef); + + // Check results +#ifdef MHD + Real const fiducial_data = -2.5; + testingUtilities::checkResults(fiducial_data, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data, test_data.magnetic_z, "magnetic_z"); +#else // MHD + Real const fiducial_data = -2.5; + testingUtilities::checkResults(fiducial_data, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Van_Leer_Slope(left, right); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667, 0, + 8.8421052631578956, 9.9047619047619051}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +__global__ void test_monotize_characteristic_return_primitive( + reconstruction::Primitive const primitive, reconstruction::Primitive const del_L, + reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, + reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, + reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, Real const sound_speed, + Real const sound_speed_squared, Real const gamma, reconstruction::Primitive *monotonized_slope) +{ + *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( + primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, + gamma); +} + +TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput) +{ +#ifdef MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21, 22, 23, 24}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29, 30, 31, 32}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37, 38, 39, 40}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45, 46, 47}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52, 53, 54}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59, 60, 61}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67, 68, 69}; +#else // MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67}; +#endif // MHD + Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; + Real const gamma = 5. / 3.; + + // Get test data + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_monotize_characteristic_return_primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, + del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{174, 74.796411763317991, 19.428234044886157, 16.129327015450095, 33524, + 0, -1385.8699833027156, -1407.694707449215}; + testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946}; + testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionCalcInterface, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Calc_Interface(left, right, coef); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid +#ifdef MHD + reconstruction::Primitive interface { + 1, 2, 3, 4, 5, 6, 7, 8 + }; +#else // MHD + reconstruction::Primitive interface { + 6, 7, 8, 9, 10 + }; +#endif // MHD + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::vector interface_arr(n_cells * grid_enum::num_fields); + + // Get test data + reconstruction::Write_Data(interface, interface_arr.data(), conserved.data(), id, n_cells, o1, o2, o3, gamma); + +// Fiducial Data +#ifdef MHD + std::unordered_map fiducial_interface = {{13, 1}, {40, 2}, {67, 3}, {94, 4}, + {121, 78.5}, {148, 7}, {175, 8}}; +#else // MHD + std::unordered_map fiducial_interface = {{13, 6}, {40, 42}, {67, 48}, {94, 54}, {121, 597}}; +#endif // MHD + + // Perform Comparison + for (size_t i = 0; i < interface_arr.size(); i++) { + // Check the interface + double test_val = interface_arr.at(i); + double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i]; + + testingUtilities::checkResults(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); + } +} diff --git a/src/reconstruction/reconstruction_tests.h b/src/reconstruction/reconstruction_tests.h deleted file mode 100644 index 305f58f7b..000000000 --- a/src/reconstruction/reconstruction_tests.h +++ /dev/null @@ -1,102 +0,0 @@ -/*! - * \file reconstruction_tests.cu - * \brief Tests for the contents of reconstruction.h - * - */ - -// STL Includes -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/reconstruction.h" -#include "../utils/DeviceVector.h" -#include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" - -#ifdef MHD -namespace -{ -__global__ void test_prim_2_char(reconstruction::Primitive const primitive, - reconstruction::Primitive const primitive_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - reconstruction::Characteristic *characteristic_slope) -{ - *characteristic_slope = - reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); -} - -__global__ void test_char_2_prim(reconstruction::Primitive const primitive, - reconstruction::Characteristic const characteristic_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - reconstruction::Primitive *primitive_slope) -{ - reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, - *primitive_slope); -} -} // namespace - -TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) -{ - // Test parameters - Real const &gamma = 5. / 3.; - reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; - reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - - // Run test - cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, - dev_results.data()); - CudaCheckError(); - cudaDeviceSynchronize(); - reconstruction::Characteristic const host_results = dev_results.at(0); - - // Check results - reconstruction::Characteristic const fiducial_results{ - 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, - 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; - testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); - testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); - testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); - testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); - testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); - testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); - testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); -} - -TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) -{ - // Test parameters - Real const &gamma = 5. / 3.; - reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; - reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - - // Run test - cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, - sound_speed_squared, dev_results.data()); - CudaCheckError(); - cudaDeviceSynchronize(); - reconstruction::Primitive const host_results = dev_results.at(0); - - // Check results - reconstruction::Primitive const fiducial_results{ - 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, - 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; - testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); - testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); - testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); -} -#endif // MHD \ No newline at end of file From c77090c21ac3b7454387e122443ee752d4b10b1e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Apr 2023 17:28:42 -0400 Subject: [PATCH 375/694] Slightly loosen fixed error on field loop test This slight increase allows the test to pass on both H2P and C-3PO --- src/system_tests/mhd_system_tests.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index b90d07cd0..c64579f3d 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -619,6 +619,9 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorre TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOutput) { test_runner.numMpiRanks = GetParam(); +#ifdef PLMC + test_runner.setFixedEpsilon(8.568e-10); +#endif // PLMC test_runner.runTest(); } From 13cd946f29a22afc69204ec33d59f8ab4f270f08 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Apr 2023 18:06:50 -0400 Subject: [PATCH 376/694] Add dual energy support to tALLReconstructionLoadData test --- src/reconstruction/reconstruction_tests.cu | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index d786bff5e..d75cbfd3c 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -132,15 +132,18 @@ TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) testingUtilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); -#else // MHD - reconstruction::Primitive const fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, - 39950.641025641031}; +#else // MHD + reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, + 39950.641025641031}; + #ifdef DE + fiducial_data.pressure = 34274.282506448195; + #endif // DE testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); -#endif // MHD +#endif // MHD } TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) From 9b27e21511db2bacbc5081ae496533cf708192c2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 16:41:21 -0400 Subject: [PATCH 377/694] Clarify a comment --- src/grid/initial_conditions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 987a73f00..9e6f58fe5 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -323,8 +323,8 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re Real x_pos, y_pos, z_pos; Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); - // set constant initial states. Note that hydro_utilities::Calc_Energy_Primitive computes the correct MHD or - // hydro energy + // set constant initial states. Note that hydro_utilities::Calc_Energy_Primitive computes the MHD energy if the + // MHD flag is turned on and the hydro energy if it isn't Real sine_wave = std::sin(2.0 * M_PI * x_pos); C.density[id] = rho; From bcf9b17922e56d4dfadd594200df623be38b68d5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 2 May 2023 11:07:19 -0400 Subject: [PATCH 378/694] Update time for MHD, disable cuda error check for MHD --- builds/make.type.mhd | 4 ++-- src/grid/grid3D.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index e0a817b3d..5780eb981 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -34,7 +34,7 @@ DFLAGS += -DTEMPERATURE_FLOOR # Apply the cooling in the GPU from precomputed tables # DFLAGS += -DCOOLING_GPU -#Measure the Timing of the different stages +# Measure the Timing of the different stages DFLAGS += -DCPU_TIME DFLAGS += $(OUTPUT) @@ -49,7 +49,7 @@ DFLAGS += $(MPI_GPU) # used on scientific runs # Do CUDA error checking -DFLAGS += -DCUDA_ERROR_CHECK +# DFLAGS += -DCUDA_ERROR_CHECK # Limit the number of steps to evolve. # DFLAGS += -DN_STEPS_LIMIT=1000 diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index f0a9fd064..d1b0c96cf 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -553,7 +553,7 @@ Real Grid3D::Update_Hydro_Grid() Real dti; #ifdef CPU_TIME - Timer.Hydro.Start(); + Timer.Hydro_MHD.Start(); #endif // CPU_TIME #ifdef GRAVITY @@ -565,10 +565,10 @@ Real Grid3D::Update_Hydro_Grid() #ifdef CPU_TIME #ifdef CHEMISTRY_GPU - Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); + Timer.Hydro_MHD.Subtract(Chem.H.runtime_chemistry_step); // Subtract the time spent on the Chemical Update #endif // CHEMISTRY_GPU - Timer.Hydro.End(); + Timer.Hydro_MHD.End(); #endif // CPU_TIME #ifdef COOLING_GRACKLE From 15e714e9a3d8aaab466f463b336d3f196d65132b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 3 May 2023 10:34:03 -0400 Subject: [PATCH 379/694] Add wave length option to Linear_Waves ICs Also, refactor the linear wave initial conditions to accept the parameters struct instead of a bunch of Reals --- src/global/global.cpp | 2 ++ src/global/global.h | 1 + src/grid/grid3D.h | 26 ++------------------- src/grid/initial_conditions.cpp | 41 +++++++++++++++------------------ 4 files changed, 23 insertions(+), 47 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index a47f9e78b..12f891f20 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -363,6 +363,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->radius = atof(value); } else if (strcmp(name, "P_blast") == 0) { parms->P_blast = atof(value); + } else if (strcmp(name, "wave_length") == 0) { + parms->wave_length = atof(value); #ifdef PARTICLES } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 17e7d7b73..b037c931d 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -265,6 +265,7 @@ struct parameters { Real polarization = 0; Real radius = 0; Real P_blast = 0; + Real wave_length = 1.0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index bd92e63fd..253fd5074 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -546,31 +546,9 @@ class Grid3D /*! * \brief Initialize the grid with a simple linear wave. * - * \param[in] rho The background density - * \param[in] vx The background velocity in the X-direction - * \param[in] vy The background velocity in the Y-direction - * \param[in] vz The background velocity in the Z-direction - * \param[in] P The background pressure - * \param[in] A The amplitude of the wave - * \param[in] Bx The background magnetic field in the X-direction - * \param[in] By The background magnetic field in the Y-direction - * \param[in] Bz The background magnetic field in the Z-direction - * \param[in] rEigenVec_rho The right eigenvector component for the density - * \param[in] rEigenVec_MomentumX The right eigenvector component for the - * velocity in the X-direction \param[in] rEigenVec_MomentumY The right - * eigenvector component for the velocity in the Y-direction \param[in] - * rEigenVec_MomentumZ The right eigenvector component for the velocity in the - * Z-direction \param[in] rEigenVec_E The right eigenvector component for the - * energy \param[in] rEigenVec_Bx The right eigenvector component for the - * magnetic field in the X-direction \param[in] rEigenVec_By The right - * eigenvector component for the magnetic field in the Y-direction \param[in] - * rEigenVec_Bz The right eigenvector component for the magnetic field in the - * Z-direction \param[in] pitch The pitch angle of the linear wave \param[in] - * yaw The yaw angle of the linear wave + * \param[in] P the parameters struct. */ - void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, Real rEigenVec_rho, - Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, Real rEigenVec_E, - Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, Real pitch, Real yaw); + void Linear_Wave(parameters const &P); /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 9e6f58fe5..5b7e563f0 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -36,9 +36,7 @@ void Grid3D::Set_Initial_Conditions(parameters P) } else if (strcmp(P.init, "Sound_Wave") == 0) { Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Linear_Wave") == 0) { - Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, P.rEigenVec_rho, P.rEigenVec_MomentumX, - P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, - P.rEigenVec_Bz, P.pitch, P.yaw); + Linear_Wave(P); } else if (strcmp(P.init, "Square_Wave") == 0) { Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Riemann") == 0) { @@ -302,19 +300,16 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) /*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, - Real rEigenVec_rho, Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, - Real rEigenVec_MomentumZ, Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, - Real rEigenVec_Bz, Real pitch, Real yaw) +void Grid3D::Linear_Wave(parameters const &P) { - auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, pitch, yaw); + auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, P.pitch, P.yaw); // set initial values of conserved variables for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) { for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) { for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) { // Rotate the indices - auto [i_rot, j_rot, k_rot] = math_utils::rotateCoords(i, j, k, pitch, yaw); + auto [i_rot, j_rot, k_rot] = math_utils::rotateCoords(i, j, k, P.pitch, P.yaw); // get cell index int id = i + j * H.nx + k * H.nx * H.ny; @@ -325,25 +320,25 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re // set constant initial states. Note that hydro_utilities::Calc_Energy_Primitive computes the MHD energy if the // MHD flag is turned on and the hydro energy if it isn't - Real sine_wave = std::sin(2.0 * M_PI * x_pos); + Real sine_wave = std::sin(2.0 * M_PI * x_pos / P.wave_length); - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P, rho, vx, vy, vz, gama, Bx, By, Bz); + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P, P.rho, P.vx, P.vy, P.vz, gama, P.Bx, P.By, P.Bz); // add small-amplitude perturbations - C.density[id] += A * rEigenVec_rho * sine_wave; - C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; - C.momentum_y[id] += A * rEigenVec_MomentumY * sine_wave; - C.momentum_z[id] += A * rEigenVec_MomentumZ * sine_wave; - C.Energy[id] += A * rEigenVec_E * sine_wave; + C.density[id] += P.A * P.rEigenVec_rho * sine_wave; + C.momentum_x[id] += P.A * P.rEigenVec_MomentumX * sine_wave; + C.momentum_y[id] += P.A * P.rEigenVec_MomentumY * sine_wave; + C.momentum_z[id] += P.A * P.rEigenVec_MomentumZ * sine_wave; + C.Energy[id] += P.A * P.rEigenVec_E * sine_wave; #ifdef MHD sine_wave = std::sin(2.0 * M_PI * (x_pos + stagger)); - C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; - C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; - C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; + C.magnetic_x[id] = P.Bx + P.A * P.rEigenVec_Bx * sine_wave; + C.magnetic_y[id] = P.By + P.A * P.rEigenVec_By * sine_wave; + C.magnetic_z[id] = P.Bz + P.A * P.rEigenVec_Bz * sine_wave; #endif // MHD } } From fa887e1064da529f760aad4916d598b3441a4567 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 16 May 2023 15:01:05 -0400 Subject: [PATCH 380/694] Fix bad rebase --- src/grid/grid3D.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d1b0c96cf..f0a9fd064 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -553,7 +553,7 @@ Real Grid3D::Update_Hydro_Grid() Real dti; #ifdef CPU_TIME - Timer.Hydro_MHD.Start(); + Timer.Hydro.Start(); #endif // CPU_TIME #ifdef GRAVITY @@ -565,10 +565,10 @@ Real Grid3D::Update_Hydro_Grid() #ifdef CPU_TIME #ifdef CHEMISTRY_GPU - Timer.Hydro_MHD.Subtract(Chem.H.runtime_chemistry_step); + Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); // Subtract the time spent on the Chemical Update #endif // CHEMISTRY_GPU - Timer.Hydro_MHD.End(); + Timer.Hydro.End(); #endif // CPU_TIME #ifdef COOLING_GRACKLE From eb755f457bed59a34ab51cce25542214042e591f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Jun 2023 14:38:19 -0400 Subject: [PATCH 381/694] Remove innacurate comment --- builds/make.type.mhd | 2 -- 1 file changed, 2 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 5780eb981..953d49238 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,8 +9,6 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -# Currently MHD only supports PCM reconstruction. Higher order reconstruction -# methods will be added later DFLAGS += -DPLMC DFLAGS += -DHLLD DFLAGS += -DMHD From 531efcbd2bbbd6b18f4b5a05f3e29f938f09a9ae Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Jun 2023 15:27:40 -0400 Subject: [PATCH 382/694] Make all IC functions use parameter struct as arg Now all the initial condition functions just take the parameters struct as the argument instead of a bunch of Reals. --- src/grid/grid3D.h | 43 +++++--- src/grid/initial_conditions.cpp | 153 +++++++++++++------------- src/system_tests/mhd_system_tests.cpp | 3 + 3 files changed, 104 insertions(+), 95 deletions(-) diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 253fd5074..e679415d9 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -535,13 +535,19 @@ class Grid3D * \brief Free the memory for the density array. */ void FreeMemory(void); - /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) - * \brief Constant gas properties. */ - void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz); + /*! + * \brief Constant gas properties. + * + * \param[in] P the parameters struct. + */ + void Constant(parameters const &P); - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Sine wave perturbation. */ - void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + /*! + * \brief Sine wave perturbation. + * + * \param[in] P the parameters struct. + */ + void Sound_Wave(parameters const &P); /*! * \brief Initialize the grid with a simple linear wave. @@ -550,17 +556,20 @@ class Grid3D */ void Linear_Wave(parameters const &P); - /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure - * equilibrium. */ - void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); - - /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, - Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, - Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) - * \brief Initialize the grid with a Riemann problem. */ - void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, - Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph); + /*! + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. + * + * \param[in] P the parameters struct. + */ + void Square_Wave(parameters const &P); + + /*! + * \brief Initialize the grid with a Riemann problem. + * + * \param[in] P the parameters struct. + */ + void Riemann(parameters const &P); /*! \fn void Shu_Osher() * \brief Initialize the grid with the Shu-Osher shock tube problem. See diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 5b7e563f0..0acf08639 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -32,16 +32,15 @@ void Grid3D::Set_Initial_Conditions(parameters P) Set_Gammas(P.gamma); if (strcmp(P.init, "Constant") == 0) { - Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); + Constant(P); } else if (strcmp(P.init, "Sound_Wave") == 0) { - Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + Sound_Wave(P); } else if (strcmp(P.init, "Linear_Wave") == 0) { Linear_Wave(P); } else if (strcmp(P.init, "Square_Wave") == 0) { - Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + Square_Wave(P); } else if (strcmp(P.init, "Riemann") == 0) { - Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, - P.Bx_r, P.By_r, P.Bz_r, P.diaph); + Riemann(P); } else if (strcmp(P.init, "Shu_Osher") == 0) { Shu_Osher(); } else if (strcmp(P.init, "Blast_1D") == 0) { @@ -178,7 +177,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real * By, Real Bz) \brief Constant gas properties. */ -void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) +void Grid3D::Constant(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -213,26 +212,26 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real // Set the magnetic field including the rightmost ghost cell on the // left side which is really the left face of the first grid cell #ifdef MHD - C.magnetic_x[id] = Bx; - C.magnetic_y[id] = By; - C.magnetic_z[id] = Bz; + C.magnetic_x[id] = P.Bx; + C.magnetic_y[id] = P.By; + C.magnetic_z[id] = P.Bz; #endif // MHD // Exclude the rightmost ghost cell on the "left" side if ((k >= kstart) and (j >= jstart) and (i >= istart)) { // set constant initial states - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE } if (i == istart && j == jstart && k == kstart) { - n = rho * DENSITY_UNIT / (mu * MP); - T = P * PRESSURE_UNIT / (n * KB); + n = P.rho * DENSITY_UNIT / (mu * MP); + T = P.P * PRESSURE_UNIT / (n * KB); printf("Initial n = %e, T = %e\n", n, T); } } @@ -242,7 +241,7 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) +void Grid3D::Sound_Wave(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -276,22 +275,22 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); // set constant initial states - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); // add small-amplitude perturbations - C.density[id] = C.density[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0 * M_PI * x_pos); - C.Energy[id] = C.Energy[id] + A * (1.5) * sin(2 * M_PI * x_pos); + C.density[id] = C.density[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_x[id] = C.momentum_x[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_y[id] = C.momentum_y[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_z[id] = C.momentum_z[id] + P.A * sin(2.0 * M_PI * x_pos); + C.Energy[id] = C.Energy[id] + P.A * (1.5) * sin(2 * M_PI * x_pos); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE } } @@ -348,7 +347,7 @@ void Grid3D::Linear_Wave(parameters const &P) /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure * equilibrium. */ -void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) +void Grid3D::Square_Wave(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -381,15 +380,15 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) // get cell-centered position Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - C.density[id] = rho; + C.density[id] = P.rho; // C.momentum_x[id] = 0.0; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; // C.momentum_z[id] = rho_l * v_l; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif #ifdef SCALAR #ifdef BASIC_SCALAR @@ -397,13 +396,13 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) #endif #endif if (x_pos > 0.25 * H.xdglobal && x_pos < 0.75 * H.xdglobal) { - C.density[id] = rho * A; - C.momentum_x[id] = rho * A * vx; - C.momentum_y[id] = rho * A * vy; - C.momentum_z[id] = rho * A * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * A * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho * P.A; + C.momentum_x[id] = P.rho * P.A * P.vx; + C.momentum_y[id] = P.rho * P.A * P.vy; + C.momentum_z[id] = P.rho * P.A * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * P.A * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif #ifdef SCALAR #ifdef BASIC_SCALAR @@ -420,16 +419,11 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, - Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) +void Grid3D::Riemann(parameters const &P) { - int i, j, k, id; - int istart, jstart, kstart, iend, jend, kend; - Real x_pos, y_pos, z_pos; - Real v, P, cs; - - istart = H.n_ghost; - iend = H.nx - H.n_ghost; + size_t const istart = H.n_ghost; + size_t const iend = H.nx - H.n_ghost; + size_t jstart, kstart, jend, kend; if (H.ny > 1) { jstart = H.n_ghost; jend = H.ny - H.n_ghost; @@ -446,58 +440,61 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real } // set initial values of conserved variables - for (k = kstart - 1; k < kend; k++) { - for (j = jstart - 1; j < jend; j++) { - for (i = istart - 1; i < iend; i++) { + for (size_t k = kstart - 1; k < kend; k++) { + for (size_t j = jstart - 1; j < jend; j++) { + for (size_t i = istart - 1; i < iend; i++) { // get cell index - id = i + j * H.nx + k * H.nx * H.ny; + size_t const id = i + j * H.nx + k * H.nx * H.ny; // get cell-centered position + Real x_pos, y_pos, z_pos; Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); #ifdef MHD // Set the magnetic field including the rightmost ghost cell on the // left side which is really the left face of the first grid cell - if (x_pos < diaph) { - C.magnetic_x[id] = Bx_l; - C.magnetic_y[id] = By_l; - C.magnetic_z[id] = Bz_l; + if (x_pos < P.diaph) { + C.magnetic_x[id] = P.Bx_l; + C.magnetic_y[id] = P.By_l; + C.magnetic_z[id] = P.Bz_l; } else { - C.magnetic_x[id] = Bx_r; - C.magnetic_y[id] = By_r; - C.magnetic_z[id] = Bz_r; + C.magnetic_x[id] = P.Bx_r; + C.magnetic_y[id] = P.By_r; + C.magnetic_z[id] = P.Bz_r; } #endif // MHD // Exclude the rightmost ghost cell on the "left" side if ((k >= kstart) and (j >= jstart) and (i >= istart)) { - if (x_pos < diaph) { - C.density[id] = rho_l; - C.momentum_x[id] = rho_l * vx_l; - C.momentum_y[id] = rho_l * vy_l; - C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P_l, rho_l, vx_l, vy_l, vz_l, gama, Bx_l, By_l, Bz_l); + if (x_pos < P.diaph) { + C.density[id] = P.rho_l; + C.momentum_x[id] = P.rho_l * P.vx_l; + C.momentum_y[id] = P.rho_l * P.vy_l; + C.momentum_z[id] = P.rho_l * P.vz_l; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_l, P.rho_l, P.vx_l, P.vy_l, P.vz_l, gama, P.Bx_l, + P.By_l, P.Bz_l); #ifdef SCALAR #ifdef BASIC_SCALAR - C.basic_scalar[id] = 1.0 * rho_l; + C.basic_scalar[id] = 1.0 * P.rho_l; #endif #endif // SCALAR #ifdef DE - C.GasEnergy[id] = P_l / (gama - 1.0); + C.GasEnergy[id] = P.P_l / (gama - 1.0); #endif // DE } else { - C.density[id] = rho_r; - C.momentum_x[id] = rho_r * vx_r; - C.momentum_y[id] = rho_r * vy_r; - C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P_r, rho_r, vx_r, vy_r, vz_r, gama, Bx_r, By_r, Bz_r); + C.density[id] = P.rho_r; + C.momentum_x[id] = P.rho_r * P.vx_r; + C.momentum_y[id] = P.rho_r * P.vy_r; + C.momentum_z[id] = P.rho_r * P.vz_r; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_r, P.rho_r, P.vx_r, P.vy_r, P.vz_r, gama, P.Bx_r, + P.By_r, P.Bz_r); #ifdef SCALAR #ifdef BASIC_SCALAR - C.basic_scalar[id] = 0.0 * rho_r; + C.basic_scalar[id] = 0.0 * P.rho_r; #endif #endif // SCALAR #ifdef DE - C.GasEnergy[id] = P_r / (gama - 1.0); + C.GasEnergy[id] = P.P_r / (gama - 1.0); #endif // DE } } diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index c64579f3d..622f0aa69 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -612,6 +612,9 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones1aShockTubeCorrectInputExpectCorre TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) { test_runner.numMpiRanks = GetParam(); + // This test is particularly sensitive to minor changes in the initial conditions, the kind of changes that are + // expected from compiler to compiler. As such the limits have been loosened slightly. + test_runner.setFixedEpsilon(7.3E-12); test_runner.runTest(); } From 72d94ce43605c5af6af80226e558fb2cb48e8f1f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Jun 2023 11:39:13 -0400 Subject: [PATCH 383/694] Refactor cat_dset_3D.py to provide an interface The cat_dset_3D.py script required manual changes to use it. This made it impossible to use in an automated way and tricky to use manually. To address this it needed both a CLI interface and a python interface for calling from within another python script. Both have been added by moving all the concatenation code into the `concat_3d` function which handles the python interface and the new `main` function handles the CLI interface then calls `concat_3d`. Also adds __pycache__ to gitignore. --- .gitignore | 6 +- python_scripts/cat_dset_3D.py | 209 +++++++++++++++++++++------------- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/.gitignore b/.gitignore index 7ce01c9e9..1f1fa3018 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ bin/* *.a a.out *.dSYM +__pycache__ # Makefiles # ############# @@ -39,11 +40,8 @@ data out.* o.* run - - disk.* - # Logs and databases # ###################### *.err @@ -67,7 +65,6 @@ disk.* # OS generated files # ###################### .DS_Store - .remote-sync.json .remote-sync_macos.json ._* @@ -85,4 +82,3 @@ Thumbs.db ############################# docs/doxygen/build docs/sphinx/build - diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 5ac71a612..4cff6dc9a 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -1,86 +1,141 @@ #!/usr/bin/env python3 -# Example file for concatenating 3D hdf5 datasets +""" +Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_3d` function can be used to concatenate the datasets. + +Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your +script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import cat_dset_3D +``` +""" import h5py import numpy as np +import argparse +import pathlib + +def main(): + """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the + command line. If you're importing this file then use the `concat_3d` function directly. + """ + # Argument handling + cli = argparse.ArgumentParser() + # Required Arguments + cli.add_argument('-s', '--start_num', type=int, required=True, help='The first output step to concatenate') + cli.add_argument('-e', '--end_num', type=int, required=True, help='The last output step to concatenate') + cli.add_argument('-n', '--num_processes', type=int, required=True, help='The number of processes that were used') + # Optional Arguments + cli.add_argument('-i', '--input_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The input directory.') + cli.add_argument('-o', '--output_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The output directory.') + args = cli.parse_args() + + # Perform the concatenation + concat_3d(start_num=args.start_num, + end_num=args.end_num, + num_processes=args.num_processes, + input_dir=args.input_dir, + output_dir=args.output_dir) + + +# ====================================================================================================================== +def concat_3d(start_num: int, + end_num: int, + num_processes: int, + input_dir: pathlib.Path = pathlib.Path.cwd(), + output_dir: pathlib.Path = pathlib.Path.cwd()): + """Concatenate 3D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a + single, large file. All outputs from start_num to end_num will be concatenated. + + Args: + start_num (int): The first output step to concatenate + end_num (int): The last output step to concatenate + num_processes (int): The number of processes that were used + input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). + output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). + """ + + # Error checking + assert start_num >= 0, 'start_num must be greater than or equal to 0' + assert end_num >= 0, 'end_num must be greater than or equal to 0' + assert start_num <= end_num, 'end_num should be greater than or equal to start_num' + assert num_processes > 1, 'num_processes must be greater than 1' + + # loop over outputs + for n in range(start_num, end_num+1): -ns = 0 -ne = 0 -n_proc = 16 # number of processors that did the calculations -istart = 0*n_proc -iend = 1*n_proc -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over outputs -for n in range(ns, ne+1): - - # loop over files for a given output - for i in range(istart, iend): - - # open the output file for writing (don't overwrite if exists) - fileout = h5py.File(dnameout+str(n)+'.h5', 'a') - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] - for unit in units: - fileout.attrs[unit] = [head[unit][0]] - - d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) - mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) - my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) - mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) - E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) + # loop over files for a given output + for i in range(0, num_processes): + + # open the output file for writing (don't overwrite if exists) + fileout = h5py.File(output_dir / f'{n}.h5', 'a') + # open the input file for reading + filein = h5py.File(input_dir / f'{n}.h5.{i}', 'r') + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] + for unit in units: + fileout.attrs[unit] = [head[unit][0]] + + d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) + mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) + my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) + mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) + E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) + try: + GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) + except KeyError: + print('No Dual energy data present'); + try: + bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) + by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) + bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) + except KeyError: + print('No magnetic field data present'); + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] + fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] + fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] + fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] + fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] try: - GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) + fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] except KeyError: - print('No Dual energy data present'); + print('No Dual energy data present'); try: - bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) - by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) - bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) + fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] except KeyError: - print('No magnetic field data present'); - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] - fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] - fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] - fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] - fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - try: - fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - except KeyError: - print('No Dual energy data present'); - try: - fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] - fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] - fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] - except KeyError: - print('No magnetic field data present'); - - filein.close() - - fileout.close() + print('No magnetic field data present'); + + filein.close() + + fileout.close() +# ====================================================================================================================== + +if __name__ == '__main__': + main() From 99f325a42e56bbc5ab14dbfb65ae65d416705f8b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 15:14:26 -0400 Subject: [PATCH 384/694] Append make type to clang-tidy log files --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e5b7a56e3..128ef39b7 100644 --- a/Makefile +++ b/Makefile @@ -205,8 +205,8 @@ tidy: # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config @echo -e - (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp_$(TYPE).log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu_$(TYPE).log 2>&1 & \ for i in 1 2; do wait -n; done @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp.log' and 'tidy_results_gpu.log' files." From 34a0ee372f958839e24dc438185ea27fb7e148cd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 15:15:19 -0400 Subject: [PATCH 385/694] Fix & Enable modernize-loop-convert tidy check --- .clang-tidy | 1 - src/global/global.cpp | 5 +++-- src/gravity/gravity_functions.cpp | 4 ++-- src/mhd/magnetic_divergence_tests.cu | 4 ++-- src/utils/reduction_utilities_tests.cu | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 49a9f8458..d14abfba2 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -110,7 +110,6 @@ Checks: "*, -misc-non-private-member-variables-in-classes, -modernize-avoid-c-arrays, -modernize-deprecated-headers, - -modernize-loop-convert, -modernize-macro-to-enum, -modernize-redundant-void-arg, -modernize-use-auto, diff --git a/src/global/global.cpp b/src/global/global.cpp index 12f891f20..da5d2105f 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -107,8 +107,9 @@ const std::set optionalParams = { * "warnings" in output. */ int is_param_valid(const char *param_name) { - for (auto it = optionalParams.begin(); it != optionalParams.end(); ++it) { - if (strcmp(param_name, *it) == 0) { + // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) { + for (auto optionalParam : optionalParams) { + if (strcmp(param_name, optionalParam) == 0) { return 1; } } diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 1f4a08f7f..70eb749c9 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -200,8 +200,8 @@ Real Grav3D::Get_Average_Density() Get_OMP_Grid_Indxs(nz_local, n_omp_procs, omp_id, &g_start, &g_end); dens_sum_all[omp_id] = Get_Average_Density_function(g_start, g_end); } - for (int i = 0; i < N_OMP_THREADS; i++) { - dens_sum += dens_sum_all[i]; + for (Real dens_sum_all_element : dens_sum_all) { + dens_sum += dens_sum_all_element; } #endif diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 9393c2498..c1c44a9a7 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -46,8 +46,8 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) std::vector host_grid(G.H.n_cells * G.H.n_fields); std::mt19937 prng(1); std::uniform_real_distribution doubleRand(1, 5); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng) / 1E15; + for (double& host_data : host_grid) { + host_data = doubleRand(prng) / 1E15; } // Allocating and copying to device diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index d7fdaf3d3..e689e2a5f 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -43,8 +43,8 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) std::mt19937 prng(1); std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, std::abs(maxValue) - 1); std::uniform_int_distribution intRand(0, host_grid.size() - 1); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (Real& host_data : host_grid) { + host_data = doubleRand(prng); } host_grid.at(intRand(prng)) = maxValue; From 54f87ab7ecd03a08be41ed60ce403218cdd9bd72 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 15:36:38 -0400 Subject: [PATCH 386/694] Fix & enable modernize-use-default-member-init check --- .clang-tidy | 1 - src/mhd/ct_electric_fields_tests.cu | 7 ++----- src/mhd/magnetic_update_tests.cu | 13 +++---------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d14abfba2..20f6646d4 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -113,7 +113,6 @@ Checks: "*, -modernize-macro-to-enum, -modernize-redundant-void-arg, -modernize-use-auto, - -modernize-use-default-member-init, -modernize-use-equals-default, -modernize-use-nodiscard, -modernize-use-noexcept, diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index e009fe948..afbaada66 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -39,10 +39,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test * */ tMHDCalculateCTElectricFields() - : nx(2), - ny(nx), - nz(nx), - n_cells(nx * ny * nz), + : n_cells(nx * ny * nz), fluxX(n_cells * (grid_enum::num_flux_fields)), fluxY(n_cells * (grid_enum::num_flux_fields)), fluxZ(n_cells * (grid_enum::num_flux_fields)), @@ -71,7 +68,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test protected: // Initialize the test grid and other state variables - size_t const nx, ny, nz; + size_t const nx = 2, ny = nx, nz = nx; size_t const n_cells; // Launch Parameters diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 402d9c737..9b78a8f5d 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -34,14 +34,7 @@ class tMHDUpdateMagneticField3D : public ::testing::Test * */ tMHDUpdateMagneticField3D() - : nx(3), - ny(nx), - nz(nx), - n_cells(nx * ny * nz), - dt(3.2), - dx(2.5), - dy(2.5), - dz(2.5), + : n_cells(nx * ny * nz), sourceGrid(n_cells * (grid_enum::num_fields)), destinationGrid(n_cells * (grid_enum::num_fields), -999.), ctElectricFields(n_cells * 3), @@ -64,9 +57,9 @@ class tMHDUpdateMagneticField3D : public ::testing::Test protected: // Initialize the test grid and other state variables - size_t const nx, ny, nz; + size_t const nx = 3, ny = nx, nz = nx; size_t const n_cells; - Real const dt, dx, dy, dz; + Real const dt = 3.2, dx = 2.5, dy = dx, dz = dx; // Launch Parameters dim3 const dimGrid; // How many blocks in the grid From 8269b69fe7e3dd5f66280e42acfce86366ebc46e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 15:48:12 -0400 Subject: [PATCH 387/694] Fix & Enable performance-faster-string-find check --- .clang-tidy | 1 - src/system_tests/system_tester.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 20f6646d4..078add31b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -120,7 +120,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -performance-faster-string-find, -performance-for-range-copy, -performance-inefficient-vector-operation, -performance-unnecessary-value-param, diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 0df225677..9f93d1557 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -428,10 +428,10 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool co const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream nameStream; std::string suiteName = test_info->test_suite_name(); - suiteName = suiteName.substr(suiteName.find("/") + 1, suiteName.length()); + suiteName = suiteName.substr(suiteName.find('/') + 1, suiteName.length()); nameStream << suiteName << "_" << test_info->name(); std::string fullTestName = nameStream.str(); - _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); + _fullTestFileName = fullTestName.substr(0, fullTestName.find('/')); // Generate the input paths. Strip out everything after a "/" since that // probably indicates a parameterized test. From 3507e35ed736f80390b1bb9f5ffec4c0dbb7ee88 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 15:59:29 -0400 Subject: [PATCH 388/694] Fix & Enable performance-for-range-copy check --- .clang-tidy | 1 - src/system_tests/system_tester.cpp | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 078add31b..0d0cd59f7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -120,7 +120,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -performance-for-range-copy, -performance-inefficient-vector-operation, -performance-unnecessary-value-param, -readability-const-return-type, diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 9f93d1557..6141a471d 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -111,7 +111,7 @@ void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, dou double L2Norm = 0; double maxError = 0; // Loop over the datasets to be tested - for (auto dataSetName : _fiducialDataSetNames) { + for (auto const &dataSetName : _fiducialDataSetNames) { // check that the test data has the dataset in it ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once."; @@ -272,7 +272,7 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro // Loop over the datasets to be tested double L2Norm = 0; double maxError = 0; - for (auto dataSetName : _fiducialDataSetNames) { + for (auto const &dataSetName : _fiducialDataSetNames) { if (dataSetName == "GasEnergy") { continue; } @@ -623,7 +623,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::str { // Determine the total number of particles if (_testTotalNumParticles == 0) { - for (auto file : _testParticlesFileVec) { + for (auto const &file : _testParticlesFileVec) { // Open the dataset H5::DataSet const dataSet = file.openDataSet(dataSetName); From e0bf3d46b55dce1c92b7530ce59eb3f07bae0b0e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 16:24:53 -0400 Subject: [PATCH 389/694] Fix & enable use-auto checks. Remove alias to device vector Fixed and enable modernize-use-auto and its aliad hicpp-use-auto. Removed unsafe alias to a DeviceVector pointer in io.cpp --- .clang-tidy | 2 - src/gravity/paris/PoissonZero3DBlockedGPU.cu | 6 +- src/io/io.cpp | 58 ++++++++++---------- src/particles/feedback_CIC_gpu.cu | 4 +- src/particles/particles_3D.cpp | 2 +- 5 files changed, 34 insertions(+), 38 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 0d0cd59f7..022378c88 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -95,7 +95,6 @@ Checks: "*, -hicpp-no-malloc, -hicpp-signed-bitwise, -hicpp-special-member-functions, - -hicpp-use-auto, -hicpp-use-equals-default, -hicpp-use-noexcept, -hicpp-use-nullptr, @@ -112,7 +111,6 @@ Checks: "*, -modernize-deprecated-headers, -modernize-macro-to-enum, -modernize-redundant-void-arg, - -modernize-use-auto, -modernize-use-equals-default, -modernize-use-nodiscard, -modernize-use-noexcept, diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 4b9e74e4c..5ad31406e 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -131,9 +131,9 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou assert(density); assert(potential); - double *const ua = potential; - double *const ub = density; - cufftDoubleComplex *const uc = reinterpret_cast(ub); + double *const ua = potential; + double *const ub = density; + auto *const uc = reinterpret_cast(ub); const double ddi = ddi_; const double ddj = ddj_; diff --git a/src/io/io.cpp b/src/io/io.cpp index 37cb24893..cc02a37c6 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -312,33 +312,32 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // first time it is needed It persists until program exit, and then calls // Free upon destruction cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - float *device_dataset_buffer = device_dataset_vector.data(); - float *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); + auto *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); if (P.out_float32_density > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_density, "/density"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_density, "/density"); } if (P.out_float32_momentum_x > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_x, "/momentum_x"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); } if (P.out_float32_momentum_y > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_y, "/momentum_y"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); } if (P.out_float32_momentum_z > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_z, "/momentum_z"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); } if (P.out_float32_Energy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_Energy, "/Energy"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_Energy, "/Energy"); } #ifdef DE if (P.out_float32_GasEnergy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_GasEnergy, "/GasEnergy"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); } #endif // DE #ifdef MHD @@ -347,17 +346,17 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); + device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); } #endif // MHD @@ -1411,21 +1410,20 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) size_t buffer_size = nx_dset * ny_dset * nz_dset; #endif cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - Real *device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); // Start writing fields - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_density, "/density"); if (output_momentum || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_z, "/momentum_z"); } if (output_energy || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_Energy, "/Energy"); #ifdef DE - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_GasEnergy, "/GasEnergy"); #endif } @@ -1486,18 +1484,18 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) Write_Generic_HDF5_Field_GPU(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, Grav.nz_local + 2 * N_GHOST_POTENTIAL, Grav.nx_local, Grav.ny_local, Grav.nz_local, - N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_buffer, Grav.F.potential_d, - "/grav_potential"); + N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_vector.data(), + Grav.F.potential_d, "/grav_potential"); #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD if (H.Output_Complete_Data) { WriteHDF5Field3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_x, "/magnetic_x", 0); + device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_y, "/magnetic_y", 1); + device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_z, "/magnetic_z", 2); + device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD } diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 09281e49f..75bf1f5e8 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -38,8 +38,8 @@ int snr_n; #ifndef O_HIP __device__ double atomicMax(double* address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; + auto* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index f2b56f62c..e8ac74dbe 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -516,7 +516,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) Real *temp_mass = (Real *)malloc(particles_array_size * sizeof(Real)); #endif #ifdef PARTICLE_IDS - part_int_t *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); + auto *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); #endif chprintf(" Allocated GPU memory for particle data\n"); From 48ca2967e3847faa555e9b9939a4929b8a27b354 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 16:31:45 -0400 Subject: [PATCH 390/694] Enable performance-inefficient-vector-operation check --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 022378c88..5a970ef93 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -118,7 +118,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -performance-inefficient-vector-operation, -performance-unnecessary-value-param, -readability-const-return-type, -readability-convert-member-functions-to-static, From 80084afd57559426355afeaf638d8f027fea1ae0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 16:49:56 -0400 Subject: [PATCH 391/694] Fix & Enable performance-unnecessary-value-param check --- .clang-tidy | 1 - src/riemann_solvers/hlld_cuda_tests.cu | 2 +- src/utils/testing_utilities.cpp | 8 ++++---- src/utils/testing_utilities.h | 10 +++++----- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5a970ef93..1d3e2c422 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -118,7 +118,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -performance-unnecessary-value-param, -readability-const-return-type, -readability-convert-member-functions-to-static, -readability-delete-null-pointer, diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 07706f543..c39e091d1 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -161,7 +161,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * \param[in] direction Which plane the interface is. 0 = plane normal to * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. */ - void checkResults(std::vector fiducialFlux, std::vector scalarFlux, Real thermalEnergyFlux, + void checkResults(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) { // Field names diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 79dc8d11b..774570aee 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -81,7 +81,7 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 } // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, double fixedEpsilon = 5.0E-12) { std::string outString; @@ -97,7 +97,7 @@ void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_valu ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); } -void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) +void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value) { std::vector testDims(3, 1); std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); @@ -112,8 +112,8 @@ void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataS } } -void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance) +void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance) { std::vector testDims(3, 1); std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 12daf0969..7057e01e9 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -95,13 +95,13 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 double const &fixedEpsilon = 1E-14, int64_t const &ulpsEpsilon = 4); // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, double fixedEpsilon); -void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value); +void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value); -void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance); +void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance); // ========================================================================= /*! @@ -120,7 +120,7 @@ void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetNa * values are ignored and default behaviour is used */ template -void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, +void checkResults(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference From f04168d5a60d50c6c1dd3d2b44fcbc2a7b3ac647 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 17:11:07 -0400 Subject: [PATCH 392/694] Update jenkinsfile for new clang-tidy log names --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f974e3da7..d699732cb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -105,10 +105,10 @@ pipeline sh ''' printf '=%.0s' {1..100} printf "\n" - cat tidy_results_cpp.log + cat tidy_results_cpp_${CHOLLA_MAKE_TYPE}.log printf '=%.0s' {1..100} printf "\n" - cat tidy_results_gpu.log + cat tidy_results_gpu_${CHOLLA_MAKE_TYPE}.log printf '=%.0s' {1..100} printf "\n" ''' From 2b038c8a770b71bcaba2de46f4fb5571bc5d2207 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 15 May 2023 13:31:28 -0400 Subject: [PATCH 393/694] Add make type info to message that prints after clang-tidy runs --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 128ef39b7..868f2c5bc 100644 --- a/Makefile +++ b/Makefile @@ -208,7 +208,7 @@ tidy: (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp_$(TYPE).log 2>&1 & \ (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu_$(TYPE).log 2>&1 & \ for i in 1 2; do wait -n; done - @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp.log' and 'tidy_results_gpu.log' files." + @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp_$(TYPE).log' and 'tidy_results_gpu_$(TYPE).log' files." clean: rm -f $(CLEAN_OBJS) From 8e3e84796698510aaa20ad1afe4eeb148c7132dd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 15 May 2023 13:44:52 -0400 Subject: [PATCH 394/694] Enable readability-const-return-type clang-tidy check --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 1d3e2c422..db92f281a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -118,7 +118,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -readability-const-return-type, -readability-convert-member-functions-to-static, -readability-delete-null-pointer, -readability-duplicate-include, From ab2e98fac5f9179f8ed290bfc4984f40fe595f52 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 15 May 2023 14:22:32 -0400 Subject: [PATCH 395/694] Fix & Enable readability-simplify-boolean-expr check --- .clang-tidy | 1 - src/grid/boundary_conditions.cpp | 2 +- src/grid/grid3D.cpp | 6 +----- src/main_tests.cpp | 13 ++----------- src/utils/testing_utilities.cpp | 8 +------- 5 files changed, 5 insertions(+), 25 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index db92f281a..aab9275bb 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -133,7 +133,6 @@ Checks: "*, -readability-non-const-parameter, -readability-redundant-control-flow, -readability-redundant-preprocessor, - -readability-simplify-boolean-expr, -readability-suspicious-call-argument" WarningsAsErrors: '' # More paths can be ignored by modifying this so that it looks like '^((?!/PATH/ONE/|/PATH/TWO/).)*$' diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 50c55126d..06e7196af 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -151,7 +151,7 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) } for (int i = 0; i < 6; i++) { - if (!((flags[i] >= 0) && (flags[i] <= 5))) { + if (flags[i] < 1 or flags[i] > 5) { chprintf( "Invalid boundary conditions. Must select between 1 (periodic), 2 " "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index f0a9fd064..9010da354 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -265,11 +265,7 @@ void Grid3D::Initialize(struct parameters *P) #endif #ifdef COSMOLOGY - if (P->scale_outputs_file[0] == '\0') { - H.OUTPUT_SCALE_FACOR = false; - } else { - H.OUTPUT_SCALE_FACOR = true; - } + H.OUTPUT_SCALE_FACOR = not P->scale_outputs_file[0] == '\0'; #endif H.Output_Initial = true; diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 3be97f3eb..ee58fbd06 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -128,17 +128,8 @@ int main(int argc, char **argv) globalMpiLauncher.init("mpirun -np"); } - if (input.cmdOptionExists("--runCholla=false")) { - globalRunCholla = false; - } else { - globalRunCholla = true; - } - - if (input.cmdOptionExists("--compareSystemTestResults=false")) { - globalCompareSystemTestResults = false; - } else { - globalCompareSystemTestResults = true; - } + globalRunCholla = not input.cmdOptionExists("--runCholla=false"); + globalCompareSystemTestResults = not input.cmdOptionExists("--compareSystemTestResults=false"); // Run test and return result return RUN_ALL_TESTS(); diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 774570aee..02aaadd68 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -71,13 +71,7 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 // Perform the ULP check which is for numbers far from zero and perform the absolute check which is for numbers near // zero - if (ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon) { - return true; - } - // if the checks don't pass indicate test failure - else { - return false; - } + return ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon; } // ========================================================================= From f9c2a1c93d59eabe16a5b855718c27d2e43fa532 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 15 May 2023 14:51:49 -0400 Subject: [PATCH 396/694] Enable google-build-namespaces check Also, it's alias cert-dcl59-cpp --- .clang-tidy | 2 -- 1 file changed, 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index aab9275bb..6029949b9 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -39,7 +39,6 @@ Checks: "*, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, - -cert-dcl59-cpp, -cert-env33-c, -cert-err33-c, -cert-err34-c, @@ -80,7 +79,6 @@ Checks: "*, -cppcoreguidelines-pro-type-vararg, -cppcoreguidelines-special-member-functions, -cppcoreguidelines-virtual-class-destructor, - -google-build-namespaces, -google-explicit-constructor, -google-global-names-in-headers, -google-readability-casting, From d59c61cd83e33494499d88638ceb5f362fabb6f4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 15 May 2023 15:13:01 -0400 Subject: [PATCH 397/694] Fix & Enabled cert-err58-cpp --- .clang-tidy | 1 - src/global/global.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 6029949b9..454c41277 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -42,7 +42,6 @@ Checks: "*, -cert-env33-c, -cert-err33-c, -cert-err34-c, - -cert-err58-cpp, -cert-msc32-c, -cert-msc51-cpp, -clang-analyzer-core.CallAndMessage, diff --git a/src/global/global.cpp b/src/global/global.cpp index da5d2105f..d5100a4ac 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -97,6 +97,7 @@ char *trim(char *s) return s; } +// NOLINTNEXTLINE(cert-err58-cpp) const std::set optionalParams = { "flag_delta", "ddelta_dt", "n_delta", "Lz", "Lx", "phi", "theta", "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", From 3a58490d1b8edd86a08939a2de10570d96503b21 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 16 May 2023 15:02:46 -0400 Subject: [PATCH 398/694] Fix rebase error --- src/io/io.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index cc02a37c6..96accc6b0 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1430,11 +1430,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef SCALAR #ifdef BASIC_SCALAR - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_basic_scalar, "/scalar0"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_basic_scalar, "/scalar0"); #endif // BASIC_SCALAR #ifdef DUST - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_dust_density, "/dust_density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density, "/dust_density"); #endif // DUST #ifdef OUTPUT_CHEMISTRY From 5f73ad4f979e6c4e7a64414235ffc679cd40a559 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 16 May 2023 15:05:09 -0400 Subject: [PATCH 399/694] Permanently disable cert-err58-cpp check This check provides minimal benefit to us and flags every single gtest TEST macro as failing so I've disabled it. --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index 454c41277..eccdfe06e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -33,6 +33,7 @@ Checks: "*, -readability-static-accessed-through-instance, -misc-unused-parameters, -hicpp-multiway-paths-covered, + -cert-err58-cpp, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, From 266e51d55e32c8aa90b0c03365c1c76ba400f066 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 16 May 2023 15:07:28 -0400 Subject: [PATCH 400/694] Formatting --- src/io/io.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 96accc6b0..3335afda7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1434,7 +1434,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #endif // BASIC_SCALAR #ifdef DUST - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density, "/dust_density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density, + "/dust_density"); #endif // DUST #ifdef OUTPUT_CHEMISTRY From 11b855f2291ac28bcd230ee9da6342187abf3c12 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 21 Jun 2023 10:57:26 -0400 Subject: [PATCH 401/694] Fix clang-tidy issues raised in PLMC PR --- src/global/global.cpp | 2 +- src/reconstruction/plmc_cuda_tests.cu | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index d5100a4ac..a4c697d3c 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -109,7 +109,7 @@ const std::set optionalParams = { int is_param_valid(const char *param_name) { // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) { - for (auto optionalParam : optionalParams) { + for (const auto *optionalParam : optionalParams) { if (strcmp(param_name, optionalParam) == 0) { return 1; } diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 272bca85d..88e9c3b34 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -39,8 +39,8 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) // Setup host grid. Fill host grid with random values and randomly assign maximum value std::vector host_grid(nx * ny * nz * n_fields); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (Real &val : host_grid) { + val = doubleRand(prng); } // Allocating and copying to device @@ -172,8 +172,8 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) // Setup host grid. Fill host grid with random values and randomly assign maximum value std::vector host_grid(n_cells_grid); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (Real &val : host_grid) { + val = doubleRand(prng); } // Allocating and copying to device From b9efa48906b0ac3aa538c57f9a8c82450f0498fa Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 15 May 2023 15:34:11 -0400 Subject: [PATCH 402/694] conform dust naming to Cholla's standard naming conventions --- src/dust/dust_cuda.cu | 93 ++++++++++++++++++------------------ src/dust/dust_cuda.h | 10 ++-- src/dust/dust_cuda_tests.cpp | 22 ++++----- 3 files changed, 62 insertions(+), 63 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index b0969c455..8966d076f 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -44,18 +44,18 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // get a global thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; + int id_z = id / (nx * ny); + int id_y = (id - id_z * nx * ny) / nx; + int id_x = id - id_z * nx * ny - id_y * nx; // define physics variables - Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real mu = 0.6; // mean molecular weight - Real T, E, P; // temperature, energy, pressure - Real vx, vy, vz; // velocities + Real density_gas, density_dust; // fluid mass densities + Real number_density; // gas number density + Real mu = 0.6; // mean molecular weight + Real temperature, energy, pressure; // temperature, energy, pressure + Real velocity_x, velocity_y, velocity_z; // velocities #ifdef DE - Real ge; + Real energy_gas; #endif // DE // define integration variables @@ -64,82 +64,83 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real dd_max = 0.01; // allowable percentage of dust density increase Real dt_sub; // refined timestep - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + if (id_x >= is && id_x < ie && id_y >= js && id_y < je && id_z >= ks && id_z < ke) { // get conserved quanitites - d_gas = dev_conserved[id + n_cells * grid_enum::density]; - d_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; - E = dev_conserved[id + n_cells * grid_enum::Energy]; + density_gas = dev_conserved[id + n_cells * grid_enum::density]; + density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + energy = dev_conserved[id + n_cells * grid_enum::Energy]; // convert mass density to number density - n = d_gas * DENSITY_UNIT / (mu * MP); + number_density = density_gas * DENSITY_UNIT / (mu * MP); - if (E < 0.0 || E != E) { + if (energy < 0.0 || energy != energy) { return; } // get conserved quanitites - vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; - vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; - vz = dev_conserved[id + n_cells * grid_enum::momentum_z] / d_gas; + velocity_x = dev_conserved[id + n_cells * grid_enum::momentum_x] / density_gas; + velocity_y = dev_conserved[id + n_cells * grid_enum::momentum_y] / density_gas; + velocity_z = dev_conserved[id + n_cells * grid_enum::momentum_z] / density_gas; #ifdef DE - ge = dev_conserved[id + n_cells * grid_enum::GasEnergy] / d_gas; - ge = fmax(ge, (Real)TINY_NUMBER); + energy_gas = dev_conserved[id + n_cells * grid_enum::GasEnergy] / density_gas; + energy_gas = fmax(ge, (Real)TINY_NUMBER); #endif // DE // calculate physical quantities - P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); + pressure = hydro_utilities::Calc_Pressure_Primitive(energy, density_gas, velocity_x, velocity_y, velocity_z, gamma); - Real T_init; - T_init = hydro_utilities::Calc_Temp(P, n); + Real temperature_init; + temperature_init = hydro_utilities::Calc_Temp(pressure, number_density); #ifdef DE - T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); + temperature_init = hydro_utilities::Calc_Temp_DE(density_gas, energy_gas, gamma, number_density); #endif // DE // if dual energy is turned on use temp from total internal energy - T = T_init; + temperature = temperature_init; - Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // sputtering timescale, kyr (sim units) + Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature) / TIME_UNIT; // sputtering timescale, kyr (sim units) - dd_dt = calc_dd_dt(d_dust, tau_sp); // rate of change in dust density at current timestep - dd = dd_dt * dt; // change in dust density at current timestep + dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep + dd = dd_dt * dt; // change in dust density at current timestep // ensure that dust density is not changing too rapidly - while (dd / d_dust > dd_max) { - dt_sub = dd_max * d_dust / dd_dt; - d_dust += dt_sub * dd_dt; + while (dd / density_dust > dd_max) { + dt_sub = dd_max * density_dust / dd_dt; + density_dust += dt_sub * dd_dt; dt -= dt_sub; - dd_dt = calc_dd_dt(d_dust, tau_sp); + dd_dt = Calc_dd_dt(density_dust, tau_sp); dd = dt * dd_dt; } // update dust density - d_dust += dd; + density_dust += dd; - dev_conserved[id + n_cells * grid_enum::dust_density] = d_dust; + dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust; #ifdef DE - dev_conserved[id + n_cells * grid_enum::GasEnergy] = d_dust * ge; + dev_conserved[id + n_cells * grid_enum::GasEnergy] = density_dust * energy_gas; #endif } } -// McKinnon et al. (2017) -__device__ __host__ Real calc_tau_sp(Real n, Real T) +// McKinnon et al. (2017) sputtering timescale +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature) { - Real YR_IN_S = 3.154e7; - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2e6; // K - Real omega = 2.5; - Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s + Real grain_radius = 1; // dust grain size in units of 0.1 micrometers + Real temperature_0 = 2e6; // temp above which the sputtering rate is ~constant in K + Real omega = 2.5; // controls the low-temperature scaling of the sputtering rate + Real A = 5.3618e15; // 0.17 Gyr in s - Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // sputtering timescale, s + number_density /= (6e-4); // gas number density in units of 10^-27 g/cm^3 + + // sputtering timescale, s + Real tau_sp = A * (grain_radius / number_density) * (pow(temperature_0 / temperature, omega) + 1); return tau_sp; } -// McKinnon et al. (2017) -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp / 3); } +// McKinnon et al. (2017) sputtering model +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp) { return -density_dust / (tau_sp / 3); } #endif // DUST diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index aab4c7db4..fb72007ac 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -48,22 +48,22 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g /*! * \brief Compute the sputtering timescale based on a cell's density and temperature. * - * \param[in] n Gas number density in cm^-3 - * \param[in] T Gas temperature in K + * \param[in] number_density Gas number density in cm^-3 + * \param[in] temperature Gas temperature in K * * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) */ -__device__ __host__ Real calc_tau_sp(Real n, Real T); +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature); /*! * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale. * - * \param[in] d_dust Dust mass density in M_sun/kpc^3 + * \param[in] density_dust Dust mass density in M_sun/kpc^3 * \param[in] tau_sp Sputtering timescale in kyr * * \return Real Dust density rate of change (McKinnon et al. 2017) */ -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp); #endif // DUST_CUDA_H #endif // DUST \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 03bd8111f..8790c1f4a 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -25,13 +25,12 @@ TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) // test suite name, test name { // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_n = 1; - Real const k_test_T = pow(10, 5.0); + Real YR_IN_S = 3.154e7; + Real const k_test_number_density = 1; + Real const k_test_temperature = pow(10, 5.0); + Real const k_fiducial_num = 182565146.96398282; - Real const k_fiducial_num = 182565146.96398282; - - Real test_num = calc_tau_sp(k_test_n, k_test_T) / YR_IN_S; // yr + Real test_num = Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature) / YR_IN_S; // yr double abs_diff; int64_t ulps_diff; @@ -50,13 +49,12 @@ TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) // test suite name, test name { // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_tau_sp = 0.17e6; // kyr - Real const k_test_d_dust = 1e-26 / DENSITY_UNIT; // sim units - - Real const k_fiducial_num = -2.6073835738056728; + Real YR_IN_S = 3.154e7; + Real const k_test_tau_sp = 0.17e6; // kyr + Real const k_test_density_dust = 1e-26 / DENSITY_UNIT; // sim units + Real const k_fiducial_num = -2.6073835738056728; - Real test_num = calc_dd_dt(k_test_d_dust, k_test_tau_sp); + Real test_num = Calc_dd_dt(k_test_density_dust, k_test_tau_sp); double abs_diff; int64_t ulps_diff; From 9019ec1d36219b3974d9e8864379d3ea7695d6a8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 15 May 2023 15:36:05 -0400 Subject: [PATCH 403/694] run clang tidy --- src/dust/dust_cuda.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 8966d076f..bbecf1935 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -99,7 +99,8 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // if dual energy is turned on use temp from total internal energy temperature = temperature_init; - Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature) / TIME_UNIT; // sputtering timescale, kyr (sim units) + Real tau_sp = + Calc_Sputtering_Timescale(number_density, temperature) / TIME_UNIT; // sputtering timescale, kyr (sim units) dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep dd = dd_dt * dt; // change in dust density at current timestep From 4ceefe6a32cd5ac6f93b1e196e2772471fe5363e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 14 Apr 2023 11:37:43 -0400 Subject: [PATCH 404/694] Remove extraneous CUDA & PPMC ifdefs --- src/reconstruction/ppmc_cuda.cu | 285 ++++++++++++++++---------------- src/reconstruction/ppmc_cuda.h | 12 +- 2 files changed, 144 insertions(+), 153 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index b69c07eb1..504896b56 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1,20 +1,18 @@ /*! \file ppmc_cuda.cu * \brief Functions definitions for the ppm kernels, using characteristic tracing. Written following Stone et al. 2008. */ -#ifdef CUDA - #ifdef PPMC - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../utils/gpu.hpp" - #include "../utils/hydro_utilities.h" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -70,8 +68,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L, vx_L, vy_L, vz_L, p_L; Real d_R, vx_R, vy_R, vz_R, p_R; - // #ifdef CTU - #ifndef VL +// #ifdef CTU +#ifndef VL Real dtodx = dt / dx; Real d_6, vx_6, vy_6, vz_6, p_6; Real lambda_m, lambda_0, lambda_p; @@ -79,29 +77,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real A, B, C, D; Real chi_1, chi_2, chi_3, chi_4, chi_5; Real sum_1, sum_2, sum_3, sum_4, sum_5; - #endif // VL +#endif // VL - #ifdef DE +#ifdef DE Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; Real ge_L, ge_R; Real E_kin, E, dge; - // #ifdef CTU - #ifndef VL + // #ifdef CTU + #ifndef VL Real chi_ge, sum_ge, ge_6; - #endif // VL - #endif // DE - #ifdef SCALAR + #endif // VL +#endif // DE +#ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - // #ifdef CTU - #ifndef VL + // #ifdef CTU + #ifndef VL Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; - #endif // VL - #endif // SCALAR + #endif // VL +#endif // SCALAR // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -147,23 +145,23 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_i = dev_conserved[o1 * n_cells + id] / d_i; vy_i = dev_conserved[o2 * n_cells + id] / d_i; vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef DE +#ifdef DE ge_i = dge / d_i; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; } - #endif // SCALAR +#endif // SCALAR // cell i-1 switch (dir) { case 0: @@ -181,24 +179,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef DE +#ifdef DE ge_imo = dge / d_imo; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; } - #endif // SCALAR +#endif // SCALAR // cell i+1 switch (dir) { case 0: @@ -215,24 +213,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef DE +#ifdef DE ge_ipo = dge / d_ipo; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; } - #endif // SCALAR +#endif // SCALAR // cell i-2 switch (dir) { case 0: @@ -249,24 +247,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_imt = fmax(p_imt, (Real)TINY_NUMBER); - #ifdef DE +#ifdef DE ge_imt = dge / d_imt; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; } - #endif // SCALAR +#endif // SCALAR // cell i+2 switch (dir) { case 0: @@ -283,24 +281,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE +#else // not DE p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); - #ifdef DE +#ifdef DE ge_ipt = dge / d_ipt; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; } - #endif // SCALAR +#endif // SCALAR // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, // p_i); @@ -362,7 +360,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_p_G = 0.0; } - #ifdef DE +#ifdef DE del_ge_L = ge_imo - ge_imt; del_ge_R = ge_i - ge_imo; del_ge_C = 0.5 * (ge_i - ge_imt); @@ -371,8 +369,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_G = 0.0; } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; @@ -383,7 +381,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_G[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 3 - Project the left, right, centered and van Leer differences onto // the characteristic variables @@ -446,7 +444,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - #ifdef DE +#ifdef DE if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); @@ -454,8 +452,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_m_imo = 0.0; } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); @@ -465,7 +463,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_m_imo[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 5 - Project the monotonized difference in the characteristic // variables back onto the @@ -534,7 +532,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_p_G = 0.0; } - #ifdef DE +#ifdef DE del_ge_L = ge_i - ge_imo; del_ge_R = ge_ipo - ge_i; del_ge_C = 0.5 * (ge_ipo - ge_imo); @@ -543,9 +541,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_G = 0.0; } - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; @@ -556,7 +554,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_G[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto // the characteristic variables @@ -619,7 +617,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - #ifdef DE +#ifdef DE if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); @@ -627,8 +625,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_m_i = 0.0; } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); @@ -638,7 +636,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_m_i[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 5 - Project the monotonized difference in the characteristic // variables back onto the @@ -707,7 +705,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_p_G = 0.0; } - #ifdef DE +#ifdef DE del_ge_L = ge_ipo - ge_i; del_ge_R = ge_ipt - ge_ipo; del_ge_C = 0.5 * (ge_ipt - ge_i); @@ -716,9 +714,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_G = 0.0; } - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_L[i] = scalar_ipo[i] - scalar_i[i]; del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; @@ -729,7 +727,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_G[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto // the characteristic variables @@ -792,7 +790,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - #ifdef DE +#ifdef DE if (del_ge_L * del_ge_R > 0.0) { lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); @@ -800,8 +798,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else { del_ge_m_ipo = 0.0; } - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); @@ -811,7 +809,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_scalar_m_ipo[i] = 0.0; } } - #endif // SCALAR +#endif // SCALAR // Step 5 - Project the monotonized difference in the characteristic // variables back onto the @@ -841,16 +839,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; - #ifdef DE +#ifdef DE ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; } - #endif // SCALAR +#endif // SCALAR // Step 7 - Apply further monotonicity constraints to ensure the values on // the left and right side @@ -926,7 +924,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_R = fmax(fmin(p_i, p_ipo), p_R); p_R = fmin(fmax(p_i, p_ipo), p_R); - #ifdef DE +#ifdef DE if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { ge_L = ge_R = ge_i; } @@ -940,9 +938,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou ge_L = fmin(fmax(ge_i, ge_imo), ge_L); ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { scalar_L[i] = scalar_R[i] = scalar_i[i]; @@ -960,10 +958,10 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); } - #endif // SCALAR +#endif // SCALAR - // #ifdef CTU - #ifndef VL +// #ifdef CTU +#ifndef VL // Step 8 - Compute the coefficients for the monotonized parabolic // interpolation function @@ -981,17 +979,17 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); - #ifdef DE + #ifdef DE del_ge_m_i = ge_R - ge_L; ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); } - #endif // SCALAR + #endif // SCALAR // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables @@ -1026,19 +1024,19 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); - #ifdef DE + #ifdef DE ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); } - #endif // SCALAR + #endif // SCALAR // Step 10 - Perform the characteristic tracing // Stone Eqns 57 - 60 @@ -1049,14 +1047,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_3 = 0; sum_4 = 0; sum_5 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_m); @@ -1081,26 +1079,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; chi_5 = A * (del_p_m_i - p_6) + B * p_6; - #ifdef DE + #ifdef DE chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; } - #endif // SCALAR + #endif // SCALAR sum_1 += chi_1 - chi_5 / (a * a); sum_3 += chi_3; sum_4 += chi_4; - #ifdef DE + #ifdef DE sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += chi_scalar[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_p); @@ -1123,14 +1121,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_R += sum_3; vz_R += sum_4; p_R += sum_5; - #ifdef DE + #ifdef DE ge_R += sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R[i] += sum_scalar[i]; } - #endif // SCALAR + #endif // SCALAR // right-hand interface value, i-1/2 sum_1 = 0; @@ -1138,14 +1136,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_3 = 0; sum_4 = 0; sum_5 = 0; - #ifdef DE + #ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR + #endif // SCALAR if (lambda_m <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_m); D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); @@ -1169,26 +1167,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; chi_5 = C * (del_p_m_i + p_6) + D * p_6; - #ifdef DE + #ifdef DE chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = C * (del_scalar_m_i[i] + scalar_6[i]) + D * scalar_6[i]; } - #endif // SCALAR + #endif // SCALAR sum_1 += chi_1 - chi_5 / (a * a); sum_3 += chi_3; sum_4 += chi_4; - #ifdef DE + #ifdef DE sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += chi_scalar[i]; } - #endif // SCALAR + #endif // SCALAR } if (lambda_p <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_p); @@ -1211,16 +1209,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_L += sum_3; vz_L += sum_4; p_L += sum_5; - #ifdef DE + #ifdef DE ge_L += sum_ge; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L[i] += sum_scalar[i]; } - #endif // SCALAR + #endif // SCALAR - #endif // VL, i.e. CTU was used for this section +#endif // VL, i.e. CTU was used for this section // enforce minimum values d_L = fmax(d_L, (Real)TINY_NUMBER); @@ -1247,14 +1245,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L; - #endif // DE +#endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; dev_bounds_L[id] = d_R; @@ -1262,16 +1260,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R; - #endif // DE +#endif // DE } } - - #endif // PPMC -#endif // CUDA diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index fc584ffb7..1717eb90f 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -1,13 +1,11 @@ /*! \file ppmc_cuda.h * \brief Declarations of the cuda ppm kernels, characteristic reconstruction * version. */ -#ifdef CUDA - #ifdef PPMC - #ifndef PPMC_CUDA_H - #define PPMC_CUDA_H +#ifndef PPMC_CUDA_H +#define PPMC_CUDA_H - #include "../global/global.h" +#include "../global/global.h" /*! \fn void PPMC(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) @@ -16,6 +14,4 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - #endif // PPMC_CUDA_H - #endif // PPMC -#endif // CUDA +#endif // PPMC_CUDA_H From 059769f1febc936c10bc6c89052e2d74fa370fdd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 14 Apr 2023 11:44:26 -0400 Subject: [PATCH 405/694] PPMC: Remove n_ghost & n_fields arguments --- src/integrators/VL_1D_cuda.cu | 3 +-- src/integrators/VL_2D_cuda.cu | 8 ++++---- src/integrators/VL_3D_cuda.cu | 12 ++++++------ src/integrators/simple_1D_cuda.cu | 3 +-- src/integrators/simple_2D_cuda.cu | 6 ++---- src/integrators/simple_3D_cuda.cu | 9 +++------ src/reconstruction/ppmc_cuda.cu | 22 ++++++++++------------ src/reconstruction/ppmc_cuda.h | 4 ++-- 8 files changed, 29 insertions(+), 38 deletions(-) diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index baa4f81cb..b4116b735 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -105,8 +105,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); #endif CudaCheckError(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 0ba1fc98a..1c96196d4 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -115,10 +115,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1); #endif // PPMC CudaCheckError(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 3fca47d42..52b40dfb3 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -234,12 +234,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, + 2); #endif // PPMC CudaCheckError(); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index c4be22acd..5a3d2b143 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -76,8 +76,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, CudaCheckError(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); CudaCheckError(); #endif diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 2e53d6c12..9529b307b 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -79,10 +79,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); #endif CudaCheckError(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index c52ecf0d2..9e9156e07 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -116,12 +116,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); + hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); CudaCheckError(); #endif // PPMC diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 504896b56..3de5c47b1 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -14,13 +14,11 @@ #include "../utils/hydro_utilities.h" #endif -/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir, int n_fields) +/*! * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir) { int n_cells = nx * ny * nz; int o1, o2, o3; @@ -148,7 +146,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); @@ -182,7 +180,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * @@ -216,7 +214,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * @@ -250,7 +248,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * @@ -284,7 +282,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * @@ -1251,7 +1249,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L; + dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = d_L * ge_L; #endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; @@ -1266,7 +1264,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R; + dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = d_R * ge_R; #endif // DE } } diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index 1717eb90f..f70d4a801 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -11,7 +11,7 @@ int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir); #endif // PPMC_CUDA_H From 2225789fc9eb1e83dfd7f0b68d7390f4126a22c4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 14 Apr 2023 13:55:14 -0400 Subject: [PATCH 406/694] Add a test for hydro PPMC --- src/reconstruction/plmc_cuda_tests.cu | 1 + src/reconstruction/ppmc_cuda_tests.cu | 116 ++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/reconstruction/ppmc_cuda_tests.cu diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 88e9c3b34..e6b115044 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -7,6 +7,7 @@ // STL Includes #include #include +#include #include // External Includes diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu new file mode 100644 index 000000000..01730ed2a --- /dev/null +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -0,0 +1,116 @@ +/*! + * \file ppmc_cuda_tests.cu + * \brief Tests for the contents of ppmc_cuda.h and ppmc_cuda.cu + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include + +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 1; + size_t const nz = 1; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::unordered_map fiducial_interface_left = {{2, 4.5260179354990537}, + {8, 0.16067557854687248}, + {14, 3.7907707014364083}, + {20, 2.1837489694378442}, + {26, 3.8877922383184833}}; + std::unordered_map fiducial_interface_right = {{1, 4.5260179354990537}, + {7, 0.16067557854687248}, + {13, 3.7907707014364083}, + {19, 2.1837489694378442}, + {25, 3.8877922383184833}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Assign the shape + size_t nx_rot, ny_rot, nz_rot; + switch (direction) { + case 0: + nx_rot = nx; + ny_rot = ny; + nz_rot = nz; + break; + case 1: + nx_rot = ny; + ny_rot = nz; + nz_rot = nx; + break; + case 2: + nx_rot = nz; + ny_rot = nx; + nz_rot = ny; + break; + } + + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + + // Launch kernel + hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.find(i) == fiducial_interface_left.end()) ? 0.0 : fiducial_interface_left[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = + (fiducial_interface_right.find(i) == fiducial_interface_right.end()) ? 0.0 : fiducial_interface_right[i]; + // if (test_val != 0) + // std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl; + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} From 783641ab37da48c5a495699f044d3c5edfd6792d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 14 Apr 2023 14:31:19 -0400 Subject: [PATCH 407/694] PPMC: Move and update thread guard --- src/reconstruction/ppmc_cuda.cu | 1962 ++++++++++++------------- src/reconstruction/ppmc_cuda_tests.cu | 101 +- 2 files changed, 1027 insertions(+), 1036 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 3de5c47b1..26829c4b6 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -20,6 +20,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir) { + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Thread guard to prevent overrun + if (xid < 2 or xid >= nx - 3 or yid < 2 or yid >= ny - 3 or zid < 2 or zid >= nz - 3) { + return; + } + int n_cells = nx * ny * nz; int o1, o2, o3; switch (dir) { @@ -99,1172 +109,1134 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // VL #endif // SCALAR - // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; - - int xs, xe, ys, ye, zs, ze; + // load the 5-cell stencil into registers + // cell i + int id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; +#ifdef DE // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); +#else // not DE + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); +#endif // PRESSURE_DE + p_i = fmax(p_i, (Real)TINY_NUMBER); +#ifdef DE + ge_i = dge / d_i; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; + } +#endif // SCALAR + // cell i-1 switch (dir) { case 0: - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; + id = xid - 1 + yid * nx + zid * nx * ny; break; case 1: - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; + id = xid + (yid - 1) * nx + zid * nx * ny; break; case 2: - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; + id = xid + yid * nx + (zid - 1) * nx * ny; break; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 5-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); -#endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); -#ifdef DE - ge_i = dge / d_i; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } -#endif // SCALAR - // cell i-1 - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } - - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; + d_imo = dev_conserved[id]; + vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; + vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; + vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); + p_imo = fmax(p_imo, (Real)TINY_NUMBER); #ifdef DE - ge_imo = dge / d_imo; + ge_imo = dge / d_imo; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; + } #endif // SCALAR - // cell i+1 - switch (dir) { - case 0: - id = xid + 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 1) * nx * ny; - break; - } - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; + // cell i+1 + switch (dir) { + case 0: + id = xid + 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 1) * nx * ny; + break; + } + d_ipo = dev_conserved[id]; + vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; + vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; + vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); + p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); #ifdef DE - ge_ipo = dge / d_ipo; + ge_ipo = dge / d_ipo; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; + } #endif // SCALAR - // cell i-2 - switch (dir) { - case 0: - id = xid - 2 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 2) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 2) * nx * ny; - break; - } - d_imt = dev_conserved[id]; - vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; - vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; - vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; + // cell i-2 + switch (dir) { + case 0: + id = xid - 2 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 2) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 2) * nx * ny; + break; + } + d_imt = dev_conserved[id]; + vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; + vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; + vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); + p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_imt = fmax(p_imt, (Real)TINY_NUMBER); + p_imt = fmax(p_imt, (Real)TINY_NUMBER); #ifdef DE - ge_imt = dge / d_imt; + ge_imt = dge / d_imt; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; + } #endif // SCALAR - // cell i+2 - switch (dir) { - case 0: - id = xid + 2 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 2) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 2) * nx * ny; - break; - } - d_ipt = dev_conserved[id]; - vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; - vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; - vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; + // cell i+2 + switch (dir) { + case 0: + id = xid + 2 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid + 2) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid + 2) * nx * ny; + break; + } + d_ipt = dev_conserved[id]; + vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; + vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; + vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); + p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); + p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); #ifdef DE - ge_ipt = dge / d_ipt; + ge_ipt = dge / d_ipt; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; + } #endif // SCALAR - // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, - // p_i); - - // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell imo - a = sqrt(gamma * p_imo / d_imo); - - // left - del_d_L = d_imo - d_imt; - del_vx_L = vx_imo - vx_imt; - del_vy_L = vy_imo - vy_imt; - del_vz_L = vz_imo - vz_imt; - del_p_L = p_imo - p_imt; - - // right - del_d_R = d_i - d_imo; - del_vx_R = vx_i - vx_imo; - del_vy_R = vy_i - vy_imo; - del_vz_R = vz_i - vz_imo; - del_p_R = p_i - p_imo; - - // centered - del_d_C = 0.5 * (d_i - d_imt); - del_vx_C = 0.5 * (vx_i - vx_imt); - del_vy_C = 0.5 * (vy_i - vy_imt); - del_vz_C = 0.5 * (vz_i - vz_imt); - del_p_C = 0.5 * (p_i - p_imt); - - // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, + // p_i); + + // Steps 2 - 5 are repeated for cell i-1, i, and i+1 + // Step 2 - Compute the left, right, centered, and van Leer differences of + // the primitive variables + // Note that here L and R refer to locations relative to the cell + // center Stone Eqn 36 + + // calculate the adiabatic sound speed in cell imo + a = sqrt(gamma * p_imo / d_imo); + + // left + del_d_L = d_imo - d_imt; + del_vx_L = vx_imo - vx_imt; + del_vy_L = vy_imo - vy_imt; + del_vz_L = vz_imo - vz_imt; + del_p_L = p_imo - p_imt; + + // right + del_d_R = d_i - d_imo; + del_vx_R = vx_i - vx_imo; + del_vy_R = vy_i - vy_imo; + del_vz_R = vz_i - vz_imo; + del_p_R = p_i - p_imo; + + // centered + del_d_C = 0.5 * (d_i - d_imt); + del_vx_C = 0.5 * (vx_i - vx_imt); + del_vy_C = 0.5 * (vy_i - vy_imt); + del_vz_C = 0.5 * (vz_i - vz_imt); + del_p_C = 0.5 * (p_i - p_imt); + + // Van Leer + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE - del_ge_L = ge_imo - ge_imt; - del_ge_R = ge_i - ge_imo; - del_ge_C = 0.5 * (ge_i - ge_imt); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } + del_ge_L = ge_imo - ge_imt; + del_ge_R = ge_i - ge_imo; + del_ge_C = 0.5 * (ge_i - ge_imt); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; - del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; + del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; + del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; } + } #endif // SCALAR - // Step 3 - Project the left, right, centered and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } + // Step 3 - Project the left, right, centered and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A + + del_a_0_L = -0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_2_L = del_vy_L; + del_a_3_L = del_vz_L; + del_a_4_L = 0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); + + del_a_0_R = -0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_2_R = del_vy_R; + del_a_3_R = del_vz_R; + del_a_4_R = 0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); + + del_a_0_C = -0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_2_C = del_vy_C; + del_a_3_C = del_vz_C; + del_a_4_C = 0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); + + del_a_0_G = -0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_2_G = del_vy_G; + del_a_3_G = del_vz_G; + del_a_4_G = 0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); + + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables + // Stone Eqn 38 + + del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + + if (del_a_0_L * del_a_0_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); + lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_1_L * del_a_1_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); + lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_2_L * del_a_2_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); + lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_3_L * del_a_3_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); + lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_4_L * del_a_4_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); + lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_imo = 0.0; - } + if (del_ge_L * del_ge_R > 0.0) { + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_ge_m_imo = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_imo[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_scalar_m_imo[i] = 0.0; } + } #endif // SCALAR - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a * del_a_0_m / d_imo + a * del_a_4_m / d_imo; - del_vy_m_imo = del_a_2_m; - del_vz_m_imo = del_a_3_m; - del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); - - // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; - - // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; - - // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); - - // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the + // primitive variables + // Stone Eqn 39 + + del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; + del_vx_m_imo = -a * del_a_0_m / d_imo + a * del_a_4_m / d_imo; + del_vy_m_imo = del_a_2_m; + del_vz_m_imo = del_a_3_m; + del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; + + // Step 2 - Compute the left, right, centered, and van Leer differences of + // the primitive variables + // Note that here L and R refer to locations relative to the cell + // center Stone Eqn 36 + + // calculate the adiabatic sound speed in cell i + a = sqrt(gamma * p_i / d_i); + + // left + del_d_L = d_i - d_imo; + del_vx_L = vx_i - vx_imo; + del_vy_L = vy_i - vy_imo; + del_vz_L = vz_i - vz_imo; + del_p_L = p_i - p_imo; + + // right + del_d_R = d_ipo - d_i; + del_vx_R = vx_ipo - vx_i; + del_vy_R = vy_ipo - vy_i; + del_vz_R = vz_ipo - vz_i; + del_p_R = p_ipo - p_i; + + // centered + del_d_C = 0.5 * (d_ipo - d_imo); + del_vx_C = 0.5 * (vx_ipo - vx_imo); + del_vy_C = 0.5 * (vy_ipo - vy_imo); + del_vz_C = 0.5 * (vz_ipo - vz_imo); + del_p_C = 0.5 * (p_ipo - p_imo); + + // van Leer + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } + del_ge_L = ge_i - ge_imo; + del_ge_R = ge_ipo - ge_i; + del_ge_C = 0.5 * (ge_ipo - ge_imo); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; + del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; + del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; } + } #endif // SCALAR - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } + // Step 3 - Project the left, right, centered, and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A + + del_a_0_L = -0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_2_L = del_vy_L; + del_a_3_L = del_vz_L; + del_a_4_L = 0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); + + del_a_0_R = -0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_2_R = del_vy_R; + del_a_3_R = del_vz_R; + del_a_4_R = 0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); + + del_a_0_C = -0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_2_C = del_vy_C; + del_a_3_C = del_vz_C; + del_a_4_C = 0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); + + del_a_0_G = -0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_2_G = del_vy_G; + del_a_3_G = del_vz_G; + del_a_4_G = 0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); + + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables + // Stone Eqn 38 + + del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + + if (del_a_0_L * del_a_0_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); + lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_1_L * del_a_1_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); + lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_2_L * del_a_2_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); + lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_3_L * del_a_3_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); + lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_4_L * del_a_4_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); + lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_i = 0.0; - } + if (del_ge_L * del_ge_R > 0.0) { + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_ge_m_i = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_i[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_scalar_m_i[i] = 0.0; } + } #endif // SCALAR - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a * del_a_0_m / d_i + a * del_a_4_m / d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma * p_ipo / d_ipo); - - // left - del_d_L = d_ipo - d_i; - del_vx_L = vx_ipo - vx_i; - del_vy_L = vy_ipo - vy_i; - del_vz_L = vz_ipo - vz_i; - del_p_L = p_ipo - p_i; - - // right - del_d_R = d_ipt - d_ipo; - del_vx_R = vx_ipt - vx_ipo; - del_vy_R = vy_ipt - vy_ipo; - del_vz_R = vz_ipt - vz_ipo; - del_p_R = p_ipt - p_ipo; - - // centered - del_d_C = 0.5 * (d_ipt - d_i); - del_vx_C = 0.5 * (vx_ipt - vx_i); - del_vy_C = 0.5 * (vy_ipt - vy_i); - del_vz_C = 0.5 * (vz_ipt - vz_i); - del_p_C = 0.5 * (p_ipt - p_i); - - // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the + // primitive variables + // Stone Eqn 39 + + del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; + del_vx_m_i = -a * del_a_0_m / d_i + a * del_a_4_m / d_i; + del_vy_m_i = del_a_2_m; + del_vz_m_i = del_a_3_m; + del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; + + // Step 2 - Compute the left, right, centered, and van Leer differences of + // the primitive variables + // Note that here L and R refer to locations relative to the cell + // center Stone Eqn 36 + + // calculate the adiabatic sound speed in cell ipo + a = sqrt(gamma * p_ipo / d_ipo); + + // left + del_d_L = d_ipo - d_i; + del_vx_L = vx_ipo - vx_i; + del_vy_L = vy_ipo - vy_i; + del_vz_L = vz_ipo - vz_i; + del_p_L = p_ipo - p_i; + + // right + del_d_R = d_ipt - d_ipo; + del_vx_R = vx_ipt - vx_ipo; + del_vy_R = vy_ipt - vy_ipo; + del_vz_R = vz_ipt - vz_ipo; + del_p_R = p_ipt - p_ipo; + + // centered + del_d_C = 0.5 * (d_ipt - d_i); + del_vx_C = 0.5 * (vx_ipt - vx_i); + del_vy_C = 0.5 * (vy_ipt - vy_i); + del_vz_C = 0.5 * (vz_ipt - vz_i); + del_p_C = 0.5 * (p_ipt - p_i); + + // van Leer + if (del_d_L * del_d_R > 0.0) { + del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + } else { + del_d_G = 0.0; + } + if (del_vx_L * del_vx_R > 0.0) { + del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + } else { + del_vx_G = 0.0; + } + if (del_vy_L * del_vy_R > 0.0) { + del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + } else { + del_vy_G = 0.0; + } + if (del_vz_L * del_vz_R > 0.0) { + del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + } else { + del_vz_G = 0.0; + } + if (del_p_L * del_p_R > 0.0) { + del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + } else { + del_p_G = 0.0; + } #ifdef DE - del_ge_L = ge_ipo - ge_i; - del_ge_R = ge_ipt - ge_ipo; - del_ge_C = 0.5 * (ge_ipt - ge_i); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } + del_ge_L = ge_ipo - ge_i; + del_ge_R = ge_ipt - ge_ipo; + del_ge_C = 0.5 * (ge_ipt - ge_i); + if (del_ge_L * del_ge_R > 0.0) { + del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + } else { + del_ge_G = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; - del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_L[i] = scalar_ipo[i] - scalar_i[i]; + del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; + del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + } else { + del_scalar_G[i] = 0.0; } + } #endif // SCALAR - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } + // Step 3 - Project the left, right, centered, and van Leer differences onto + // the characteristic variables + // Stone Eqn 37 (del_a are differences in characteristic variables, + // see Stone for notation) Use the eigenvectors given in Stone + // 2008, Appendix A + + del_a_0_L = -0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_2_L = del_vy_L; + del_a_3_L = del_vz_L; + del_a_4_L = 0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); + + del_a_0_R = -0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_2_R = del_vy_R; + del_a_3_R = del_vz_R; + del_a_4_R = 0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); + + del_a_0_C = -0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_2_C = del_vy_C; + del_a_3_C = del_vz_C; + del_a_4_C = 0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); + + del_a_0_G = -0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_2_G = del_vy_G; + del_a_3_G = del_vz_G; + del_a_4_G = 0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); + + // Step 4 - Apply monotonicity constraints to the differences in the + // characteristic variables + // Stone Eqn 38 + + del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + + if (del_a_0_L * del_a_0_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); + lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); + del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_1_L * del_a_1_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); + lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); + del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_2_L * del_a_2_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); + lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); + del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_3_L * del_a_3_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); + lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); + del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } + if (del_a_4_L * del_a_4_R > 0.0) { + lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); + lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); + del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_ipo = 0.0; - } + if (del_ge_L * del_ge_R > 0.0) { + lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); + lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); + del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_ge_m_ipo = 0.0; + } #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_ipo[i] = 0.0; - } + for (int i = 0; i < NSCALARS; i++) { + if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { + lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); + lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); + del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + } else { + del_scalar_m_ipo[i] = 0.0; } + } #endif // SCALAR - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a * del_a_0_m / d_ipo + a * del_a_4_m / d_ipo; - del_vy_m_ipo = del_a_2_m; - del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 6 - Use parabolic interpolation to compute values at the left and - // right of each cell center - // Here, the subscripts L and R refer to the left and right side of - // the ith cell center Stone Eqn 46 - - d_L = 0.5 * (d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5 * (vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5 * (vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5 * (vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5 * (p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; - - d_R = 0.5 * (d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5 * (vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5 * (vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; + // Step 5 - Project the monotonized difference in the characteristic + // variables back onto the + // primitive variables + // Stone Eqn 39 + + del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; + del_vx_m_ipo = -a * del_a_0_m / d_ipo + a * del_a_4_m / d_ipo; + del_vy_m_ipo = del_a_2_m; + del_vz_m_ipo = del_a_3_m; + del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; + + // Step 6 - Use parabolic interpolation to compute values at the left and + // right of each cell center + // Here, the subscripts L and R refer to the left and right side of + // the ith cell center Stone Eqn 46 + + d_L = 0.5 * (d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; + vx_L = 0.5 * (vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; + vy_L = 0.5 * (vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; + vz_L = 0.5 * (vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; + p_L = 0.5 * (p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; + + d_R = 0.5 * (d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; + vx_R = 0.5 * (vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; + vy_R = 0.5 * (vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; + vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; + p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; #ifdef DE - ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; + ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; + ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; - scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; + scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; + } #endif // SCALAR - // Step 7 - Apply further monotonicity constraints to ensure the values on - // the left and right side - // of cell center lie between neighboring cell-centered values - // Stone Eqns 47 - 53 + // Step 7 - Apply further monotonicity constraints to ensure the values on + // the left and right side + // of cell center lie between neighboring cell-centered values + // Stone Eqns 47 - 53 - if ((d_R - d_i) * (d_i - d_L) <= 0) { - d_L = d_R = d_i; - } - if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) { - vx_L = vx_R = vx_i; - } - if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) { - vy_L = vy_R = vy_i; - } - if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) { - vz_L = vz_R = vz_i; - } - if ((p_R - p_i) * (p_i - p_L) <= 0) { - p_L = p_R = p_i; - } + if ((d_R - d_i) * (d_i - d_L) <= 0) { + d_L = d_R = d_i; + } + if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) { + vx_L = vx_R = vx_i; + } + if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) { + vy_L = vy_R = vy_i; + } + if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) { + vz_L = vz_R = vz_i; + } + if ((p_R - p_i) * (p_i - p_L) <= 0) { + p_L = p_R = p_i; + } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { - d_L = 3.0 * d_i - 2.0 * d_R; - } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { - vx_L = 3.0 * vx_i - 2.0 * vx_R; - } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { - vy_L = 3.0 * vy_i - 2.0 * vy_R; - } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { - vz_L = 3.0 * vz_i - 2.0 * vz_R; - } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { - p_L = 3.0 * p_i - 2.0 * p_R; - } + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { + d_L = 3.0 * d_i - 2.0 * d_R; + } + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { + vx_L = 3.0 * vx_i - 2.0 * vx_R; + } + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { + vy_L = 3.0 * vy_i - 2.0 * vy_R; + } + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { + vz_L = 3.0 * vz_i - 2.0 * vz_R; + } + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { + p_L = 3.0 * p_i - 2.0 * p_R; + } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { - d_R = 3.0 * d_i - 2.0 * d_L; - } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { - vx_R = 3.0 * vx_i - 2.0 * vx_L; - } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { - vy_R = 3.0 * vy_i - 2.0 * vy_L; - } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { - vz_R = 3.0 * vz_i - 2.0 * vz_L; - } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { - p_R = 3.0 * p_i - 2.0 * p_L; - } + if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { + d_R = 3.0 * d_i - 2.0 * d_L; + } + if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { + vx_R = 3.0 * vx_i - 2.0 * vx_L; + } + if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { + vy_R = 3.0 * vy_i - 2.0 * vy_L; + } + if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { + vz_R = 3.0 * vz_i - 2.0 * vz_L; + } + if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { + p_R = 3.0 * p_i - 2.0 * p_L; + } - d_L = fmax(fmin(d_i, d_imo), d_L); - d_L = fmin(fmax(d_i, d_imo), d_L); - d_R = fmax(fmin(d_i, d_ipo), d_R); - d_R = fmin(fmax(d_i, d_ipo), d_R); - vx_L = fmax(fmin(vx_i, vx_imo), vx_L); - vx_L = fmin(fmax(vx_i, vx_imo), vx_L); - vx_R = fmax(fmin(vx_i, vx_ipo), vx_R); - vx_R = fmin(fmax(vx_i, vx_ipo), vx_R); - vy_L = fmax(fmin(vy_i, vy_imo), vy_L); - vy_L = fmin(fmax(vy_i, vy_imo), vy_L); - vy_R = fmax(fmin(vy_i, vy_ipo), vy_R); - vy_R = fmin(fmax(vy_i, vy_ipo), vy_R); - vz_L = fmax(fmin(vz_i, vz_imo), vz_L); - vz_L = fmin(fmax(vz_i, vz_imo), vz_L); - vz_R = fmax(fmin(vz_i, vz_ipo), vz_R); - vz_R = fmin(fmax(vz_i, vz_ipo), vz_R); - p_L = fmax(fmin(p_i, p_imo), p_L); - p_L = fmin(fmax(p_i, p_imo), p_L); - p_R = fmax(fmin(p_i, p_ipo), p_R); - p_R = fmin(fmax(p_i, p_ipo), p_R); + d_L = fmax(fmin(d_i, d_imo), d_L); + d_L = fmin(fmax(d_i, d_imo), d_L); + d_R = fmax(fmin(d_i, d_ipo), d_R); + d_R = fmin(fmax(d_i, d_ipo), d_R); + vx_L = fmax(fmin(vx_i, vx_imo), vx_L); + vx_L = fmin(fmax(vx_i, vx_imo), vx_L); + vx_R = fmax(fmin(vx_i, vx_ipo), vx_R); + vx_R = fmin(fmax(vx_i, vx_ipo), vx_R); + vy_L = fmax(fmin(vy_i, vy_imo), vy_L); + vy_L = fmin(fmax(vy_i, vy_imo), vy_L); + vy_R = fmax(fmin(vy_i, vy_ipo), vy_R); + vy_R = fmin(fmax(vy_i, vy_ipo), vy_R); + vz_L = fmax(fmin(vz_i, vz_imo), vz_L); + vz_L = fmin(fmax(vz_i, vz_imo), vz_L); + vz_R = fmax(fmin(vz_i, vz_ipo), vz_R); + vz_R = fmin(fmax(vz_i, vz_ipo), vz_R); + p_L = fmax(fmin(p_i, p_imo), p_L); + p_L = fmin(fmax(p_i, p_imo), p_L); + p_R = fmax(fmin(p_i, p_ipo), p_R); + p_R = fmin(fmax(p_i, p_ipo), p_R); #ifdef DE - if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { - ge_L = ge_R = ge_i; - } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { - ge_L = 3.0 * ge_i - 2.0 * ge_R; - } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { - ge_R = 3.0 * ge_i - 2.0 * ge_L; - } - ge_L = fmax(fmin(ge_i, ge_imo), ge_L); - ge_L = fmin(fmax(ge_i, ge_imo), ge_L); - ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); - ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); + if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { + ge_L = ge_R = ge_i; + } + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { + ge_L = 3.0 * ge_i - 2.0 * ge_R; + } + if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { + ge_R = 3.0 * ge_i - 2.0 * ge_L; + } + ge_L = fmax(fmin(ge_i, ge_imo), ge_L); + ge_L = fmin(fmax(ge_i, ge_imo), ge_L); + ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); + ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { - scalar_L[i] = scalar_R[i] = scalar_i[i]; - } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > - (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; - } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < - -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; - } - scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); - scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); - } + for (int i = 0; i < NSCALARS; i++) { + if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { + scalar_L[i] = scalar_R[i] = scalar_i[i]; + } + if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > + (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { + scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; + } + if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < + -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { + scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; + } + scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); + scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); + scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); + scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); + } #endif // SCALAR // #ifdef CTU #ifndef VL - // Step 8 - Compute the coefficients for the monotonized parabolic - // interpolation function - // Stone Eqn 54 + // Step 8 - Compute the coefficients for the monotonized parabolic + // interpolation function + // Stone Eqn 54 - del_d_m_i = d_R - d_L; - del_vx_m_i = vx_R - vx_L; - del_vy_m_i = vy_R - vy_L; - del_vz_m_i = vz_R - vz_L; - del_p_m_i = p_R - p_L; + del_d_m_i = d_R - d_L; + del_vx_m_i = vx_R - vx_L; + del_vy_m_i = vy_R - vy_L; + del_vz_m_i = vz_R - vz_L; + del_p_m_i = p_R - p_L; - d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); - vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); - vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); - vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); - p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); + d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); + vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); + vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); + vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); + p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); #ifdef DE - del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); + del_ge_m_i = ge_R - ge_L; + ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; - scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); - } + for (int i = 0; i < NSCALARS; i++) { + del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; + scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); + } #endif // SCALAR - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables - // recalculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); + // recalculate the adiabatic sound speed in cell i + a = sqrt(gamma * p_i / d_i); - lambda_m = vx_i - a; - lambda_0 = vx_i; - lambda_p = vx_i + a; + lambda_m = vx_i - a; + lambda_0 = vx_i; + lambda_p = vx_i + a; - // Step 9 - Compute the left and right interface values using monotonized - // parabolic interpolation - // Stone Eqns 55 & 56 + // Step 9 - Compute the left and right interface values using monotonized + // parabolic interpolation + // Stone Eqns 55 & 56 - // largest eigenvalue - lambda_max = fmax(lambda_p, (Real)0); - // smallest eigenvalue - lambda_min = fmin(lambda_m, (Real)0); + // largest eigenvalue + lambda_max = fmax(lambda_p, (Real)0); + // smallest eigenvalue + lambda_min = fmin(lambda_m, (Real)0); - // left interface value, i+1/2 - d_R = d_R - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); - vx_R = vx_R - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); - vy_R = vy_R - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); - vz_R = vz_R - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - p_R = p_R - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + // left interface value, i+1/2 + d_R = d_R - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + vx_R = vx_R - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + vy_R = vy_R - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + vz_R = vz_R - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + p_R = p_R - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); - // right interface value, i-1/2 - d_L = d_L - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); - vx_L = vx_L - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); - vy_L = vy_L - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); - vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + // right interface value, i-1/2 + d_L = d_L - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + vx_L = vx_L - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + vy_L = vy_L - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); #ifdef DE - ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); - ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); + ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * - (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); - scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * - (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); - } + for (int i = 0; i < NSCALARS; i++) { + scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * + (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * + (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + } #endif // SCALAR - // Step 10 - Perform the characteristic tracing - // Stone Eqns 57 - 60 + // Step 10 - Perform the characteristic tracing + // Stone Eqns 57 - 60 - // left-hand interface value, i+1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; + // left-hand interface value, i+1/2 + sum_1 = 0; + sum_2 = 0; + sum_3 = 0; + sum_4 = 0; + sum_5 = 0; #ifdef DE - sum_ge = 0; + sum_ge = 0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } #endif // SCALAR - if (lambda_m >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_m); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); + if (lambda_m >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_m); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); - } - if (lambda_0 >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_0); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); - - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); + sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); + } + if (lambda_0 >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_0); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); + + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; #ifdef DE - chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; + chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; - } + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; + } #endif // SCALAR - sum_1 += chi_1 - chi_5 / (a * a); - sum_3 += chi_3; - sum_4 += chi_4; + sum_1 += chi_1 - chi_5 / (a * a); + sum_3 += chi_3; + sum_4 += chi_4; #ifdef DE - sum_ge += chi_ge; + sum_ge += chi_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } - #endif // SCALAR - } - if (lambda_p >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_p); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); - - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; - - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; } + #endif // SCALAR + } + if (lambda_p >= 0) { + A = (0.5 * dtodx) * (lambda_p - lambda_p); + B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); + + chi_1 = A * (del_d_m_i - d_6) + B * d_6; + chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + chi_5 = A * (del_p_m_i - p_6) + B * p_6; + + sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); + sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + } - // add the corrections to the initial guesses for the interface values - d_R += sum_1; - vx_R += sum_2; - vy_R += sum_3; - vz_R += sum_4; - p_R += sum_5; + // add the corrections to the initial guesses for the interface values + d_R += sum_1; + vx_R += sum_2; + vy_R += sum_3; + vz_R += sum_4; + p_R += sum_5; #ifdef DE - ge_R += sum_ge; + ge_R += sum_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] += sum_scalar[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_R[i] += sum_scalar[i]; + } #endif // SCALAR - // right-hand interface value, i-1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; + // right-hand interface value, i-1/2 + sum_1 = 0; + sum_2 = 0; + sum_3 = 0; + sum_4 = 0; + sum_5 = 0; #ifdef DE - sum_ge = 0; + sum_ge = 0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } #endif // SCALAR - if (lambda_m <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_m); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; - - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); - } - if (lambda_0 <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_0); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; + if (lambda_m <= 0) { + C = (0.5 * dtodx) * (lambda_m - lambda_m); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); + + chi_1 = C * (del_d_m_i + d_6) + D * d_6; + chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + chi_5 = C * (del_p_m_i + p_6) + D * p_6; + + sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); + sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); + } + if (lambda_0 <= 0) { + C = (0.5 * dtodx) * (lambda_m - lambda_0); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); + + chi_1 = C * (del_d_m_i + d_6) + D * d_6; + chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + chi_5 = C * (del_p_m_i + p_6) + D * p_6; #ifdef DE - chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; + chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = C * (del_scalar_m_i[i] + scalar_6[i]) + D * scalar_6[i]; - } + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = C * (del_scalar_m_i[i] + scalar_6[i]) + D * scalar_6[i]; + } #endif // SCALAR - sum_1 += chi_1 - chi_5 / (a * a); - sum_3 += chi_3; - sum_4 += chi_4; + sum_1 += chi_1 - chi_5 / (a * a); + sum_3 += chi_3; + sum_4 += chi_4; #ifdef DE - sum_ge += chi_ge; + sum_ge += chi_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } - #endif // SCALAR - } - if (lambda_p <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_p); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; - - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; } + #endif // SCALAR + } + if (lambda_p <= 0) { + C = (0.5 * dtodx) * (lambda_m - lambda_p); + D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); + + chi_1 = C * (del_d_m_i + d_6) + D * d_6; + chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + chi_5 = C * (del_p_m_i + p_6) + D * p_6; + + sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); + sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + } - // add the corrections - d_L += sum_1; - vx_L += sum_2; - vy_L += sum_3; - vz_L += sum_4; - p_L += sum_5; + // add the corrections + d_L += sum_1; + vx_L += sum_2; + vy_L += sum_3; + vz_L += sum_4; + p_L += sum_5; #ifdef DE - ge_L += sum_ge; + ge_L += sum_ge; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] += sum_scalar[i]; - } + for (int i = 0; i < NSCALARS; i++) { + scalar_L[i] += sum_scalar[i]; + } #endif // SCALAR #endif // VL, i.e. CTU was used for this section - // enforce minimum values - d_L = fmax(d_L, (Real)TINY_NUMBER); - d_R = fmax(d_R, (Real)TINY_NUMBER); - p_L = fmax(p_L, (Real)TINY_NUMBER); - p_R = fmax(p_R, (Real)TINY_NUMBER); - - // Step 11 - Send final values back from kernel - - // bounds_R refers to the right side of the i-1/2 interface - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } - dev_bounds_R[id] = d_L; - dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; - dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; - dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); + // enforce minimum values + d_L = fmax(d_L, (Real)TINY_NUMBER); + d_R = fmax(d_R, (Real)TINY_NUMBER); + p_L = fmax(p_L, (Real)TINY_NUMBER); + p_R = fmax(p_R, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // bounds_R refers to the right side of the i-1/2 interface + switch (dir) { + case 0: + id = xid - 1 + yid * nx + zid * nx * ny; + break; + case 1: + id = xid + (yid - 1) * nx + zid * nx * ny; + break; + case 2: + id = xid + yid * nx + (zid - 1) * nx * ny; + break; + } + dev_bounds_R[id] = d_L; + dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; + dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; + dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; + dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; - } + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; + } #endif // SCALAR #ifdef DE - dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = d_L * ge_L; + dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = d_L * ge_L; #endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_R; - dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; - dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; - dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); + // bounds_L refers to the left side of the i+1/2 interface + id = xid + yid * nx + zid * nx * ny; + dev_bounds_L[id] = d_R; + dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; + dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; + dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; + dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; - } + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; + } #endif // SCALAR #ifdef DE - dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = d_R * ge_R; + dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = d_R * ge_R; #endif // DE - } } diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 01730ed2a..03bb1828c 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -5,16 +5,16 @@ */ // STL Includes +#include #include #include #include +#include // External Includes #include // Include GoogleTest and related libraries/headers // Local Includes -#include - #include "../global/global.h" #include "../io/io.h" #include "../reconstruction/ppmc_cuda.h" @@ -30,8 +30,8 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) // Mock up needed information size_t const nx = 6; - size_t const ny = 1; - size_t const nz = 1; + size_t const ny = 6; + size_t const nz = 6; size_t const n_fields = 5; double const dx = doubleRand(prng); double const dt = doubleRand(prng); @@ -48,46 +48,63 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::unordered_map fiducial_interface_left = {{2, 4.5260179354990537}, - {8, 0.16067557854687248}, - {14, 3.7907707014364083}, - {20, 2.1837489694378442}, - {26, 3.8877922383184833}}; - std::unordered_map fiducial_interface_right = {{1, 4.5260179354990537}, - {7, 0.16067557854687248}, - {13, 3.7907707014364083}, - {19, 2.1837489694378442}, - {25, 3.8877922383184833}}; + std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.2002498722761787}, + {734, 1.764334292986655}, + {950, 3.3600925565746804}, + {86, 2.4950488327292639}}, + {{86, 2.4950488327292639}, + {302, 0.79287723513518138}, + {518, 1.7614576990062414}, + {734, 1.8238574169157304}, + {950, 3.14294317122161}}, + {{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 3.2100231679403066}}}; + + std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 1.8381070277226794}, + {733, 1.764334292986655}, + {949, 3.0847691079841209}}, + {{80, 3.1281603739188069}, + {85, 2.6558981128823214}, + {296, 0.99406757727427164}, + {301, 0.84399195916314151}, + {512, 1.8732124042412865}, + {517, 1.8381070277226794}, + {728, 1.6489758692176784}, + {733, 1.764334292986655}, + {944, 2.8820015278590443}, + {949, 3.0847691079841209}}, + {{50, 2.6558981128823214}, + {80, 3.1281603739188069}, + {85, 2.6558981128823214}, + {266, 0.84399195916314151}, + {296, 0.99406757727427164}, + {301, 0.84399195916314151}, + {482, 2.0109603398129137}, + {512, 1.8732124042412865}, + {517, 1.8381070277226794}, + {698, 1.764334292986655}, + {728, 1.6489758692176784}, + {733, 1.764334292986655}, + {914, 3.2100231679403066}, + {944, 2.8820015278590443}, + {949, 3.0847691079841209}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { - // Assign the shape - size_t nx_rot, ny_rot, nz_rot; - switch (direction) { - case 0: - nx_rot = nx; - ny_rot = ny; - nz_rot = nz; - break; - case 1: - nx_rot = ny; - ny_rot = nz; - nz_rot = nx; - break; - case 2: - nx_rot = nz; - ny_rot = nx; - nz_rot = ny; - break; - } - // Allocate device buffers cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); // Launch kernel hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction); + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); CudaCheckError(); CHECK(cudaDeviceSynchronize()); @@ -96,18 +113,20 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) // Check the left interface double test_val = dev_interface_left.at(i); double fiducial_val = - (fiducial_interface_left.find(i) == fiducial_interface_left.end()) ? 0.0 : fiducial_interface_left[i]; + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; testingUtilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); // Check the right interface - test_val = dev_interface_right.at(i); - fiducial_val = - (fiducial_interface_right.find(i) == fiducial_interface_right.end()) ? 0.0 : fiducial_interface_right[i]; - // if (test_val != 0) - // std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl; + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + testingUtilities::checkResults( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); From 0e5936317720a777a6559bc8d357908d582eb716 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 14 Apr 2023 14:33:24 -0400 Subject: [PATCH 408/694] PPMC: Update n_cells and o1, o2, o3 calculations --- src/reconstruction/ppmc_cuda.cu | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 26829c4b6..a3215d7a3 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -30,23 +30,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou return; } - int n_cells = nx * ny * nz; + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions int o1, o2, o3; switch (dir) { case 0: - o1 = 1; - o2 = 2; - o3 = 3; + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; break; case 1: - o1 = 2; - o2 = 3; - o3 = 1; + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; break; case 2: - o1 = 3; - o2 = 1; - o3 = 2; + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; break; } From afe7584f31e3e0c2f7f0618fe8febed8626dadd0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 09:48:20 -0400 Subject: [PATCH 409/694] PPMC: Replace loaded data with structs --- src/reconstruction/ppmc_cuda.cu | 559 +++++++++++++++++--------------- 1 file changed, 290 insertions(+), 269 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index a3215d7a3..999fab99c 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -7,6 +7,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/reconstruction.h" #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" @@ -55,11 +56,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // declare primitive variables for each stencil // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; - Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; + reconstruction::Primitive cell_i, cell_im1, cell_im2, cell_ip1, cell_ip2; // declare other variables to be used Real a; @@ -91,7 +88,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // VL #ifdef DE - Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt; Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; Real ge_L, ge_R; @@ -102,7 +98,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // VL #endif // DE #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; @@ -114,26 +109,32 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // load the 5-cell stencil into registers // cell i - int id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; + int id = xid + yid * nx + zid * nx * ny; + cell_i.density = dev_conserved[id]; + cell_i.velocity_x = dev_conserved[o1 * n_cells + id] / cell_i.density; + cell_i.velocity_y = dev_conserved[o2 * n_cells + id] / cell_i.density; + cell_i.velocity_z = dev_conserved[o3 * n_cells + id] / cell_i.density; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E_kin = 0.5 * cell_i.density * + (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + + cell_i.velocity_z * cell_i.velocity_z); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_i.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + cell_i.pressure = (dev_conserved[4 * n_cells + id] - + 0.5 * cell_i.density * + (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + + cell_i.velocity_z * cell_i.velocity_z)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); + cell_i.pressure = fmax(cell_i.pressure, (Real)TINY_NUMBER); #ifdef DE - ge_i = dge / d_i; + cell_i.gas_energy = dge / cell_i.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; + cell_i.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_i.density; } #endif // SCALAR // cell i-1 @@ -149,26 +150,31 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; + cell_im1.density = dev_conserved[id]; + cell_im1.velocity_x = dev_conserved[o1 * n_cells + id] / cell_im1.density; + cell_im1.velocity_y = dev_conserved[o2 * n_cells + id] / cell_im1.density; + cell_im1.velocity_z = dev_conserved[o3 * n_cells + id] / cell_im1.density; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E_kin = 0.5 * cell_im1.density * + (cell_im1.velocity_x * cell_im1.velocity_x + cell_im1.velocity_y * cell_im1.velocity_y + + cell_im1.velocity_z * cell_im1.velocity_z); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_im1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); + cell_im1.pressure = (dev_conserved[4 * n_cells + id] - + 0.5 * cell_im1.density * + (cell_im1.velocity_x * cell_im1.velocity_x + cell_im1.velocity_y * cell_im1.velocity_y + + cell_im1.velocity_z * cell_im1.velocity_z)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); + cell_im1.pressure = fmax(cell_im1.pressure, (Real)TINY_NUMBER); #ifdef DE - ge_imo = dge / d_imo; + cell_im1.gas_energy = dge / cell_im1.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; + cell_im1.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_im1.density; } #endif // SCALAR // cell i+1 @@ -183,26 +189,31 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou id = xid + yid * nx + (zid + 1) * nx * ny; break; } - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; + cell_ip1.density = dev_conserved[id]; + cell_ip1.velocity_x = dev_conserved[o1 * n_cells + id] / cell_ip1.density; + cell_ip1.velocity_y = dev_conserved[o2 * n_cells + id] / cell_ip1.density; + cell_ip1.velocity_z = dev_conserved[o3 * n_cells + id] / cell_ip1.density; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E_kin = 0.5 * cell_ip1.density * + (cell_ip1.velocity_x * cell_ip1.velocity_x + cell_ip1.velocity_y * cell_ip1.velocity_y + + cell_ip1.velocity_z * cell_ip1.velocity_z); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_ip1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); + cell_ip1.pressure = (dev_conserved[4 * n_cells + id] - + 0.5 * cell_ip1.density * + (cell_ip1.velocity_x * cell_ip1.velocity_x + cell_ip1.velocity_y * cell_ip1.velocity_y + + cell_ip1.velocity_z * cell_ip1.velocity_z)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); + cell_ip1.pressure = fmax(cell_ip1.pressure, (Real)TINY_NUMBER); #ifdef DE - ge_ipo = dge / d_ipo; + cell_ip1.gas_energy = dge / cell_ip1.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; + cell_ip1.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_ip1.density; } #endif // SCALAR // cell i-2 @@ -217,26 +228,31 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou id = xid + yid * nx + (zid - 2) * nx * ny; break; } - d_imt = dev_conserved[id]; - vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; - vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; - vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; + cell_im2.density = dev_conserved[id]; + cell_im2.velocity_x = dev_conserved[o1 * n_cells + id] / cell_im2.density; + cell_im2.velocity_y = dev_conserved[o2 * n_cells + id] / cell_im2.density; + cell_im2.velocity_z = dev_conserved[o3 * n_cells + id] / cell_im2.density; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E_kin = 0.5 * cell_im2.density * + (cell_im2.velocity_x * cell_im2.velocity_x + cell_im2.velocity_y * cell_im2.velocity_y + + cell_im2.velocity_z * cell_im2.velocity_z); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_im2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); + cell_im2.pressure = (dev_conserved[4 * n_cells + id] - + 0.5 * cell_im2.density * + (cell_im2.velocity_x * cell_im2.velocity_x + cell_im2.velocity_y * cell_im2.velocity_y + + cell_im2.velocity_z * cell_im2.velocity_z)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_imt = fmax(p_imt, (Real)TINY_NUMBER); + cell_im2.pressure = fmax(cell_im2.pressure, (Real)TINY_NUMBER); #ifdef DE - ge_imt = dge / d_imt; + cell_im2.gas_energy = dge / cell_im2.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; + cell_im2.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_im2.density; } #endif // SCALAR // cell i+2 @@ -251,31 +267,36 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou id = xid + yid * nx + (zid + 2) * nx * ny; break; } - d_ipt = dev_conserved[id]; - vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; - vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; - vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; + cell_ip2.density = dev_conserved[id]; + cell_ip2.velocity_x = dev_conserved[o1 * n_cells + id] / cell_ip2.density; + cell_ip2.velocity_y = dev_conserved[o2 * n_cells + id] / cell_ip2.density; + cell_ip2.velocity_z = dev_conserved[o3 * n_cells + id] / cell_ip2.density; #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + E_kin = 0.5 * cell_ip2.density * + (cell_ip2.velocity_x * cell_ip2.velocity_x + cell_ip2.velocity_y * cell_ip2.velocity_y + + cell_ip2.velocity_z * cell_ip2.velocity_z); + dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_ip2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else // not DE - p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); + cell_ip2.pressure = (dev_conserved[4 * n_cells + id] - + 0.5 * cell_ip2.density * + (cell_ip2.velocity_x * cell_ip2.velocity_x + cell_ip2.velocity_y * cell_ip2.velocity_y + + cell_ip2.velocity_z * cell_ip2.velocity_z)) * + (gamma - 1.0); #endif // PRESSURE_DE - p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); + cell_ip2.pressure = fmax(cell_ip2.pressure, (Real)TINY_NUMBER); #ifdef DE - ge_ipt = dge / d_ipt; + cell_ip2.gas_energy = dge / cell_ip2.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; + cell_ip2.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_ip2.density; } #endif // SCALAR - // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, - // p_i); + // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, cell_i.density, cell_i.velocity_x, cell_i.velocity_y, + // cell_i.velocity_z, cell_i.pressure); // Steps 2 - 5 are repeated for cell i-1, i, and i+1 // Step 2 - Compute the left, right, centered, and van Leer differences of @@ -284,28 +305,28 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell imo - a = sqrt(gamma * p_imo / d_imo); + a = sqrt(gamma * cell_im1.pressure / cell_im1.density); // left - del_d_L = d_imo - d_imt; - del_vx_L = vx_imo - vx_imt; - del_vy_L = vy_imo - vy_imt; - del_vz_L = vz_imo - vz_imt; - del_p_L = p_imo - p_imt; + del_d_L = cell_im1.density - cell_im2.density; + del_vx_L = cell_im1.velocity_x - cell_im2.velocity_x; + del_vy_L = cell_im1.velocity_y - cell_im2.velocity_y; + del_vz_L = cell_im1.velocity_z - cell_im2.velocity_z; + del_p_L = cell_im1.pressure - cell_im2.pressure; // right - del_d_R = d_i - d_imo; - del_vx_R = vx_i - vx_imo; - del_vy_R = vy_i - vy_imo; - del_vz_R = vz_i - vz_imo; - del_p_R = p_i - p_imo; + del_d_R = cell_i.density - cell_im1.density; + del_vx_R = cell_i.velocity_x - cell_im1.velocity_x; + del_vy_R = cell_i.velocity_y - cell_im1.velocity_y; + del_vz_R = cell_i.velocity_z - cell_im1.velocity_z; + del_p_R = cell_i.pressure - cell_im1.pressure; // centered - del_d_C = 0.5 * (d_i - d_imt); - del_vx_C = 0.5 * (vx_i - vx_imt); - del_vy_C = 0.5 * (vy_i - vy_imt); - del_vz_C = 0.5 * (vz_i - vz_imt); - del_p_C = 0.5 * (p_i - p_imt); + del_d_C = 0.5 * (cell_i.density - cell_im2.density); + del_vx_C = 0.5 * (cell_i.velocity_x - cell_im2.velocity_x); + del_vy_C = 0.5 * (cell_i.velocity_y - cell_im2.velocity_y); + del_vz_C = 0.5 * (cell_i.velocity_z - cell_im2.velocity_z); + del_p_C = 0.5 * (cell_i.pressure - cell_im2.pressure); // Van Leer if (del_d_L * del_d_R > 0.0) { @@ -335,9 +356,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE - del_ge_L = ge_imo - ge_imt; - del_ge_R = ge_i - ge_imo; - del_ge_C = 0.5 * (ge_i - ge_imt); + del_ge_L = cell_im1.gas_energy - cell_im2.gas_energy; + del_ge_R = cell_i.gas_energy - cell_im1.gas_energy; + del_ge_C = 0.5 * (cell_i.gas_energy - cell_im2.gas_energy); if (del_ge_L * del_ge_R > 0.0) { del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); } else { @@ -346,9 +367,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; - del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); + del_scalar_L[i] = cell_im1.scalar[i] - cell_im2.scalar[i]; + del_scalar_R[i] = cell_i.scalar[i] - cell_im1.scalar[i]; + del_scalar_C[i] = 0.5 * (cell_i.scalar[i] - cell_im2.scalar[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { @@ -363,29 +384,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_0_L = -0.5 * cell_im1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_im1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_0_R = -0.5 * cell_im1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_im1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_0_C = -0.5 * cell_im1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_im1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_0_G = -0.5 * cell_im1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_im1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -445,7 +466,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a * del_a_0_m / d_imo + a * del_a_4_m / d_imo; + del_vx_m_imo = -a * del_a_0_m / cell_im1.density + a * del_a_4_m / cell_im1.density; del_vy_m_imo = del_a_2_m; del_vz_m_imo = del_a_3_m; del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; @@ -456,28 +477,28 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); + a = sqrt(gamma * cell_i.pressure / cell_i.density); // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; + del_d_L = cell_i.density - cell_im1.density; + del_vx_L = cell_i.velocity_x - cell_im1.velocity_x; + del_vy_L = cell_i.velocity_y - cell_im1.velocity_y; + del_vz_L = cell_i.velocity_z - cell_im1.velocity_z; + del_p_L = cell_i.pressure - cell_im1.pressure; // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; + del_d_R = cell_ip1.density - cell_i.density; + del_vx_R = cell_ip1.velocity_x - cell_i.velocity_x; + del_vy_R = cell_ip1.velocity_y - cell_i.velocity_y; + del_vz_R = cell_ip1.velocity_z - cell_i.velocity_z; + del_p_R = cell_ip1.pressure - cell_i.pressure; // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); + del_d_C = 0.5 * (cell_ip1.density - cell_im1.density); + del_vx_C = 0.5 * (cell_ip1.velocity_x - cell_im1.velocity_x); + del_vy_C = 0.5 * (cell_ip1.velocity_y - cell_im1.velocity_y); + del_vz_C = 0.5 * (cell_ip1.velocity_z - cell_im1.velocity_z); + del_p_C = 0.5 * (cell_ip1.pressure - cell_im1.pressure); // van Leer if (del_d_L * del_d_R > 0.0) { @@ -507,9 +528,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); + del_ge_L = cell_i.gas_energy - cell_im1.gas_energy; + del_ge_R = cell_ip1.gas_energy - cell_i.gas_energy; + del_ge_C = 0.5 * (cell_ip1.gas_energy - cell_im1.gas_energy); if (del_ge_L * del_ge_R > 0.0) { del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); } else { @@ -519,9 +540,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); + del_scalar_L[i] = cell_i.scalar[i] - cell_im1.scalar[i]; + del_scalar_R[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; + del_scalar_C[i] = 0.5 * (cell_ip1.scalar[i] - cell_im1.scalar[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { @@ -536,29 +557,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_0_L = -0.5 * cell_i.density * del_vx_L / a + 0.5 * del_p_L / (a * a); del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_i.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_0_R = -0.5 * cell_i.density * del_vx_R / a + 0.5 * del_p_R / (a * a); del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_i.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_0_C = -0.5 * cell_i.density * del_vx_C / a + 0.5 * del_p_C / (a * a); del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_i.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_0_G = -0.5 * cell_i.density * del_vx_G / a + 0.5 * del_p_G / (a * a); del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_i.density * del_vx_G / a + 0.5 * del_p_G / (a * a); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -618,7 +639,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a * del_a_0_m / d_i + a * del_a_4_m / d_i; + del_vx_m_i = -a * del_a_0_m / cell_i.density + a * del_a_4_m / cell_i.density; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; @@ -629,28 +650,28 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma * p_ipo / d_ipo); + a = sqrt(gamma * cell_ip1.pressure / cell_ip1.density); // left - del_d_L = d_ipo - d_i; - del_vx_L = vx_ipo - vx_i; - del_vy_L = vy_ipo - vy_i; - del_vz_L = vz_ipo - vz_i; - del_p_L = p_ipo - p_i; + del_d_L = cell_ip1.density - cell_i.density; + del_vx_L = cell_ip1.velocity_x - cell_i.velocity_x; + del_vy_L = cell_ip1.velocity_y - cell_i.velocity_y; + del_vz_L = cell_ip1.velocity_z - cell_i.velocity_z; + del_p_L = cell_ip1.pressure - cell_i.pressure; // right - del_d_R = d_ipt - d_ipo; - del_vx_R = vx_ipt - vx_ipo; - del_vy_R = vy_ipt - vy_ipo; - del_vz_R = vz_ipt - vz_ipo; - del_p_R = p_ipt - p_ipo; + del_d_R = cell_ip2.density - cell_ip1.density; + del_vx_R = cell_ip2.velocity_x - cell_ip1.velocity_x; + del_vy_R = cell_ip2.velocity_y - cell_ip1.velocity_y; + del_vz_R = cell_ip2.velocity_z - cell_ip1.velocity_z; + del_p_R = cell_ip2.pressure - cell_ip1.pressure; // centered - del_d_C = 0.5 * (d_ipt - d_i); - del_vx_C = 0.5 * (vx_ipt - vx_i); - del_vy_C = 0.5 * (vy_ipt - vy_i); - del_vz_C = 0.5 * (vz_ipt - vz_i); - del_p_C = 0.5 * (p_ipt - p_i); + del_d_C = 0.5 * (cell_ip2.density - cell_i.density); + del_vx_C = 0.5 * (cell_ip2.velocity_x - cell_i.velocity_x); + del_vy_C = 0.5 * (cell_ip2.velocity_y - cell_i.velocity_y); + del_vz_C = 0.5 * (cell_ip2.velocity_z - cell_i.velocity_z); + del_p_C = 0.5 * (cell_ip2.pressure - cell_i.pressure); // van Leer if (del_d_L * del_d_R > 0.0) { @@ -680,9 +701,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE - del_ge_L = ge_ipo - ge_i; - del_ge_R = ge_ipt - ge_ipo; - del_ge_C = 0.5 * (ge_ipt - ge_i); + del_ge_L = cell_ip1.gas_energy - cell_i.gas_energy; + del_ge_R = cell_ip2.gas_energy - cell_ip1.gas_energy; + del_ge_C = 0.5 * (cell_ip2.gas_energy - cell_i.gas_energy); if (del_ge_L * del_ge_R > 0.0) { del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); } else { @@ -692,9 +713,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; - del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); + del_scalar_L[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; + del_scalar_R[i] = cell_ip2.scalar[i] - cell_ip1.scalar[i]; + del_scalar_C[i] = 0.5 * (cell_ip2.scalar[i] - cell_i.scalar[i]); if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); } else { @@ -709,29 +730,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_0_L = -0.5 * cell_ip1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); del_a_1_L = del_d_L - del_p_L / (a * a); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_ip1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_0_R = -0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_0_R = -0.5 * cell_ip1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); del_a_1_R = del_d_R - del_p_R / (a * a); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_ip1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_0_C = -0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_0_C = -0.5 * cell_ip1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); del_a_1_C = del_d_C - del_p_C / (a * a); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_ip1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_0_G = -0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_0_G = -0.5 * cell_ip1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); del_a_1_G = del_d_G - del_p_G / (a * a); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_ip1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -791,7 +812,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a * del_a_0_m / d_ipo + a * del_a_4_m / d_ipo; + del_vx_m_ipo = -a * del_a_0_m / cell_ip1.density + a * del_a_4_m / cell_ip1.density; del_vy_m_ipo = del_a_2_m; del_vz_m_ipo = del_a_3_m; del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; @@ -801,26 +822,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Here, the subscripts L and R refer to the left and right side of // the ith cell center Stone Eqn 46 - d_L = 0.5 * (d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5 * (vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5 * (vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5 * (vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5 * (p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; + d_L = 0.5 * (cell_i.density + cell_im1.density) - (del_d_m_i - del_d_m_imo) / 6.0; + vx_L = 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_vx_m_i - del_vx_m_imo) / 6.0; + vy_L = 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_vy_m_i - del_vy_m_imo) / 6.0; + vz_L = 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_vz_m_i - del_vz_m_imo) / 6.0; + p_L = 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_p_m_i - del_p_m_imo) / 6.0; - d_R = 0.5 * (d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5 * (vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5 * (vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; + d_R = 0.5 * (cell_ip1.density + cell_i.density) - (del_d_m_ipo - del_d_m_i) / 6.0; + vx_R = 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_vx_m_ipo - del_vx_m_i) / 6.0; + vy_R = 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_vy_m_ipo - del_vy_m_i) / 6.0; + vz_R = 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_vz_m_ipo - del_vz_m_i) / 6.0; + p_R = 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_p_m_ipo - del_p_m_i) / 6.0; #ifdef DE - ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; + ge_L = 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_ge_m_i - del_ge_m_imo) / 6.0; + ge_R = 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_ge_m_ipo - del_ge_m_i) / 6.0; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; - scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; + scalar_L[i] = 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; + scalar_R[i] = 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; } #endif // SCALAR @@ -829,108 +850,108 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // of cell center lie between neighboring cell-centered values // Stone Eqns 47 - 53 - if ((d_R - d_i) * (d_i - d_L) <= 0) { - d_L = d_R = d_i; + if ((d_R - cell_i.density) * (cell_i.density - d_L) <= 0) { + d_L = d_R = cell_i.density; } - if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) { - vx_L = vx_R = vx_i; + if ((vx_R - cell_i.velocity_x) * (cell_i.velocity_x - vx_L) <= 0) { + vx_L = vx_R = cell_i.velocity_x; } - if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) { - vy_L = vy_R = vy_i; + if ((vy_R - cell_i.velocity_y) * (cell_i.velocity_y - vy_L) <= 0) { + vy_L = vy_R = cell_i.velocity_y; } - if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) { - vz_L = vz_R = vz_i; + if ((vz_R - cell_i.velocity_z) * (cell_i.velocity_z - vz_L) <= 0) { + vz_L = vz_R = cell_i.velocity_z; } - if ((p_R - p_i) * (p_i - p_L) <= 0) { - p_L = p_R = p_i; + if ((p_R - cell_i.pressure) * (cell_i.pressure - p_L) <= 0) { + p_L = p_R = cell_i.pressure; } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { - d_L = 3.0 * d_i - 2.0 * d_R; + if (6.0 * (d_R - d_L) * (cell_i.density - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { + d_L = 3.0 * cell_i.density - 2.0 * d_R; } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { - vx_L = 3.0 * vx_i - 2.0 * vx_R; + if (6.0 * (vx_R - vx_L) * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { + vx_L = 3.0 * cell_i.velocity_x - 2.0 * vx_R; } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { - vy_L = 3.0 * vy_i - 2.0 * vy_R; + if (6.0 * (vy_R - vy_L) * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { + vy_L = 3.0 * cell_i.velocity_y - 2.0 * vy_R; } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { - vz_L = 3.0 * vz_i - 2.0 * vz_R; + if (6.0 * (vz_R - vz_L) * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { + vz_L = 3.0 * cell_i.velocity_z - 2.0 * vz_R; } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { - p_L = 3.0 * p_i - 2.0 * p_R; + if (6.0 * (p_R - p_L) * (cell_i.pressure - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { + p_L = 3.0 * cell_i.pressure - 2.0 * p_R; } - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { - d_R = 3.0 * d_i - 2.0 * d_L; + if (6.0 * (d_R - d_L) * (cell_i.density - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { + d_R = 3.0 * cell_i.density - 2.0 * d_L; } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { - vx_R = 3.0 * vx_i - 2.0 * vx_L; + if (6.0 * (vx_R - vx_L) * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { + vx_R = 3.0 * cell_i.velocity_x - 2.0 * vx_L; } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { - vy_R = 3.0 * vy_i - 2.0 * vy_L; + if (6.0 * (vy_R - vy_L) * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { + vy_R = 3.0 * cell_i.velocity_y - 2.0 * vy_L; } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { - vz_R = 3.0 * vz_i - 2.0 * vz_L; + if (6.0 * (vz_R - vz_L) * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { + vz_R = 3.0 * cell_i.velocity_z - 2.0 * vz_L; } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { - p_R = 3.0 * p_i - 2.0 * p_L; + if (6.0 * (p_R - p_L) * (cell_i.pressure - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { + p_R = 3.0 * cell_i.pressure - 2.0 * p_L; } - d_L = fmax(fmin(d_i, d_imo), d_L); - d_L = fmin(fmax(d_i, d_imo), d_L); - d_R = fmax(fmin(d_i, d_ipo), d_R); - d_R = fmin(fmax(d_i, d_ipo), d_R); - vx_L = fmax(fmin(vx_i, vx_imo), vx_L); - vx_L = fmin(fmax(vx_i, vx_imo), vx_L); - vx_R = fmax(fmin(vx_i, vx_ipo), vx_R); - vx_R = fmin(fmax(vx_i, vx_ipo), vx_R); - vy_L = fmax(fmin(vy_i, vy_imo), vy_L); - vy_L = fmin(fmax(vy_i, vy_imo), vy_L); - vy_R = fmax(fmin(vy_i, vy_ipo), vy_R); - vy_R = fmin(fmax(vy_i, vy_ipo), vy_R); - vz_L = fmax(fmin(vz_i, vz_imo), vz_L); - vz_L = fmin(fmax(vz_i, vz_imo), vz_L); - vz_R = fmax(fmin(vz_i, vz_ipo), vz_R); - vz_R = fmin(fmax(vz_i, vz_ipo), vz_R); - p_L = fmax(fmin(p_i, p_imo), p_L); - p_L = fmin(fmax(p_i, p_imo), p_L); - p_R = fmax(fmin(p_i, p_ipo), p_R); - p_R = fmin(fmax(p_i, p_ipo), p_R); + d_L = fmax(fmin(cell_i.density, cell_im1.density), d_L); + d_L = fmin(fmax(cell_i.density, cell_im1.density), d_L); + d_R = fmax(fmin(cell_i.density, cell_ip1.density), d_R); + d_R = fmin(fmax(cell_i.density, cell_ip1.density), d_R); + vx_L = fmax(fmin(cell_i.velocity_x, cell_im1.velocity_x), vx_L); + vx_L = fmin(fmax(cell_i.velocity_x, cell_im1.velocity_x), vx_L); + vx_R = fmax(fmin(cell_i.velocity_x, cell_ip1.velocity_x), vx_R); + vx_R = fmin(fmax(cell_i.velocity_x, cell_ip1.velocity_x), vx_R); + vy_L = fmax(fmin(cell_i.velocity_y, cell_im1.velocity_y), vy_L); + vy_L = fmin(fmax(cell_i.velocity_y, cell_im1.velocity_y), vy_L); + vy_R = fmax(fmin(cell_i.velocity_y, cell_ip1.velocity_y), vy_R); + vy_R = fmin(fmax(cell_i.velocity_y, cell_ip1.velocity_y), vy_R); + vz_L = fmax(fmin(cell_i.velocity_z, cell_im1.velocity_z), vz_L); + vz_L = fmin(fmax(cell_i.velocity_z, cell_im1.velocity_z), vz_L); + vz_R = fmax(fmin(cell_i.velocity_z, cell_ip1.velocity_z), vz_R); + vz_R = fmin(fmax(cell_i.velocity_z, cell_ip1.velocity_z), vz_R); + p_L = fmax(fmin(cell_i.pressure, cell_im1.pressure), p_L); + p_L = fmin(fmax(cell_i.pressure, cell_im1.pressure), p_L); + p_R = fmax(fmin(cell_i.pressure, cell_ip1.pressure), p_R); + p_R = fmin(fmax(cell_i.pressure, cell_ip1.pressure), p_R); #ifdef DE - if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { - ge_L = ge_R = ge_i; + if ((ge_R - cell_i.gas_energy) * (cell_i.gas_energy - ge_L) <= 0) { + ge_L = ge_R = cell_i.gas_energy; } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { - ge_L = 3.0 * ge_i - 2.0 * ge_R; + if (6.0 * (ge_R - ge_L) * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { + ge_L = 3.0 * cell_i.gas_energy - 2.0 * ge_R; } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { - ge_R = 3.0 * ge_i - 2.0 * ge_L; + if (6.0 * (ge_R - ge_L) * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { + ge_R = 3.0 * cell_i.gas_energy - 2.0 * ge_L; } - ge_L = fmax(fmin(ge_i, ge_imo), ge_L); - ge_L = fmin(fmax(ge_i, ge_imo), ge_L); - ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); - ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); + ge_L = fmax(fmin(cell_i.gas_energy, cell_im1.gas_energy), ge_L); + ge_L = fmin(fmax(cell_i.gas_energy, cell_im1.gas_energy), ge_L); + ge_R = fmax(fmin(cell_i.gas_energy, cell_ip1.gas_energy), ge_R); + ge_R = fmin(fmax(cell_i.gas_energy, cell_ip1.gas_energy), ge_R); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { - scalar_L[i] = scalar_R[i] = scalar_i[i]; + if ((scalar_R[i] - cell_i.scalar[i]) * (cell_i.scalar[i] - scalar_L[i]) <= 0) { + scalar_L[i] = scalar_R[i] = cell_i.scalar[i]; } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > + if (6.0 * (scalar_R[i] - scalar_L[i]) * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; + scalar_L[i] = 3.0 * cell_i.scalar[i] - 2.0 * scalar_R[i]; } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < + if (6.0 * (scalar_R[i] - scalar_L[i]) * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; + scalar_R[i] = 3.0 * cell_i.scalar[i] - 2.0 * scalar_L[i]; } - scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); - scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); + scalar_L[i] = fmax(fmin(cell_i.scalar[i], cell_im1.scalar[i]), scalar_L[i]); + scalar_L[i] = fmin(fmax(cell_i.scalar[i], cell_im1.scalar[i]), scalar_L[i]); + scalar_R[i] = fmax(fmin(cell_i.scalar[i], cell_ip1.scalar[i]), scalar_R[i]); + scalar_R[i] = fmin(fmax(cell_i.scalar[i], cell_ip1.scalar[i]), scalar_R[i]); } #endif // SCALAR @@ -947,21 +968,21 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_vz_m_i = vz_R - vz_L; del_p_m_i = p_R - p_L; - d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); - vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); - vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); - vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); - p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); + d_6 = 6.0 * (cell_i.density - 0.5 * (d_L + d_R)); + vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)); + vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)); + vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)); + p_6 = 6.0 * (cell_i.pressure - 0.5 * (p_L + p_R)); #ifdef DE del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); + ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; - scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); + scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])); } #endif // SCALAR @@ -969,11 +990,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables using the cell-centered primitive variables // recalculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); + a = sqrt(gamma * cell_i.pressure / cell_i.density); - lambda_m = vx_i - a; - lambda_0 = vx_i; - lambda_p = vx_i + a; + lambda_m = cell_i.velocity_x - a; + lambda_0 = cell_i.velocity_x; + lambda_p = cell_i.velocity_x + a; // Step 9 - Compute the left and right interface values using monotonized // parabolic interpolation @@ -1040,9 +1061,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; chi_5 = A * (del_p_m_i - p_6) + B * p_6; - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); + sum_1 += -0.5 * (cell_i.density * chi_2 / a - chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 - chi_5 / (a * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * a - chi_5); } if (lambda_0 >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_0); @@ -1084,9 +1105,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; chi_5 = A * (del_p_m_i - p_6) + B * p_6; - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + sum_1 += 0.5 * (cell_i.density * chi_2 / a + chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 + chi_5 / (a * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * a + chi_5); } // add the corrections to the initial guesses for the interface values @@ -1128,9 +1149,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; chi_5 = C * (del_p_m_i + p_6) + D * p_6; - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); + sum_1 += -0.5 * (cell_i.density * chi_2 / a - chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 - chi_5 / (a * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * a - chi_5); } if (lambda_0 <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_0); @@ -1172,9 +1193,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; chi_5 = C * (del_p_m_i + p_6) + D * p_6; - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); + sum_1 += 0.5 * (cell_i.density * chi_2 / a + chi_5 / (a * a)); + sum_2 += 0.5 * (chi_2 + chi_5 / (a * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * a + chi_5); } // add the corrections From 35a5d7d6ab5902cf78f81f6e773f3a73b058bd21 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 10:08:40 -0400 Subject: [PATCH 410/694] PPMC: Replace `a` with `sound_speed` and function --- src/reconstruction/ppmc_cuda.cu | 136 ++++++++++++++++---------------- 1 file changed, 66 insertions(+), 70 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 999fab99c..0965fe6c8 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -54,12 +54,10 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - // declare primitive variables for each stencil - // these will be placed into registers for each thread + // declare primitive variables for each stencil these will be placed into registers for each thread reconstruction::Primitive cell_i, cell_im1, cell_im2, cell_ip1, cell_ip2; // declare other variables to be used - Real a; Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; @@ -76,7 +74,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_L, vx_L, vy_L, vz_L, p_L; Real d_R, vx_R, vy_R, vz_R, p_R; -// #ifdef CTU #ifndef VL Real dtodx = dt / dx; Real d_6, vx_6, vy_6, vz_6, p_6; @@ -92,7 +89,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; Real ge_L, ge_R; Real E_kin, E, dge; - // #ifdef CTU + #ifndef VL Real chi_ge, sum_ge, ge_6; #endif // VL @@ -101,7 +98,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - // #ifdef CTU + #ifndef VL Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; #endif // VL @@ -305,7 +302,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell imo - a = sqrt(gamma * cell_im1.pressure / cell_im1.density); + Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); // left del_d_L = cell_im1.density - cell_im2.density; @@ -384,29 +381,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_im1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_0_L = -0.5 * cell_im1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); + del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_im1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_im1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_0_R = -0.5 * cell_im1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_0_R = -0.5 * cell_im1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); + del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_im1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_im1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_0_C = -0.5 * cell_im1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_0_C = -0.5 * cell_im1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); + del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_im1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_im1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_0_G = -0.5 * cell_im1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_0_G = -0.5 * cell_im1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_im1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_im1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -466,10 +463,10 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a * del_a_0_m / cell_im1.density + a * del_a_4_m / cell_im1.density; + del_vx_m_imo = -sound_speed * del_a_0_m / cell_im1.density + sound_speed * del_a_4_m / cell_im1.density; del_vy_m_imo = del_a_2_m; del_vz_m_imo = del_a_3_m; - del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; + del_p_m_imo = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -477,7 +474,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell i - a = sqrt(gamma * cell_i.pressure / cell_i.density); + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); // left del_d_L = cell_i.density - cell_im1.density; @@ -557,29 +554,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_i.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_0_L = -0.5 * cell_i.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); + del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_i.density * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_i.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_0_R = -0.5 * cell_i.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_0_R = -0.5 * cell_i.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); + del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_i.density * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_i.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_0_C = -0.5 * cell_i.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_0_C = -0.5 * cell_i.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); + del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_i.density * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_i.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_0_G = -0.5 * cell_i.density * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_0_G = -0.5 * cell_i.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_i.density * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_i.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -639,10 +636,10 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a * del_a_0_m / cell_i.density + a * del_a_4_m / cell_i.density; + del_vx_m_i = -sound_speed * del_a_0_m / cell_i.density + sound_speed * del_a_4_m / cell_i.density; del_vy_m_i = del_a_2_m; del_vz_m_i = del_a_3_m; - del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; + del_p_m_i = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -650,7 +647,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // center Stone Eqn 36 // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma * cell_ip1.pressure / cell_ip1.density); + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma); // left del_d_L = cell_ip1.density - cell_i.density; @@ -730,29 +727,29 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_ip1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); + del_a_0_L = -0.5 * cell_ip1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); + del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); del_a_2_L = del_vy_L; del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_ip1.density * del_vx_L / a + 0.5 * del_p_L / (a * a); + del_a_4_L = 0.5 * cell_ip1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_0_R = -0.5 * cell_ip1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); + del_a_0_R = -0.5 * cell_ip1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); + del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); del_a_2_R = del_vy_R; del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_ip1.density * del_vx_R / a + 0.5 * del_p_R / (a * a); + del_a_4_R = 0.5 * cell_ip1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_0_C = -0.5 * cell_ip1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); + del_a_0_C = -0.5 * cell_ip1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); + del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); del_a_2_C = del_vy_C; del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_ip1.density * del_vx_C / a + 0.5 * del_p_C / (a * a); + del_a_4_C = 0.5 * cell_ip1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_0_G = -0.5 * cell_ip1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); + del_a_0_G = -0.5 * cell_ip1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); del_a_2_G = del_vy_G; del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_ip1.density * del_vx_G / a + 0.5 * del_p_G / (a * a); + del_a_4_G = 0.5 * cell_ip1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -812,10 +809,10 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 39 del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a * del_a_0_m / cell_ip1.density + a * del_a_4_m / cell_ip1.density; + del_vx_m_ipo = -sound_speed * del_a_0_m / cell_ip1.density + sound_speed * del_a_4_m / cell_ip1.density; del_vy_m_ipo = del_a_2_m; del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; + del_p_m_ipo = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; // Step 6 - Use parabolic interpolation to compute values at the left and // right of each cell center @@ -955,7 +952,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR -// #ifdef CTU #ifndef VL // Step 8 - Compute the coefficients for the monotonized parabolic @@ -990,11 +986,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables using the cell-centered primitive variables // recalculate the adiabatic sound speed in cell i - a = sqrt(gamma * cell_i.pressure / cell_i.density); + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - lambda_m = cell_i.velocity_x - a; + lambda_m = cell_i.velocity_x - sound_speed; lambda_0 = cell_i.velocity_x; - lambda_p = cell_i.velocity_x + a; + lambda_p = cell_i.velocity_x + sound_speed; // Step 9 - Compute the left and right interface values using monotonized // parabolic interpolation @@ -1061,9 +1057,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; chi_5 = A * (del_p_m_i - p_6) + B * p_6; - sum_1 += -0.5 * (cell_i.density * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * cell_i.density)); - sum_5 += -0.5 * (cell_i.density * chi_2 * a - chi_5); + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); } if (lambda_0 >= 0) { A = (0.5 * dtodx) * (lambda_p - lambda_0); @@ -1083,7 +1079,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - sum_1 += chi_1 - chi_5 / (a * a); + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); sum_3 += chi_3; sum_4 += chi_4; #ifdef DE @@ -1105,9 +1101,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; chi_5 = A * (del_p_m_i - p_6) + B * p_6; - sum_1 += 0.5 * (cell_i.density * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * cell_i.density)); - sum_5 += 0.5 * (cell_i.density * chi_2 * a + chi_5); + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); } // add the corrections to the initial guesses for the interface values @@ -1149,9 +1145,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; chi_5 = C * (del_p_m_i + p_6) + D * p_6; - sum_1 += -0.5 * (cell_i.density * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * cell_i.density)); - sum_5 += -0.5 * (cell_i.density * chi_2 * a - chi_5); + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); } if (lambda_0 <= 0) { C = (0.5 * dtodx) * (lambda_m - lambda_0); @@ -1171,7 +1167,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - sum_1 += chi_1 - chi_5 / (a * a); + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); sum_3 += chi_3; sum_4 += chi_4; #ifdef DE @@ -1193,9 +1189,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; chi_5 = C * (del_p_m_i + p_6) + D * p_6; - sum_1 += 0.5 * (cell_i.density * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * cell_i.density)); - sum_5 += 0.5 * (cell_i.density * chi_2 * a + chi_5); + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); } // add the corrections @@ -1213,7 +1209,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR -#endif // VL, i.e. CTU was used for this section +#endif // not VL, i.e. CTU or SIMPLE was used for this section // enforce minimum values d_L = fmax(d_L, (Real)TINY_NUMBER); From b6e8ccac9d57baa1d55ce4c47a8eec54925f7ea3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 10:18:20 -0400 Subject: [PATCH 411/694] PPMC: Replace primitive slope vars with structs --- src/reconstruction/ppmc_cuda.cu | 469 +++++++++++++++++--------------- 1 file changed, 244 insertions(+), 225 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 0965fe6c8..752e68302 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -58,10 +58,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Primitive cell_i, cell_im1, cell_im2, cell_ip1, cell_ip2; // declare other variables to be used - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; + reconstruction::Primitive del_L, del_R, del_C, del_G; // Slopes Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; @@ -85,7 +82,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // VL #ifdef DE - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; Real ge_L, ge_R; Real E_kin, E, dge; @@ -95,7 +91,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // VL #endif // DE #ifdef SCALAR - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; @@ -305,72 +300,72 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); // left - del_d_L = cell_im1.density - cell_im2.density; - del_vx_L = cell_im1.velocity_x - cell_im2.velocity_x; - del_vy_L = cell_im1.velocity_y - cell_im2.velocity_y; - del_vz_L = cell_im1.velocity_z - cell_im2.velocity_z; - del_p_L = cell_im1.pressure - cell_im2.pressure; + del_L.density = cell_im1.density - cell_im2.density; + del_L.velocity_x = cell_im1.velocity_x - cell_im2.velocity_x; + del_L.velocity_y = cell_im1.velocity_y - cell_im2.velocity_y; + del_L.velocity_z = cell_im1.velocity_z - cell_im2.velocity_z; + del_L.pressure = cell_im1.pressure - cell_im2.pressure; // right - del_d_R = cell_i.density - cell_im1.density; - del_vx_R = cell_i.velocity_x - cell_im1.velocity_x; - del_vy_R = cell_i.velocity_y - cell_im1.velocity_y; - del_vz_R = cell_i.velocity_z - cell_im1.velocity_z; - del_p_R = cell_i.pressure - cell_im1.pressure; + del_R.density = cell_i.density - cell_im1.density; + del_R.velocity_x = cell_i.velocity_x - cell_im1.velocity_x; + del_R.velocity_y = cell_i.velocity_y - cell_im1.velocity_y; + del_R.velocity_z = cell_i.velocity_z - cell_im1.velocity_z; + del_R.pressure = cell_i.pressure - cell_im1.pressure; // centered - del_d_C = 0.5 * (cell_i.density - cell_im2.density); - del_vx_C = 0.5 * (cell_i.velocity_x - cell_im2.velocity_x); - del_vy_C = 0.5 * (cell_i.velocity_y - cell_im2.velocity_y); - del_vz_C = 0.5 * (cell_i.velocity_z - cell_im2.velocity_z); - del_p_C = 0.5 * (cell_i.pressure - cell_im2.pressure); + del_C.density = 0.5 * (cell_i.density - cell_im2.density); + del_C.velocity_x = 0.5 * (cell_i.velocity_x - cell_im2.velocity_x); + del_C.velocity_y = 0.5 * (cell_i.velocity_y - cell_im2.velocity_y); + del_C.velocity_z = 0.5 * (cell_i.velocity_z - cell_im2.velocity_z); + del_C.pressure = 0.5 * (cell_i.pressure - cell_im2.pressure); // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + if (del_L.density * del_R.density > 0.0) { + del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); } else { - del_d_G = 0.0; + del_G.density = 0.0; } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + if (del_L.velocity_x * del_R.velocity_x > 0.0) { + del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); } else { - del_vx_G = 0.0; + del_G.velocity_x = 0.0; } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + if (del_L.velocity_y * del_R.velocity_y > 0.0) { + del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); } else { - del_vy_G = 0.0; + del_G.velocity_y = 0.0; } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + if (del_L.velocity_z * del_R.velocity_z > 0.0) { + del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); } else { - del_vz_G = 0.0; + del_G.velocity_z = 0.0; } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + if (del_L.pressure * del_R.pressure > 0.0) { + del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); } else { - del_p_G = 0.0; + del_G.pressure = 0.0; } #ifdef DE - del_ge_L = cell_im1.gas_energy - cell_im2.gas_energy; - del_ge_R = cell_i.gas_energy - cell_im1.gas_energy; - del_ge_C = 0.5 * (cell_i.gas_energy - cell_im2.gas_energy); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + del_L.gas_energy = cell_im1.gas_energy - cell_im2.gas_energy; + del_R.gas_energy = cell_i.gas_energy - cell_im1.gas_energy; + del_C.gas_energy = 0.5 * (cell_i.gas_energy - cell_im2.gas_energy); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); } else { - del_ge_G = 0.0; + del_G.gas_energy = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = cell_im1.scalar[i] - cell_im2.scalar[i]; - del_scalar_R[i] = cell_i.scalar[i] - cell_im1.scalar[i]; - del_scalar_C[i] = 0.5 * (cell_i.scalar[i] - cell_im2.scalar[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + del_L.scalar[i] = cell_im1.scalar[i] - cell_im2.scalar[i]; + del_R.scalar[i] = cell_i.scalar[i] - cell_im1.scalar[i]; + del_C.scalar[i] = 0.5 * (cell_i.scalar[i] - cell_im2.scalar[i]); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); } else { - del_scalar_G[i] = 0.0; + del_G.scalar[i] = 0.0; } } #endif // SCALAR @@ -381,29 +376,37 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_im1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_im1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - - del_a_0_R = -0.5 * cell_im1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_im1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - - del_a_0_C = -0.5 * cell_im1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_im1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - - del_a_0_G = -0.5 * cell_im1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); - del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_im1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_0_L = + -0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_2_L = del_L.velocity_y; + del_a_3_L = del_L.velocity_z; + del_a_4_L = + 0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + + del_a_0_R = + -0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_2_R = del_R.velocity_y; + del_a_3_R = del_R.velocity_z; + del_a_4_R = + 0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + + del_a_0_C = + -0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_2_C = del_C.velocity_y; + del_a_3_C = del_C.velocity_z; + del_a_4_C = + 0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + + del_a_0_G = + -0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_2_G = del_G.velocity_y; + del_a_3_G = del_G.velocity_z; + del_a_4_G = + 0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -437,20 +440,20 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_ge_m_imo = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_ge_m_imo = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); + lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); + del_scalar_m_imo[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_scalar_m_imo[i] = 0.0; } @@ -477,73 +480,73 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); // left - del_d_L = cell_i.density - cell_im1.density; - del_vx_L = cell_i.velocity_x - cell_im1.velocity_x; - del_vy_L = cell_i.velocity_y - cell_im1.velocity_y; - del_vz_L = cell_i.velocity_z - cell_im1.velocity_z; - del_p_L = cell_i.pressure - cell_im1.pressure; + del_L.density = cell_i.density - cell_im1.density; + del_L.velocity_x = cell_i.velocity_x - cell_im1.velocity_x; + del_L.velocity_y = cell_i.velocity_y - cell_im1.velocity_y; + del_L.velocity_z = cell_i.velocity_z - cell_im1.velocity_z; + del_L.pressure = cell_i.pressure - cell_im1.pressure; // right - del_d_R = cell_ip1.density - cell_i.density; - del_vx_R = cell_ip1.velocity_x - cell_i.velocity_x; - del_vy_R = cell_ip1.velocity_y - cell_i.velocity_y; - del_vz_R = cell_ip1.velocity_z - cell_i.velocity_z; - del_p_R = cell_ip1.pressure - cell_i.pressure; + del_R.density = cell_ip1.density - cell_i.density; + del_R.velocity_x = cell_ip1.velocity_x - cell_i.velocity_x; + del_R.velocity_y = cell_ip1.velocity_y - cell_i.velocity_y; + del_R.velocity_z = cell_ip1.velocity_z - cell_i.velocity_z; + del_R.pressure = cell_ip1.pressure - cell_i.pressure; // centered - del_d_C = 0.5 * (cell_ip1.density - cell_im1.density); - del_vx_C = 0.5 * (cell_ip1.velocity_x - cell_im1.velocity_x); - del_vy_C = 0.5 * (cell_ip1.velocity_y - cell_im1.velocity_y); - del_vz_C = 0.5 * (cell_ip1.velocity_z - cell_im1.velocity_z); - del_p_C = 0.5 * (cell_ip1.pressure - cell_im1.pressure); + del_C.density = 0.5 * (cell_ip1.density - cell_im1.density); + del_C.velocity_x = 0.5 * (cell_ip1.velocity_x - cell_im1.velocity_x); + del_C.velocity_y = 0.5 * (cell_ip1.velocity_y - cell_im1.velocity_y); + del_C.velocity_z = 0.5 * (cell_ip1.velocity_z - cell_im1.velocity_z); + del_C.pressure = 0.5 * (cell_ip1.pressure - cell_im1.pressure); // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + if (del_L.density * del_R.density > 0.0) { + del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); } else { - del_d_G = 0.0; + del_G.density = 0.0; } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + if (del_L.velocity_x * del_R.velocity_x > 0.0) { + del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); } else { - del_vx_G = 0.0; + del_G.velocity_x = 0.0; } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + if (del_L.velocity_y * del_R.velocity_y > 0.0) { + del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); } else { - del_vy_G = 0.0; + del_G.velocity_y = 0.0; } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + if (del_L.velocity_z * del_R.velocity_z > 0.0) { + del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); } else { - del_vz_G = 0.0; + del_G.velocity_z = 0.0; } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + if (del_L.pressure * del_R.pressure > 0.0) { + del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); } else { - del_p_G = 0.0; + del_G.pressure = 0.0; } #ifdef DE - del_ge_L = cell_i.gas_energy - cell_im1.gas_energy; - del_ge_R = cell_ip1.gas_energy - cell_i.gas_energy; - del_ge_C = 0.5 * (cell_ip1.gas_energy - cell_im1.gas_energy); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + del_L.gas_energy = cell_i.gas_energy - cell_im1.gas_energy; + del_R.gas_energy = cell_ip1.gas_energy - cell_i.gas_energy; + del_C.gas_energy = 0.5 * (cell_ip1.gas_energy - cell_im1.gas_energy); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); } else { - del_ge_G = 0.0; + del_G.gas_energy = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = cell_i.scalar[i] - cell_im1.scalar[i]; - del_scalar_R[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; - del_scalar_C[i] = 0.5 * (cell_ip1.scalar[i] - cell_im1.scalar[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + del_L.scalar[i] = cell_i.scalar[i] - cell_im1.scalar[i]; + del_R.scalar[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; + del_C.scalar[i] = 0.5 * (cell_ip1.scalar[i] - cell_im1.scalar[i]); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); } else { - del_scalar_G[i] = 0.0; + del_G.scalar[i] = 0.0; } } #endif // SCALAR @@ -554,29 +557,37 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_i.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_i.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - - del_a_0_R = -0.5 * cell_i.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_i.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - - del_a_0_C = -0.5 * cell_i.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_i.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - - del_a_0_G = -0.5 * cell_i.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); - del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_i.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_0_L = + -0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_2_L = del_L.velocity_y; + del_a_3_L = del_L.velocity_z; + del_a_4_L = + 0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + + del_a_0_R = + -0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_2_R = del_R.velocity_y; + del_a_3_R = del_R.velocity_z; + del_a_4_R = + 0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + + del_a_0_C = + -0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_2_C = del_C.velocity_y; + del_a_3_C = del_C.velocity_z; + del_a_4_C = + 0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + + del_a_0_G = + -0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_2_G = del_G.velocity_y; + del_a_3_G = del_G.velocity_z; + del_a_4_G = + 0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -610,20 +621,20 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_ge_m_i = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_ge_m_i = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); + lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); + del_scalar_m_i[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_scalar_m_i[i] = 0.0; } @@ -650,73 +661,73 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma); // left - del_d_L = cell_ip1.density - cell_i.density; - del_vx_L = cell_ip1.velocity_x - cell_i.velocity_x; - del_vy_L = cell_ip1.velocity_y - cell_i.velocity_y; - del_vz_L = cell_ip1.velocity_z - cell_i.velocity_z; - del_p_L = cell_ip1.pressure - cell_i.pressure; + del_L.density = cell_ip1.density - cell_i.density; + del_L.velocity_x = cell_ip1.velocity_x - cell_i.velocity_x; + del_L.velocity_y = cell_ip1.velocity_y - cell_i.velocity_y; + del_L.velocity_z = cell_ip1.velocity_z - cell_i.velocity_z; + del_L.pressure = cell_ip1.pressure - cell_i.pressure; // right - del_d_R = cell_ip2.density - cell_ip1.density; - del_vx_R = cell_ip2.velocity_x - cell_ip1.velocity_x; - del_vy_R = cell_ip2.velocity_y - cell_ip1.velocity_y; - del_vz_R = cell_ip2.velocity_z - cell_ip1.velocity_z; - del_p_R = cell_ip2.pressure - cell_ip1.pressure; + del_R.density = cell_ip2.density - cell_ip1.density; + del_R.velocity_x = cell_ip2.velocity_x - cell_ip1.velocity_x; + del_R.velocity_y = cell_ip2.velocity_y - cell_ip1.velocity_y; + del_R.velocity_z = cell_ip2.velocity_z - cell_ip1.velocity_z; + del_R.pressure = cell_ip2.pressure - cell_ip1.pressure; // centered - del_d_C = 0.5 * (cell_ip2.density - cell_i.density); - del_vx_C = 0.5 * (cell_ip2.velocity_x - cell_i.velocity_x); - del_vy_C = 0.5 * (cell_ip2.velocity_y - cell_i.velocity_y); - del_vz_C = 0.5 * (cell_ip2.velocity_z - cell_i.velocity_z); - del_p_C = 0.5 * (cell_ip2.pressure - cell_i.pressure); + del_C.density = 0.5 * (cell_ip2.density - cell_i.density); + del_C.velocity_x = 0.5 * (cell_ip2.velocity_x - cell_i.velocity_x); + del_C.velocity_y = 0.5 * (cell_ip2.velocity_y - cell_i.velocity_y); + del_C.velocity_z = 0.5 * (cell_ip2.velocity_z - cell_i.velocity_z); + del_C.pressure = 0.5 * (cell_ip2.pressure - cell_i.pressure); // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); + if (del_L.density * del_R.density > 0.0) { + del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); } else { - del_d_G = 0.0; + del_G.density = 0.0; } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); + if (del_L.velocity_x * del_R.velocity_x > 0.0) { + del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); } else { - del_vx_G = 0.0; + del_G.velocity_x = 0.0; } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); + if (del_L.velocity_y * del_R.velocity_y > 0.0) { + del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); } else { - del_vy_G = 0.0; + del_G.velocity_y = 0.0; } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); + if (del_L.velocity_z * del_R.velocity_z > 0.0) { + del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); } else { - del_vz_G = 0.0; + del_G.velocity_z = 0.0; } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); + if (del_L.pressure * del_R.pressure > 0.0) { + del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); } else { - del_p_G = 0.0; + del_G.pressure = 0.0; } #ifdef DE - del_ge_L = cell_ip1.gas_energy - cell_i.gas_energy; - del_ge_R = cell_ip2.gas_energy - cell_ip1.gas_energy; - del_ge_C = 0.5 * (cell_ip2.gas_energy - cell_i.gas_energy); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); + del_L.gas_energy = cell_ip1.gas_energy - cell_i.gas_energy; + del_R.gas_energy = cell_ip2.gas_energy - cell_ip1.gas_energy; + del_C.gas_energy = 0.5 * (cell_ip2.gas_energy - cell_i.gas_energy); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); } else { - del_ge_G = 0.0; + del_G.gas_energy = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; - del_scalar_R[i] = cell_ip2.scalar[i] - cell_ip1.scalar[i]; - del_scalar_C[i] = 0.5 * (cell_ip2.scalar[i] - cell_i.scalar[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); + del_L.scalar[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; + del_R.scalar[i] = cell_ip2.scalar[i] - cell_ip1.scalar[i]; + del_C.scalar[i] = 0.5 * (cell_ip2.scalar[i] - cell_i.scalar[i]); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); } else { - del_scalar_G[i] = 0.0; + del_G.scalar[i] = 0.0; } } #endif // SCALAR @@ -727,29 +738,37 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = -0.5 * cell_ip1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - del_a_1_L = del_d_L - del_p_L / (sound_speed * sound_speed); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * cell_ip1.density * del_vx_L / sound_speed + 0.5 * del_p_L / (sound_speed * sound_speed); - - del_a_0_R = -0.5 * cell_ip1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - del_a_1_R = del_d_R - del_p_R / (sound_speed * sound_speed); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * cell_ip1.density * del_vx_R / sound_speed + 0.5 * del_p_R / (sound_speed * sound_speed); - - del_a_0_C = -0.5 * cell_ip1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - del_a_1_C = del_d_C - del_p_C / (sound_speed * sound_speed); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * cell_ip1.density * del_vx_C / sound_speed + 0.5 * del_p_C / (sound_speed * sound_speed); - - del_a_0_G = -0.5 * cell_ip1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); - del_a_1_G = del_d_G - del_p_G / (sound_speed * sound_speed); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * cell_ip1.density * del_vx_G / sound_speed + 0.5 * del_p_G / (sound_speed * sound_speed); + del_a_0_L = + -0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_2_L = del_L.velocity_y; + del_a_3_L = del_L.velocity_z; + del_a_4_L = + 0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); + + del_a_0_R = + -0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_2_R = del_R.velocity_y; + del_a_3_R = del_R.velocity_z; + del_a_4_R = + 0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); + + del_a_0_C = + -0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_2_C = del_C.velocity_y; + del_a_3_C = del_C.velocity_z; + del_a_4_C = + 0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); + + del_a_0_G = + -0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_2_G = del_G.velocity_y; + del_a_3_G = del_G.velocity_z; + del_a_4_G = + 0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -783,20 +802,20 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.gas_energy * del_R.gas_energy > 0.0) { + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_ge_m_ipo = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_ge_m_ipo = 0.0; } #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { + lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); + lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); + del_scalar_m_ipo[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { del_scalar_m_ipo[i] = 0.0; } From d4589b9b93b0bdb207414789e8f86b26eefd4ac3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 10:36:08 -0400 Subject: [PATCH 412/694] PPMC: Replace slopes & interfaces with structs --- src/reconstruction/ppmc_cuda.cu | 695 +++++++++++++++++--------------- 1 file changed, 378 insertions(+), 317 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 752e68302..cb276e69c 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -58,18 +58,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Primitive cell_i, cell_im1, cell_im2, cell_ip1, cell_ip2; // declare other variables to be used - reconstruction::Primitive del_L, del_R, del_C, del_G; // Slopes - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; + reconstruction::Primitive del_L, del_R, del_C, del_G; // primitive slopes + reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes Real lim_slope_a, lim_slope_b; Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo; Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo; - Real d_L, vx_L, vy_L, vz_L, p_L; - Real d_R, vx_R, vy_R, vz_R, p_R; + reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states #ifndef VL Real dtodx = dt / dx; @@ -83,7 +78,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; - Real ge_L, ge_R; Real E_kin, E, dge; #ifndef VL @@ -92,7 +86,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // DE #ifdef SCALAR Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS]; #ifndef VL Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; @@ -376,68 +369,68 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = + del_a_L.a0 = -0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_2_L = del_L.velocity_y; - del_a_3_L = del_L.velocity_z; - del_a_4_L = + del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_L.a2 = del_L.velocity_y; + del_a_L.a3 = del_L.velocity_z; + del_a_L.a4 = 0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_0_R = + del_a_R.a0 = -0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_2_R = del_R.velocity_y; - del_a_3_R = del_R.velocity_z; - del_a_4_R = + del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_R.a2 = del_R.velocity_y; + del_a_R.a3 = del_R.velocity_z; + del_a_R.a4 = 0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_0_C = + del_a_C.a0 = -0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_2_C = del_C.velocity_y; - del_a_3_C = del_C.velocity_z; - del_a_4_C = + del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_C.a2 = del_C.velocity_y; + del_a_C.a3 = del_C.velocity_z; + del_a_C.a4 = 0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_0_G = + del_a_G.a0 = -0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_2_G = del_G.velocity_y; - del_a_3_G = del_G.velocity_z; - del_a_4_G = + del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_G.a2 = del_G.velocity_y; + del_a_G.a3 = del_G.velocity_z; + del_a_G.a4 = 0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables // Stone Eqn 38 - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a0 * del_a_R.a0 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); + lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); + del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a1 * del_a_R.a1 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); + lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); + del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a2 * del_a_R.a2 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); + lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); + del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a3 * del_a_R.a3 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); + lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); + del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a4 * del_a_R.a4 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); + lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); + del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { @@ -465,11 +458,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -sound_speed * del_a_0_m / cell_im1.density + sound_speed * del_a_4_m / cell_im1.density; - del_vy_m_imo = del_a_2_m; - del_vz_m_imo = del_a_3_m; - del_p_m_imo = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; + del_d_m_imo = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_vx_m_imo = -sound_speed * del_a_m.a0 / cell_im1.density + sound_speed * del_a_m.a4 / cell_im1.density; + del_vy_m_imo = del_a_m.a2; + del_vz_m_imo = del_a_m.a3; + del_p_m_imo = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -557,68 +550,68 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = + del_a_L.a0 = -0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_2_L = del_L.velocity_y; - del_a_3_L = del_L.velocity_z; - del_a_4_L = + del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_L.a2 = del_L.velocity_y; + del_a_L.a3 = del_L.velocity_z; + del_a_L.a4 = 0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_0_R = + del_a_R.a0 = -0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_2_R = del_R.velocity_y; - del_a_3_R = del_R.velocity_z; - del_a_4_R = + del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_R.a2 = del_R.velocity_y; + del_a_R.a3 = del_R.velocity_z; + del_a_R.a4 = 0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_0_C = + del_a_C.a0 = -0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_2_C = del_C.velocity_y; - del_a_3_C = del_C.velocity_z; - del_a_4_C = + del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_C.a2 = del_C.velocity_y; + del_a_C.a3 = del_C.velocity_z; + del_a_C.a4 = 0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_0_G = + del_a_G.a0 = -0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_2_G = del_G.velocity_y; - del_a_3_G = del_G.velocity_z; - del_a_4_G = + del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_G.a2 = del_G.velocity_y; + del_a_G.a3 = del_G.velocity_z; + del_a_G.a4 = 0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables // Stone Eqn 38 - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a0 * del_a_R.a0 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); + lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); + del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a1 * del_a_R.a1 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); + lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); + del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a2 * del_a_R.a2 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); + lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); + del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a3 * del_a_R.a3 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); + lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); + del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a4 * del_a_R.a4 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); + lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); + del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { @@ -646,11 +639,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -sound_speed * del_a_0_m / cell_i.density + sound_speed * del_a_4_m / cell_i.density; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; + del_d_m_i = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_vx_m_i = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; + del_vy_m_i = del_a_m.a2; + del_vz_m_i = del_a_m.a3; + del_p_m_i = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -738,68 +731,68 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // see Stone for notation) Use the eigenvectors given in Stone // 2008, Appendix A - del_a_0_L = + del_a_L.a0 = -0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_1_L = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_2_L = del_L.velocity_y; - del_a_3_L = del_L.velocity_z; - del_a_4_L = + del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); + del_a_L.a2 = del_L.velocity_y; + del_a_L.a3 = del_L.velocity_z; + del_a_L.a4 = 0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_0_R = + del_a_R.a0 = -0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_1_R = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_2_R = del_R.velocity_y; - del_a_3_R = del_R.velocity_z; - del_a_4_R = + del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); + del_a_R.a2 = del_R.velocity_y; + del_a_R.a3 = del_R.velocity_z; + del_a_R.a4 = 0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_0_C = + del_a_C.a0 = -0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_1_C = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_2_C = del_C.velocity_y; - del_a_3_C = del_C.velocity_z; - del_a_4_C = + del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); + del_a_C.a2 = del_C.velocity_y; + del_a_C.a3 = del_C.velocity_z; + del_a_C.a4 = 0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_0_G = + del_a_G.a0 = -0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_1_G = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_2_G = del_G.velocity_y; - del_a_3_G = del_G.velocity_z; - del_a_4_G = + del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); + del_a_G.a2 = del_G.velocity_y; + del_a_G.a3 = del_G.velocity_z; + del_a_G.a4 = 0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables // Stone Eqn 38 - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a0 * del_a_R.a0 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); + lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); + del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a1 * del_a_R.a1 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); + lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); + del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a2 * del_a_R.a2 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); + lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); + del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a3 * del_a_R.a3 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); + lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); + del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + if (del_a_L.a4 * del_a_R.a4 > 0.0) { + lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); + lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); + del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { @@ -827,37 +820,39 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -sound_speed * del_a_0_m / cell_ip1.density + sound_speed * del_a_4_m / cell_ip1.density; - del_vy_m_ipo = del_a_2_m; - del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = sound_speed * sound_speed * del_a_0_m + sound_speed * sound_speed * del_a_4_m; + del_d_m_ipo = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_vx_m_ipo = -sound_speed * del_a_m.a0 / cell_ip1.density + sound_speed * del_a_m.a4 / cell_ip1.density; + del_vy_m_ipo = del_a_m.a2; + del_vz_m_ipo = del_a_m.a3; + del_p_m_ipo = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 6 - Use parabolic interpolation to compute values at the left and // right of each cell center // Here, the subscripts L and R refer to the left and right side of // the ith cell center Stone Eqn 46 - d_L = 0.5 * (cell_i.density + cell_im1.density) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_p_m_i - del_p_m_imo) / 6.0; + interface_R_imh.density = 0.5 * (cell_i.density + cell_im1.density) - (del_d_m_i - del_d_m_imo) / 6.0; + interface_R_imh.velocity_x = 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_vx_m_i - del_vx_m_imo) / 6.0; + interface_R_imh.velocity_y = 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_vy_m_i - del_vy_m_imo) / 6.0; + interface_R_imh.velocity_z = 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_vz_m_i - del_vz_m_imo) / 6.0; + interface_R_imh.pressure = 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_p_m_i - del_p_m_imo) / 6.0; - d_R = 0.5 * (cell_ip1.density + cell_i.density) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_p_m_ipo - del_p_m_i) / 6.0; + interface_L_iph.density = 0.5 * (cell_ip1.density + cell_i.density) - (del_d_m_ipo - del_d_m_i) / 6.0; + interface_L_iph.velocity_x = 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_vx_m_ipo - del_vx_m_i) / 6.0; + interface_L_iph.velocity_y = 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_vy_m_ipo - del_vy_m_i) / 6.0; + interface_L_iph.velocity_z = 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_vz_m_ipo - del_vz_m_i) / 6.0; + interface_L_iph.pressure = 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_p_m_ipo - del_p_m_i) / 6.0; #ifdef DE - ge_L = 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_ge_m_ipo - del_ge_m_i) / 6.0; + interface_R_imh.gas_energy = 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_ge_m_i - del_ge_m_imo) / 6.0; + interface_L_iph.gas_energy = 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_ge_m_ipo - del_ge_m_i) / 6.0; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; - scalar_R[i] = 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; + interface_R_imh.scalar[i] = + 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; + interface_L_iph.scalar[i] = + 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; } #endif // SCALAR @@ -866,108 +861,144 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // of cell center lie between neighboring cell-centered values // Stone Eqns 47 - 53 - if ((d_R - cell_i.density) * (cell_i.density - d_L) <= 0) { - d_L = d_R = cell_i.density; - } - if ((vx_R - cell_i.velocity_x) * (cell_i.velocity_x - vx_L) <= 0) { - vx_L = vx_R = cell_i.velocity_x; - } - if ((vy_R - cell_i.velocity_y) * (cell_i.velocity_y - vy_L) <= 0) { - vy_L = vy_R = cell_i.velocity_y; - } - if ((vz_R - cell_i.velocity_z) * (cell_i.velocity_z - vz_L) <= 0) { - vz_L = vz_R = cell_i.velocity_z; - } - if ((p_R - cell_i.pressure) * (cell_i.pressure - p_L) <= 0) { - p_L = p_R = cell_i.pressure; - } - - if (6.0 * (d_R - d_L) * (cell_i.density - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { - d_L = 3.0 * cell_i.density - 2.0 * d_R; - } - if (6.0 * (vx_R - vx_L) * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { - vx_L = 3.0 * cell_i.velocity_x - 2.0 * vx_R; - } - if (6.0 * (vy_R - vy_L) * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { - vy_L = 3.0 * cell_i.velocity_y - 2.0 * vy_R; - } - if (6.0 * (vz_R - vz_L) * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { - vz_L = 3.0 * cell_i.velocity_z - 2.0 * vz_R; - } - if (6.0 * (p_R - p_L) * (cell_i.pressure - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { - p_L = 3.0 * cell_i.pressure - 2.0 * p_R; - } - - if (6.0 * (d_R - d_L) * (cell_i.density - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { - d_R = 3.0 * cell_i.density - 2.0 * d_L; - } - if (6.0 * (vx_R - vx_L) * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { - vx_R = 3.0 * cell_i.velocity_x - 2.0 * vx_L; - } - if (6.0 * (vy_R - vy_L) * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { - vy_R = 3.0 * cell_i.velocity_y - 2.0 * vy_L; - } - if (6.0 * (vz_R - vz_L) * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { - vz_R = 3.0 * cell_i.velocity_z - 2.0 * vz_L; - } - if (6.0 * (p_R - p_L) * (cell_i.pressure - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { - p_R = 3.0 * cell_i.pressure - 2.0 * p_L; - } - - d_L = fmax(fmin(cell_i.density, cell_im1.density), d_L); - d_L = fmin(fmax(cell_i.density, cell_im1.density), d_L); - d_R = fmax(fmin(cell_i.density, cell_ip1.density), d_R); - d_R = fmin(fmax(cell_i.density, cell_ip1.density), d_R); - vx_L = fmax(fmin(cell_i.velocity_x, cell_im1.velocity_x), vx_L); - vx_L = fmin(fmax(cell_i.velocity_x, cell_im1.velocity_x), vx_L); - vx_R = fmax(fmin(cell_i.velocity_x, cell_ip1.velocity_x), vx_R); - vx_R = fmin(fmax(cell_i.velocity_x, cell_ip1.velocity_x), vx_R); - vy_L = fmax(fmin(cell_i.velocity_y, cell_im1.velocity_y), vy_L); - vy_L = fmin(fmax(cell_i.velocity_y, cell_im1.velocity_y), vy_L); - vy_R = fmax(fmin(cell_i.velocity_y, cell_ip1.velocity_y), vy_R); - vy_R = fmin(fmax(cell_i.velocity_y, cell_ip1.velocity_y), vy_R); - vz_L = fmax(fmin(cell_i.velocity_z, cell_im1.velocity_z), vz_L); - vz_L = fmin(fmax(cell_i.velocity_z, cell_im1.velocity_z), vz_L); - vz_R = fmax(fmin(cell_i.velocity_z, cell_ip1.velocity_z), vz_R); - vz_R = fmin(fmax(cell_i.velocity_z, cell_ip1.velocity_z), vz_R); - p_L = fmax(fmin(cell_i.pressure, cell_im1.pressure), p_L); - p_L = fmin(fmax(cell_i.pressure, cell_im1.pressure), p_L); - p_R = fmax(fmin(cell_i.pressure, cell_ip1.pressure), p_R); - p_R = fmin(fmax(cell_i.pressure, cell_ip1.pressure), p_R); + if ((interface_L_iph.density - cell_i.density) * (cell_i.density - interface_R_imh.density) <= 0) { + interface_R_imh.density = interface_L_iph.density = cell_i.density; + } + if ((interface_L_iph.velocity_x - cell_i.velocity_x) * (cell_i.velocity_x - interface_R_imh.velocity_x) <= 0) { + interface_R_imh.velocity_x = interface_L_iph.velocity_x = cell_i.velocity_x; + } + if ((interface_L_iph.velocity_y - cell_i.velocity_y) * (cell_i.velocity_y - interface_R_imh.velocity_y) <= 0) { + interface_R_imh.velocity_y = interface_L_iph.velocity_y = cell_i.velocity_y; + } + if ((interface_L_iph.velocity_z - cell_i.velocity_z) * (cell_i.velocity_z - interface_R_imh.velocity_z) <= 0) { + interface_R_imh.velocity_z = interface_L_iph.velocity_z = cell_i.velocity_z; + } + if ((interface_L_iph.pressure - cell_i.pressure) * (cell_i.pressure - interface_R_imh.pressure) <= 0) { + interface_R_imh.pressure = interface_L_iph.pressure = cell_i.pressure; + } + + if (6.0 * (interface_L_iph.density - interface_R_imh.density) * + (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)) > + (interface_L_iph.density - interface_R_imh.density) * (interface_L_iph.density - interface_R_imh.density)) { + interface_R_imh.density = 3.0 * cell_i.density - 2.0 * interface_L_iph.density; + } + if (6.0 * (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * + (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)) > + (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * + (interface_L_iph.velocity_x - interface_R_imh.velocity_x)) { + interface_R_imh.velocity_x = 3.0 * cell_i.velocity_x - 2.0 * interface_L_iph.velocity_x; + } + if (6.0 * (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * + (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)) > + (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * + (interface_L_iph.velocity_y - interface_R_imh.velocity_y)) { + interface_R_imh.velocity_y = 3.0 * cell_i.velocity_y - 2.0 * interface_L_iph.velocity_y; + } + if (6.0 * (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * + (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)) > + (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * + (interface_L_iph.velocity_z - interface_R_imh.velocity_z)) { + interface_R_imh.velocity_z = 3.0 * cell_i.velocity_z - 2.0 * interface_L_iph.velocity_z; + } + if (6.0 * (interface_L_iph.pressure - interface_R_imh.pressure) * + (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)) > + (interface_L_iph.pressure - interface_R_imh.pressure) * (interface_L_iph.pressure - interface_R_imh.pressure)) { + interface_R_imh.pressure = 3.0 * cell_i.pressure - 2.0 * interface_L_iph.pressure; + } + + if (6.0 * (interface_L_iph.density - interface_R_imh.density) * + (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)) < + -(interface_L_iph.density - interface_R_imh.density) * (interface_L_iph.density - interface_R_imh.density)) { + interface_L_iph.density = 3.0 * cell_i.density - 2.0 * interface_R_imh.density; + } + if (6.0 * (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * + (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)) < + -(interface_L_iph.velocity_x - interface_R_imh.velocity_x) * + (interface_L_iph.velocity_x - interface_R_imh.velocity_x)) { + interface_L_iph.velocity_x = 3.0 * cell_i.velocity_x - 2.0 * interface_R_imh.velocity_x; + } + if (6.0 * (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * + (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)) < + -(interface_L_iph.velocity_y - interface_R_imh.velocity_y) * + (interface_L_iph.velocity_y - interface_R_imh.velocity_y)) { + interface_L_iph.velocity_y = 3.0 * cell_i.velocity_y - 2.0 * interface_R_imh.velocity_y; + } + if (6.0 * (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * + (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)) < + -(interface_L_iph.velocity_z - interface_R_imh.velocity_z) * + (interface_L_iph.velocity_z - interface_R_imh.velocity_z)) { + interface_L_iph.velocity_z = 3.0 * cell_i.velocity_z - 2.0 * interface_R_imh.velocity_z; + } + if (6.0 * (interface_L_iph.pressure - interface_R_imh.pressure) * + (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)) < + -(interface_L_iph.pressure - interface_R_imh.pressure) * (interface_L_iph.pressure - interface_R_imh.pressure)) { + interface_L_iph.pressure = 3.0 * cell_i.pressure - 2.0 * interface_R_imh.pressure; + } + + interface_R_imh.density = fmax(fmin(cell_i.density, cell_im1.density), interface_R_imh.density); + interface_R_imh.density = fmin(fmax(cell_i.density, cell_im1.density), interface_R_imh.density); + interface_L_iph.density = fmax(fmin(cell_i.density, cell_ip1.density), interface_L_iph.density); + interface_L_iph.density = fmin(fmax(cell_i.density, cell_ip1.density), interface_L_iph.density); + interface_R_imh.velocity_x = fmax(fmin(cell_i.velocity_x, cell_im1.velocity_x), interface_R_imh.velocity_x); + interface_R_imh.velocity_x = fmin(fmax(cell_i.velocity_x, cell_im1.velocity_x), interface_R_imh.velocity_x); + interface_L_iph.velocity_x = fmax(fmin(cell_i.velocity_x, cell_ip1.velocity_x), interface_L_iph.velocity_x); + interface_L_iph.velocity_x = fmin(fmax(cell_i.velocity_x, cell_ip1.velocity_x), interface_L_iph.velocity_x); + interface_R_imh.velocity_y = fmax(fmin(cell_i.velocity_y, cell_im1.velocity_y), interface_R_imh.velocity_y); + interface_R_imh.velocity_y = fmin(fmax(cell_i.velocity_y, cell_im1.velocity_y), interface_R_imh.velocity_y); + interface_L_iph.velocity_y = fmax(fmin(cell_i.velocity_y, cell_ip1.velocity_y), interface_L_iph.velocity_y); + interface_L_iph.velocity_y = fmin(fmax(cell_i.velocity_y, cell_ip1.velocity_y), interface_L_iph.velocity_y); + interface_R_imh.velocity_z = fmax(fmin(cell_i.velocity_z, cell_im1.velocity_z), interface_R_imh.velocity_z); + interface_R_imh.velocity_z = fmin(fmax(cell_i.velocity_z, cell_im1.velocity_z), interface_R_imh.velocity_z); + interface_L_iph.velocity_z = fmax(fmin(cell_i.velocity_z, cell_ip1.velocity_z), interface_L_iph.velocity_z); + interface_L_iph.velocity_z = fmin(fmax(cell_i.velocity_z, cell_ip1.velocity_z), interface_L_iph.velocity_z); + interface_R_imh.pressure = fmax(fmin(cell_i.pressure, cell_im1.pressure), interface_R_imh.pressure); + interface_R_imh.pressure = fmin(fmax(cell_i.pressure, cell_im1.pressure), interface_R_imh.pressure); + interface_L_iph.pressure = fmax(fmin(cell_i.pressure, cell_ip1.pressure), interface_L_iph.pressure); + interface_L_iph.pressure = fmin(fmax(cell_i.pressure, cell_ip1.pressure), interface_L_iph.pressure); #ifdef DE - if ((ge_R - cell_i.gas_energy) * (cell_i.gas_energy - ge_L) <= 0) { - ge_L = ge_R = cell_i.gas_energy; - } - if (6.0 * (ge_R - ge_L) * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { - ge_L = 3.0 * cell_i.gas_energy - 2.0 * ge_R; - } - if (6.0 * (ge_R - ge_L) * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { - ge_R = 3.0 * cell_i.gas_energy - 2.0 * ge_L; - } - ge_L = fmax(fmin(cell_i.gas_energy, cell_im1.gas_energy), ge_L); - ge_L = fmin(fmax(cell_i.gas_energy, cell_im1.gas_energy), ge_L); - ge_R = fmax(fmin(cell_i.gas_energy, cell_ip1.gas_energy), ge_R); - ge_R = fmin(fmax(cell_i.gas_energy, cell_ip1.gas_energy), ge_R); + if ((interface_L_iph.gas_energy - cell_i.gas_energy) * (cell_i.gas_energy - interface_R_imh.gas_energy) <= 0) { + interface_R_imh.gas_energy = interface_L_iph.gas_energy = cell_i.gas_energy; + } + if (6.0 * (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * + (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)) > + (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * + (interface_L_iph.gas_energy - interface_R_imh.gas_energy)) { + interface_R_imh.gas_energy = 3.0 * cell_i.gas_energy - 2.0 * interface_L_iph.gas_energy; + } + if (6.0 * (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * + (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)) < + -(interface_L_iph.gas_energy - interface_R_imh.gas_energy) * + (interface_L_iph.gas_energy - interface_R_imh.gas_energy)) { + interface_L_iph.gas_energy = 3.0 * cell_i.gas_energy - 2.0 * interface_R_imh.gas_energy; + } + interface_R_imh.gas_energy = fmax(fmin(cell_i.gas_energy, cell_im1.gas_energy), interface_R_imh.gas_energy); + interface_R_imh.gas_energy = fmin(fmax(cell_i.gas_energy, cell_im1.gas_energy), interface_R_imh.gas_energy); + interface_L_iph.gas_energy = fmax(fmin(cell_i.gas_energy, cell_ip1.gas_energy), interface_L_iph.gas_energy); + interface_L_iph.gas_energy = fmin(fmax(cell_i.gas_energy, cell_ip1.gas_energy), interface_L_iph.gas_energy); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - cell_i.scalar[i]) * (cell_i.scalar[i] - scalar_L[i]) <= 0) { - scalar_L[i] = scalar_R[i] = cell_i.scalar[i]; + if ((interface_L_iph.scalar[i] - cell_i.scalar[i]) * (cell_i.scalar[i] - interface_R_imh.scalar[i]) <= 0) { + interface_R_imh.scalar[i] = interface_L_iph.scalar[i] = cell_i.scalar[i]; } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > - (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_L[i] = 3.0 * cell_i.scalar[i] - 2.0 * scalar_R[i]; + if (6.0 * (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * + (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])) > + (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * + (interface_L_iph.scalar[i] - interface_R_imh.scalar[i])) { + interface_R_imh.scalar[i] = 3.0 * cell_i.scalar[i] - 2.0 * interface_L_iph.scalar[i]; } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < - -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_R[i] = 3.0 * cell_i.scalar[i] - 2.0 * scalar_L[i]; + if (6.0 * (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * + (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])) < + -(interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * + (interface_L_iph.scalar[i] - interface_R_imh.scalar[i])) { + interface_L_iph.scalar[i] = 3.0 * cell_i.scalar[i] - 2.0 * interface_R_imh.scalar[i]; } - scalar_L[i] = fmax(fmin(cell_i.scalar[i], cell_im1.scalar[i]), scalar_L[i]); - scalar_L[i] = fmin(fmax(cell_i.scalar[i], cell_im1.scalar[i]), scalar_L[i]); - scalar_R[i] = fmax(fmin(cell_i.scalar[i], cell_ip1.scalar[i]), scalar_R[i]); - scalar_R[i] = fmin(fmax(cell_i.scalar[i], cell_ip1.scalar[i]), scalar_R[i]); + interface_R_imh.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_im1.scalar[i]), interface_R_imh.scalar[i]); + interface_R_imh.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_im1.scalar[i]), interface_R_imh.scalar[i]); + interface_L_iph.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_ip1.scalar[i]), interface_L_iph.scalar[i]); + interface_L_iph.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_ip1.scalar[i]), interface_L_iph.scalar[i]); } #endif // SCALAR @@ -977,27 +1008,27 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // interpolation function // Stone Eqn 54 - del_d_m_i = d_R - d_L; - del_vx_m_i = vx_R - vx_L; - del_vy_m_i = vy_R - vy_L; - del_vz_m_i = vz_R - vz_L; - del_p_m_i = p_R - p_L; + del_d_m_i = interface_L_iph.density - interface_R_imh.density; + del_vx_m_i = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_vy_m_i = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_vz_m_i = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_p_m_i = interface_L_iph.pressure - interface_R_imh.pressure; - d_6 = 6.0 * (cell_i.density - 0.5 * (d_L + d_R)); - vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (vx_L + vx_R)); - vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (vy_L + vy_R)); - vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (vz_L + vz_R)); - p_6 = 6.0 * (cell_i.pressure - 0.5 * (p_L + p_R)); + d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); + vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); + vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)); + vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); + p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); #ifdef DE - del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (ge_L + ge_R)); + del_ge_m_i = interface_L_iph.gas_energy - interface_R_imh.gas_energy; + ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; - scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (scalar_L[i] + scalar_R[i])); + del_scalar_m_i[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; + scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); } #endif // SCALAR @@ -1021,30 +1052,52 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lambda_min = fmin(lambda_m, (Real)0); // left interface value, i+1/2 - d_R = d_R - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); - vx_R = vx_R - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); - vy_R = vy_R - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); - vz_R = vz_R - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - p_R = p_R - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + interface_L_iph.density = interface_L_iph.density - + lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + interface_L_iph.velocity_x = + interface_L_iph.velocity_x - + lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + interface_L_iph.velocity_y = + interface_L_iph.velocity_y - + lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + interface_L_iph.velocity_z = + interface_L_iph.velocity_z - + lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + interface_L_iph.pressure = interface_L_iph.pressure - + lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); // right interface value, i-1/2 - d_L = d_L - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); - vx_L = vx_L - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); - vy_L = vy_L - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); - vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + interface_R_imh.density = interface_R_imh.density - + lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + interface_R_imh.velocity_x = + interface_R_imh.velocity_x - + lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + interface_R_imh.velocity_y = + interface_R_imh.velocity_y - + lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + interface_R_imh.velocity_z = + interface_R_imh.velocity_z - + lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + interface_R_imh.pressure = interface_R_imh.pressure - + lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); #ifdef DE - ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); - ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); + interface_L_iph.gas_energy = + interface_L_iph.gas_energy - + lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + interface_R_imh.gas_energy = + interface_R_imh.gas_energy - + lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * - (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); - scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * - (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + interface_L_iph.scalar[i] = + interface_L_iph.scalar[i] - + lambda_max * (0.5 * dtodx) * (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + interface_R_imh.scalar[i] = + interface_R_imh.scalar[i] - + lambda_min * (0.5 * dtodx) * (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); } #endif // SCALAR @@ -1126,17 +1179,17 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // add the corrections to the initial guesses for the interface values - d_R += sum_1; - vx_R += sum_2; - vy_R += sum_3; - vz_R += sum_4; - p_R += sum_5; + interface_L_iph.density += sum_1; + interface_L_iph.velocity_x += sum_2; + interface_L_iph.velocity_y += sum_3; + interface_L_iph.velocity_z += sum_4; + interface_L_iph.pressure += sum_5; #ifdef DE - ge_R += sum_ge; + interface_L_iph.gas_energy += sum_ge; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] += sum_scalar[i]; + interface_L_iph.scalar[i] += sum_scalar[i]; } #endif // SCALAR @@ -1214,27 +1267,27 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // add the corrections - d_L += sum_1; - vx_L += sum_2; - vy_L += sum_3; - vz_L += sum_4; - p_L += sum_5; + interface_R_imh.density += sum_1; + interface_R_imh.velocity_x += sum_2; + interface_R_imh.velocity_y += sum_3; + interface_R_imh.velocity_z += sum_4; + interface_R_imh.pressure += sum_5; #ifdef DE - ge_L += sum_ge; + interface_R_imh.gas_energy += sum_ge; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] += sum_scalar[i]; + interface_R_imh.scalar[i] += sum_scalar[i]; } #endif // SCALAR #endif // not VL, i.e. CTU or SIMPLE was used for this section // enforce minimum values - d_L = fmax(d_L, (Real)TINY_NUMBER); - d_R = fmax(d_R, (Real)TINY_NUMBER); - p_L = fmax(p_L, (Real)TINY_NUMBER); - p_R = fmax(p_R, (Real)TINY_NUMBER); + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); // Step 11 - Send final values back from kernel @@ -1250,32 +1303,40 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou id = xid + yid * nx + (zid - 1) * nx * ny; break; } - dev_bounds_R[id] = d_L; - dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; - dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; - dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); + dev_bounds_R[id] = interface_R_imh.density; + dev_bounds_R[o1 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_x; + dev_bounds_R[o2 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_y; + dev_bounds_R[o3 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_z; + dev_bounds_R[4 * n_cells + id] = + interface_R_imh.pressure / (gamma - 1.0) + 0.5 * interface_R_imh.density * + (interface_R_imh.velocity_x * interface_R_imh.velocity_x + + interface_R_imh.velocity_y * interface_R_imh.velocity_y + + interface_R_imh.velocity_z * interface_R_imh.velocity_z); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; + dev_bounds_R[(5 + i) * n_cells + id] = interface_R_imh.density * interface_R_imh.scalar[i]; } #endif // SCALAR #ifdef DE - dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = d_L * ge_L; + dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = interface_R_imh.density * interface_R_imh.gas_energy; #endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_R; - dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; - dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; - dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); + dev_bounds_L[id] = interface_L_iph.density; + dev_bounds_L[o1 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_x; + dev_bounds_L[o2 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_y; + dev_bounds_L[o3 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_z; + dev_bounds_L[4 * n_cells + id] = + interface_L_iph.pressure / (gamma - 1.0) + 0.5 * interface_L_iph.density * + (interface_L_iph.velocity_x * interface_L_iph.velocity_x + + interface_L_iph.velocity_y * interface_L_iph.velocity_y + + interface_L_iph.velocity_z * interface_L_iph.velocity_z); #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; + dev_bounds_L[(5 + i) * n_cells + id] = interface_L_iph.density * interface_L_iph.scalar[i]; } #endif // SCALAR #ifdef DE - dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = d_R * ge_R; + dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = interface_L_iph.density * interface_L_iph.gas_energy; #endif // DE } From cda763f17954e544d0d59c559a626383753d213d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 10:54:51 -0400 Subject: [PATCH 413/694] PPMC: Move declaration to when variable is used --- src/reconstruction/ppmc_cuda.cu | 194 ++++++++++++++------------------ 1 file changed, 87 insertions(+), 107 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index cb276e69c..06677172b 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -60,37 +60,18 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // declare other variables to be used reconstruction::Primitive del_L, del_R, del_C, del_G; // primitive slopes reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes - Real lim_slope_a, lim_slope_b; Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo; Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo; reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states -#ifndef VL - Real dtodx = dt / dx; - Real d_6, vx_6, vy_6, vz_6, p_6; - Real lambda_m, lambda_0, lambda_p; - Real lambda_max, lambda_min; - Real A, B, C, D; - Real chi_1, chi_2, chi_3, chi_4, chi_5; - Real sum_1, sum_2, sum_3, sum_4, sum_5; -#endif // VL - #ifdef DE Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; - Real E_kin, E, dge; +#endif // DE - #ifndef VL - Real chi_ge, sum_ge, ge_6; - #endif // VL -#endif // DE #ifdef SCALAR Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; - - #ifndef VL - Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; - #endif // VL -#endif // SCALAR +#endif // SCALAR // load the 5-cell stencil into registers // cell i @@ -100,12 +81,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou cell_i.velocity_y = dev_conserved[o2 * n_cells + id] / cell_i.density; cell_i.velocity_z = dev_conserved[o3 * n_cells + id] / cell_i.density; #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * cell_i.density * - (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + - cell_i.velocity_z * cell_i.velocity_z); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_i.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + Real E = dev_conserved[4 * n_cells + id]; + Real E_kin = 0.5 * cell_i.density * + (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + + cell_i.velocity_z * cell_i.velocity_z); + Real gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_i.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); #else // not DE cell_i.pressure = (dev_conserved[4 * n_cells + id] - 0.5 * cell_i.density * @@ -115,7 +96,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // PRESSURE_DE cell_i.pressure = fmax(cell_i.pressure, (Real)TINY_NUMBER); #ifdef DE - cell_i.gas_energy = dge / cell_i.density; + cell_i.gas_energy = gas_energy / cell_i.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -144,8 +125,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E_kin = 0.5 * cell_im1.density * (cell_im1.velocity_x * cell_im1.velocity_x + cell_im1.velocity_y * cell_im1.velocity_y + cell_im1.velocity_z * cell_im1.velocity_z); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_im1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_im1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); #else // not DE cell_im1.pressure = (dev_conserved[4 * n_cells + id] - 0.5 * cell_im1.density * @@ -155,7 +136,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // PRESSURE_DE cell_im1.pressure = fmax(cell_im1.pressure, (Real)TINY_NUMBER); #ifdef DE - cell_im1.gas_energy = dge / cell_im1.density; + cell_im1.gas_energy = gas_energy / cell_im1.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -183,8 +164,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E_kin = 0.5 * cell_ip1.density * (cell_ip1.velocity_x * cell_ip1.velocity_x + cell_ip1.velocity_y * cell_ip1.velocity_y + cell_ip1.velocity_z * cell_ip1.velocity_z); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_ip1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_ip1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); #else // not DE cell_ip1.pressure = (dev_conserved[4 * n_cells + id] - 0.5 * cell_ip1.density * @@ -194,7 +175,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // PRESSURE_DE cell_ip1.pressure = fmax(cell_ip1.pressure, (Real)TINY_NUMBER); #ifdef DE - cell_ip1.gas_energy = dge / cell_ip1.density; + cell_ip1.gas_energy = gas_energy / cell_ip1.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -222,8 +203,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E_kin = 0.5 * cell_im2.density * (cell_im2.velocity_x * cell_im2.velocity_x + cell_im2.velocity_y * cell_im2.velocity_y + cell_im2.velocity_z * cell_im2.velocity_z); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_im2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_im2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); #else // not DE cell_im2.pressure = (dev_conserved[4 * n_cells + id] - 0.5 * cell_im2.density * @@ -233,7 +214,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // PRESSURE_DE cell_im2.pressure = fmax(cell_im2.pressure, (Real)TINY_NUMBER); #ifdef DE - cell_im2.gas_energy = dge / cell_im2.density; + cell_im2.gas_energy = gas_energy / cell_im2.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -261,8 +242,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou E_kin = 0.5 * cell_ip2.density * (cell_ip2.velocity_x * cell_ip2.velocity_x + cell_ip2.velocity_y * cell_ip2.velocity_y + cell_ip2.velocity_z * cell_ip2.velocity_z); - dge = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_ip2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + cell_ip2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); #else // not DE cell_ip2.pressure = (dev_conserved[4 * n_cells + id] - 0.5 * cell_ip2.density * @@ -272,7 +253,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // PRESSURE_DE cell_ip2.pressure = fmax(cell_ip2.pressure, (Real)TINY_NUMBER); #ifdef DE - cell_ip2.gas_energy = dge / cell_ip2.density; + cell_ip2.gas_energy = gas_energy / cell_ip2.density; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { @@ -406,7 +387,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqn 38 del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - + Real lim_slope_a, lim_slope_b; if (del_a_L.a0 * del_a_R.a0 > 0.0) { lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); @@ -1003,7 +984,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR #ifndef VL - // Step 8 - Compute the coefficients for the monotonized parabolic // interpolation function // Stone Eqn 54 @@ -1014,19 +994,20 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_vz_m_i = interface_L_iph.velocity_z - interface_R_imh.velocity_z; del_p_m_i = interface_L_iph.pressure - interface_R_imh.pressure; - d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); - vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); - vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)); - vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); - p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); + Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); + Real const vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); + Real const vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)); + Real const vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); + Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); #ifdef DE - del_ge_m_i = interface_L_iph.gas_energy - interface_R_imh.gas_energy; - ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); + del_ge_m_i = interface_L_iph.gas_energy - interface_R_imh.gas_energy; + Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { + Real scalar_6[NSCALARS] : for (int i = 0; i < NSCALARS; i++) + { del_scalar_m_i[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); } @@ -1038,20 +1019,21 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // recalculate the adiabatic sound speed in cell i sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - lambda_m = cell_i.velocity_x - sound_speed; - lambda_0 = cell_i.velocity_x; - lambda_p = cell_i.velocity_x + sound_speed; + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; // Step 9 - Compute the left and right interface values using monotonized // parabolic interpolation // Stone Eqns 55 & 56 // largest eigenvalue - lambda_max = fmax(lambda_p, (Real)0); + Real const lambda_max = fmax(lambda_p, (Real)0); // smallest eigenvalue - lambda_min = fmin(lambda_m, (Real)0); + Real const lambda_min = fmin(lambda_m, (Real)0); // left interface value, i+1/2 + Real const dtodx = dt / dx; interface_L_iph.density = interface_L_iph.density - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); interface_L_iph.velocity_x = @@ -1105,47 +1087,45 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Stone Eqns 57 - 60 // left-hand interface value, i+1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; + Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; #ifdef DE - sum_ge = 0; + Real sum_ge = 0; #endif // DE #ifdef SCALAR + Real sum_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } #endif // SCALAR if (lambda_m >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_m); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); + Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; + Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); } if (lambda_0 >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_0); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); - - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); + + Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; + Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; #ifdef DE - chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; + Real chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; #endif // DE #ifdef SCALAR + Real chi_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; } @@ -1164,14 +1144,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR } if (lambda_p >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_p); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); + Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; + Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; + Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; + Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; + Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); @@ -1208,28 +1188,28 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR if (lambda_m <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_m); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); + Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; + Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); } if (lambda_0 <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_0); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); + + Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; + Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; #ifdef DE chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; #endif // DE @@ -1252,14 +1232,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #endif // SCALAR } if (lambda_p <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_p); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); + + Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; + Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; + Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; + Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; + Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); From 50e210d00be21fadcf9bb23c3ccc270805ae93c9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 11:04:36 -0400 Subject: [PATCH 414/694] PPMC: Replace monotonized primitive slopes with structs --- src/reconstruction/ppmc_cuda.cu | 246 ++++++++++++++++---------------- 1 file changed, 125 insertions(+), 121 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 06677172b..d91d830a5 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -60,18 +60,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // declare other variables to be used reconstruction::Primitive del_L, del_R, del_C, del_G; // primitive slopes reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes - Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo; - reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states - -#ifdef DE - Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; -#endif // DE - -#ifdef SCALAR - Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; -#endif // SCALAR + reconstruction::Primitive del_m_im1, del_m_i, del_m_ip1; // Monotonized primitive slopes + reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states // load the 5-cell stencil into registers // cell i @@ -415,11 +405,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_ge_m_imo = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_m_im1.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_ge_m_imo = 0.0; + del_m_im1.gas_energy = 0.0; } #endif // DE #ifdef SCALAR @@ -427,9 +417,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_m_im1.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_scalar_m_imo[i] = 0.0; + del_m_im1.scalar[i] = 0.0; } } #endif // SCALAR @@ -439,11 +429,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_imo = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_vx_m_imo = -sound_speed * del_a_m.a0 / cell_im1.density + sound_speed * del_a_m.a4 / cell_im1.density; - del_vy_m_imo = del_a_m.a2; - del_vz_m_imo = del_a_m.a3; - del_p_m_imo = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + del_m_im1.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_m_im1.velocity_x = -sound_speed * del_a_m.a0 / cell_im1.density + sound_speed * del_a_m.a4 / cell_im1.density; + del_m_im1.velocity_y = del_a_m.a2; + del_m_im1.velocity_z = del_a_m.a3; + del_m_im1.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -596,11 +586,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_ge_m_i = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_m_i.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_ge_m_i = 0.0; + del_m_i.gas_energy = 0.0; } #endif // DE #ifdef SCALAR @@ -608,9 +598,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_scalar_m_i[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_m_i.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_scalar_m_i[i] = 0.0; + del_m_i.scalar[i] = 0.0; } } #endif // SCALAR @@ -620,11 +610,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_i = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_vx_m_i = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; - del_vy_m_i = del_a_m.a2; - del_vz_m_i = del_a_m.a3; - del_p_m_i = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + del_m_i.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_m_i.velocity_x = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; + del_m_i.velocity_y = del_a_m.a2; + del_m_i.velocity_z = del_a_m.a3; + del_m_i.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 2 - Compute the left, right, centered, and van Leer differences of // the primitive variables @@ -777,11 +767,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #ifdef DE if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_ge_m_ipo = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); + lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); + del_m_ip1.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_ge_m_ipo = 0.0; + del_m_ip1.gas_energy = 0.0; } #endif // DE #ifdef SCALAR @@ -789,9 +779,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); + del_m_ip1.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); } else { - del_scalar_m_ipo[i] = 0.0; + del_m_ip1.scalar[i] = 0.0; } } #endif // SCALAR @@ -801,39 +791,49 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // primitive variables // Stone Eqn 39 - del_d_m_ipo = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_vx_m_ipo = -sound_speed * del_a_m.a0 / cell_ip1.density + sound_speed * del_a_m.a4 / cell_ip1.density; - del_vy_m_ipo = del_a_m.a2; - del_vz_m_ipo = del_a_m.a3; - del_p_m_ipo = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + del_m_ip1.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; + del_m_ip1.velocity_x = -sound_speed * del_a_m.a0 / cell_ip1.density + sound_speed * del_a_m.a4 / cell_ip1.density; + del_m_ip1.velocity_y = del_a_m.a2; + del_m_ip1.velocity_z = del_a_m.a3; + del_m_ip1.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; // Step 6 - Use parabolic interpolation to compute values at the left and // right of each cell center // Here, the subscripts L and R refer to the left and right side of // the ith cell center Stone Eqn 46 - interface_R_imh.density = 0.5 * (cell_i.density + cell_im1.density) - (del_d_m_i - del_d_m_imo) / 6.0; - interface_R_imh.velocity_x = 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_vx_m_i - del_vx_m_imo) / 6.0; - interface_R_imh.velocity_y = 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_vy_m_i - del_vy_m_imo) / 6.0; - interface_R_imh.velocity_z = 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_vz_m_i - del_vz_m_imo) / 6.0; - interface_R_imh.pressure = 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_p_m_i - del_p_m_imo) / 6.0; + interface_R_imh.density = 0.5 * (cell_i.density + cell_im1.density) - (del_m_i.density - del_m_im1.density) / 6.0; + interface_R_imh.velocity_x = + 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_m_i.velocity_x - del_m_im1.velocity_x) / 6.0; + interface_R_imh.velocity_y = + 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_m_i.velocity_y - del_m_im1.velocity_y) / 6.0; + interface_R_imh.velocity_z = + 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_m_i.velocity_z - del_m_im1.velocity_z) / 6.0; + interface_R_imh.pressure = + 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_m_i.pressure - del_m_im1.pressure) / 6.0; - interface_L_iph.density = 0.5 * (cell_ip1.density + cell_i.density) - (del_d_m_ipo - del_d_m_i) / 6.0; - interface_L_iph.velocity_x = 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - interface_L_iph.velocity_y = 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - interface_L_iph.velocity_z = 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - interface_L_iph.pressure = 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_p_m_ipo - del_p_m_i) / 6.0; + interface_L_iph.density = 0.5 * (cell_ip1.density + cell_i.density) - (del_m_ip1.density - del_m_i.density) / 6.0; + interface_L_iph.velocity_x = + 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_m_ip1.velocity_x - del_m_i.velocity_x) / 6.0; + interface_L_iph.velocity_y = + 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_m_ip1.velocity_y - del_m_i.velocity_y) / 6.0; + interface_L_iph.velocity_z = + 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_m_ip1.velocity_z - del_m_i.velocity_z) / 6.0; + interface_L_iph.pressure = + 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_m_ip1.pressure - del_m_i.pressure) / 6.0; #ifdef DE - interface_R_imh.gas_energy = 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_ge_m_i - del_ge_m_imo) / 6.0; - interface_L_iph.gas_energy = 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_ge_m_ipo - del_ge_m_i) / 6.0; + interface_R_imh.gas_energy = + 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_m_i.gas_energy - del_m_im1.gas_energy) / 6.0; + interface_L_iph.gas_energy = + 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_m_ip1.gas_energy - del_m_i.gas_energy) / 6.0; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { interface_R_imh.scalar[i] = - 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; + 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_m_i.scalar[i] - del_m_im1.scalar[i]) / 6.0; interface_L_iph.scalar[i] = - 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; + 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_m_ip1.scalar[i] - del_m_i.scalar[i]) / 6.0; } #endif // SCALAR @@ -988,11 +988,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // interpolation function // Stone Eqn 54 - del_d_m_i = interface_L_iph.density - interface_R_imh.density; - del_vx_m_i = interface_L_iph.velocity_x - interface_R_imh.velocity_x; - del_vy_m_i = interface_L_iph.velocity_y - interface_R_imh.velocity_y; - del_vz_m_i = interface_L_iph.velocity_z - interface_R_imh.velocity_z; - del_p_m_i = interface_L_iph.pressure - interface_R_imh.pressure; + del_m_i.density = interface_L_iph.density - interface_R_imh.density; + del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); Real const vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); @@ -1001,14 +1001,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); #ifdef DE - del_ge_m_i = interface_L_iph.gas_energy - interface_R_imh.gas_energy; - Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); + del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; + Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); #endif // DE #ifdef SCALAR Real scalar_6[NSCALARS] : for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; + del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); } #endif // SCALAR @@ -1033,53 +1033,57 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const lambda_min = fmin(lambda_m, (Real)0); // left interface value, i+1/2 - Real const dtodx = dt / dx; - interface_L_iph.density = interface_L_iph.density - - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + Real const dtodx = dt / dx; + interface_L_iph.density = + interface_L_iph.density - + lambda_max * (0.5 * dtodx) * (del_m_i.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); interface_L_iph.velocity_x = interface_L_iph.velocity_x - - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_x - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); interface_L_iph.velocity_y = interface_L_iph.velocity_y - - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_y - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); interface_L_iph.velocity_z = interface_L_iph.velocity_z - - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - interface_L_iph.pressure = interface_L_iph.pressure - - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_z - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + interface_L_iph.pressure = + interface_L_iph.pressure - + lambda_max * (0.5 * dtodx) * (del_m_i.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); // right interface value, i-1/2 - interface_R_imh.density = interface_R_imh.density - - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + interface_R_imh.density = + interface_R_imh.density - + lambda_min * (0.5 * dtodx) * (del_m_i.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); interface_R_imh.velocity_x = interface_R_imh.velocity_x - - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_x + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); interface_R_imh.velocity_y = interface_R_imh.velocity_y - - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_y + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); interface_R_imh.velocity_z = interface_R_imh.velocity_z - - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - interface_R_imh.pressure = interface_R_imh.pressure - - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_z + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + interface_R_imh.pressure = + interface_R_imh.pressure - + lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); #ifdef DE interface_L_iph.gas_energy = interface_L_iph.gas_energy - - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); interface_R_imh.gas_energy = interface_R_imh.gas_energy - - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); + lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - - lambda_max * (0.5 * dtodx) * (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + lambda_max * (0.5 * dtodx) * (del_m_i.scalar[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); interface_R_imh.scalar[i] = interface_R_imh.scalar[i] - - lambda_min * (0.5 * dtodx) * (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + lambda_min * (0.5 * dtodx) * (del_m_i.scalar[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); } #endif // SCALAR @@ -1102,11 +1106,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); - Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; - Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); @@ -1116,18 +1120,18 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); - Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; - Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; #ifdef DE - Real chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; + Real chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; #endif // DE #ifdef SCALAR Real chi_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; + chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i]; } #endif // SCALAR @@ -1147,11 +1151,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); - Real const chi_1 = A * (del_d_m_i - d_6) + B * d_6; - Real const chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - Real const chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - Real const chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - Real const chi_5 = A * (del_p_m_i - p_6) + B * p_6; + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); @@ -1191,11 +1195,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); - Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; - Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); @@ -1205,17 +1209,17 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); - Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; - Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; #ifdef DE - chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; + chi_ge = C * (del_m_i.gas_energy + ge_6) + D * ge_6; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = C * (del_scalar_m_i[i] + scalar_6[i]) + D * scalar_6[i]; + chi_scalar[i] = C * (del_m_i.scalar[i] + scalar_6[i]) + D * scalar_6[i]; } #endif // SCALAR @@ -1235,11 +1239,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); - Real const chi_1 = C * (del_d_m_i + d_6) + D * d_6; - Real const chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - Real const chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - Real const chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - Real const chi_5 = C * (del_p_m_i + p_6) + D * p_6; + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); @@ -1319,4 +1323,4 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = interface_L_iph.density * interface_L_iph.gas_energy; #endif // DE -} +} \ No newline at end of file From ff515b7e241e61bcc7d46a7c6e45c2453bfa32c7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 11:11:36 -0400 Subject: [PATCH 415/694] PPMC: Replace writes with functions --- src/reconstruction/ppmc_cuda.cu | 53 ++++----------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index d91d830a5..ceec14837 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1275,52 +1275,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 11 - Send final values back from kernel + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } - dev_bounds_R[id] = interface_R_imh.density; - dev_bounds_R[o1 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_x; - dev_bounds_R[o2 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_y; - dev_bounds_R[o3 * n_cells + id] = interface_R_imh.density * interface_R_imh.velocity_z; - dev_bounds_R[4 * n_cells + id] = - interface_R_imh.pressure / (gamma - 1.0) + 0.5 * interface_R_imh.density * - (interface_R_imh.velocity_x * interface_R_imh.velocity_x + - interface_R_imh.velocity_y * interface_R_imh.velocity_y + - interface_R_imh.velocity_z * interface_R_imh.velocity_z); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = interface_R_imh.density * interface_R_imh.scalar[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_R[grid_enum::GasEnergy * n_cells + id] = interface_R_imh.density * interface_R_imh.gas_energy; -#endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = interface_L_iph.density; - dev_bounds_L[o1 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_x; - dev_bounds_L[o2 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_y; - dev_bounds_L[o3 * n_cells + id] = interface_L_iph.density * interface_L_iph.velocity_z; - dev_bounds_L[4 * n_cells + id] = - interface_L_iph.pressure / (gamma - 1.0) + 0.5 * interface_L_iph.density * - (interface_L_iph.velocity_x * interface_L_iph.velocity_x + - interface_L_iph.velocity_y * interface_L_iph.velocity_y + - interface_L_iph.velocity_z * interface_L_iph.velocity_z); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = interface_L_iph.density * interface_L_iph.scalar[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_L[grid_enum::GasEnergy * n_cells + id] = interface_L_iph.density * interface_L_iph.gas_energy; -#endif // DE + id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } \ No newline at end of file From c52e8e4bc7e18e59f894478dd6d5c824fca4491c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 11:20:25 -0400 Subject: [PATCH 416/694] PPMC: Replace loads with functions --- src/reconstruction/plmc_cuda.cu | 4 +- src/reconstruction/ppmc_cuda.cu | 209 +++----------------------------- 2 files changed, 21 insertions(+), 192 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index d0e200803..1276de34a 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -62,11 +62,11 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Primitive const cell_i = reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - // cell i-1. The equality checks check the direction and subtract one from the direction + // cell i-1. The equality checks check the direction and subtracts one from the direction reconstruction::Primitive const cell_imo = reconstruction::Load_Data( dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // cell i+1. The equality checks check the direction and add one to the direction + // cell i+1. The equality checks check the direction and adds one to the direction reconstruction::Primitive const cell_ipo = reconstruction::Load_Data( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index ceec14837..9d9a6fd79 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -54,9 +54,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - // declare primitive variables for each stencil these will be placed into registers for each thread - reconstruction::Primitive cell_i, cell_im1, cell_im2, cell_ip1, cell_ip2; - // declare other variables to be used reconstruction::Primitive del_L, del_R, del_C, del_G; // primitive slopes reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes @@ -65,194 +62,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // load the 5-cell stencil into registers // cell i - int id = xid + yid * nx + zid * nx * ny; - cell_i.density = dev_conserved[id]; - cell_i.velocity_x = dev_conserved[o1 * n_cells + id] / cell_i.density; - cell_i.velocity_y = dev_conserved[o2 * n_cells + id] / cell_i.density; - cell_i.velocity_z = dev_conserved[o3 * n_cells + id] / cell_i.density; -#ifdef DE // PRESSURE_DE - Real E = dev_conserved[4 * n_cells + id]; - Real E_kin = 0.5 * cell_i.density * - (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + - cell_i.velocity_z * cell_i.velocity_z); - Real gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_i.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); -#else // not DE - cell_i.pressure = (dev_conserved[4 * n_cells + id] - - 0.5 * cell_i.density * - (cell_i.velocity_x * cell_i.velocity_x + cell_i.velocity_y * cell_i.velocity_y + - cell_i.velocity_z * cell_i.velocity_z)) * - (gamma - 1.0); -#endif // PRESSURE_DE - cell_i.pressure = fmax(cell_i.pressure, (Real)TINY_NUMBER); -#ifdef DE - cell_i.gas_energy = gas_energy / cell_i.density; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - cell_i.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_i.density; - } -#endif // SCALAR - // cell i-1 - switch (dir) { - case 0: - id = xid - 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 1) * nx * ny; - break; - } + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - cell_im1.density = dev_conserved[id]; - cell_im1.velocity_x = dev_conserved[o1 * n_cells + id] / cell_im1.density; - cell_im1.velocity_y = dev_conserved[o2 * n_cells + id] / cell_im1.density; - cell_im1.velocity_z = dev_conserved[o3 * n_cells + id] / cell_im1.density; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * cell_im1.density * - (cell_im1.velocity_x * cell_im1.velocity_x + cell_im1.velocity_y * cell_im1.velocity_y + - cell_im1.velocity_z * cell_im1.velocity_z); - gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_im1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); -#else // not DE - cell_im1.pressure = (dev_conserved[4 * n_cells + id] - - 0.5 * cell_im1.density * - (cell_im1.velocity_x * cell_im1.velocity_x + cell_im1.velocity_y * cell_im1.velocity_y + - cell_im1.velocity_z * cell_im1.velocity_z)) * - (gamma - 1.0); -#endif // PRESSURE_DE - cell_im1.pressure = fmax(cell_im1.pressure, (Real)TINY_NUMBER); -#ifdef DE - cell_im1.gas_energy = gas_energy / cell_im1.density; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - cell_im1.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_im1.density; - } -#endif // SCALAR - // cell i+1 - switch (dir) { - case 0: - id = xid + 1 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 1) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 1) * nx * ny; - break; - } - cell_ip1.density = dev_conserved[id]; - cell_ip1.velocity_x = dev_conserved[o1 * n_cells + id] / cell_ip1.density; - cell_ip1.velocity_y = dev_conserved[o2 * n_cells + id] / cell_ip1.density; - cell_ip1.velocity_z = dev_conserved[o3 * n_cells + id] / cell_ip1.density; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * cell_ip1.density * - (cell_ip1.velocity_x * cell_ip1.velocity_x + cell_ip1.velocity_y * cell_ip1.velocity_y + - cell_ip1.velocity_z * cell_ip1.velocity_z); - gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_ip1.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); -#else // not DE - cell_ip1.pressure = (dev_conserved[4 * n_cells + id] - - 0.5 * cell_ip1.density * - (cell_ip1.velocity_x * cell_ip1.velocity_x + cell_ip1.velocity_y * cell_ip1.velocity_y + - cell_ip1.velocity_z * cell_ip1.velocity_z)) * - (gamma - 1.0); -#endif // PRESSURE_DE - cell_ip1.pressure = fmax(cell_ip1.pressure, (Real)TINY_NUMBER); -#ifdef DE - cell_ip1.gas_energy = gas_energy / cell_ip1.density; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - cell_ip1.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_ip1.density; - } -#endif // SCALAR - // cell i-2 - switch (dir) { - case 0: - id = xid - 2 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid - 2) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid - 2) * nx * ny; - break; - } - cell_im2.density = dev_conserved[id]; - cell_im2.velocity_x = dev_conserved[o1 * n_cells + id] / cell_im2.density; - cell_im2.velocity_y = dev_conserved[o2 * n_cells + id] / cell_im2.density; - cell_im2.velocity_z = dev_conserved[o3 * n_cells + id] / cell_im2.density; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * cell_im2.density * - (cell_im2.velocity_x * cell_im2.velocity_x + cell_im2.velocity_y * cell_im2.velocity_y + - cell_im2.velocity_z * cell_im2.velocity_z); - gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_im2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); -#else // not DE - cell_im2.pressure = (dev_conserved[4 * n_cells + id] - - 0.5 * cell_im2.density * - (cell_im2.velocity_x * cell_im2.velocity_x + cell_im2.velocity_y * cell_im2.velocity_y + - cell_im2.velocity_z * cell_im2.velocity_z)) * - (gamma - 1.0); -#endif // PRESSURE_DE - cell_im2.pressure = fmax(cell_im2.pressure, (Real)TINY_NUMBER); -#ifdef DE - cell_im2.gas_energy = gas_energy / cell_im2.density; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - cell_im2.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_im2.density; - } -#endif // SCALAR - // cell i+2 - switch (dir) { - case 0: - id = xid + 2 + yid * nx + zid * nx * ny; - break; - case 1: - id = xid + (yid + 2) * nx + zid * nx * ny; - break; - case 2: - id = xid + yid * nx + (zid + 2) * nx * ny; - break; - } - cell_ip2.density = dev_conserved[id]; - cell_ip2.velocity_x = dev_conserved[o1 * n_cells + id] / cell_ip2.density; - cell_ip2.velocity_y = dev_conserved[o2 * n_cells + id] / cell_ip2.density; - cell_ip2.velocity_z = dev_conserved[o3 * n_cells + id] / cell_ip2.density; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * cell_ip2.density * - (cell_ip2.velocity_x * cell_ip2.velocity_x + cell_ip2.velocity_y * cell_ip2.velocity_y + - cell_ip2.velocity_z * cell_ip2.velocity_z); - gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; - cell_ip2.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, gas_energy, gamma); -#else // not DE - cell_ip2.pressure = (dev_conserved[4 * n_cells + id] - - 0.5 * cell_ip2.density * - (cell_ip2.velocity_x * cell_ip2.velocity_x + cell_ip2.velocity_y * cell_ip2.velocity_y + - cell_ip2.velocity_z * cell_ip2.velocity_z)) * - (gamma - 1.0); -#endif // PRESSURE_DE - cell_ip2.pressure = fmax(cell_ip2.pressure, (Real)TINY_NUMBER); -#ifdef DE - cell_ip2.gas_energy = gas_energy / cell_ip2.density; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - cell_ip2.scalar[i] = dev_conserved[(5 + i) * n_cells + id] / cell_ip2.density; - } -#endif // SCALAR + // cell i-1. The equality checks check the direction and subtracts one from the direction + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks check the direction and adds one to the direction + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-2. The equality checks check the direction and subtracts one from the direction + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, cell_i.density, cell_i.velocity_x, cell_i.velocity_y, - // cell_i.velocity_z, cell_i.pressure); + // cell i+2. The equality checks check the direction and adds one to the direction + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // Steps 2 - 5 are repeated for cell i-1, i, and i+1 // Step 2 - Compute the left, right, centered, and van Leer differences of @@ -1277,7 +1106,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Convert the left and right states in the primitive to the conserved variables send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface - id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); From 58f09b447298a7d21264fca57b7fd7995d8e1bcd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 11:42:06 -0400 Subject: [PATCH 417/694] PPMC: Replace primitive slopes with functions --- src/reconstruction/ppmc_cuda.cu | 245 +++++--------------------------- 1 file changed, 38 insertions(+), 207 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 9d9a6fd79..d93bc9f9a 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -55,7 +55,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // declare other variables to be used - reconstruction::Primitive del_L, del_R, del_C, del_G; // primitive slopes reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes reconstruction::Primitive del_m_im1, del_m_i, del_m_ip1; // Monotonized primitive slopes reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states @@ -66,102 +65,50 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); // cell i-1. The equality checks check the direction and subtracts one from the direction + // im1 stands for "i minus 1" reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // cell i+1. The equality checks check the direction and adds one to the direction + // ip1 stands for "i plus 1" reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // cell i-2. The equality checks check the direction and subtracts one from the direction + // im2 stands for "i minus 2" reconstruction::Primitive const cell_im2 = reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // cell i+2. The equality checks check the direction and adds one to the direction + // ip2 stands for "i plus 2" reconstruction::Primitive const cell_ip2 = reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - // calculate the adiabatic sound speed in cell imo + // =============== + // Cell i-1 slopes + // =============== + + // calculate the adiabatic sound speed in cell im1 Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 + // left - del_L.density = cell_im1.density - cell_im2.density; - del_L.velocity_x = cell_im1.velocity_x - cell_im2.velocity_x; - del_L.velocity_y = cell_im1.velocity_y - cell_im2.velocity_y; - del_L.velocity_z = cell_im1.velocity_z - cell_im2.velocity_z; - del_L.pressure = cell_im1.pressure - cell_im2.pressure; + reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im1, cell_im2); // right - del_R.density = cell_i.density - cell_im1.density; - del_R.velocity_x = cell_i.velocity_x - cell_im1.velocity_x; - del_R.velocity_y = cell_i.velocity_y - cell_im1.velocity_y; - del_R.velocity_z = cell_i.velocity_z - cell_im1.velocity_z; - del_R.pressure = cell_i.pressure - cell_im1.pressure; + reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_i, cell_im1); // centered - del_C.density = 0.5 * (cell_i.density - cell_im2.density); - del_C.velocity_x = 0.5 * (cell_i.velocity_x - cell_im2.velocity_x); - del_C.velocity_y = 0.5 * (cell_i.velocity_y - cell_im2.velocity_y); - del_C.velocity_z = 0.5 * (cell_i.velocity_z - cell_im2.velocity_z); - del_C.pressure = 0.5 * (cell_i.pressure - cell_im2.pressure); + reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_i, cell_im2, 0.5); // Van Leer - if (del_L.density * del_R.density > 0.0) { - del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); - } else { - del_G.density = 0.0; - } - if (del_L.velocity_x * del_R.velocity_x > 0.0) { - del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); - } else { - del_G.velocity_x = 0.0; - } - if (del_L.velocity_y * del_R.velocity_y > 0.0) { - del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); - } else { - del_G.velocity_y = 0.0; - } - if (del_L.velocity_z * del_R.velocity_z > 0.0) { - del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); - } else { - del_G.velocity_z = 0.0; - } - if (del_L.pressure * del_R.pressure > 0.0) { - del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); - } else { - del_G.pressure = 0.0; - } - -#ifdef DE - del_L.gas_energy = cell_im1.gas_energy - cell_im2.gas_energy; - del_R.gas_energy = cell_i.gas_energy - cell_im1.gas_energy; - del_C.gas_energy = 0.5 * (cell_i.gas_energy - cell_im2.gas_energy); - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); - } else { - del_G.gas_energy = 0.0; - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_L.scalar[i] = cell_im1.scalar[i] - cell_im2.scalar[i]; - del_R.scalar[i] = cell_i.scalar[i] - cell_im1.scalar[i]; - del_C.scalar[i] = 0.5 * (cell_i.scalar[i] - cell_im2.scalar[i]); - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); - } else { - del_G.scalar[i] = 0.0; - } - } -#endif // SCALAR + reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); // Step 3 - Project the left, right, centered and van Leer differences onto // the characteristic variables @@ -264,85 +211,27 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_m_im1.velocity_z = del_a_m.a3; del_m_im1.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 + // ============= + // Cell i slopes + // ============= // calculate the adiabatic sound speed in cell i sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 + // left - del_L.density = cell_i.density - cell_im1.density; - del_L.velocity_x = cell_i.velocity_x - cell_im1.velocity_x; - del_L.velocity_y = cell_i.velocity_y - cell_im1.velocity_y; - del_L.velocity_z = cell_i.velocity_z - cell_im1.velocity_z; - del_L.pressure = cell_i.pressure - cell_im1.pressure; + del_L = reconstruction::Compute_Slope(cell_i, cell_im1); // right - del_R.density = cell_ip1.density - cell_i.density; - del_R.velocity_x = cell_ip1.velocity_x - cell_i.velocity_x; - del_R.velocity_y = cell_ip1.velocity_y - cell_i.velocity_y; - del_R.velocity_z = cell_ip1.velocity_z - cell_i.velocity_z; - del_R.pressure = cell_ip1.pressure - cell_i.pressure; + del_R = reconstruction::Compute_Slope(cell_ip1, cell_i); // centered - del_C.density = 0.5 * (cell_ip1.density - cell_im1.density); - del_C.velocity_x = 0.5 * (cell_ip1.velocity_x - cell_im1.velocity_x); - del_C.velocity_y = 0.5 * (cell_ip1.velocity_y - cell_im1.velocity_y); - del_C.velocity_z = 0.5 * (cell_ip1.velocity_z - cell_im1.velocity_z); - del_C.pressure = 0.5 * (cell_ip1.pressure - cell_im1.pressure); - - // van Leer - if (del_L.density * del_R.density > 0.0) { - del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); - } else { - del_G.density = 0.0; - } - if (del_L.velocity_x * del_R.velocity_x > 0.0) { - del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); - } else { - del_G.velocity_x = 0.0; - } - if (del_L.velocity_y * del_R.velocity_y > 0.0) { - del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); - } else { - del_G.velocity_y = 0.0; - } - if (del_L.velocity_z * del_R.velocity_z > 0.0) { - del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); - } else { - del_G.velocity_z = 0.0; - } - if (del_L.pressure * del_R.pressure > 0.0) { - del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); - } else { - del_G.pressure = 0.0; - } + del_C = reconstruction::Compute_Slope(cell_ip1, cell_im1, 0.5); -#ifdef DE - del_L.gas_energy = cell_i.gas_energy - cell_im1.gas_energy; - del_R.gas_energy = cell_ip1.gas_energy - cell_i.gas_energy; - del_C.gas_energy = 0.5 * (cell_ip1.gas_energy - cell_im1.gas_energy); - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); - } else { - del_G.gas_energy = 0.0; - } -#endif // DE - -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_L.scalar[i] = cell_i.scalar[i] - cell_im1.scalar[i]; - del_R.scalar[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; - del_C.scalar[i] = 0.5 * (cell_ip1.scalar[i] - cell_im1.scalar[i]); - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); - } else { - del_G.scalar[i] = 0.0; - } - } -#endif // SCALAR + // Van Leer + del_G = reconstruction::Van_Leer_Slope(del_L, del_R); // Step 3 - Project the left, right, centered, and van Leer differences onto // the characteristic variables @@ -445,85 +334,27 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_m_i.velocity_z = del_a_m.a3; del_m_i.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 + // =============== + // Cell i+1 slopes + // =============== // calculate the adiabatic sound speed in cell ipo sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma); + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 + // left - del_L.density = cell_ip1.density - cell_i.density; - del_L.velocity_x = cell_ip1.velocity_x - cell_i.velocity_x; - del_L.velocity_y = cell_ip1.velocity_y - cell_i.velocity_y; - del_L.velocity_z = cell_ip1.velocity_z - cell_i.velocity_z; - del_L.pressure = cell_ip1.pressure - cell_i.pressure; + del_L = reconstruction::Compute_Slope(cell_ip1, cell_i); // right - del_R.density = cell_ip2.density - cell_ip1.density; - del_R.velocity_x = cell_ip2.velocity_x - cell_ip1.velocity_x; - del_R.velocity_y = cell_ip2.velocity_y - cell_ip1.velocity_y; - del_R.velocity_z = cell_ip2.velocity_z - cell_ip1.velocity_z; - del_R.pressure = cell_ip2.pressure - cell_ip1.pressure; + del_R = reconstruction::Compute_Slope(cell_ip2, cell_ip1); // centered - del_C.density = 0.5 * (cell_ip2.density - cell_i.density); - del_C.velocity_x = 0.5 * (cell_ip2.velocity_x - cell_i.velocity_x); - del_C.velocity_y = 0.5 * (cell_ip2.velocity_y - cell_i.velocity_y); - del_C.velocity_z = 0.5 * (cell_ip2.velocity_z - cell_i.velocity_z); - del_C.pressure = 0.5 * (cell_ip2.pressure - cell_i.pressure); - - // van Leer - if (del_L.density * del_R.density > 0.0) { - del_G.density = 2.0 * del_L.density * del_R.density / (del_L.density + del_R.density); - } else { - del_G.density = 0.0; - } - if (del_L.velocity_x * del_R.velocity_x > 0.0) { - del_G.velocity_x = 2.0 * del_L.velocity_x * del_R.velocity_x / (del_L.velocity_x + del_R.velocity_x); - } else { - del_G.velocity_x = 0.0; - } - if (del_L.velocity_y * del_R.velocity_y > 0.0) { - del_G.velocity_y = 2.0 * del_L.velocity_y * del_R.velocity_y / (del_L.velocity_y + del_R.velocity_y); - } else { - del_G.velocity_y = 0.0; - } - if (del_L.velocity_z * del_R.velocity_z > 0.0) { - del_G.velocity_z = 2.0 * del_L.velocity_z * del_R.velocity_z / (del_L.velocity_z + del_R.velocity_z); - } else { - del_G.velocity_z = 0.0; - } - if (del_L.pressure * del_R.pressure > 0.0) { - del_G.pressure = 2.0 * del_L.pressure * del_R.pressure / (del_L.pressure + del_R.pressure); - } else { - del_G.pressure = 0.0; - } - -#ifdef DE - del_L.gas_energy = cell_ip1.gas_energy - cell_i.gas_energy; - del_R.gas_energy = cell_ip2.gas_energy - cell_ip1.gas_energy; - del_C.gas_energy = 0.5 * (cell_ip2.gas_energy - cell_i.gas_energy); - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - del_G.gas_energy = 2.0 * del_L.gas_energy * del_R.gas_energy / (del_L.gas_energy + del_R.gas_energy); - } else { - del_G.gas_energy = 0.0; - } -#endif // DE + del_C = reconstruction::Compute_Slope(cell_ip2, cell_i, 0.5); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_L.scalar[i] = cell_ip1.scalar[i] - cell_i.scalar[i]; - del_R.scalar[i] = cell_ip2.scalar[i] - cell_ip1.scalar[i]; - del_C.scalar[i] = 0.5 * (cell_ip2.scalar[i] - cell_i.scalar[i]); - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - del_G.scalar[i] = 2.0 * del_L.scalar[i] * del_R.scalar[i] / (del_L.scalar[i] + del_R.scalar[i]); - } else { - del_G.scalar[i] = 0.0; - } - } -#endif // SCALAR + // Van Leer + del_G = reconstruction::Van_Leer_Slope(del_L, del_R); // Step 3 - Project the left, right, centered, and van Leer differences onto // the characteristic variables From 5efb275eb429694027ec6db72ce8e49217b2dc79 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 13:32:48 -0400 Subject: [PATCH 418/694] PPMC: Replace characteristic projection with functions --- src/reconstruction/ppmc_cuda.cu | 154 +++++++++----------------------- 1 file changed, 40 insertions(+), 114 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index d93bc9f9a..fbca095e7 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -55,9 +55,9 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // declare other variables to be used - reconstruction::Characteristic del_a_L, del_a_R, del_a_C, del_a_G, del_a_m; // characteristic slopes - reconstruction::Primitive del_m_im1, del_m_i, del_m_ip1; // Monotonized primitive slopes - reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states + reconstruction::Characteristic del_a_m; // characteristic slopes + reconstruction::Primitive del_m_im1, del_m_i, del_m_ip1; // Monotonized primitive slopes + reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states // load the 5-cell stencil into registers // cell i @@ -110,43 +110,21 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Step 3 - Project the left, right, centered and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_L.a0 = - -0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_L.a2 = del_L.velocity_y; - del_a_L.a3 = del_L.velocity_z; - del_a_L.a4 = - 0.5 * cell_im1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - - del_a_R.a0 = - -0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_R.a2 = del_R.velocity_y; - del_a_R.a3 = del_R.velocity_z; - del_a_R.a4 = - 0.5 * cell_im1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - - del_a_C.a0 = - -0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_C.a2 = del_C.velocity_y; - del_a_C.a3 = del_C.velocity_z; - del_a_C.a4 = - 0.5 * cell_im1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - - del_a_G.a0 = - -0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_G.a2 = del_G.velocity_y; - del_a_G.a3 = del_G.velocity_z; - del_a_G.a4 = - 0.5 * cell_im1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic del_a_L = + reconstruction::Primitive_To_Characteristic(cell_im1, del_L, sound_speed, sound_speed * sound_speed, gamma); + + reconstruction::Characteristic del_a_R = + reconstruction::Primitive_To_Characteristic(cell_im1, del_R, sound_speed, sound_speed * sound_speed, gamma); + + reconstruction::Characteristic del_a_C = + reconstruction::Primitive_To_Characteristic(cell_im1, del_C, sound_speed, sound_speed * sound_speed, gamma); + + reconstruction::Characteristic del_a_G = + reconstruction::Primitive_To_Characteristic(cell_im1, del_G, sound_speed, sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -233,43 +211,17 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_L.a0 = - -0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_L.a2 = del_L.velocity_y; - del_a_L.a3 = del_L.velocity_z; - del_a_L.a4 = - 0.5 * cell_i.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - - del_a_R.a0 = - -0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_R.a2 = del_R.velocity_y; - del_a_R.a3 = del_R.velocity_z; - del_a_R.a4 = - 0.5 * cell_i.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - - del_a_C.a0 = - -0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_C.a2 = del_C.velocity_y; - del_a_C.a3 = del_C.velocity_z; - del_a_C.a4 = - 0.5 * cell_i.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - - del_a_G.a0 = - -0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_G.a2 = del_G.velocity_y; - del_a_G.a3 = del_G.velocity_z; - del_a_G.a4 = - 0.5 * cell_i.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed * sound_speed, gamma); + + del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed * sound_speed, gamma); + + del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed * sound_speed, gamma); + + del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables @@ -356,43 +308,17 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_L.a0 = - -0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - del_a_L.a1 = del_L.density - del_L.pressure / (sound_speed * sound_speed); - del_a_L.a2 = del_L.velocity_y; - del_a_L.a3 = del_L.velocity_z; - del_a_L.a4 = - 0.5 * cell_ip1.density * del_L.velocity_x / sound_speed + 0.5 * del_L.pressure / (sound_speed * sound_speed); - - del_a_R.a0 = - -0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - del_a_R.a1 = del_R.density - del_R.pressure / (sound_speed * sound_speed); - del_a_R.a2 = del_R.velocity_y; - del_a_R.a3 = del_R.velocity_z; - del_a_R.a4 = - 0.5 * cell_ip1.density * del_R.velocity_x / sound_speed + 0.5 * del_R.pressure / (sound_speed * sound_speed); - - del_a_C.a0 = - -0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - del_a_C.a1 = del_C.density - del_C.pressure / (sound_speed * sound_speed); - del_a_C.a2 = del_C.velocity_y; - del_a_C.a3 = del_C.velocity_z; - del_a_C.a4 = - 0.5 * cell_ip1.density * del_C.velocity_x / sound_speed + 0.5 * del_C.pressure / (sound_speed * sound_speed); - - del_a_G.a0 = - -0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); - del_a_G.a1 = del_G.density - del_G.pressure / (sound_speed * sound_speed); - del_a_G.a2 = del_G.velocity_y; - del_a_G.a3 = del_G.velocity_z; - del_a_G.a4 = - 0.5 * cell_ip1.density * del_G.velocity_x / sound_speed + 0.5 * del_G.pressure / (sound_speed * sound_speed); + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, sound_speed, sound_speed * sound_speed, gamma); + + del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, sound_speed, sound_speed * sound_speed, gamma); + + del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, sound_speed, sound_speed * sound_speed, gamma); + + del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, sound_speed, sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the // characteristic variables From 8adf32c7a2eb3646b59e95cb191ec5526c782500 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 13:51:11 -0400 Subject: [PATCH 419/694] PPMC: Replace monotonization with functions --- src/reconstruction/ppmc_cuda.cu | 212 ++++---------------------------- 1 file changed, 21 insertions(+), 191 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index fbca095e7..a92ad5088 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -55,8 +55,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } // declare other variables to be used - reconstruction::Characteristic del_a_m; // characteristic slopes - reconstruction::Primitive del_m_im1, del_m_i, del_m_ip1; // Monotonized primitive slopes reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states // load the 5-cell stencil into registers @@ -110,7 +108,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Project the left, right, centered and van Leer differences onto the + // Step 3 - Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A @@ -126,68 +124,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic(cell_im1, del_G, sound_speed, sound_speed * sound_speed, gamma); - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - Real lim_slope_a, lim_slope_b; - if (del_a_L.a0 * del_a_R.a0 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); - lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); - del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a1 * del_a_R.a1 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); - lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); - del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a2 * del_a_R.a2 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); - lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); - del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a3 * del_a_R.a3 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); - lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); - del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a4 * del_a_R.a4 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); - lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); - del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } -#ifdef DE - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_m_im1.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_im1.gas_energy = 0.0; - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); - lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_m_im1.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_im1.scalar[i] = 0.0; - } - } -#endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_m_im1.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_m_im1.velocity_x = -sound_speed * del_a_m.a0 / cell_im1.density + sound_speed * del_a_m.a4 / cell_im1.density; - del_m_im1.velocity_y = del_a_m.a2; - del_m_im1.velocity_z = del_a_m.a3; - del_m_im1.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, + gamma); // ============= // Cell i slopes @@ -211,7 +153,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Project the left, right, centered and van Leer differences onto the + // Step 3 - Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A @@ -223,68 +165,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed * sound_speed, gamma); - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - - if (del_a_L.a0 * del_a_R.a0 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); - lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); - del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a1 * del_a_R.a1 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); - lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); - del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a2 * del_a_R.a2 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); - lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); - del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a3 * del_a_R.a3 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); - lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); - del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a4 * del_a_R.a4 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); - lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); - del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } -#ifdef DE - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_m_i.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_i.gas_energy = 0.0; - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); - lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_m_i.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_i.scalar[i] = 0.0; - } - } -#endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_m_i.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_m_i.velocity_x = -sound_speed * del_a_m.a0 / cell_i.density + sound_speed * del_a_m.a4 / cell_i.density; - del_m_i.velocity_y = del_a_m.a2; - del_m_i.velocity_z = del_a_m.a3; - del_m_i.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, + gamma); // =============== // Cell i+1 slopes @@ -308,7 +194,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Project the left, right, centered and van Leer differences onto the + // Step 3 - Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A @@ -320,68 +206,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, sound_speed, sound_speed * sound_speed, gamma); - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_m.a0 = del_a_m.a1 = del_a_m.a2 = del_a_m.a3 = del_a_m.a4 = 0.0; - - if (del_a_L.a0 * del_a_R.a0 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a0), fabs(del_a_R.a0)); - lim_slope_b = fmin(fabs(del_a_C.a0), fabs(del_a_G.a0)); - del_a_m.a0 = sgn_CUDA(del_a_C.a0) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a1 * del_a_R.a1 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a1), fabs(del_a_R.a1)); - lim_slope_b = fmin(fabs(del_a_C.a1), fabs(del_a_G.a1)); - del_a_m.a1 = sgn_CUDA(del_a_C.a1) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a2 * del_a_R.a2 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a2), fabs(del_a_R.a2)); - lim_slope_b = fmin(fabs(del_a_C.a2), fabs(del_a_G.a2)); - del_a_m.a2 = sgn_CUDA(del_a_C.a2) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a3 * del_a_R.a3 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a3), fabs(del_a_R.a3)); - lim_slope_b = fmin(fabs(del_a_C.a3), fabs(del_a_G.a3)); - del_a_m.a3 = sgn_CUDA(del_a_C.a3) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_L.a4 * del_a_R.a4 > 0.0) { - lim_slope_a = fmin(fabs(del_a_L.a4), fabs(del_a_R.a4)); - lim_slope_b = fmin(fabs(del_a_C.a4), fabs(del_a_G.a4)); - del_a_m.a4 = sgn_CUDA(del_a_C.a4) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } -#ifdef DE - if (del_L.gas_energy * del_R.gas_energy > 0.0) { - lim_slope_a = fmin(fabs(del_L.gas_energy), fabs(del_R.gas_energy)); - lim_slope_b = fmin(fabs(del_C.gas_energy), fabs(del_G.gas_energy)); - del_m_ip1.gas_energy = sgn_CUDA(del_C.gas_energy) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_ip1.gas_energy = 0.0; - } -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_L.scalar[i] * del_R.scalar[i] > 0.0) { - lim_slope_a = fmin(fabs(del_L.scalar[i]), fabs(del_R.scalar[i])); - lim_slope_b = fmin(fabs(del_C.scalar[i]), fabs(del_G.scalar[i])); - del_m_ip1.scalar[i] = sgn_CUDA(del_C.scalar[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_m_ip1.scalar[i] = 0.0; - } - } -#endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_m_ip1.density = del_a_m.a0 + del_a_m.a1 + del_a_m.a4; - del_m_ip1.velocity_x = -sound_speed * del_a_m.a0 / cell_ip1.density + sound_speed * del_a_m.a4 / cell_ip1.density; - del_m_ip1.velocity_y = del_a_m.a2; - del_m_ip1.velocity_z = del_a_m.a3; - del_m_ip1.pressure = sound_speed * sound_speed * del_a_m.a0 + sound_speed * sound_speed * del_a_m.a4; + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive const del_m_ip1 = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, + gamma); // Step 6 - Use parabolic interpolation to compute values at the left and // right of each cell center From 32d78e54a15b1c14eccd8b44f936c60973297341 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 14:02:02 -0400 Subject: [PATCH 420/694] Rename reconstruction::Calc_Interface Renamed to reconstruction::Calc_Interface_Linear to differentiate from the parabolic version --- src/reconstruction/plmc_cuda.cu | 4 ++-- src/reconstruction/ppmc_cuda.cu | 12 ++++-------- src/reconstruction/reconstruction.h | 4 ++-- src/reconstruction/reconstruction_tests.cu | 2 +- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 1276de34a..b1a60be86 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -112,8 +112,8 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma); // Compute the left and right interface values using the monotonized difference in the primitive variables - reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface(cell_i, del_m_i, 1.0); - reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface(cell_i, del_m_i, -1.0); + reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); + reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0); #ifndef VL diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index a92ad5088..ab0e9077c 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -213,10 +213,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, gamma); - // Step 6 - Use parabolic interpolation to compute values at the left and - // right of each cell center - // Here, the subscripts L and R refer to the left and right side of - // the ith cell center Stone Eqn 46 + // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the + // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 interface_R_imh.density = 0.5 * (cell_i.density + cell_im1.density) - (del_m_i.density - del_m_im1.density) / 6.0; interface_R_imh.velocity_x = @@ -253,10 +251,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } #endif // SCALAR - // Step 7 - Apply further monotonicity constraints to ensure the values on - // the left and right side - // of cell center lie between neighboring cell-centered values - // Stone Eqns 47 - 53 + // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie + // between neighboring cell-centered values Stone Eqns 47 - 53 if ((interface_L_iph.density - cell_i.density) * (cell_i.density - interface_R_imh.density) <= 0) { interface_R_imh.density = interface_L_iph.density = cell_i.density; diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index bdfd9b46a..6ce061fff 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -518,8 +518,8 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it * \return Primitive The interface state */ -Primitive __device__ __host__ __inline__ Calc_Interface(Primitive const &primitive, Primitive const &slopes, - Real const &sign) +Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const &primitive, Primitive const &slopes, + Real const &sign) { Primitive output; diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index d75cbfd3c..f61755ec2 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -298,7 +298,7 @@ TEST(tALLReconstructionCalcInterface, CorrectInputExpectCorrectOutput) Real const coef = 0.5; // Get test data - auto test_data = reconstruction::Calc_Interface(left, right, coef); + auto test_data = reconstruction::Calc_Interface_Linear(left, right, coef); // Check results #ifdef MHD From 1e4b143fade1d5e514c838d6b8d735ca1b584dbf Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 14:50:04 -0400 Subject: [PATCH 421/694] PPMC: Add a function for parabolic interpolation New `reconstruction::Calc_interface_Parabolic` function and test. Function used in the PPMC reconstructor --- src/reconstruction/ppmc_cuda.cu | 41 ++---------------- src/reconstruction/reconstruction.h | 50 +++++++++++++++++++++- src/reconstruction/reconstruction_tests.cu | 43 ++++++++++++++++++- 3 files changed, 95 insertions(+), 39 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index ab0e9077c..0c96240b5 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -54,9 +54,6 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou break; } - // declare other variables to be used - reconstruction::Primitive interface_R_imh, interface_L_iph; // Interface states - // load the 5-cell stencil into registers // cell i reconstruction::Primitive const cell_i = @@ -215,41 +212,11 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 + reconstruction::Primitive interface_L_iph = + reconstruction::Calc_Interface_Parabolic(cell_ip1, cell_i, del_m_ip1, del_m_i); - interface_R_imh.density = 0.5 * (cell_i.density + cell_im1.density) - (del_m_i.density - del_m_im1.density) / 6.0; - interface_R_imh.velocity_x = - 0.5 * (cell_i.velocity_x + cell_im1.velocity_x) - (del_m_i.velocity_x - del_m_im1.velocity_x) / 6.0; - interface_R_imh.velocity_y = - 0.5 * (cell_i.velocity_y + cell_im1.velocity_y) - (del_m_i.velocity_y - del_m_im1.velocity_y) / 6.0; - interface_R_imh.velocity_z = - 0.5 * (cell_i.velocity_z + cell_im1.velocity_z) - (del_m_i.velocity_z - del_m_im1.velocity_z) / 6.0; - interface_R_imh.pressure = - 0.5 * (cell_i.pressure + cell_im1.pressure) - (del_m_i.pressure - del_m_im1.pressure) / 6.0; - - interface_L_iph.density = 0.5 * (cell_ip1.density + cell_i.density) - (del_m_ip1.density - del_m_i.density) / 6.0; - interface_L_iph.velocity_x = - 0.5 * (cell_ip1.velocity_x + cell_i.velocity_x) - (del_m_ip1.velocity_x - del_m_i.velocity_x) / 6.0; - interface_L_iph.velocity_y = - 0.5 * (cell_ip1.velocity_y + cell_i.velocity_y) - (del_m_ip1.velocity_y - del_m_i.velocity_y) / 6.0; - interface_L_iph.velocity_z = - 0.5 * (cell_ip1.velocity_z + cell_i.velocity_z) - (del_m_ip1.velocity_z - del_m_i.velocity_z) / 6.0; - interface_L_iph.pressure = - 0.5 * (cell_ip1.pressure + cell_i.pressure) - (del_m_ip1.pressure - del_m_i.pressure) / 6.0; - -#ifdef DE - interface_R_imh.gas_energy = - 0.5 * (cell_i.gas_energy + cell_im1.gas_energy) - (del_m_i.gas_energy - del_m_im1.gas_energy) / 6.0; - interface_L_iph.gas_energy = - 0.5 * (cell_ip1.gas_energy + cell_i.gas_energy) - (del_m_ip1.gas_energy - del_m_i.gas_energy) / 6.0; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = - 0.5 * (cell_i.scalar[i] + cell_im1.scalar[i]) - (del_m_i.scalar[i] - del_m_im1.scalar[i]) / 6.0; - interface_L_iph.scalar[i] = - 0.5 * (cell_ip1.scalar[i] + cell_i.scalar[i]) - (del_m_ip1.scalar[i] - del_m_i.scalar[i]) / 6.0; - } -#endif // SCALAR + reconstruction::Primitive interface_R_imh = + reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, del_m_i, del_m_im1); // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie // between neighboring cell-centered values Stone Eqns 47 - 53 diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 6ce061fff..02170eb69 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -511,7 +511,7 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( // ===================================================================================================================== /*! - * \brief Compute the interface state from the slope and cell centered state. + * \brief Compute the interface state from the slope and cell centered state using linear interpolation * * \param[in] primitive The cell centered state * \param[in] slopes The slopes @@ -549,6 +549,54 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const & } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Compute the interface state from the slope and cell centered state using parabolic interpolation + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] slopes_i The slopes in cell i + * \param[in] slopes_im1 The slopes in cell i-1 + * \return Primitive The interface state + */ +Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &slopes_i, + Primitive const &slopes_im1) +{ + Primitive output; + + auto interface = [](Real const &state_i, Real const &state_im1, Real const &slope_i, Real const &slope_im1) -> Real { + return 0.5 * (state_i + state_im1) - (slope_i - slope_im1) / 6.0; + }; + + output.density = interface(cell_i.density, cell_im1.density, slopes_i.density, slopes_im1.density); + output.velocity_x = interface(cell_i.velocity_x, cell_im1.velocity_x, slopes_i.velocity_x, slopes_im1.velocity_x); + output.velocity_y = interface(cell_i.velocity_y, cell_im1.velocity_y, slopes_i.velocity_y, slopes_im1.velocity_y); + output.velocity_z = interface(cell_i.velocity_z, cell_im1.velocity_z, slopes_i.velocity_z, slopes_im1.velocity_z); + output.pressure = interface(cell_i.pressure, cell_im1.pressure, slopes_i.pressure, slopes_im1.pressure); + +#ifdef MHD + output.magnetic_y = interface(cell_i.magnetic_y, cell_im1.magnetic_y, slopes_i.magnetic_y, slopes_im1.magnetic_y); + ; + output.magnetic_z = interface(cell_i.magnetic_z, cell_im1.magnetic_z, slopes_i.magnetic_z, slopes_im1.magnetic_z); + ; +#endif // MHD + +#ifdef DE + output.gas_energy = interface(cell_i.gas_energy, cell_im1.gas_energy, slopes_i.gas_energy, slopes_im1.gas_energy); + ; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(cell_i.scalar[i], cell_im1.scalar[i], slopes_i.scalar[i], slopes_im1.scalar[i]); + ; + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Write the interface data to the appropriate arrays diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index f61755ec2..854eefdfa 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -285,7 +285,7 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #endif // MHD } -TEST(tALLReconstructionCalcInterface, CorrectInputExpectCorrectOutput) +TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) { // Setup input data #ifdef MHD @@ -320,6 +320,47 @@ TEST(tALLReconstructionCalcInterface, CorrectInputExpectCorrectOutput) #endif // MHD } +TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18, 19, 20, 21}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26, 27, 28, 29}; +#else // MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, slopes_i, slopes_im1); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339, 0.0, + 10.833333333333334, 11.833333333333334}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid From f0095c7f166a8b392e95d1ac59ec2abed3c76470 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 16:30:40 -0400 Subject: [PATCH 422/694] PPMC: Replace interface monotonization with a function --- src/reconstruction/ppmc_cuda.cu | 141 +-------------------- src/reconstruction/reconstruction.h | 73 +++++++++++ src/reconstruction/reconstruction_tests.cu | 63 +++++++++ 3 files changed, 137 insertions(+), 140 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 0c96240b5..7e53571d6 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -221,146 +221,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie // between neighboring cell-centered values Stone Eqns 47 - 53 - if ((interface_L_iph.density - cell_i.density) * (cell_i.density - interface_R_imh.density) <= 0) { - interface_R_imh.density = interface_L_iph.density = cell_i.density; - } - if ((interface_L_iph.velocity_x - cell_i.velocity_x) * (cell_i.velocity_x - interface_R_imh.velocity_x) <= 0) { - interface_R_imh.velocity_x = interface_L_iph.velocity_x = cell_i.velocity_x; - } - if ((interface_L_iph.velocity_y - cell_i.velocity_y) * (cell_i.velocity_y - interface_R_imh.velocity_y) <= 0) { - interface_R_imh.velocity_y = interface_L_iph.velocity_y = cell_i.velocity_y; - } - if ((interface_L_iph.velocity_z - cell_i.velocity_z) * (cell_i.velocity_z - interface_R_imh.velocity_z) <= 0) { - interface_R_imh.velocity_z = interface_L_iph.velocity_z = cell_i.velocity_z; - } - if ((interface_L_iph.pressure - cell_i.pressure) * (cell_i.pressure - interface_R_imh.pressure) <= 0) { - interface_R_imh.pressure = interface_L_iph.pressure = cell_i.pressure; - } - - if (6.0 * (interface_L_iph.density - interface_R_imh.density) * - (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)) > - (interface_L_iph.density - interface_R_imh.density) * (interface_L_iph.density - interface_R_imh.density)) { - interface_R_imh.density = 3.0 * cell_i.density - 2.0 * interface_L_iph.density; - } - if (6.0 * (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * - (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)) > - (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * - (interface_L_iph.velocity_x - interface_R_imh.velocity_x)) { - interface_R_imh.velocity_x = 3.0 * cell_i.velocity_x - 2.0 * interface_L_iph.velocity_x; - } - if (6.0 * (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * - (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)) > - (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * - (interface_L_iph.velocity_y - interface_R_imh.velocity_y)) { - interface_R_imh.velocity_y = 3.0 * cell_i.velocity_y - 2.0 * interface_L_iph.velocity_y; - } - if (6.0 * (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * - (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)) > - (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * - (interface_L_iph.velocity_z - interface_R_imh.velocity_z)) { - interface_R_imh.velocity_z = 3.0 * cell_i.velocity_z - 2.0 * interface_L_iph.velocity_z; - } - if (6.0 * (interface_L_iph.pressure - interface_R_imh.pressure) * - (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)) > - (interface_L_iph.pressure - interface_R_imh.pressure) * (interface_L_iph.pressure - interface_R_imh.pressure)) { - interface_R_imh.pressure = 3.0 * cell_i.pressure - 2.0 * interface_L_iph.pressure; - } - - if (6.0 * (interface_L_iph.density - interface_R_imh.density) * - (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)) < - -(interface_L_iph.density - interface_R_imh.density) * (interface_L_iph.density - interface_R_imh.density)) { - interface_L_iph.density = 3.0 * cell_i.density - 2.0 * interface_R_imh.density; - } - if (6.0 * (interface_L_iph.velocity_x - interface_R_imh.velocity_x) * - (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)) < - -(interface_L_iph.velocity_x - interface_R_imh.velocity_x) * - (interface_L_iph.velocity_x - interface_R_imh.velocity_x)) { - interface_L_iph.velocity_x = 3.0 * cell_i.velocity_x - 2.0 * interface_R_imh.velocity_x; - } - if (6.0 * (interface_L_iph.velocity_y - interface_R_imh.velocity_y) * - (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)) < - -(interface_L_iph.velocity_y - interface_R_imh.velocity_y) * - (interface_L_iph.velocity_y - interface_R_imh.velocity_y)) { - interface_L_iph.velocity_y = 3.0 * cell_i.velocity_y - 2.0 * interface_R_imh.velocity_y; - } - if (6.0 * (interface_L_iph.velocity_z - interface_R_imh.velocity_z) * - (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)) < - -(interface_L_iph.velocity_z - interface_R_imh.velocity_z) * - (interface_L_iph.velocity_z - interface_R_imh.velocity_z)) { - interface_L_iph.velocity_z = 3.0 * cell_i.velocity_z - 2.0 * interface_R_imh.velocity_z; - } - if (6.0 * (interface_L_iph.pressure - interface_R_imh.pressure) * - (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)) < - -(interface_L_iph.pressure - interface_R_imh.pressure) * (interface_L_iph.pressure - interface_R_imh.pressure)) { - interface_L_iph.pressure = 3.0 * cell_i.pressure - 2.0 * interface_R_imh.pressure; - } - - interface_R_imh.density = fmax(fmin(cell_i.density, cell_im1.density), interface_R_imh.density); - interface_R_imh.density = fmin(fmax(cell_i.density, cell_im1.density), interface_R_imh.density); - interface_L_iph.density = fmax(fmin(cell_i.density, cell_ip1.density), interface_L_iph.density); - interface_L_iph.density = fmin(fmax(cell_i.density, cell_ip1.density), interface_L_iph.density); - interface_R_imh.velocity_x = fmax(fmin(cell_i.velocity_x, cell_im1.velocity_x), interface_R_imh.velocity_x); - interface_R_imh.velocity_x = fmin(fmax(cell_i.velocity_x, cell_im1.velocity_x), interface_R_imh.velocity_x); - interface_L_iph.velocity_x = fmax(fmin(cell_i.velocity_x, cell_ip1.velocity_x), interface_L_iph.velocity_x); - interface_L_iph.velocity_x = fmin(fmax(cell_i.velocity_x, cell_ip1.velocity_x), interface_L_iph.velocity_x); - interface_R_imh.velocity_y = fmax(fmin(cell_i.velocity_y, cell_im1.velocity_y), interface_R_imh.velocity_y); - interface_R_imh.velocity_y = fmin(fmax(cell_i.velocity_y, cell_im1.velocity_y), interface_R_imh.velocity_y); - interface_L_iph.velocity_y = fmax(fmin(cell_i.velocity_y, cell_ip1.velocity_y), interface_L_iph.velocity_y); - interface_L_iph.velocity_y = fmin(fmax(cell_i.velocity_y, cell_ip1.velocity_y), interface_L_iph.velocity_y); - interface_R_imh.velocity_z = fmax(fmin(cell_i.velocity_z, cell_im1.velocity_z), interface_R_imh.velocity_z); - interface_R_imh.velocity_z = fmin(fmax(cell_i.velocity_z, cell_im1.velocity_z), interface_R_imh.velocity_z); - interface_L_iph.velocity_z = fmax(fmin(cell_i.velocity_z, cell_ip1.velocity_z), interface_L_iph.velocity_z); - interface_L_iph.velocity_z = fmin(fmax(cell_i.velocity_z, cell_ip1.velocity_z), interface_L_iph.velocity_z); - interface_R_imh.pressure = fmax(fmin(cell_i.pressure, cell_im1.pressure), interface_R_imh.pressure); - interface_R_imh.pressure = fmin(fmax(cell_i.pressure, cell_im1.pressure), interface_R_imh.pressure); - interface_L_iph.pressure = fmax(fmin(cell_i.pressure, cell_ip1.pressure), interface_L_iph.pressure); - interface_L_iph.pressure = fmin(fmax(cell_i.pressure, cell_ip1.pressure), interface_L_iph.pressure); - -#ifdef DE - if ((interface_L_iph.gas_energy - cell_i.gas_energy) * (cell_i.gas_energy - interface_R_imh.gas_energy) <= 0) { - interface_R_imh.gas_energy = interface_L_iph.gas_energy = cell_i.gas_energy; - } - if (6.0 * (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * - (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)) > - (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * - (interface_L_iph.gas_energy - interface_R_imh.gas_energy)) { - interface_R_imh.gas_energy = 3.0 * cell_i.gas_energy - 2.0 * interface_L_iph.gas_energy; - } - if (6.0 * (interface_L_iph.gas_energy - interface_R_imh.gas_energy) * - (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)) < - -(interface_L_iph.gas_energy - interface_R_imh.gas_energy) * - (interface_L_iph.gas_energy - interface_R_imh.gas_energy)) { - interface_L_iph.gas_energy = 3.0 * cell_i.gas_energy - 2.0 * interface_R_imh.gas_energy; - } - interface_R_imh.gas_energy = fmax(fmin(cell_i.gas_energy, cell_im1.gas_energy), interface_R_imh.gas_energy); - interface_R_imh.gas_energy = fmin(fmax(cell_i.gas_energy, cell_im1.gas_energy), interface_R_imh.gas_energy); - interface_L_iph.gas_energy = fmax(fmin(cell_i.gas_energy, cell_ip1.gas_energy), interface_L_iph.gas_energy); - interface_L_iph.gas_energy = fmin(fmax(cell_i.gas_energy, cell_ip1.gas_energy), interface_L_iph.gas_energy); -#endif // DE - -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if ((interface_L_iph.scalar[i] - cell_i.scalar[i]) * (cell_i.scalar[i] - interface_R_imh.scalar[i]) <= 0) { - interface_R_imh.scalar[i] = interface_L_iph.scalar[i] = cell_i.scalar[i]; - } - if (6.0 * (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * - (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])) > - (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * - (interface_L_iph.scalar[i] - interface_R_imh.scalar[i])) { - interface_R_imh.scalar[i] = 3.0 * cell_i.scalar[i] - 2.0 * interface_L_iph.scalar[i]; - } - if (6.0 * (interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * - (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])) < - -(interface_L_iph.scalar[i] - interface_R_imh.scalar[i]) * - (interface_L_iph.scalar[i] - interface_R_imh.scalar[i])) { - interface_L_iph.scalar[i] = 3.0 * cell_i.scalar[i] - 2.0 * interface_R_imh.scalar[i]; - } - interface_R_imh.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_im1.scalar[i]), interface_R_imh.scalar[i]); - interface_R_imh.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_im1.scalar[i]), interface_R_imh.scalar[i]); - interface_L_iph.scalar[i] = fmax(fmin(cell_i.scalar[i], cell_ip1.scalar[i]), interface_L_iph.scalar[i]); - interface_L_iph.scalar[i] = fmin(fmax(cell_i.scalar[i], cell_ip1.scalar[i]), interface_L_iph.scalar[i]); - } -#endif // SCALAR + reconstruction::Monotize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); #ifndef VL // Step 8 - Compute the coefficients for the monotonized parabolic diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 02170eb69..01d35aeae 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -509,6 +509,79 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Monotonize the parabolic interface states + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] cell_ip1 The state in cell i+1 + * \param[in,out] interface_L_iph The left interface state at i+1/2 + * \param[in,out] interface_R_imh The right interface state at i-1/2 + * \return Primitive + */ +void __device__ __host__ __inline__ Monotize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &cell_ip1, Primitive &interface_L_iph, + Primitive &interface_R_imh) +{ + // The function that will actually do the monotozation. Note the return by refernce of the interface state + auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L, + Real &interface_R) { + // First monotonicity constraint. Equations 47-49 in Stone et al. 2008 + if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) { + interface_L = state_i; + interface_R = state_i; + } + + // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 + Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); + Real const term_2 = pow(interface_L - interface_R, 2.0); + if (term_1 > term_2) { + interface_R = 3.0 * state_i - 2.0 * interface_L; + } + + // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 + if (term_1 < -term_2) { + interface_L = 3.0 * state_i - 2.0 * interface_R; + } + + // Final monotocity constraint + interface_R = fmax(fmin(state_i, state_im1), interface_R); + interface_R = fmin(fmax(state_i, state_im1), interface_R); + interface_L = fmax(fmin(state_i, state_ip1), interface_L); + interface_L = fmin(fmax(state_i, state_ip1), interface_L); + }; + + // Monotonize each interface state + Monotonize(cell_i.density, cell_im1.density, cell_ip1.density, interface_L_iph.density, interface_R_imh.density); + Monotonize(cell_i.velocity_x, cell_im1.velocity_x, cell_ip1.velocity_x, interface_L_iph.velocity_x, + interface_R_imh.velocity_x); + Monotonize(cell_i.velocity_y, cell_im1.velocity_y, cell_ip1.velocity_y, interface_L_iph.velocity_y, + interface_R_imh.velocity_y); + Monotonize(cell_i.velocity_z, cell_im1.velocity_z, cell_ip1.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.velocity_z); + Monotonize(cell_i.pressure, cell_im1.pressure, cell_ip1.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + +#ifdef MHD + Monotonize(cell_i.magnetic_y, cell_im1.magnetic_y, cell_ip1.magnetic_y, interface_L_iph.magnetic_y, + interface_R_imh.magnetic_y); + Monotonize(cell_i.magnetic_z, cell_im1.magnetic_z, cell_ip1.magnetic_z, interface_L_iph.magnetic_z, + interface_R_imh.magnetic_z); +#endif // MHD + +#ifdef DE + Monotonize(cell_i.gas_energy, cell_im1.gas_energy, cell_ip1.gas_energy, interface_L_iph.gas_energy, + interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + Monotonize(cell_i.scalar[i], cell_im1.scalar[i], cell_ip1.scalar[i], interface_L_iph.scalar[i], + interface_R_imh.scalar[i]); + } +#endif // SCALAR +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Compute the interface state from the slope and cell centered state using linear interpolation diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 854eefdfa..3f3b72691 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -285,6 +285,69 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #endif // MHD } +TEST(tALLReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) +{ +// Input Data +#ifdef MHD + reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, + 3.7096802847, 8.9692274397, 9.3416846121, 2.7707989229}; + reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, + 2.2945188332, 8.2028929443, 1.6941969156, 8.9424967039}; + reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, + 9.093498542, 3.6911762486, 7.3777130085, 3.6711825219}; + reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, + 2.450533435, 9.4782390708, 5.6820584385, 4.7115587023}; + reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, + 1.339777121, 4.5589857979, 1.4398647311, 8.8727778983}; +#else // not MHD + reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, 3.7096802847}; + reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, 2.2945188332}; + reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, 9.093498542}; + reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, 2.450533435}; + reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, 1.339777121}; +#endif // MHD + + // Get test data + reconstruction::Monotize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + +// Check results +#ifdef MHD + reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001, 0 < 9.3416846120999999, + 2.7707989229000001}; + reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001, 0 < 9.3416846120999999, + 2.7707989229000001}; + testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); + testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); + testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); +#else // MHD + reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); + testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); + testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); +#endif // MHD +} + TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) { // Setup input data From 312de0690eee54145ab8dc333f778fe3b43a90fe Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Apr 2023 16:59:21 -0400 Subject: [PATCH 423/694] Add MHD PPMC test --- src/reconstruction/ppmc_cuda_tests.cu | 115 ++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 03bb1828c..2b5e3c4b7 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -133,3 +133,118 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) } } } + +TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; + size_t const n_fields = 8; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (size_t i = 0; i < host_grid.size(); i++) { + host_grid.at(i) = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 6.1966752435374648}, + {1166, 1.1612148377210372}, + {1382, 2.4816715896801607}}, + {{86, 2.2167886449096095}, + {302, 0.70445164383109971}, + {518, 2.2081812807712167}, + {734, 1.9337956878738418}, + {950, 9.1565812482351436}, + {1166, 2.8331021062933308}, + {1382, 1.562787356714062}}, + {{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 11.923133284483747}, + {1166, 1.562787356714062}, + {1382, 1.1612148377210372}}}; + + std::vector> fiducial_interface_right = { + {{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 2.0109603398129137}, + {733, 1.764334292986655}, + {949, 8.6490192698558381}, + {1165, 1.1612148377210372}, + {1381, 3.1565068702572638}}, + {{80, 3.3165345946674432}, + {85, 2.6558981128823214}, + {296, 1.0539291837321079}, + {301, 0.84399195916314151}, + {512, 1.9599277242665043}, + {517, 2.0109603398129137}, + {728, 1.8582259623199069}, + {733, 1.764334292986655}, + {944, 6.5776143533545097}, + {949, 8.6490192698558381}, + {1160, 2.8331021062933308}, + {1165, 1.1612148377210372}, + {1376, 1.562787356714062}, + {1381, 3.1565068702572638}}, + {{50, 2.6558981128823214}, {80, 3.3165345946674432}, {85, 2.6558981128823214}, {266, 0.84399195916314151}, + {296, 1.0539291837321079}, {301, 0.84399195916314151}, {482, 2.0109603398129137}, {512, 1.9599277242665043}, + {517, 2.0109603398129137}, {698, 1.764334292986655}, {728, 1.8582259623199069}, {733, 1.764334292986655}, + {914, 4.5501389454964283}, {944, 6.5776143533545097}, {949, 8.6490192698558381}, {1130, 1.562787356714062}, + {1160, 2.8331021062933308}, {1165, 1.1612148377210372}, {1346, 1.1612148377210372}, {1376, 1.562787356714062}, + {1381, 3.1565068702572638}}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + + // Launch kernel + hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} From a133e33c019f3dc9e84257c45fb1d77fbd66cb28 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 18 Apr 2023 10:53:18 -0400 Subject: [PATCH 424/694] Properly init memory in PPMC and PLMC tests --- src/reconstruction/plmc_cuda_tests.cu | 169 ++++++++++++++------------ src/reconstruction/ppmc_cuda_tests.cu | 78 +++++------- 2 files changed, 117 insertions(+), 130 deletions(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index e6b115044..3616d2d0a 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -49,54 +49,66 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = { - {{26, 2.1584359129984056}, - {27, 0.70033864721549188}, - {106, 2.2476363309467553}, - {107, 3.0633780053857027}, - {186, 2.2245934101106259}, - {187, 2.1015872413794123}, - {266, 2.1263341057778309}, - {267, 3.9675148506537838}, - {346, 3.3640057502842691}, - {347, 21.091316282933843}}, - {{21, 0.72430827309279655}, {26, 2.1584359129984056}, {27, 0.70033864721549188}, {37, 0.19457128219588618}, - {101, 5.4739527659741896}, {106, 2.2476363309467553}, {107, 3.0633780053857027}, {117, 4.4286255636679313}, - {181, 0.12703829036056602}, {186, 2.2245934101106259}, {187, 2.1015872413794123}, {197, 2.2851440769830953}, - {261, 1.5337035731959561}, {266, 2.1263341057778309}, {267, 3.9675148506537838}, {277, 2.697375839048191}, - {341, 22.319601655044117}, {346, 3.3640057502842691}, {347, 21.091316282933843}, {357, 82.515887983144168}}, - {{21, 0.72430827309279655}, {25, 2.2863650183226212}, {26, 2.1584359129984056}, {27, 0.70033864721549188}, - {29, 1.686415421301841}, {37, 0.19457128219588618}, {101, 5.4739527659741896}, {105, 0.72340346106443465}, - {106, 2.2476363309467553}, {107, 3.0633780053857027}, {109, 5.4713687086831388}, {117, 4.4286255636679313}, - {181, 0.12703829036056602}, {185, 3.929100145230096}, {186, 2.2245934101106259}, {187, 2.1015872413794123}, - {189, 4.9166140516911483}, {197, 2.2851440769830953}, {261, 1.5337035731959561}, {265, 0.95177493689267167}, - {266, 2.1263341057778309}, {267, 3.9675148506537838}, {269, 0.46056494878491938}, {277, 2.697375839048191}, - {341, 22.319601655044117}, {345, 3.6886096301452787}, {346, 3.3640057502842691}, {347, 21.091316282933843}, - {349, 16.105488797582133}, {357, 82.515887983144168}}}; - std::vector> fiducial_interface_right = { - {{25, 3.8877922383184833}, - {26, 0.70033864721549188}, - {105, 1.5947787943675635}, - {106, 3.0633780053857027}, - {185, 4.0069556576401011}, - {186, 2.1015872413794123}, - {265, 1.7883678016935785}, - {266, 3.9675148506537838}, - {345, 2.8032969746372527}, - {346, 21.091316282933843}}, - {{17, 0.43265217076853835}, {25, 3.8877922383184833}, {26, 0.70033864721549188}, {33, 0.19457128219588618}, - {97, 3.2697645945288754}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, {113, 4.4286255636679313}, - {177, 0.07588397666718491}, {185, 4.0069556576401011}, {186, 2.1015872413794123}, {193, 2.2851440769830953}, - {257, 0.91612950577699748}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, {273, 2.697375839048191}, - {337, 13.332201861384396}, {345, 2.8032969746372527}, {346, 21.091316282933843}, {353, 82.515887983144168}}, - {{5, 2.2863650183226212}, {9, 1.686415421301841}, {17, 0.43265217076853835}, {25, 3.8877922383184833}, - {26, 0.70033864721549188}, {33, 0.19457128219588618}, {85, 0.72340346106443465}, {89, 1.7792505446336098}, - {97, 3.2697645945288754}, {105, 1.5947787943675635}, {106, 3.0633780053857027}, {113, 4.4286255636679313}, - {165, 5.3997753452111859}, {169, 1.4379190463124139}, {177, 0.07588397666718491}, {185, 4.0069556576401011}, - {186, 2.1015872413794123}, {193, 2.2851440769830953}, {245, 0.95177493689267167}, {249, 0.46056494878491938}, - {257, 0.91612950577699748}, {265, 1.7883678016935785}, {266, 3.9675148506537838}, {273, 2.697375839048191}, - {325, 6.6889498465051407}, {329, 1.6145084086614281}, {337, 13.332201861384396}, {345, 2.8032969746372527}, - {346, 21.091316282933843}, {353, 82.515887983144168}}}; + std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, + {27, 0.70033864721549188}, + {106, 2.2476363309467553}, + {107, 3.0633780053857027}, + {186, 2.2245934101106259}, + {187, 2.1015872413794123}, + {266, 2.1263341057778309}, + {267, 3.9675148506537838}, + {346, 3.3640057502842691}, + {347, 21.091316282933843}}, + {{21, 0.72430827309279655}, + {37, 0.19457128219588618}, + {101, 5.4739527659741896}, + {117, 4.4286255636679313}, + {181, 0.12703829036056602}, + {197, 2.2851440769830953}, + {261, 1.5337035731959561}, + {277, 2.697375839048191}, + {341, 22.319601655044117}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.4713687086831388}, + {185, 3.929100145230096}, + {189, 4.9166140516911483}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.6886096301452787}, + {349, 16.105488797582133}}}; + std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.5947787943675635}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935785}, + {266, 3.9675148506537838}, + {345, 2.8032969746372527}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.7792505446336098}, + {165, 5.3997753452111859}, + {169, 1.4379190463124139}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051407}, + {329, 1.6145084086614281}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -121,8 +133,8 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) } // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); - cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); // Launch kernel hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), @@ -203,40 +215,37 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {277, 23.677832869261188}, {341, 1.5437923271692418}, {405, 1.8141353672443383}}}; - std::vector> fiducial_interface_right = { - {{20, 0.59023012197434721}, - {84, 3.0043379408547275}, - {148, 2.6320759184913625}, - {212, 0.9487867623146744}, - {276, 22.111134849009044}, - {340, 1.8587936590169301}, - {404, 2.1583975283044725}}, - {{17, 0.44405384992296193}, - {20, 0.59023012197434721}, - {81, 2.5027813113931279}, - {84, 3.0043379408547275}, - {145, 2.6371119205792346}, - {148, 2.6320759184913625}, - {209, 1.0210845222961809}, - {212, 0.9487867623146744}, - {273, 21.360010722689488}, - {276, 22.111134849009044}, - {337, 2.1634182515826184}, - {340, 1.8587936590169301}, - {401, 1.7073441775673177}, - {404, 2.1583975283044725}}, - {{5, 0.92705119413602599}, {17, 0.44405384992296193}, {20, 0.59023012197434721}, {69, 1.9592598982258778}, - {81, 2.5027813113931279}, {84, 3.0043379408547275}, {133, 0.96653490574340428}, {145, 2.6371119205792346}, - {148, 2.6320759184913625}, {197, 1.3203867992383289}, {209, 1.0210845222961809}, {212, 0.9487867623146744}, - {261, 8.0057564947791793}, {273, 21.360010722689488}, {276, 22.111134849009044}, {325, 1.8629714367312684}, - {337, 2.1634182515826184}, {340, 1.8587936590169301}, {389, 1.9034519507895218}, {401, 1.7073441775673177}, - {404, 2.1583975283044725}}}; + std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, + {84, 3.0043379408547275}, + {148, 2.6320759184913625}, + {212, 0.9487867623146744}, + {276, 22.111134849009044}, + {340, 1.8587936590169301}, + {404, 2.1583975283044725}}, + { + {17, 0.44405384992296193}, + {81, 2.5027813113931279}, + {145, 2.6371119205792346}, + {209, 1.0210845222961809}, + {273, 21.360010722689488}, + {337, 2.1634182515826184}, + {401, 1.7073441775673177}, + }, + { + {5, 0.92705119413602599}, + {69, 1.9592598982258778}, + {133, 0.96653490574340428}, + {197, 1.3203867992383289}, + {261, 8.0057564947791793}, + {325, 1.8629714367312684}, + {389, 1.9034519507895218}, + }}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(n_cells_interface); - cuda_utilities::DeviceVector dev_interface_right(n_cells_interface); + cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); + cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); // Launch kernel hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 2b5e3c4b7..8f4508da8 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -71,36 +71,21 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) {733, 1.764334292986655}, {949, 3.0847691079841209}}, {{80, 3.1281603739188069}, - {85, 2.6558981128823214}, {296, 0.99406757727427164}, - {301, 0.84399195916314151}, {512, 1.8732124042412865}, - {517, 1.8381070277226794}, {728, 1.6489758692176784}, - {733, 1.764334292986655}, - {944, 2.8820015278590443}, - {949, 3.0847691079841209}}, + {944, 2.8820015278590443}}, {{50, 2.6558981128823214}, - {80, 3.1281603739188069}, - {85, 2.6558981128823214}, {266, 0.84399195916314151}, - {296, 0.99406757727427164}, - {301, 0.84399195916314151}, {482, 2.0109603398129137}, - {512, 1.8732124042412865}, - {517, 1.8381070277226794}, {698, 1.764334292986655}, - {728, 1.6489758692176784}, - {733, 1.764334292986655}, - {914, 3.2100231679403066}, - {944, 2.8820015278590443}, - {949, 3.0847691079841209}}}; + {914, 3.2100231679403066}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); - cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); // Launch kernel hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), @@ -182,40 +167,33 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) {1166, 1.562787356714062}, {1382, 1.1612148377210372}}}; - std::vector> fiducial_interface_right = { - {{85, 2.6558981128823214}, - {301, 0.84399195916314151}, - {517, 2.0109603398129137}, - {733, 1.764334292986655}, - {949, 8.6490192698558381}, - {1165, 1.1612148377210372}, - {1381, 3.1565068702572638}}, - {{80, 3.3165345946674432}, - {85, 2.6558981128823214}, - {296, 1.0539291837321079}, - {301, 0.84399195916314151}, - {512, 1.9599277242665043}, - {517, 2.0109603398129137}, - {728, 1.8582259623199069}, - {733, 1.764334292986655}, - {944, 6.5776143533545097}, - {949, 8.6490192698558381}, - {1160, 2.8331021062933308}, - {1165, 1.1612148377210372}, - {1376, 1.562787356714062}, - {1381, 3.1565068702572638}}, - {{50, 2.6558981128823214}, {80, 3.3165345946674432}, {85, 2.6558981128823214}, {266, 0.84399195916314151}, - {296, 1.0539291837321079}, {301, 0.84399195916314151}, {482, 2.0109603398129137}, {512, 1.9599277242665043}, - {517, 2.0109603398129137}, {698, 1.764334292986655}, {728, 1.8582259623199069}, {733, 1.764334292986655}, - {914, 4.5501389454964283}, {944, 6.5776143533545097}, {949, 8.6490192698558381}, {1130, 1.562787356714062}, - {1160, 2.8331021062933308}, {1165, 1.1612148377210372}, {1346, 1.1612148377210372}, {1376, 1.562787356714062}, - {1381, 3.1565068702572638}}}; + std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 2.0109603398129137}, + {733, 1.764334292986655}, + {949, 8.6490192698558381}, + {1165, 1.1612148377210372}, + {1381, 3.1565068702572638}}, + {{80, 3.3165345946674432}, + {296, 1.0539291837321079}, + {512, 1.9599277242665043}, + {728, 1.8582259623199069}, + {944, 6.5776143533545097}, + {1160, 2.8331021062933308}, + {1376, 1.562787356714062}}, + {{50, 2.6558981128823214}, + {266, 0.84399195916314151}, + {482, 2.0109603398129137}, + {698, 1.764334292986655}, + {914, 4.5501389454964283}, + {1130, 1.562787356714062}, + {1346, 1.1612148377210372}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(host_grid.size()); - cuda_utilities::DeviceVector dev_interface_right(host_grid.size()); + cuda_utilities::DeviceVector dev_interface_left(nx * ny * nz * (n_fields - 1), true); + cuda_utilities::DeviceVector dev_interface_right(nx * ny * nz * (n_fields - 1), true); // Launch kernel hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), @@ -224,7 +202,7 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) CHECK(cudaDeviceSynchronize()); // Perform Comparison - for (size_t i = 0; i < host_grid.size(); i++) { + for (size_t i = 0; i < dev_interface_left.size(); i++) { // Check the left interface double test_val = dev_interface_left.at(i); double fiducial_val = From 6b70513030c8159eaa590e973358541cdd0c0319 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 18 Apr 2023 11:52:39 -0400 Subject: [PATCH 425/694] Enable PPMC on MHD builds --- builds/make.type.mhd | 2 +- src/utils/error_handling.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 953d49238..2c6cbf68d 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPLMC +DFLAGS += -DPPMC DFLAGS += -DHLLD DFLAGS += -DMHD diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index a9b217d1a..5f198f7f2 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -79,9 +79,9 @@ void Check_Configuration(parameters const &P) #endif //! HLLD or EXACT or ROE or HLL or HLLC // May only use certain reconstructions - #if (!defined(PCM) == !defined(PLMC)) || defined(PLMP) || defined(PPMC) || defined(PPMP) - #error "MHD only supports PCM and PLMC reconstruction" - #endif //! PCM or PLMP or PPMC or PPMP + #if ((defined(PCM) + defined(PLMC) + defined(PPMC)) != 1) || defined(PLMP) || defined(PPMP) + #error "MHD only supports PCM, PLMC, and PPMC reconstruction" + #endif // Reconstruction check // must have HDF5 #ifndef HDF5 From 7628d6f08d867139be0624566a06dcb3fadb77a2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 19 Apr 2023 10:29:42 -0400 Subject: [PATCH 426/694] Debug: remove extra : --- src/reconstruction/reconstruction.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 01d35aeae..4addeafcd 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -650,19 +650,15 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive cons #ifdef MHD output.magnetic_y = interface(cell_i.magnetic_y, cell_im1.magnetic_y, slopes_i.magnetic_y, slopes_im1.magnetic_y); - ; output.magnetic_z = interface(cell_i.magnetic_z, cell_im1.magnetic_z, slopes_i.magnetic_z, slopes_im1.magnetic_z); - ; #endif // MHD #ifdef DE output.gas_energy = interface(cell_i.gas_energy, cell_im1.gas_energy, slopes_i.gas_energy, slopes_im1.gas_energy); - ; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { output.scalar[i] = interface(cell_i.scalar[i], cell_im1.scalar[i], slopes_i.scalar[i], slopes_im1.scalar[i]); - ; } #endif // SCALAR From 29aef32a4cf966afd6360f19bcaf3a440adc4cb0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 21 Apr 2023 10:06:14 -0400 Subject: [PATCH 427/694] Tighten limits on PPMC hydro linear wave test --- src/system_tests/hydro_system_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index dd92fbddc..a448e45b3 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -144,8 +144,8 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; #elif defined(PPMC) - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.7E-8; #endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, From e889a0b4255be852bc2f8e07ab8e1589d090ddde Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 21 Apr 2023 10:07:03 -0400 Subject: [PATCH 428/694] Fix spelling error and make if statement into else if --- src/reconstruction/ppmc_cuda.cu | 3 +-- src/reconstruction/reconstruction.h | 17 +++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 7e53571d6..0cb2cfe96 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -220,8 +220,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie // between neighboring cell-centered values Stone Eqns 47 - 53 - - reconstruction::Monotize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); #ifndef VL // Step 8 - Compute the coefficients for the monotonized parabolic diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 4addeafcd..9bcc157eb 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -520,28 +520,29 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( * \param[in,out] interface_R_imh The right interface state at i-1/2 * \return Primitive */ -void __device__ __host__ __inline__ Monotize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1, - Primitive const &cell_ip1, Primitive &interface_L_iph, - Primitive &interface_R_imh) +void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &cell_ip1, + Primitive &interface_L_iph, + Primitive &interface_R_imh) { // The function that will actually do the monotozation. Note the return by refernce of the interface state auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L, Real &interface_R) { + // Some terms we need for the comparisons + Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); + Real const term_2 = pow(interface_L - interface_R, 2.0); + // First monotonicity constraint. Equations 47-49 in Stone et al. 2008 if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) { interface_L = state_i; interface_R = state_i; } - // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 - Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); - Real const term_2 = pow(interface_L - interface_R, 2.0); if (term_1 > term_2) { interface_R = 3.0 * state_i - 2.0 * interface_L; } - // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 - if (term_1 < -term_2) { + else if (term_1 < -term_2) { interface_L = 3.0 * state_i - 2.0 * interface_R; } From 487aff5dedee22877be25b3a625b82f7ba4d66bc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 21 Apr 2023 11:23:04 -0400 Subject: [PATCH 429/694] Tighten thread guard on PPMC --- src/reconstruction/ppmc_cuda.cu | 3 +- src/reconstruction/ppmc_cuda_tests.cu | 161 ++++++++++----------- src/reconstruction/reconstruction_tests.cu | 2 +- 3 files changed, 83 insertions(+), 83 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 0cb2cfe96..a1ffbde3f 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -27,7 +27,8 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); // Thread guard to prevent overrun - if (xid < 2 or xid >= nx - 3 or yid < 2 or yid >= ny - 3 or zid < 2 or zid >= nz - 3) { + if (size_t const min = 3, max = 3; + xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { return; } diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 8f4508da8..573900aae 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -29,9 +29,9 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 6; - size_t const ny = 6; - size_t const nz = 6; + size_t const nx = 7; + size_t const ny = 7; + size_t const nz = 7; size_t const n_fields = 5; double const dx = doubleRand(prng); double const dt = doubleRand(prng); @@ -48,38 +48,37 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.2002498722761787}, - {734, 1.764334292986655}, - {950, 3.3600925565746804}, - {86, 2.4950488327292639}}, - {{86, 2.4950488327292639}, - {302, 0.79287723513518138}, - {518, 1.7614576990062414}, - {734, 1.8238574169157304}, - {950, 3.14294317122161}}, - {{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.0109603398129137}, - {734, 1.764334292986655}, - {950, 3.2100231679403066}}}; - - std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, - {301, 0.84399195916314151}, - {517, 1.8381070277226794}, - {733, 1.764334292986655}, - {949, 3.0847691079841209}}, - {{80, 3.1281603739188069}, - {296, 0.99406757727427164}, - {512, 1.8732124042412865}, - {728, 1.6489758692176784}, - {944, 2.8820015278590443}}, - {{50, 2.6558981128823214}, - {266, 0.84399195916314151}, - {482, 2.0109603398129137}, - {698, 1.764334292986655}, - {914, 3.2100231679403066}}}; + std::vector> fiducial_interface_left = {{{171, 1.7598055553475744}, + {514, 3.3921082637175894}, + {857, 3.5866056366266772}, + {1200, 3.4794572581328902}, + {1543, 10.363861270296034}}, + {{171, 1.6206985712721598}, + {514, 3.123972986618837}, + {857, 3.30309596610488}, + {1200, 3.204417323222251}, + {1543, 9.544631281899882}}, + {{171, 1.6206985712721595}, + {514, 5.0316428671215876}, + {857, 2.3915465711497186}, + {1200, 3.2044173232222506}, + {1543, 12.74302824034023}}}; + + std::vector> fiducial_interface_right = {{{170, 1.7857012385420896}, + {513, 3.4420234152477129}, + {856, 3.6393828329638049}, + {1199, 3.5306577572855762}, + {1542, 10.516366339570284}}, + {{164, 1.6206985712721595}, + {507, 3.1239729866188366}, + {850, 3.3030959661048795}, + {1193, 3.2044173232222506}, + {1536, 9.5446312818998802}}, + {{122, 1.6206985712721595}, + {465, 5.4375307473677061}, + {808, 2.2442413290889327}, + {1151, 3.2044173232222506}, + {1494, 13.843305272338561}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -126,9 +125,9 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 6; - size_t const ny = 6; - size_t const nz = 6; + size_t const nx = 7; + size_t const ny = 7; + size_t const nz = 7; size_t const n_fields = 8; double const dx = doubleRand(prng); double const dt = doubleRand(prng); @@ -145,49 +144,49 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.0109603398129137}, - {734, 1.764334292986655}, - {950, 6.1966752435374648}, - {1166, 1.1612148377210372}, - {1382, 2.4816715896801607}}, - {{86, 2.2167886449096095}, - {302, 0.70445164383109971}, - {518, 2.2081812807712167}, - {734, 1.9337956878738418}, - {950, 9.1565812482351436}, - {1166, 2.8331021062933308}, - {1382, 1.562787356714062}}, - {{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.0109603398129137}, - {734, 1.764334292986655}, - {950, 11.923133284483747}, - {1166, 1.562787356714062}, - {1382, 1.1612148377210372}}}; - - std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, - {301, 0.84399195916314151}, - {517, 2.0109603398129137}, - {733, 1.764334292986655}, - {949, 8.6490192698558381}, - {1165, 1.1612148377210372}, - {1381, 3.1565068702572638}}, - {{80, 3.3165345946674432}, - {296, 1.0539291837321079}, - {512, 1.9599277242665043}, - {728, 1.8582259623199069}, - {944, 6.5776143533545097}, - {1160, 2.8331021062933308}, - {1376, 1.562787356714062}}, - {{50, 2.6558981128823214}, - {266, 0.84399195916314151}, - {482, 2.0109603398129137}, - {698, 1.764334292986655}, - {914, 4.5501389454964283}, - {1130, 1.562787356714062}, - {1346, 1.1612148377210372}}}; + std::vector> fiducial_interface_left = {{{171, 1.534770576865724}, + {514, 2.9583427219427034}, + {857, 3.1279687606328648}, + {1200, 3.0345219714853804}, + {1543, 23.015998619464185}, + {1886, 2.1906071705977261}, + {2229, 3.1997462690190144}}, + {{171, 1.6206985712721598}, + {514, 3.123972986618837}, + {857, 3.30309596610488}, + {1200, 3.204417323222251}, + {1543, 26.732346761532895}, + {1886, 4.0436839628613175}, + {2229, 4.1622274705137627}}, + {{171, 1.6206985712721595}, + {514, 1.7752459698084133}, + {857, 3.9720060989313879}, + {1200, 3.2044173232222506}, + {1543, 21.984278941312677}, + {1886, 4.1622274705137627}, + {2229, 2.1042141607876181}}}; + + std::vector> fiducial_interface_right = {{{170, 1.7925545600850308}, + {513, 3.4552335159711038}, + {856, 3.6533503770489086}, + {1199, 3.5442080266959914}, + {1542, 29.263332026690119}, + {1885, 2.1906071705977261}, + {2228, 3.1997462690190144}}, + {{164, 1.6206985712721595}, + {507, 3.1239729866188366}, + {850, 3.3030959661048795}, + {1193, 3.2044173232222506}, + {1536, 26.803126363556764}, + {1879, 2.1514229421449058}, + {2222, 4.1622274705137627}}, + {{122, 1.6206985712721595}, + {465, 5.4175246353495679}, + {808, 2.4067132198954435}, + {1151, 3.2044173232222506}, + {1494, 35.794674014212731}, + {1837, 4.1622274705137627}, + {2180, 2.7068276720054212}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 3f3b72691..9b4d8cb48 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -308,7 +308,7 @@ TEST(tALLReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutp #endif // MHD // Get test data - reconstruction::Monotize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); // Check results #ifdef MHD From 6dd08cedba802d78db48e80954dba2dbc4b75e37 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 25 Apr 2023 14:31:49 -0400 Subject: [PATCH 430/694] Add else to match Athena --- src/reconstruction/reconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 9bcc157eb..c7b276578 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -538,7 +538,7 @@ void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive con interface_R = state_i; } // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 - if (term_1 > term_2) { + else if (term_1 > term_2) { interface_R = 3.0 * state_i - 2.0 * interface_L; } // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 From 4c0e7c4aafa179e07037f0b4d9ed1e5a6f7476ea Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 25 Apr 2023 14:53:01 -0400 Subject: [PATCH 431/694] Add check that only one integrator is enabled --- src/utils/error_handling.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 5f198f7f2..fd2a59ad7 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -49,6 +49,11 @@ void Check_Configuration(parameters const &P) #error "The CUDA macro is required" #endif //! CUDA +// Can only have one integrator enabled +#if ((defined(VL) + defined(CTU) + defined(SIMPLE)) != 1) + #error "Only one integrator can be enabled at a time." +#endif // Only one integrator check + // warn if error checking is disabled #ifndef CUDA_ERROR_CHECK #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" From 22031886a1a42a33bbde60e5f14fd746a361a3f3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 27 Apr 2023 11:17:44 -0400 Subject: [PATCH 432/694] Clarify a comment --- src/reconstruction/reconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index c7b276578..d30fa5dea 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -546,7 +546,7 @@ void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive con interface_L = 3.0 * state_i - 2.0 * interface_R; } - // Final monotocity constraint + // Bound the interface to lie between adjacent cell centered values interface_R = fmax(fmin(state_i, state_im1), interface_R); interface_R = fmin(fmax(state_i, state_im1), interface_R); interface_L = fmax(fmin(state_i, state_ip1), interface_L); From d1911451649e8f118d7d071f54bf135af075bf24 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 28 Apr 2023 14:26:20 -0400 Subject: [PATCH 433/694] Add MHD PPMC Reconstruction Kernel - Add new PPMC kernel that works with MHD. The old PPMC was not stable for the Brio & Wu shock tube. The new one, based on Athena++ is and it's faster. - Renamed PPMC kernels so it's clear which is for CTU and which is for VL - Fix amplitude of MHD contact wave so that it's consistent with the other MHD waves - update all system tests for MHD PPMC - Change outstep in MHD blast wave example to match tout - Fix bug in Primitive_To_Characteristic and Characteristic_To_Primitive. Beta_y should default to 1 not 0. See [this PR](https://github.com/PrincetonUniversity/athena/pull/497) for more info - Remove MHD sod shock tube test & update hydro sod test to work for MHD - Add extra check to SystemTestRunner to avoid erroneous error when adding custom data fields --- cholla-tests-data | 2 +- examples/3D/mhd_blast.txt | 2 +- examples/3D/mhd_contact_wave.txt | 2 +- src/integrators/VL_1D_cuda.cu | 2 +- src/integrators/VL_2D_cuda.cu | 6 +- src/integrators/VL_3D_cuda.cu | 9 +- src/integrators/simple_1D_cuda.cu | 2 +- src/integrators/simple_2D_cuda.cu | 4 +- src/integrators/simple_3D_cuda.cu | 6 +- src/reconstruction/ppmc_cuda.cu | 249 ++++++++++++++---- src/reconstruction/ppmc_cuda.h | 39 ++- src/reconstruction/ppmc_cuda_tests.cu | 135 ++++++---- src/reconstruction/reconstruction.h | 151 ++++++++++- src/reconstruction/reconstruction_tests.cu | 105 +++++--- src/system_tests/hydro_system_tests.cpp | 50 ++-- ...edMpi_CorrectInputExpectCorrectOutput.txt} | 1 - ...ockTubeCorrectInputExpectCorrectOutput.txt | 57 ---- src/system_tests/mhd_system_tests.cpp | 38 ++- src/system_tests/system_tester.cpp | 2 +- 19 files changed, 590 insertions(+), 272 deletions(-) rename src/system_tests/input_files/{tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt => tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt} (99%) delete mode 100644 src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt diff --git a/cholla-tests-data b/cholla-tests-data index d59317178..c2cc6d173 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit d593171787379092a8d2189cf4be3e98fb35300f +Subproject commit c2cc6d173bf2d04a0dae6a45cb71624f56b22bb8 diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt index e796c763e..5d078f674 100644 --- a/examples/3D/mhd_blast.txt +++ b/examples/3D/mhd_blast.txt @@ -13,7 +13,7 @@ nz=200 # final output time tout=0.2 # time interval for output -outstep=0.005 +outstep=0.2 # name of initial conditions init=MHD_Spherical_Blast # domain properties diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt index 9250bba5a..0ff7e7989 100644 --- a/examples/3D/mhd_contact_wave.txt +++ b/examples/3D/mhd_contact_wave.txt @@ -57,7 +57,7 @@ By=1.5 # magnetic field in the z direction Bz=0 # amplitude of perturbing oscillations -A=1e-1 +A=1e-6 # value of gamma gamma=1.666666666666667 # The right eigenvectors to set the wave properly diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index b4116b735..57c1a9c65 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -105,7 +105,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); #endif CudaCheckError(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 1c96196d4..27677f61d 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -115,10 +115,8 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - 0); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, - 1); + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); #endif // PPMC CudaCheckError(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 52b40dfb3..cd99f472f 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -234,12 +234,9 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - 0); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, - 1); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, - 2); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); #endif // PPMC CudaCheckError(); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 5a3d2b143..3be5ba40a 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -76,7 +76,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, CudaCheckError(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); CudaCheckError(); #endif diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 9529b307b..b9d11b180 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -79,8 +79,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); #endif CudaCheckError(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 9e9156e07..32994eeff 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -116,9 +116,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); CudaCheckError(); #endif // PPMC diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index a1ffbde3f..c0d922a2f 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -15,11 +15,12 @@ #include "../utils/hydro_utilities.h" #endif +// ===================================================================================================================== /*! * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int dir) +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -223,7 +224,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // between neighboring cell-centered values Stone Eqns 47 - 53 reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); -#ifndef VL + // This is the beginning of the characteristic tracing // Step 8 - Compute the coefficients for the monotonized parabolic // interpolation function // Stone Eqn 54 @@ -240,18 +241,18 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); - #ifdef DE +#ifdef DE del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); - #endif // DE +#endif // DE - #ifdef SCALAR - Real scalar_6[NSCALARS] : for (int i = 0; i < NSCALARS; i++) - { +#ifdef SCALAR + Real scalar_6[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); } - #endif // SCALAR +#endif // SCALAR // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables @@ -307,16 +308,16 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou interface_R_imh.pressure - lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); - #ifdef DE +#ifdef DE interface_L_iph.gas_energy = interface_L_iph.gas_energy - lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); interface_R_imh.gas_energy = interface_R_imh.gas_energy - lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - @@ -325,22 +326,22 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou interface_R_imh.scalar[i] - lambda_min * (0.5 * dtodx) * (del_m_i.scalar[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); } - #endif // SCALAR +#endif // SCALAR // Step 10 - Perform the characteristic tracing // Stone Eqns 57 - 60 // left-hand interface value, i+1/2 Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; - #ifdef DE +#ifdef DE Real sum_ge = 0; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real sum_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR +#endif // SCALAR if (lambda_m >= 0) { Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); @@ -365,27 +366,27 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; - #ifdef DE +#ifdef DE Real chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real chi_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i]; } - #endif // SCALAR +#endif // SCALAR sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); sum_3 += chi_3; sum_4 += chi_4; - #ifdef DE +#ifdef DE sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += chi_scalar[i]; } - #endif // SCALAR +#endif // SCALAR } if (lambda_p >= 0) { Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); @@ -408,14 +409,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou interface_L_iph.velocity_y += sum_3; interface_L_iph.velocity_z += sum_4; interface_L_iph.pressure += sum_5; - #ifdef DE +#ifdef DE interface_L_iph.gas_energy += sum_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { interface_L_iph.scalar[i] += sum_scalar[i]; } - #endif // SCALAR +#endif // SCALAR // right-hand interface value, i-1/2 sum_1 = 0; @@ -423,14 +424,14 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_3 = 0; sum_4 = 0; sum_5 = 0; - #ifdef DE +#ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR +#endif // SCALAR if (lambda_m <= 0) { Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); @@ -454,26 +455,26 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; - #ifdef DE +#ifdef DE chi_ge = C * (del_m_i.gas_energy + ge_6) + D * ge_6; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = C * (del_m_i.scalar[i] + scalar_6[i]) + D * scalar_6[i]; } - #endif // SCALAR +#endif // SCALAR sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); sum_3 += chi_3; sum_4 += chi_4; - #ifdef DE +#ifdef DE sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] += chi_scalar[i]; } - #endif // SCALAR +#endif // SCALAR } if (lambda_p <= 0) { Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); @@ -496,16 +497,169 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou interface_R_imh.velocity_y += sum_3; interface_R_imh.velocity_z += sum_4; interface_R_imh.pressure += sum_5; - #ifdef DE +#ifdef DE interface_R_imh.gas_energy += sum_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { interface_R_imh.scalar[i] += sum_scalar[i]; } - #endif // SCALAR +#endif // SCALAR + + // This is the end of the characteristic tracing + + // enforce minimum values + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); +} +// ===================================================================================================================== + +// ===================================================================================================================== +__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma, + int dir) +{ + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Thread guard to prevent overrun + if (size_t const min = 3, max = 3; + xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { + return; + } + + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; + } + + // load the 5-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-1. The equality checks check the direction and subtracts one from the direction + // im1 stands for "i minus 1" + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks check the direction and adds one to the direction + // ip1 stands for "i plus 1" + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-2. The equality checks check the direction and subtracts one from the direction + // im2 stands for "i minus 2" + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+2. The equality checks check the direction and adds one to the direction + // ip2 stands for "i plus 2" + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // Convert to the characteristic variables + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Cell i + reconstruction::Characteristic const cell_i_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_i, sound_speed, sound_speed_squared, gamma); + + // Cell i-1 + reconstruction::Characteristic const cell_im1_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_im1, sound_speed, sound_speed_squared, gamma); + + // Cell i-2 + reconstruction::Characteristic const cell_im2_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_im2, sound_speed, sound_speed_squared, gamma); + + // Cell i+1 + reconstruction::Characteristic const cell_ip1_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_ip1, sound_speed, sound_speed_squared, gamma); + + // Cell i+2 + reconstruction::Characteristic const cell_ip2_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_ip2, sound_speed, sound_speed_squared, gamma); + + // Compute the interface states for each field + reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; + reconstruction::Primitive interface_L_iph, interface_R_imh; + + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0, + cell_ip1_characteristic.a0, cell_ip2_characteristic.a0, + interface_L_iph_characteristic.a0, interface_R_imh_characteristic.a0); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a1, cell_im1_characteristic.a1, cell_i_characteristic.a1, + cell_ip1_characteristic.a1, cell_ip2_characteristic.a1, + interface_L_iph_characteristic.a1, interface_R_imh_characteristic.a1); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a2, cell_im1_characteristic.a2, cell_i_characteristic.a2, + cell_ip1_characteristic.a2, cell_ip2_characteristic.a2, + interface_L_iph_characteristic.a2, interface_R_imh_characteristic.a2); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a3, cell_im1_characteristic.a3, cell_i_characteristic.a3, + cell_ip1_characteristic.a3, cell_ip2_characteristic.a3, + interface_L_iph_characteristic.a3, interface_R_imh_characteristic.a3); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a4, cell_im1_characteristic.a4, cell_i_characteristic.a4, + cell_ip1_characteristic.a4, cell_ip2_characteristic.a4, + interface_L_iph_characteristic.a4, interface_R_imh_characteristic.a4); + +#ifdef MHD + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a5, cell_im1_characteristic.a5, cell_i_characteristic.a5, + cell_ip1_characteristic.a5, cell_ip2_characteristic.a5, + interface_L_iph_characteristic.a5, interface_R_imh_characteristic.a5); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a6, cell_im1_characteristic.a6, cell_i_characteristic.a6, + cell_ip1_characteristic.a6, cell_ip2_characteristic.a6, + interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6); +#endif // MHD + +#ifdef DE + reconstruction::PPM_Single_Variable(cell_im2.gas_energy, cell_im1.gas_energy, cell_i.gas_energy, cell_ip1.gas_energy, + cell_ip2.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + reconstruction::PPM_Single_Variable(cell_im2.scalar[i], cell_im1.scalar[i], cell_i.scalar[i], cell_ip1.scalar[i], + cell_ip2.scalar[i], interface_L_iph.scalar[i], interface_R_imh.scalar[i]); + } +#endif // SCALAR -#endif // not VL, i.e. CTU or SIMPLE was used for this section + // Convert back to primitive variables + reconstruction::Characteristic_To_Primitive(cell_i, interface_L_iph_characteristic, sound_speed, sound_speed_squared, + gamma, interface_L_iph); + reconstruction::Characteristic_To_Primitive(cell_i, interface_R_imh_characteristic, sound_speed, sound_speed_squared, + gamma, interface_R_imh); // enforce minimum values interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); @@ -522,4 +676,5 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); -} \ No newline at end of file +} +// ===================================================================================================================== diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index f70d4a801..1eb146750 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -7,11 +7,38 @@ #include "../global/global.h" -/*! \fn void PPMC(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, - int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int dir); +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and + * characteristic tracing. Used for the CTU and SIMPLE integrators + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] dx The length of the cells in the `dir` direction + * \param[in] dt The time step + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir); + +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for + * the VL (Van Leer) integrators + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma, + int dir); #endif // PPMC_CUDA_H diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 573900aae..3352f22f6 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -22,7 +22,7 @@ #include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" -TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) +TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) { // Set up PRNG to use std::mt19937_64 prng(42); @@ -87,7 +87,7 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); // Launch kernel - hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PPMC_CTU, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); CudaCheckError(); CHECK(cudaDeviceSynchronize()); @@ -118,20 +118,22 @@ TEST(tHYDROPpmcReconstructor, CorrectInputExpectCorrectOutput) } } -TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) +TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) { // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 7; - size_t const ny = 7; - size_t const nz = 7; + size_t const nx = 7; + size_t const ny = 7; + size_t const nz = 7; + double const gamma = 5.0 / 3.0; +#ifdef MHD size_t const n_fields = 8; - double const dx = doubleRand(prng); - double const dt = doubleRand(prng); - double const gamma = 5.0 / 3.0; +#else // not MHD + size_t const n_fields = 5; +#endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value std::vector host_grid(nx * ny * nz * n_fields); @@ -143,50 +145,73 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) cuda_utilities::DeviceVector dev_grid(host_grid.size()); dev_grid.cpyHostToDevice(host_grid); - // Fiducial Data - std::vector> fiducial_interface_left = {{{171, 1.534770576865724}, - {514, 2.9583427219427034}, - {857, 3.1279687606328648}, - {1200, 3.0345219714853804}, - {1543, 23.015998619464185}, - {1886, 2.1906071705977261}, - {2229, 3.1997462690190144}}, - {{171, 1.6206985712721598}, - {514, 3.123972986618837}, - {857, 3.30309596610488}, - {1200, 3.204417323222251}, - {1543, 26.732346761532895}, - {1886, 4.0436839628613175}, - {2229, 4.1622274705137627}}, - {{171, 1.6206985712721595}, - {514, 1.7752459698084133}, - {857, 3.9720060989313879}, - {1200, 3.2044173232222506}, - {1543, 21.984278941312677}, - {1886, 4.1622274705137627}, - {2229, 2.1042141607876181}}}; - - std::vector> fiducial_interface_right = {{{170, 1.7925545600850308}, - {513, 3.4552335159711038}, - {856, 3.6533503770489086}, - {1199, 3.5442080266959914}, - {1542, 29.263332026690119}, - {1885, 2.1906071705977261}, - {2228, 3.1997462690190144}}, - {{164, 1.6206985712721595}, - {507, 3.1239729866188366}, - {850, 3.3030959661048795}, - {1193, 3.2044173232222506}, - {1536, 26.803126363556764}, - {1879, 2.1514229421449058}, - {2222, 4.1622274705137627}}, - {{122, 1.6206985712721595}, - {465, 5.4175246353495679}, - {808, 2.4067132198954435}, - {1151, 3.2044173232222506}, - {1494, 35.794674014212731}, - {1837, 4.1622274705137627}, - {2180, 2.7068276720054212}}}; +// Fiducial Data +#ifdef MHD + std::vector> fiducial_interface_left = {{{171, 1.5556846217288991}, + {514, 1.7422005905354798}, + {857, 3.6289199464135558}, + {1200, 2.1487031353407438}, + {1543, 22.988345461909127}, + {1886, 3.1027541330860546}, + {2229, 3.2554981416903335}}, + {{171, 1.7167767631895592}, + {514, 1.8447385381907686}, + {857, 2.9211469103910663}, + {1200, 2.626030390823102}, + {1543, 28.84165870179233}, + {1886, 3.8209152940021962}, + {2229, 2.7248523895714203}}, + {{171, 1.421933695280897}, + {514, 1.2318388818745061}, + {857, 2.8667822907691818}, + {1200, 2.1256773710028964}, + {1543, 15.684026541123352}, + {1886, 2.3642698195433232}, + {2229, 2.9207483994866617}}}; + + std::vector> fiducial_interface_right = {{{170, 1.4838721492695441}, + {513, 1.3797509020377114}, + {856, 3.223172223924883}, + {1199, 2.2593969253004111}, + {1542, 15.634488002075017}, + {1885, 2.7494588681249819}, + {2228, 3.2540533219925698}}, + {{164, 1.4075989434297753}, + {507, 1.34947711631431}, + {850, 3.605198021293794}, + {1193, 1.9244827470895529}, + {1536, 13.52285212927548}, + {1879, 2.9568307038177966}, + {2222, 2.1086380065800636}}, + {{122, 1.9532382085816002}, + {465, 2.6860067041011249}, + {808, 5.1657781029381917}, + {1151, 2.7811084475444732}, + {1494, 24.999993264381686}, + {1837, 2.3090650532529238}, + {2180, 2.8525500781893642}}}; +#else // not MHD + std::vector> fiducial_interface_left = { + {{171, 1.5239648818969727}, {514, 1.658831367400063}, {857, 3.3918153400617137}, {1200, 2.4096936604224304}}, + {{171, 1.5239639282226562}, {514, 1.6246850138898132}, {857, 3.391813217514656}, {1200, 2.3220060950058032}}, + {{171, 1.7062816619873047}, {514, 1.3300289077249516}, {857, 3.5599794228554593}, {1200, 2.5175993972231074}}}; + + std::vector> fiducial_interface_right = {{{135, 6.5824208227997447}, + {170, 1.5239620208740234}, + {513, 1.5386557138925041}, + {856, 3.3918089724205411}, + {1199, 1.9263881802230425}}, + {{135, 6.4095055796015963}, + {164, 1.5239639282226562}, + {507, 1.5544994569400168}, + {850, 3.391813217514656}, + {1193, 2.1017627061702138}}, + {{122, 1.3893871307373047}, + {135, 6.0894802934332555}, + {465, 2.1518846449159135}, + {808, 3.4792525252435533}, + {1151, 2.0500250813102903}}}; +#endif // MHD // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -195,8 +220,8 @@ TEST(tMHDPpmcReconstructor, CorrectInputExpectCorrectOutput) cuda_utilities::DeviceVector dev_interface_right(nx * ny * nz * (n_fields - 1), true); // Launch kernel - hipLaunchKernelGGL(PPMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); + hipLaunchKernelGGL(PPMC_VL, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, gamma, direction); CudaCheckError(); CHECK(cudaDeviceSynchronize()); diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index d30fa5dea..204ee8eff 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -291,7 +291,7 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check // handles that case Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; // Compute Q(s) (equation A14) @@ -397,7 +397,7 @@ void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitiv // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check // handles that case Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 0.0; + Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; // Compute Q(s) (equation A14) @@ -625,7 +625,8 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const & // ===================================================================================================================== /*! - * \brief Compute the interface state from the slope and cell centered state using parabolic interpolation + * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state + * using parabolic interpolation * * \param[in] cell_i The state in cell i * \param[in] cell_im1 The state in cell i-1 @@ -667,6 +668,150 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive cons } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Compute the PPM interface state for a given field/stencil. + * + * \details This method is heavily based on the implementation in Athena++. See the following papers for details + * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained + * transport", JCP, 375, (2018) + * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 + * (1984) + * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) + * - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined grids", + * CAMCoS, 6, 1 (2011) + * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, + * 230, 2952 (2011) + * + * \param[in] cell_im2 The value of the field/stencil at i-2 + * \param[in] cell_im1 The value of the field/stencil at i-1 + * \param[in] cell_i The value of the field/stencil at i + * \param[in] cell_ip1 The value of the field/stencil at i+1 + * \param[in] cell_ip2 The value of the field/stencil at i+2 + * \param[out] interface_L_iph The left interface at the i+1/2 face + * \param[out] interface_R_imh The right interface at the i-1/2 face + */ +void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Real const &cell_im1, Real const &cell_i, + Real const &cell_ip1, Real const &cell_ip2, + Real &interface_L_iph, Real &interface_R_imh) +{ + // This method is heavily based on the implementation in Athena++. See the following papers for details + // - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained + // transport", JCP, 375, (2018) + // - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 + // (1984) + // - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) + // - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined + // grids", CAMCoS, 6, 1 (2011) + // - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, + // 230, 2952 (2011) + + // Let's start by setting up some things that we'll need later + + // Colella & Sekora 2008 constant used in second derivative limiter + Real const C2 = 1.25; + + // This lambda function is used for limiting the interfaces + auto limit_interface = [&C2](Real const &cell_i, Real const &cell_im1, Real const &interface, Real const &slope_2nd_i, + Real const &slope_2nd_im1) -> Real { + // Colella et al. 2011 eq. 85b. + // 85a is slope_2nd_im1 and 85c is slope_2nd_i + Real slope_2nd_centered = 3.0 * (cell_im1 + cell_i - 2.0 * interface); + + Real limited_slope = 0.0; + if (SIGN(slope_2nd_centered) == SIGN(slope_2nd_im1) and SIGN(slope_2nd_centered) == SIGN(slope_2nd_i)) { + limited_slope = + SIGN(slope_2nd_centered) * min(C2 * abs(slope_2nd_im1), min(C2 * abs(slope_2nd_i), abs(slope_2nd_centered))); + } + + // Collela et al. 2011 eq. 84a & 84b + Real const diff_left = interface - cell_im1; + Real const diff_right = cell_i - interface; + if (diff_left * diff_right < 0.0) { + // Local extrema detected at the interface + return 0.5 * (cell_im1 + cell_i) - limited_slope / 6.0; + } else { + return interface; + } + }; + + // Now that the setup is done we can start computing the interface states + + // Compute average slopes + Real const slope_left = (cell_i - cell_im1); + Real const slope_right = (cell_ip1 - cell_i); + Real const slope_avg_im1 = 0.5 * slope_left + 0.5 * (cell_im1 - cell_im2); + Real const slope_avg_i = 0.5 * slope_right + 0.5 * slope_left; + Real const slope_avg_ip1 = 0.5 * (cell_ip2 - cell_ip1) + 0.5 * slope_right; + + // Approximate interface average at i-1/2 and i+1/2 using PPM + // P. Colella & P. Woodward 1984 eq. 1.6 + interface_R_imh = 0.5 * (cell_im1 + cell_i) + (slope_avg_im1 - slope_avg_i) / 6.0; + interface_L_iph = 0.5 * (cell_i + cell_ip1) + (slope_avg_i - slope_avg_ip1) / 6.0; + + // Limit interpolated interface states (Colella et al. 2011 section 4.3.1) + + // Approximate second derivative at interfaces for smooth extrema preservation + // Colella et al. 2011 eq 85a + Real const slope_2nd_im1 = cell_im2 + cell_i - 2.0 * cell_im1; + Real const slope_2nd_i = cell_im1 + cell_ip1 - 2.0 * cell_i; + Real const slope_2nd_ip1 = cell_i + cell_ip2 - 2.0 * cell_ip1; + + interface_R_imh = limit_interface(cell_i, cell_im1, interface_R_imh, slope_2nd_i, slope_2nd_im1); + interface_L_iph = limit_interface(cell_ip1, cell_i, interface_L_iph, slope_2nd_ip1, slope_2nd_i); + + // Compute cell-centered difference stencils (McCorquodale & Colella 2011 section 2.4.1) + + // Apply Colella & Sekora limiters to parabolic interpolant + Real slope_2nd_face = 6.0 * (interface_R_imh + interface_L_iph - 2.0 * cell_i); + + Real slope_2nd_limited = 0.0; + if (SIGN(slope_2nd_im1) == SIGN(slope_2nd_i) and SIGN(slope_2nd_im1) == SIGN(slope_2nd_ip1) and + SIGN(slope_2nd_im1) == SIGN(slope_2nd_face)) { + // Extrema is smooth + // Colella & Sekora eq. 22 + slope_2nd_limited = SIGN(slope_2nd_face) * min(min(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)), + min(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face))); + } + + // Check if 2nd derivative is close to roundoff error + Real cell_max = max(abs(cell_im2), abs(cell_im1)); + cell_max = max(cell_max, abs(cell_i)); + cell_max = max(cell_max, abs(cell_ip1)); + cell_max = max(cell_max, abs(cell_ip2)); + + // If this condition is true then the limiter is not sensitive to roundoff and we use the limited ratio + // McCorquodale & Colella 2011 eq. 27 + Real const rho = (abs(slope_2nd_face) > (1.0e-12) * cell_max) ? slope_2nd_limited / slope_2nd_face : 0.0; + + // Colella & Sekora eq. 25 + Real slope_face_left = cell_i - interface_R_imh; + Real slope_face_right = interface_L_iph - cell_i; + + // Check for local extrema + if ((slope_face_left * slope_face_right) <= 0.0 or ((cell_ip1 - cell_i) * (cell_i - cell_im1)) <= 0.0) { + // Extrema detected + // Check if relative change in limited 2nd deriv is > roundoff + if (rho <= (1.0 - (1.0e-12))) { + // Limit smooth extrema + // Colella & Sekora eq. 23 + interface_R_imh = cell_i - rho * slope_face_left; + interface_L_iph = cell_i + rho * slope_face_right; + } + } else { + // No extrema detected + // Overshoot i-1/2,R / i,(-) state + if (abs(slope_face_left) >= 2.0 * abs(slope_face_right)) { + interface_R_imh = cell_i - 2.0 * slope_face_right; + } + // Overshoot i+1/2,L / i,(+) state + if (abs(slope_face_right) >= 2.0 * abs(slope_face_left)) { + interface_L_iph = cell_i + 2.0 * slope_face_left; + } + } +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Write the interface data to the appropriate arrays diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 9b4d8cb48..d6ef4afe9 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -285,51 +285,20 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #endif // MHD } -TEST(tALLReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) +TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) { -// Input Data -#ifdef MHD - reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, - 3.7096802847, 8.9692274397, 9.3416846121, 2.7707989229}; - reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, - 2.2945188332, 8.2028929443, 1.6941969156, 8.9424967039}; - reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, - 9.093498542, 3.6911762486, 7.3777130085, 3.6711825219}; - reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, - 2.450533435, 9.4782390708, 5.6820584385, 4.7115587023}; - reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, - 1.339777121, 4.5589857979, 1.4398647311, 8.8727778983}; -#else // not MHD + // Input Data + reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, 3.7096802847}; reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, 2.2945188332}; reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, 9.093498542}; reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, 2.450533435}; reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, 1.339777121}; -#endif // MHD // Get test data reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); -// Check results -#ifdef MHD - reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, - 4.6476103465999996, 3.7096802847000001, 0 < 9.3416846120999999, - 2.7707989229000001}; - reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, - 4.6476103465999996, 3.7096802847000001, 0 < 9.3416846120999999, - 2.7707989229000001}; - testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); - testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); - - testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); - testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); -#else // MHD + // Check results reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, 4.6476103465999996, 3.7096802847000001}; reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, @@ -345,7 +314,6 @@ TEST(tALLReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutp testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); -#endif // MHD } TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) @@ -424,6 +392,71 @@ TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) #endif // MHD } +TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(-100, 100); + + // Set up testing parameters + size_t const n_tests = 100; + std::vector fiducial_left_interface{ + 50.429040149605328, 4.4043935241855703, 37.054257344499717, 23.707343328192593, -14.949021655598202, + -10.760611497035882, 8.367260859616664, 8.5357943668839624, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 23.370742401854159, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 75.572387546643355, 61.339053128914685, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -36.626332669233776, 30.421304081280084, + 20.637382412674093, 58.342347077360131, -79.757902483702381, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -39.384192078363533, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -20.951323678824952, 46.92743159953308, -75.449850543801574, -54.603894223278004, + -59.419110050353098, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -79.086012597191512, -45.713537271527976, 80.237684918029572, -60.666381661910016, 68.727158732184449, + 24.53669768915492, -67.195147776790975, 72.610434112023597, 54.910597945673814, -19.862686571231023, + 32.244024128018054, -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 0.77846541072490538, 59.620571575902432, 41.0983082454959, + -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + 30.9954824410611, 78.440749922366507, 51.390453104722326, 70.625792807373429, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147}; + std::vector fiducial_right_interface{ + 50.429040149605328, -40.625142952817804, 37.054257344499717, -55.796322960572695, -14.949021655598202, + -10.760611497035882, 71.107183338735751, -29.453314279116661, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 9.3792919223901166, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 96.420244795844823, 37.498528618937456, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -12.674113472365306, 30.421304081280084, + 43.700175645941769, 58.342347077360131, -31.574197692184548, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -94.449608348937488, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -60.629868182203786, -41.343090531127039, -75.449850543801574, -82.52313028208863, + 19.871484181185011, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -50.671539065300863, 13.424189957034621, 80.237684918029572, 32.454734198410179, 66.84741286999801, + 24.53669768915492, -67.195147776790975, 72.277527112459907, -46.094192444366435, -99.915875366345205, + 32.244024128018054, -95.648868731550635, 17.922876720365402, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 23.62791013796798, 59.620571575902432, 41.0983082454959, + -30.533954819557593, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + -49.167117951549066, 78.440749922366507, 51.390453104722326, 3.1993391287610393, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147}; + + // Run n_tests iterations of the loop choosing random numbers to put into the interface state computation and checking + // the results + for (size_t i = 0; i < n_tests; i++) { + // Run the function + double test_left_interface, test_right_interface; + reconstruction::PPM_Single_Variable(doubleRand(prng), doubleRand(prng), doubleRand(prng), doubleRand(prng), + doubleRand(prng), test_left_interface, test_right_interface); + + // Compare results + testingUtilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); + testingUtilities::checkResults(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); + } +} + TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index a448e45b3..292935813 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -26,40 +26,44 @@ * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam +class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { - // #ifdef MHD - // // Loosen correctness check to account for MHD only having PCM. This is - // // about the error between PCM and PPMP in hydro - // sodTest.setFixedEpsilon(1E-3); +#ifdef MHD + sodTest.setFixedEpsilon(1.0E-4); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); - // // Don't test the gas energy fields - // auto datasetNames = sodTest.getDataSetsToTest(); - // datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + // Set the magnetic fiducial datasets to zero + size_t const size = 64 * 64 * 65; + std::vector const magVec(size, 0); - // // Set the magnetic fiducial datasets to zero - // size_t const size = std::pow(65, 3); - // std::vector const magVec(0, size); + for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { + sodTest.setFiducialData(field, magVec); + datasetNames.emplace_back(field); + } - // for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { - // sodTest.setFiducialData(field, magVec); - // datasetNames.push_back(field); - // } + sodTest.setDataSetsToTest(datasetNames); - // sodTest.setDataSetsToTest(datasetNames); - // #endif // MHD + double const maxAllowedL1Error = 7.0E-3; + double const maxAllowedError = 4.6E-2; +#else + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; +#endif // MHD sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); + sodTest.runTest(true, maxAllowedL1Error, maxAllowedError); } -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROSYSTEMSodShockTubeParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= @@ -103,7 +107,7 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) #elif defined(PLMC) tolerance = 1.0E-7; #elif defined(PPMC) - tolerance = 0.0; + tolerance = 1.9E-9; #endif // PCM #endif // MHD @@ -141,8 +145,8 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; #elif defined(PLMC) - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #elif defined(PPMC) double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 double const allowedError = 2.7E-8; diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt similarity index 99% rename from src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 5e7687d07..efcd912fd 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -54,4 +54,3 @@ P_r=0.1 diaph=0.5 # value of gamma gamma=1.4 - diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt deleted file mode 100644 index 7246e311f..000000000 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt +++ /dev/null @@ -1,57 +0,0 @@ -# -# Parameter File for 3D Sod Shock tube -# - -################################################ -# number of grid cells in the x dimension -nx=64 -# number of grid cells in the y dimension -ny=64 -# number of grid cells in the z dimension -nz=64 -# final output time -tout=0.2 -# time interval for output -outstep=0.2 -# name of initial conditions -init=Riemann -# domain properties -xmin=0.0 -ymin=0.0 -zmin=0.0 -xlen=1.0 -ylen=1.0 -zlen=1.0 -# type of boundary conditions -xl_bcnd=3 -xu_bcnd=3 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 -# path to output directory -outdir=./ - -################################################# -# Parameters for 1D Riemann problems -# density of left state -rho_l=1.0 -# velocity of left state -vx_l=0.0 -vy_l=0.0 -vz_l=0.0 -# pressure of left state -P_l=1.0 -# density of right state -rho_r=0.1 -# velocity of right state -vx_r=0.0 -vy_r=0.0 -vz_r=0.0 -# pressure of right state -P_r=0.1 -# location of initial discontinuity -diaph=0.5 -# value of gamma -gamma=1.4 - diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 622f0aa69..4b093eff0 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -160,7 +160,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC #elif defined(PLMC) waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(6.11E-8, 5.5E-8); #endif // PCM } @@ -196,7 +196,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCo #elif defined(PLMC) waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(6.1E-8, 5.5E-8); #endif // PCM } @@ -234,7 +234,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC #elif defined(PLMC) waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM } @@ -270,7 +270,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo #elif defined(PLMC) waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM } @@ -307,7 +307,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu #elif defined(PLMC) waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); #endif // PCM } @@ -342,7 +342,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInput #elif defined(PLMC) waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); #endif // PCM } @@ -381,7 +381,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect #elif defined(PLMC) waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.41e-09, 1.5E-09); #endif // PCM } @@ -499,7 +499,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCor #elif defined(PLMC) waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM } @@ -535,7 +535,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorr #elif defined(PLMC) waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); #elif defined(PPMC) - waveTest.runL1ErrorTest(0.0, 0.0); + waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM } @@ -572,14 +572,6 @@ TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithMagneticFieldCorrectInputExpectCo test_runner.runTest(); } -/// TODO: This is temporary. Remove once PPMP is implemented for MHD and replace -/// TODO: with the hydro sod test -TEST_P(tMHDSYSTEMParameterizedMpi, SodShockTubeCorrectInputExpectCorrectOutput) -{ - test_runner.numMpiRanks = GetParam(); - test_runner.runTest(); -} - /// Test the MHD Einfeldt Strong Rarefaction (Einfeldt et al. 1991) TEST_P(tMHDSYSTEMParameterizedMpi, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) { @@ -728,9 +720,9 @@ TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, MovingW double const allowedError = 5.0E-3; #elif defined(PPMC) // Set the number of timesteps - cpawTest.setFiducialNumTimeSteps(0); - double const allowedL1Error = 0.0; // Based on results in Gardiner & Stone 2008 - double const allowedError = 0.0; + cpawTest.setFiducialNumTimeSteps(84); + double const allowedL1Error = 4.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 3.0E-3; #endif // PCM // Set the launch parameters @@ -763,9 +755,9 @@ TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, Standin double const allowedError = 2.0E-3; #elif defined(PPMC) // Set the number of timesteps - cpawTest.setFiducialNumTimeSteps(0); - double const allowedL1Error = 0.0; // Based on results in Gardiner & Stone 2008 - double const allowedError = 0.0; + cpawTest.setFiducialNumTimeSteps(130); + double const allowedL1Error = 1.3E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1.3E-3; #endif // PCM // Set the launch parameters diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 6141a471d..a8302992d 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -679,7 +679,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::str // ============================================================================= std::vector systemTest::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) { - if (_fiducialFileExists) { + if (_fiducialFileExists and (_fiducialDataSets.find(dataSetName) == _fiducialDataSets.end())) { // Open the dataset H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); From b28c9c903f652230d9c582c58a683857040e1f45 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 17 May 2023 17:24:59 -0400 Subject: [PATCH 434/694] Fix incorrectly placed allocations in PPMC_CTU --- src/reconstruction/ppmc_cuda.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index c0d922a2f..aa797a283 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -335,8 +335,10 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; #ifdef DE Real sum_ge = 0; + Real chi_ge = 0; #endif // DE #ifdef SCALAR + Real chi_scalar[NSCALARS]; Real sum_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; @@ -367,10 +369,9 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; #ifdef DE - Real chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; + chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; #endif // DE #ifdef SCALAR - Real chi_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i]; } From f0907f0f22cb6965c906eb6d20a47dd2287bf020 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 25 May 2023 15:32:26 -0400 Subject: [PATCH 435/694] Fix linear wave initial conditions The magnetic field wasn't being set properly --- src/grid/initial_conditions.cpp | 133 +++++++++++++++++++++----- src/system_tests/mhd_system_tests.cpp | 8 -- src/utils/mhd_utilities.cu | 14 +-- 3 files changed, 117 insertions(+), 38 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 0acf08639..3fa0a7380 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -301,43 +301,130 @@ void Grid3D::Sound_Wave(parameters const &P) * \brief Sine wave perturbation. */ void Grid3D::Linear_Wave(parameters const &P) { - auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, P.pitch, P.yaw); + // Compute any test parameters needed + // ================================== + // Angles + Real const sin_yaw = std::sin(P.yaw); + Real const cos_yaw = std::cos(P.yaw); + Real const sin_pitch = std::sin(P.pitch); + Real const cos_pitch = std::cos(P.pitch); + + Real const wavenumber = 2.0 * M_PI / P.wave_length; // the angular wave number k + +#ifdef MHD + // TODO: This method of setting the magnetic fields via the vector potential should work but instead leads to small + // TODO: errors in the magnetic field that tend to amplify over time until the solution diverges. I don't know why + // TODO: that is the case and can't figure out the reason. Without this we can't run linear waves at an angle to the + // TODO: grid. + // // Compute the vector potential + // // ============================ + // std::vector vectorPotential(3 * H.n_cells, 0); + + // // lambda function for computing the vector potential + // auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) { + // // The "_rot" variables are the rotated version + // Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch; + // Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw; + + // Real const a_y = P.Bz * x_rot - (P.A * P.rEigenVec_Bz / wavenumber) * std::cos(wavenumber * x_rot); + // Real const a_z = -P.By * x_rot + (P.A * P.rEigenVec_By / wavenumber) * std::cos(wavenumber * x_rot) + P.Bx * + // y_rot; + + // return std::make_pair(a_y, a_z); + // }; + + // for (size_t k = 0; k < H.nz; k++) { + // for (size_t j = 0; j < H.ny; j++) { + // for (size_t i = 0; i < H.nx; i++) { + // // Get cell index + // size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Real x, y, z; + // Get_Position(i, j, k, &x, &y, &z); + + // auto vectorPot = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.); + // vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.); + // vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z); + // vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch; + // } + // } + // } + + // // Compute the magnetic field from the vector potential + // // ==================================================== + // mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + Real shift = H.dx; + size_t dir = 0; + if (sin_yaw == 1.0) { + shift = H.dy; + dir = 1; + } else if (sin_pitch == 1.0) { + shift = H.dz; + dir = 2; + } // set initial values of conserved variables for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) { for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) { for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) { - // Rotate the indices - auto [i_rot, j_rot, k_rot] = math_utils::rotateCoords(i, j, k, P.pitch, P.yaw); + // get cell index + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; + + Real const sine_x = std::sin(x_pos_rot * wavenumber); + Real bx = P.Bx + P.A * P.rEigenVec_Bx * sine_x; + Real by = P.By + P.A * P.rEigenVec_By * sine_x; + Real bz = P.Bz + P.A * P.rEigenVec_Bz * sine_x; + + C.magnetic_x[id] = bx * cos_pitch * cos_yaw - by * sin_yaw - bz * sin_pitch * cos_yaw; + C.magnetic_y[id] = bx * cos_pitch * sin_yaw + by * cos_yaw - bz * sin_pitch * sin_yaw; + C.magnetic_z[id] = bx * sin_pitch + bz * cos_pitch; + } + } + } +#endif // MHD + + // Compute the hydro variables + // =========================== + for (size_t k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (size_t j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (size_t i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { // get cell index - int id = i + j * H.nx + k * H.nx * H.ny; + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); // get cell-centered position Real x_pos, y_pos, z_pos; - Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; - // set constant initial states. Note that hydro_utilities::Calc_Energy_Primitive computes the MHD energy if the - // MHD flag is turned on and the hydro energy if it isn't - Real sine_wave = std::sin(2.0 * M_PI * x_pos / P.wave_length); + Real const sine_x = std::sin(x_pos_rot * wavenumber); - C.density[id] = P.rho; - C.momentum_x[id] = P.rho * P.vx; - C.momentum_y[id] = P.rho * P.vy; - C.momentum_z[id] = P.rho * P.vz; - C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P, P.rho, P.vx, P.vy, P.vz, gama, P.Bx, P.By, P.Bz); - // add small-amplitude perturbations - C.density[id] += P.A * P.rEigenVec_rho * sine_wave; - C.momentum_x[id] += P.A * P.rEigenVec_MomentumX * sine_wave; - C.momentum_y[id] += P.A * P.rEigenVec_MomentumY * sine_wave; - C.momentum_z[id] += P.A * P.rEigenVec_MomentumZ * sine_wave; - C.Energy[id] += P.A * P.rEigenVec_E * sine_wave; + // Density + C.density[id] = P.rho + P.A * P.rEigenVec_rho * sine_x; + + // Momenta + Real mx = P.rho * P.vx + P.A * P.rEigenVec_MomentumX * sine_x; + Real my = P.A * P.rEigenVec_MomentumY * sine_x; + Real mz = P.A * P.rEigenVec_MomentumZ * sine_x; + + C.momentum_x[id] = mx * cos_pitch * cos_yaw - my * sin_yaw - mz * sin_pitch * cos_yaw; + C.momentum_y[id] = mx * cos_pitch * sin_yaw + my * cos_yaw - mz * sin_pitch * sin_yaw; + C.momentum_z[id] = mx * sin_pitch + mz * cos_pitch; + // Energy + C.Energy[id] = P.P / (P.gamma - 1.0) + 0.5 * P.rho * P.vx * P.vx + P.A * sine_x * P.rEigenVec_E; #ifdef MHD - sine_wave = std::sin(2.0 * M_PI * (x_pos + stagger)); - C.magnetic_x[id] = P.Bx + P.A * P.rEigenVec_Bx * sine_wave; - C.magnetic_y[id] = P.By + P.A * P.rEigenVec_By * sine_wave; - C.magnetic_z[id] = P.Bz + P.A * P.rEigenVec_Bz * sine_wave; + C.Energy[id] += 0.5 * (P.Bx * P.Bx + P.By * P.By + P.Bz * P.Bz); #endif // MHD } } diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 4b093eff0..f7d7ad9a2 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -66,18 +66,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< case 2: // swap X and Y y_len *= 2; ny *= 2; - std::swap(vx_rot, vy_rot); - std::swap(Bx_rot, By_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); break; case 3: // swap X and Z z_len *= 2; nz *= 2; - std::swap(vx_rot, vz_rot); - std::swap(Bx_rot, Bz_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); break; default: throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu index 110d8d66b..bceb4abc1 100644 --- a/src/utils/mhd_utilities.cu +++ b/src/utils/mhd_utilities.cu @@ -23,14 +23,14 @@ void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserve std::vector const &vectorPotential) { // Compute the magnetic field - for (int k = 1; k < H.nz; k++) { - for (int j = 1; j < H.ny; j++) { - for (int i = 1; i < H.nx; i++) { + for (size_t k = 1; k < H.nz; k++) { + for (size_t j = 1; j < H.ny; j++) { + for (size_t i = 1; i < H.nx; i++) { // Get cell index. The "xmo" means: X direction Minus One - int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); - int const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); - int const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); - int const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + size_t const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); + size_t const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); + size_t const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; From fa2fdf8d2d60daf9c3093ed033b5643c42799e44 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 11:20:37 -0400 Subject: [PATCH 436/694] Fix PPM_Single_Variable test On different compilers the test was giving different results. It seems like that was due to a reordering of the call to the PRNG. Now we generate all the random values serially at the beginning to avoid that issue. --- src/reconstruction/reconstruction_tests.cu | 59 +++++++++++++--------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index d6ef4afe9..777c799fe 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -15,6 +15,7 @@ // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../io/io.h" #include "../reconstruction/reconstruction.h" #include "../utils/DeviceVector.h" #include "../utils/gpu.hpp" @@ -400,28 +401,12 @@ TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) // Set up testing parameters size_t const n_tests = 100; + std::vector input_data(n_tests * 5); + for (double &val : input_data) { + val = doubleRand(prng); + } + std::vector fiducial_left_interface{ - 50.429040149605328, 4.4043935241855703, 37.054257344499717, 23.707343328192593, -14.949021655598202, - -10.760611497035882, 8.367260859616664, 8.5357943668839624, 7.38606168778702, -23.210826670297152, - -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, - 58.769584663215738, 47.626531326553447, 23.370742401854159, 47.06767164062336, -53.975231802858218, - -81.51278133300454, -74.554960772880221, 75.572387546643355, 61.339053128914685, -41.370881014041672, - -41.817524439980467, 58.391560533135817, -85.991024651293131, -36.626332669233776, 30.421304081280084, - 20.637382412674093, 58.342347077360131, -79.757902483702381, 98.151410701129635, -9.4994975790183389, - -87.49117921577357, -39.384192078363533, 79.849643090061676, 93.096197902468759, -64.374502025066192, - 82.037247010307937, -20.951323678824952, 46.92743159953308, -75.449850543801574, -54.603894223278004, - -59.419110050353098, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, - 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, - 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, - -79.086012597191512, -45.713537271527976, 80.237684918029572, -60.666381661910016, 68.727158732184449, - 24.53669768915492, -67.195147776790975, 72.610434112023597, 54.910597945673814, -19.862686571231023, - 32.244024128018054, -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724, - 39.48244113577249, 64.203567686297504, 0.77846541072490538, 59.620571575902432, 41.0983082454959, - -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, - -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, - 30.9954824410611, 78.440749922366507, 51.390453104722326, 70.625792807373429, 43.749856317813453, - -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147}; - std::vector fiducial_right_interface{ 50.429040149605328, -40.625142952817804, 37.054257344499717, -55.796322960572695, -14.949021655598202, -10.760611497035882, 71.107183338735751, -29.453314279116661, 7.38606168778702, -23.210826670297152, -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, @@ -434,22 +419,46 @@ TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) 19.871484181185011, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, - -50.671539065300863, 13.424189957034621, 80.237684918029572, 32.454734198410179, 66.84741286999801, + -50.671539065300863, 13.424189957034622, 80.237684918029572, 32.454734198410179, 66.84741286999801, 24.53669768915492, -67.195147776790975, 72.277527112459907, -46.094192444366435, -99.915875366345205, 32.244024128018054, -95.648868731550635, 17.922876720365402, -86.334093878928797, -16.580223524066724, 39.48244113577249, 64.203567686297504, 23.62791013796798, 59.620571575902432, 41.0983082454959, -30.533954819557593, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, -49.167117951549066, 78.440749922366507, 51.390453104722326, 3.1993391287610393, 43.749856317813453, - -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147}; + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; + std::vector fiducial_right_interface{ + 50.429040149605328, 4.4043935241855703, 37.054257344499717, 23.707343328192596, -14.949021655598202, + -10.760611497035882, 8.367260859616664, 8.5357943668839624, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 23.370742401854159, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 75.572387546643355, 61.339053128914685, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -36.626332669233776, 30.421304081280084, + 20.637382412674096, 58.342347077360131, -79.757902483702381, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -39.384192078363533, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -20.951323678824952, 46.927431599533087, -75.449850543801574, -54.603894223278004, + -59.419110050353098, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -79.086012597191512, -45.713537271527976, 80.237684918029572, -60.666381661910016, 68.727158732184449, + 24.53669768915492, -67.195147776790975, 72.610434112023597, 54.910597945673814, -19.862686571231023, + 32.244024128018054, -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 0.77846541072490538, 59.620571575902432, 41.0983082454959, + -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + 30.9954824410611, 78.440749922366507, 51.390453104722326, 70.625792807373429, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; // Run n_tests iterations of the loop choosing random numbers to put into the interface state computation and checking // the results for (size_t i = 0; i < n_tests; i++) { // Run the function double test_left_interface, test_right_interface; - reconstruction::PPM_Single_Variable(doubleRand(prng), doubleRand(prng), doubleRand(prng), doubleRand(prng), - doubleRand(prng), test_left_interface, test_right_interface); + size_t const idx = 5 * i; + reconstruction::PPM_Single_Variable(input_data[idx], input_data[idx + 1], input_data[idx + 2], input_data[idx + 3], + input_data[idx + 4], test_left_interface, test_right_interface); // Compare results testingUtilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); From 38507615bd59be9951491f7639b83360ec5a3b6f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 11:45:48 -0400 Subject: [PATCH 437/694] Disable PPMC VL test for DE builds The DE parts are well tested elsewhere. No need for the extra complexity in this test. --- src/reconstruction/ppmc_cuda_tests.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 3352f22f6..e1974130c 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -120,6 +120,13 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) { +#ifdef DE + /// This test doesn't support Dual Energy. It wouldn't be that hard to add support for DE but the DE parts of the + /// reconstructor (loading and PPM_Single_Variable) are well tested elsewhere so there's no need to add the extra + /// complexity here. + GTEST_SKIP(); +#endif // DE + // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); From 303bcc62037844131ca57519d42e95cb3edce1b9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 14:07:27 -0400 Subject: [PATCH 438/694] Convert Advection Field Loop test to L1 test The cell-to-cell comparison was brittle across systems --- src/system_tests/mhd_system_tests.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index f7d7ad9a2..90ae75a86 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -606,10 +606,9 @@ TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorre TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOutput) { test_runner.numMpiRanks = GetParam(); -#ifdef PLMC - test_runner.setFixedEpsilon(8.568e-10); -#endif // PLMC - test_runner.runTest(); + + // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems + test_runner.runTest(true, 3.9E-8, 1.6E-6); } /// Test the MHD Blast Wave From 378ed0c0c5ce34be337185d83ff99dbcc936171d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 14:31:47 -0400 Subject: [PATCH 439/694] Fix the clang-tidy errors For some reason clang-tidy can find the host side fmin function but not the host side min function in some cases. So I changed it to fmin. --- src/reconstruction/ppmc_cuda_tests.cu | 8 ++++---- src/reconstruction/reconstruction.h | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index e1974130c..7dd9b49e3 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -39,8 +39,8 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) // Setup host grid. Fill host grid with random values and randomly assign maximum value std::vector host_grid(nx * ny * nz * n_fields); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (double &val : host_grid) { + val = doubleRand(prng); } // Allocating and copying to device @@ -144,8 +144,8 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Setup host grid. Fill host grid with random values and randomly assign maximum value std::vector host_grid(nx * ny * nz * n_fields); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (double &val : host_grid) { + val = doubleRand(prng); } // Allocating and copying to device diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 204ee8eff..70e43d792 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -720,8 +720,8 @@ void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Re Real limited_slope = 0.0; if (SIGN(slope_2nd_centered) == SIGN(slope_2nd_im1) and SIGN(slope_2nd_centered) == SIGN(slope_2nd_i)) { - limited_slope = - SIGN(slope_2nd_centered) * min(C2 * abs(slope_2nd_im1), min(C2 * abs(slope_2nd_i), abs(slope_2nd_centered))); + limited_slope = SIGN(slope_2nd_centered) * + fmin(C2 * abs(slope_2nd_im1), fmin(C2 * abs(slope_2nd_i), abs(slope_2nd_centered))); } // Collela et al. 2011 eq. 84a & 84b @@ -770,15 +770,15 @@ void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Re SIGN(slope_2nd_im1) == SIGN(slope_2nd_face)) { // Extrema is smooth // Colella & Sekora eq. 22 - slope_2nd_limited = SIGN(slope_2nd_face) * min(min(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)), - min(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face))); + slope_2nd_limited = SIGN(slope_2nd_face) * fmin(fmin(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)), + fmin(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face))); } // Check if 2nd derivative is close to roundoff error - Real cell_max = max(abs(cell_im2), abs(cell_im1)); - cell_max = max(cell_max, abs(cell_i)); - cell_max = max(cell_max, abs(cell_ip1)); - cell_max = max(cell_max, abs(cell_ip2)); + Real cell_max = fmax(abs(cell_im2), abs(cell_im1)); + cell_max = fmax(cell_max, abs(cell_i)); + cell_max = fmax(cell_max, abs(cell_ip1)); + cell_max = fmax(cell_max, abs(cell_ip2)); // If this condition is true then the limiter is not sensitive to roundoff and we use the limited ratio // McCorquodale & Colella 2011 eq. 27 From 54c00d608d0674c2f031a978a8f26579c089b751 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 15:20:09 -0400 Subject: [PATCH 440/694] Fix memory leak & more clang tidy fixes --- src/io/io.cpp | 4 ++++ src/reconstruction/ppmc_cuda.cu | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 3335afda7..420982309 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1648,6 +1648,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) free(dataset_buffer_dxz); free(dataset_buffer_Txy); free(dataset_buffer_Txz); + #ifdef DUST + free(dataset_buffer_dust_xy); + free(dataset_buffer_dust_xz); + #endif // DUST } #endif // HDF5 diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index aa797a283..52ea8d986 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -340,8 +340,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun #ifdef SCALAR Real chi_scalar[NSCALARS]; Real sum_scalar[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; + for (Real &val : sum_scalar) { + val = 0; } #endif // SCALAR @@ -429,8 +429,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun sum_ge = 0; #endif // DE #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; + for (Real &val : sum_scalar) { + val = 0; } #endif // SCALAR if (lambda_m <= 0) { From d5dd26e4018fe63031da0e8e2a141b43036d2996 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 7 Jul 2023 16:51:50 -0400 Subject: [PATCH 441/694] Fix grammer in comments --- src/reconstruction/plmc_cuda.cu | 4 ++-- src/reconstruction/ppmc_cuda.cu | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index b1a60be86..67972083f 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -62,11 +62,11 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou reconstruction::Primitive const cell_i = reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - // cell i-1. The equality checks check the direction and subtracts one from the direction + // cell i-1. The equality checks the direction and will subtract one from the correct direction reconstruction::Primitive const cell_imo = reconstruction::Load_Data( dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // cell i+1. The equality checks check the direction and adds one to the direction + // cell i+1. The equality checks the direction and add one to the correct direction reconstruction::Primitive const cell_ipo = reconstruction::Load_Data( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 52ea8d986..3f5b7d5cd 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -570,23 +570,23 @@ __global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bound reconstruction::Primitive const cell_i = reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - // cell i-1. The equality checks check the direction and subtracts one from the direction + // cell i-1. The equality checks the direction and will subtract one from the correct direction // im1 stands for "i minus 1" reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // cell i+1. The equality checks check the direction and adds one to the direction + // cell i+1. The equality checks the direction and add one to the correct direction // ip1 stands for "i plus 1" reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // cell i-2. The equality checks check the direction and subtracts one from the direction + // cell i-2. The equality checks the direction and will subtract two from the correct direction // im2 stands for "i minus 2" reconstruction::Primitive const cell_im2 = reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // cell i+2. The equality checks check the direction and adds one to the direction + // cell i+2. The equality checks the direction and add two to the correct direction // ip2 stands for "i plus 2" reconstruction::Primitive const cell_ip2 = reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), From 5b5c0e1df6abdbef752da224e47105f02dcf0b46 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 10:52:19 -0400 Subject: [PATCH 442/694] Clarify some comments --- src/reconstruction/ppmc_cuda.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 3f5b7d5cd..08f643cda 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -27,7 +27,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - // Thread guard to prevent overrun + // Ensure that we are only operating on cells that will be used if (size_t const min = 3, max = 3; xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { return; @@ -536,7 +536,7 @@ __global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bound int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - // Thread guard to prevent overrun + // Ensure that we are only operating on cells that will be used if (size_t const min = 3, max = 3; xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { return; From ef1598b1e6935386a985357277c763642f84df54 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 11:08:23 -0400 Subject: [PATCH 443/694] Reorder arguments to `reconstruction::Compute_Slope` This was done to make it consistent with the other functions in the `reconstruction` namespace --- src/reconstruction/plmc_cuda.cu | 6 +++--- src/reconstruction/ppmc_cuda.cu | 18 ++++++++-------- src/reconstruction/reconstruction.h | 24 +++++++++++----------- src/reconstruction/reconstruction_tests.cu | 8 ++++---- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 67972083f..fd7d2e0ae 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -79,13 +79,13 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // the cell center // left - reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_i, cell_imo); + reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); // right - reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_ipo, cell_i); + reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); // centered - reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_ipo, cell_imo, 0.5); + reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); // Van Leer reconstruction::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 08f643cda..9acf2b936 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -96,13 +96,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // and R refer to locations relative to the cell center Stone Eqn 36 // left - reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im1, cell_im2); + reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im2, cell_im1); // right - reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_i, cell_im1); + reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_im1, cell_i); // centered - reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_i, cell_im2, 0.5); + reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_im2, cell_i, 0.5); // Van Leer reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); @@ -141,13 +141,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // and R refer to locations relative to the cell center Stone Eqn 36 // left - del_L = reconstruction::Compute_Slope(cell_i, cell_im1); + del_L = reconstruction::Compute_Slope(cell_im1, cell_i); // right - del_R = reconstruction::Compute_Slope(cell_ip1, cell_i); + del_R = reconstruction::Compute_Slope(cell_i, cell_ip1); // centered - del_C = reconstruction::Compute_Slope(cell_ip1, cell_im1, 0.5); + del_C = reconstruction::Compute_Slope(cell_im1, cell_ip1, 0.5); // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); @@ -182,13 +182,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // and R refer to locations relative to the cell center Stone Eqn 36 // left - del_L = reconstruction::Compute_Slope(cell_ip1, cell_i); + del_L = reconstruction::Compute_Slope(cell_i, cell_ip1); // right - del_R = reconstruction::Compute_Slope(cell_ip2, cell_ip1); + del_R = reconstruction::Compute_Slope(cell_ip1, cell_ip2); // centered - del_C = reconstruction::Compute_Slope(cell_ip2, cell_i, 0.5); + del_C = reconstruction::Compute_Slope(cell_i, cell_ip2, 0.5); // Van Leer del_G = reconstruction::Van_Leer_Slope(del_L, del_R); diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 70e43d792..3a77780e6 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -157,10 +157,10 @@ Primitive __device__ __host__ __inline__ Load_Data(Real const *dev_conserved, si // ===================================================================================================================== /*! - * \brief Compute a simple slope. Equation is `coef * (left - right)`. + * \brief Compute a simple slope. Equation is `coef * (right - left)`. * - * \param[in] left The data on the positive side of the slope - * \param[in] right The data on the negative side of the slope + * \param[in] left The data with the lower index (on the "left" side) + * \param[in] right The data with the higher index (on the "right" side) * \param[in] coef The coefficient to multiply the slope by. Defaults to 1.0 * \return Primitive The slopes */ @@ -169,24 +169,24 @@ Primitive __device__ __host__ __inline__ Compute_Slope(Primitive const &left, Pr { Primitive slopes; - slopes.density = coef * (left.density - right.density); - slopes.velocity_x = coef * (left.velocity_x - right.velocity_x); - slopes.velocity_y = coef * (left.velocity_y - right.velocity_y); - slopes.velocity_z = coef * (left.velocity_z - right.velocity_z); - slopes.pressure = coef * (left.pressure - right.pressure); + slopes.density = coef * (right.density - left.density); + slopes.velocity_x = coef * (right.velocity_x - left.velocity_x); + slopes.velocity_y = coef * (right.velocity_y - left.velocity_y); + slopes.velocity_z = coef * (right.velocity_z - left.velocity_z); + slopes.pressure = coef * (right.pressure - left.pressure); #ifdef MHD - slopes.magnetic_y = coef * (left.magnetic_y - right.magnetic_y); - slopes.magnetic_z = coef * (left.magnetic_z - right.magnetic_z); + slopes.magnetic_y = coef * (right.magnetic_y - left.magnetic_y); + slopes.magnetic_z = coef * (right.magnetic_z - left.magnetic_z); #endif // MHD #ifdef DE - slopes.gas_energy = coef * (left.gas_energy - right.gas_energy); + slopes.gas_energy = coef * (right.gas_energy - left.gas_energy); #endif // DE #ifdef SCALAR for (size_t i = 0; i < grid_enum::nscalars; i++) { - slopes.scalar[i] = coef * (left.scalar[i] - right.scalar[i]); + slopes.scalar[i] = coef * (right.scalar[i] - left.scalar[i]); } #endif // SCALAR diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 777c799fe..dc7100524 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -151,11 +151,11 @@ TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) { // Setup input data #ifdef MHD - reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; - reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive left{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive right{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive left{1, 2, 3, 4, 5}; - reconstruction::Primitive right{6, 7, 8, 9, 10}; + reconstruction::Primitive left{6, 7, 8, 9, 10}; + reconstruction::Primitive right{1, 2, 3, 4, 5}; #endif // MHD Real const coef = 0.5; From 3b192d3f6329764beab332983fcea790e20d91a0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 11:25:51 -0400 Subject: [PATCH 444/694] PPMC comments to indicate which PPM method is used --- src/reconstruction/ppmc_cuda.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index 1eb146750..f3eba5b09 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -9,7 +9,8 @@ /*! * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and - * characteristic tracing. Used for the CTU and SIMPLE integrators + * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in + * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD" * * \param[in] dev_conserved The conserved variable array * \param[out] dev_bounds_L The array of left interfaces @@ -27,7 +28,11 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun /*! * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for - * the VL (Van Leer) integrators + * the VL (Van Leer) integrators. This uses the PPM method described in + * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport". + * We found that this newer method and limiters was some stable, less oscilattory, and faster than the method described + * in Stone et al. 2008 which is used in PPMC_CTU. The difference is most prounced in the Brio & Wu shock tube where the + * PPM oscillations are much smaller using this method. * * \param[in] dev_conserved The conserved variable array * \param[out] dev_bounds_L The array of left interfaces From 156c028cae963b0d4ee15dab32c36d90998febb4 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 16:35:31 -0400 Subject: [PATCH 445/694] Add citations to PPMC_VL description --- src/reconstruction/ppmc_cuda.h | 12 ++++++++---- src/reconstruction/reconstruction.h | 23 ++++++----------------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index f3eba5b09..033f2505b 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -10,7 +10,9 @@ /*! * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in - * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD" + * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD". Fundementally this method relies on a Van Leer limiter + * in the characteristic variables to monotonize the slopes followed by limiting the interface states using the limiter + * from Colella & Woodward 1984. * * \param[in] dev_conserved The conserved variable array * \param[out] dev_bounds_L The array of left interfaces @@ -30,9 +32,11 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for * the VL (Van Leer) integrators. This uses the PPM method described in * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport". - * We found that this newer method and limiters was some stable, less oscilattory, and faster than the method described - * in Stone et al. 2008 which is used in PPMC_CTU. The difference is most prounced in the Brio & Wu shock tube where the - * PPM oscillations are much smaller using this method. + * This method computes the 3rd order interface then applies a mixture of monoticity constraints from from Colella & + * Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011; for details see the + * `reconstruction::PPM_Single_Variable` function. We found that this newer method and limiters was more stable, less + * oscillatory, and faster than the method described in Stone et al. 2008 which is used in PPMC_CTU. The difference is + * most pronounced in the Brio & Wu shock tube where the PPM oscillations are much smaller using this method. * * \param[in] dev_conserved The conserved variable array * \param[out] dev_bounds_L The array of left interfaces diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 3a77780e6..d2cca5189 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -673,14 +673,14 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive cons * \brief Compute the PPM interface state for a given field/stencil. * * \details This method is heavily based on the implementation in Athena++. See the following papers for details - * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained + * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained * transport", JCP, 375, (2018) - * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 + * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 * (1984) - * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) - * - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined grids", - * CAMCoS, 6, 1 (2011) - * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, + * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) + * - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined + * grids", CAMCoS, 6, 1 (2011) + * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, * 230, 2952 (2011) * * \param[in] cell_im2 The value of the field/stencil at i-2 @@ -695,17 +695,6 @@ void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Re Real const &cell_ip1, Real const &cell_ip2, Real &interface_L_iph, Real &interface_R_imh) { - // This method is heavily based on the implementation in Athena++. See the following papers for details - // - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained - // transport", JCP, 375, (2018) - // - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 - // (1984) - // - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) - // - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined - // grids", CAMCoS, 6, 1 (2011) - // - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, - // 230, 2952 (2011) - // Let's start by setting up some things that we'll need later // Colella & Sekora 2008 constant used in second derivative limiter From deef210e2d2215dd4ec20be334bbe6ec6313cb90 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 10 Jul 2023 18:12:06 -0400 Subject: [PATCH 446/694] change function names to conform to Cholla naming standards and turn on readability-identifier-naming check --- .clang-tidy | 21 +- run_check.sh | 11 + src/analysis/feedback_analysis_gpu.cu | 6 +- src/global/global.cpp | 36 +- src/global/global.h | 10 +- src/gravity/paris/ParisPeriodic.cu | 20 +- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 26 +- src/gravity/potential_paris_3D.cu | 2 +- src/hydro/hydro_cuda_tests.cu | 4 +- src/io/io.cpp | 78 +-- src/io/io.h | 18 +- src/io/io_gpu.cu | 4 +- src/main.cpp | 18 +- src/main_tests.cpp | 20 +- src/mhd/ct_electric_fields_tests.cu | 10 +- src/mhd/magnetic_divergence_tests.cu | 2 +- src/mhd/magnetic_update_tests.cu | 6 +- src/model/disk_ICs.cpp | 132 ++--- src/particles/feedback_CIC_gpu.cu | 18 +- src/reconstruction/plmc_cuda_tests.cu | 8 +- src/reconstruction/reconstruction_tests.cu | 164 +++--- src/riemann_solvers/hllc_cuda_tests.cu | 8 +- src/riemann_solvers/hlld_cuda_tests.cu | 554 +++++++++---------- src/system_tests/hydro_system_tests.cpp | 12 +- src/system_tests/mhd_system_tests.cpp | 30 +- src/utils/DeviceVector_tests.cu | 10 +- src/utils/cuda_utilities_tests.cpp | 2 +- src/utils/hydro_utilities_tests.cpp | 22 +- src/utils/math_utilities_tests.cpp | 8 +- src/utils/mhd_utilities_tests.cu | 32 +- src/utils/reduction_utilities_tests.cu | 2 +- src/utils/testing_utilities.cpp | 2 +- src/utils/testing_utilities.h | 4 +- src/utils/timing_functions.cpp | 8 +- 34 files changed, 659 insertions(+), 649 deletions(-) create mode 100644 run_check.sh diff --git a/.clang-tidy b/.clang-tidy index eccdfe06e..af3796ff8 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -122,7 +122,6 @@ Checks: "*, -readability-else-after-return, -readability-function-cognitive-complexity, -readability-identifier-length, - -readability-identifier-naming, -readability-implicit-bool-conversion, -readability-inconsistent-declaration-parameter-name, -readability-isolate-declaration, @@ -149,23 +148,23 @@ CheckOptions: # - aNy_CasE # # Entries that are commented out probably aren't needed but it should be verified - readability-identifier-naming.VariableCase: 'lower_case' + # readability-identifier-naming.VariableCase: 'lower_case' readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' readability-identifier-naming.NamespaceCase: 'lower_case' - readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' - readability-identifier-naming.TypedefCase: 'CamelCase' - readability-identifier-naming.TypeAliasCase: 'CamelCase' + # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' + # readability-identifier-naming.TypedefCase: 'CamelCase' + # readability-identifier-naming.TypeAliasCase: 'CamelCase' readability-identifier-naming.EnumCase: 'CamelCase' - readability-identifier-naming.ConstantCase: 'lower_case' + # readability-identifier-naming.ConstantCase: 'lower_case' - readability-identifier-naming.ConstantPrefix: 'k_' - readability-identifier-naming.GlobalVariablePrefix: 'g_' + # readability-identifier-naming.ConstantPrefix: 'k_' + # readability-identifier-naming.GlobalVariablePrefix: 'g_' - readability-identifier-naming.ClassCase: 'CamelCase' + # readability-identifier-naming.ClassCase: 'CamelCase' # readability-identifier-naming.MemberCase: 'lower_case' # readability-identifier-naming.MethodCase: 'CamelCase' - readability-identifier-naming.PrivateMemberSuffix: '_' - readability-identifier-naming.PrivateMethodSuffix: '_' + # readability-identifier-naming.PrivateMemberSuffix: '_' + # readability-identifier-naming.PrivateMethodSuffix: '_' # readability-identifier-naming.StructCase: 'CamelCase' ... diff --git a/run_check.sh b/run_check.sh new file mode 100644 index 000000000..01168bf12 --- /dev/null +++ b/run_check.sh @@ -0,0 +1,11 @@ +cd /ix/eschneider/helena/code/cholla + +make tidy TYPE=hydro & +make tidy TYPE=gravity & +make tidy TYPE=disk & +make tidy TYPE=particles & +make tidy TYPE=cosmology & +make tidy TYPE=mhd & +make tidy TYPE=dust & + +wait diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index 9ef268216..778ee921c 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -11,7 +11,7 @@ #define MIN_DENSITY (0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT) // 148279.7 #define TPB_ANALYSIS 1024 -__device__ void warpReduce(volatile Real *buff, size_t tid) +__device__ void Warp_Reduce(volatile Real *buff, size_t tid) { if (TPB_ANALYSIS >= 64) { buff[tid] += buff[tid + 32]; @@ -125,8 +125,8 @@ void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *ou } if (tid < 32) { - warpReduce(s_mass, tid); - warpReduce(s_vel, tid); + Warp_Reduce(s_mass, tid); + Warp_Reduce(s_vel, tid); } __syncthreads(); diff --git a/src/global/global.cpp b/src/global/global.cpp index a4c697d3c..ac0f20222 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -35,18 +35,18 @@ void Set_Gammas(Real gamma_in) gama = gamma_in; } -/*! \fn double get_time(void) +/*! \fn double Get_Time(void) * \brief Returns the current clock time. */ -double get_time(void) +double Get_Time(void) { struct timeval timer; gettimeofday(&timer, NULL); return timer.tv_sec + 1.0e-6 * timer.tv_usec; } -/*! \fn int sgn +/*! \fn int Sgn * \brief Mathematical sign function. Returns sign of x. */ -int sgn(Real x) +int Sgn(Real x) { if (x < 0) { return -1; @@ -58,7 +58,7 @@ int sgn(Real x) #ifndef CUDA /*! \fn Real calc_eta(Real cW[], Real gamma) * \brief Calculate the eta value for the H correction. */ -Real calc_eta(Real cW[], Real gamma) +Real Calc_Eta(Real cW[], Real gamma) { Real pl, pr, al, ar; @@ -74,9 +74,9 @@ Real calc_eta(Real cW[], Real gamma) } #endif // NO CUDA -/*! \fn char trim(char *s) +/*! \fn char Trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ -char *trim(char *s) +char *Trim(char *s) { /* Initialize start, end pointers */ char *s1 = s, *s2 = &s[strlen(s) - 1]; @@ -103,10 +103,10 @@ const std::set optionalParams = { "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z"}; -/*! \fn int is_param_valid(char *name); +/*! \fn int Is_Param_Valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids * "warnings" in output. */ -int is_param_valid(const char *param_name) +int Is_Param_Valid(const char *param_name) { // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) { for (const auto *optionalParam : optionalParams) { @@ -117,11 +117,11 @@ int is_param_valid(const char *param_name) return 0; } -void parse_param(char *name, char *value, struct parameters *parms); +void Parse_Param(char *name, char *value, struct parameters *parms); -/*! \fn void parse_params(char *param_file, struct parameters * parms); +/*! \fn void Parse_Params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -void parse_params(char *param_file, struct parameters *parms, int argc, char **argv) +void Parse_Params(char *param_file, struct parameters *parms, int argc, char **argv) { int buf; char *s, buff[256]; @@ -174,8 +174,8 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a } else { strncpy(value, s, MAXLEN); } - trim(value); - parse_param(name, value, parms); + Trim(value); + Parse_Param(name, value, parms); } /* Close file */ fclose(fp); @@ -195,14 +195,14 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a } else { strncpy(value, s, MAXLEN); } - parse_param(name, value, parms); + Parse_Param(name, value, parms); chprintf("Override with %s=%s\n", name, value); } } -/*! \fn void parse_param(char *name,char *value, struct parameters *parms); +/*! \fn void Parse_Param(char *name,char *value, struct parameters *parms); * \brief Parses and sets a single param based on name and value. */ -void parse_param(char *name, char *value, struct parameters *parms) +void Parse_Param(char *name, char *value, struct parameters *parms) { /* Copy into correct entry in parameters struct */ if (strcmp(name, "nx") == 0) { @@ -451,7 +451,7 @@ void parse_param(char *name, char *value, struct parameters *parms) strncpy(parms->skewersdir, value, MAXLEN); #endif #endif - } else if (!is_param_valid(name)) { + } else if (!Is_Param_Valid(name)) { chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); } } diff --git a/src/global/global.h b/src/global/global.h index b037c931d..66fb78829 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -162,16 +162,16 @@ extern void Set_Gammas(Real gamma_in); /*! \fn double get_time(void) * \brief Returns the current clock time. */ -extern double get_time(void); +extern double Get_Time(void); /*! \fn int sgn * \brief Mathematical sign function. Returns sign of x. */ -extern int sgn(Real x); +extern int Sgn(Real x); #ifndef CUDA /*! \fn Real calc_eta(Real cW[], Real gamma) * \brief Calculate the eta value for the H correction. */ -extern Real calc_eta(Real cW[], Real gamma); +extern Real Calc_Eta(Real cW[], Real gamma); #endif struct parameters { @@ -326,11 +326,11 @@ struct parameters { /*! \fn void parse_params(char *param_file, struct parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -extern void parse_params(char *param_file, struct parameters *parms, int argc, char **argv); +extern void Parse_Params(char *param_file, struct parameters *parms, int argc, char **argv); /*! \fn int is_param_valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids * "warnings" in output. */ -extern int is_param_valid(const char *name); +extern int Is_Param_Valid(const char *name); #endif // GLOBAL_H diff --git a/src/gravity/paris/ParisPeriodic.cu b/src/gravity/paris/ParisPeriodic.cu index 0686626c6..0b2e5ef5a 100644 --- a/src/gravity/paris/ParisPeriodic.cu +++ b/src/gravity/paris/ParisPeriodic.cu @@ -4,7 +4,7 @@ #include "ParisPeriodic.hpp" -__host__ __device__ static inline double sqr(const double x) { return x * x; } +__host__ __device__ static inline double Sqr(const double x) { return x * x; } ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) : ni_(n[0]), @@ -16,9 +16,9 @@ ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), #elif defined PARIS_5PT nk_(n[2]), - ddi_(sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), - ddj_(sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), - ddk_(sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), #else ddi_{2.0 * M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, ddj_{2.0 * M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, @@ -52,9 +52,9 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, double *con [=] __device__(const int i, const int j, const int k, const cufftDoubleComplex b) { if (i || j || k) { #ifdef PARIS_3PT - const double i2 = sqr(sin(double(min(i, ni - i)) * si) * ddi); - const double j2 = sqr(sin(double(min(j, nj - j)) * sj) * ddj); - const double k2 = sqr(sin(double(k) * sk) * ddk); + const double i2 = Sqr(sin(double(min(i, ni - i)) * si) * ddi); + const double j2 = Sqr(sin(double(min(j, nj - j)) * sj) * ddj); + const double k2 = Sqr(sin(double(k) * sk) * ddk); #elif defined PARIS_5PT const double ci = cos(double(min(i, ni - i)) * si); const double cj = cos(double(min(j, nj - j)) * sj); @@ -63,9 +63,9 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, double *con const double j2 = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); const double k2 = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0); #else - const double i2 = sqr(double(min(i, ni - i)) * ddi); - const double j2 = sqr(double(min(j, nj - j)) * ddj); - const double k2 = sqr(double(k) * ddk); + const double i2 = Sqr(double(min(i, ni - i)) * ddi); + const double j2 = Sqr(double(min(j, nj - j)) * ddj); + const double k2 = Sqr(double(k) * ddk); #endif const double d = -1.0 / (i2 + j2 + k2); return cufftDoubleComplex{d * b.x, d * b.y}; diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 5ad31406e..e924ca30e 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -10,7 +10,7 @@ static constexpr double sqrt2 = 0.4142135623730950488016887242096980785696718753769480731766797379; -static inline __host__ __device__ double sqr(const double x) { return x * x; } +static inline __host__ __device__ double Sqr(const double x) { return x * x; } PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) @@ -20,9 +20,9 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])), ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), #elif defined PARIS_GALACTIC_5PT - ddi_(sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), - ddj_(sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), - ddk_(sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), #else ddi_{M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, ddj_{M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, @@ -38,7 +38,7 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo nj_(n[1]), nk_(n[2]) { - mq_ = int(round(sqrt(mk_))); + mq_ = int(round(Sqrt(mk_))); while (mk_ % mq_) { mq_--; } @@ -302,7 +302,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou const double si = M_PI / double(ni + ni); const double sj = M_PI / double(nj + nj); const double sk = M_PI / double(nk + nk); - const double iin = sqr(sin(double(ni) * si) * ddi); + const double iin = Sqr(sin(double(ni) * si) * ddi); #elif defined PARIS_GALACTIC_5PT const double si = M_PI / double(ni); const double sj = M_PI / double(nj); @@ -310,7 +310,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou const double cin = cos(double(ni) * si); const double iin = ddi * (2.0 * cin * cin - 16.0 * cin + 14.0); #else - const double iin = sqr(double(ni) * ddi); + const double iin = Sqr(double(ni) * ddi); #endif const int jLo = (idi * mp + idp) * djp; const int kLo = (idj * mq + idq) * dkq; @@ -319,7 +319,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou const int kj = (k * djp + j) * ni; const int kj2 = (k * djp + j) * ni2; #ifdef PARIS_GALACTIC_3PT - const double jjkk = sqr(sin(double(jLo + j + 1) * sj) * ddj) + sqr(sin(double(kLo + k + 1) * sk) * ddk); + const double jjkk = Sqr(sin(double(jLo + j + 1) * sj) * ddj) + Sqr(sin(double(kLo + k + 1) * sk) * ddk); #elif defined PARIS_GALACTIC_5PT const double cj = cos(double(jLo + j + 1) * sj); const double jj = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); @@ -328,18 +328,18 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou const double jjkk = jj + kk; #else const double jjkk = - sqr(double(jLo + j + 1) * ddj) + sqr(double(kLo + k + 1) * ddk); + Sqr(double(jLo + j + 1) * ddj) + Sqr(double(kLo + k + 1) * ddk); #endif if (i == 0) { ua[kj] = -2.0 * ub[kj2] / (iin + jjkk); } else { #ifdef PARIS_GALACTIC_3PT - const double ii = sqr(sin(double(i) * si) * ddi); + const double ii = Sqr(sin(double(i) * si) * ddi); #elif defined PARIS_GALACTIC_5PT const double ci = cos(double(i) * si); const double ii = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0); #else - const double ii = sqr(double(i) * ddi); + const double ii = Sqr(double(i) * ddi); #endif if (i + i == ni) { ua[kj + ni / 2] = -2.0 * ub[kj2 + ni] / (ii + jjkk); @@ -349,12 +349,12 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou double wa, wb; sincospi(double(i) / double(ni + ni), &wb, &wa); #ifdef PARIS_GALACTIC_3PT - const double nii = sqr(sin(double(ni - i) * si) * ddi); + const double nii = Sqr(sin(double(ni - i) * si) * ddi); #elif defined PARIS_GALACTIC_5PT const double cni = cos(double(ni - i) * si); const double nii = ddi * (2.0 * cni * cni - 16.0 * cni + 14.0); #else - const double nii = sqr(double(ni - i) * ddi); + const double nii = Sqr(double(ni - i) * ddi); #endif const double aai = -(wa * ai + wb * bi) / (nii + jjkk); const double bbi = (wa * bi - wb * ai) / (ii + jjkk); diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index ab8bdc0b5..51d967a9d 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -9,7 +9,7 @@ #include "../utils/gpu.hpp" static void __attribute__((unused)) -printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) +Print_Diff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 482564462..48d11b0c3 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -106,7 +106,7 @@ TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput) velocityZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testingUtilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the hydroInverseCrossingTime function @@ -140,7 +140,7 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testingUtilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the mhdInverseCrossingTime function diff --git a/src/io/io.cpp b/src/io/io.cpp index 3335afda7..690f8d3fc 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -30,7 +30,7 @@ /* function used to rotate points about an axis in 3D for the rotated projection * output routine */ -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); +void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); void Create_Log_File(struct parameters P) { @@ -78,7 +78,7 @@ void Write_Message_To_Log_File(const char *message) } /* Write Cholla Output Data */ -void WriteData(Grid3D &G, struct parameters P, int nfile) +void Write_Data(Grid3D &G, struct parameters P, int nfile) { cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); @@ -110,32 +110,32 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) #ifndef ONLY_PARTICLES /*call the data output routine for Hydro data*/ if (nfile % P.n_hydro == 0) { - OutputData(G, P, nfile); + Output_Data(G, P, nfile); } #endif // This function does other checks to make sure it is valid (3D only) #ifdef HDF5 if (P.n_out_float32 && nfile % P.n_out_float32 == 0) { - OutputFloat32(G, P, nfile); + Output_Float32(G, P, nfile); } #endif #ifdef PROJECTION if (nfile % P.n_projection == 0) { - OutputProjectedData(G, P, nfile); + Output_Projected_Data(G, P, nfile); } #endif /*PROJECTION*/ #ifdef ROTATED_PROJECTION if (nfile % P.n_rotated_projection == 0) { - OutputRotatedProjectedData(G, P, nfile); + Output_Rotated_Projected_Data(G, P, nfile); } #endif /*ROTATED_PROJECTION*/ #ifdef SLICES if (nfile % P.n_slice == 0) { - OutputSlices(G, P, nfile); + Output_Slices(G, P, nfile); } #endif /*SLICES*/ @@ -179,7 +179,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) } /* Output the grid data to file. */ -void OutputData(Grid3D &G, struct parameters P, int nfile) +void Output_Data(Grid3D &G, struct parameters P, int nfile) { // create the filename std::string filename(P.outdir); @@ -257,7 +257,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #endif } -void OutputFloat32(Grid3D &G, struct parameters P, int nfile) +void Output_Float32(Grid3D &G, struct parameters P, int nfile) { #ifdef HDF5 Header H = G.H; @@ -315,28 +315,28 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) auto *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); if (P.out_float32_density > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_density, "/density"); } if (P.out_float32_momentum_x > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); } if (P.out_float32_momentum_y > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); } if (P.out_float32_momentum_z > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); } if (P.out_float32_Energy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_Energy, "/Energy"); } #ifdef DE if (P.out_float32_GasEnergy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); } #endif // DE @@ -345,17 +345,17 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); } @@ -375,7 +375,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) } /* Output a projection of the grid data to file. */ -void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) +void Output_Projected_Data(Grid3D &G, struct parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -404,23 +404,23 @@ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputProjectedData: File write failed. ProcID: %d\n", procID); + printf("Output_Projected_Data: File write failed. ProcID: %d\n", procID); chexit(-1); } #else if (status < 0) { - printf("OutputProjectedData: File write failed.\n"); + printf("Output_Projected_Data: File write failed.\n"); exit(-1); } #endif #else - printf("OutputProjected Data only defined for hdf5 writes.\n"); + printf("Output_Projected_Data only defined for hdf5 writes.\n"); #endif // HDF5 } /* Output a rotated projection of the grid data to file. */ -void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) +void Output_Rotated_Projected_Data(Grid3D &G, struct parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -461,12 +461,12 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) status = H5Fclose(file_id); #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); + printf("Output_Rotated_Projected_Data: File write failed. ProcID: %d\n", procID); chexit(-1); } #else if (status < 0) { - printf("OutputRotatedProjectedData: File write failed.\n"); + printf("Output_Rotated_Projected_Data: File write failed.\n"); exit(-1); } #endif @@ -509,23 +509,23 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); + printf("Output_Rotated_Projected_Data: File write failed. ProcID: %d\n", procID); chexit(-1); } #else if (status < 0) { - printf("OutputRotatedProjectedData: File write failed.\n"); + printf("Output_Rotated_Projected_Data: File write failed.\n"); exit(-1); } #endif #else - printf("OutputRotatedProjectedData only defined for HDF5 writes.\n"); + printf("Output_Rotated_Projected_Data only defined for HDF5 writes.\n"); #endif } /* Output xy, xz, and yz slices of the grid data. */ -void OutputSlices(Grid3D &G, struct parameters P, int nfile) +void Output_Slices(Grid3D &G, struct parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -554,17 +554,17 @@ void OutputSlices(Grid3D &G, struct parameters P, int nfile) #ifdef MPI_CHOLLA if (status < 0) { - printf("OutputSlices: File write failed. ProcID: %d\n", procID); + printf("Output_Slices: File write failed. ProcID: %d\n", procID); chexit(-1); } #else // MPI_CHOLLA is not defined if (status < 0) { - printf("OutputSlices: File write failed.\n"); + printf("Output_Slices: File write failed.\n"); exit(-1); } #endif // MPI_CHOLLA #else // HDF5 is not defined - printf("OutputSlices only defined for hdf5 writes.\n"); + printf("Output_Slices only defined for hdf5 writes.\n"); #endif // HDF5 } @@ -773,7 +773,7 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id) Get_Position(H.n_ghost + i * (H.nx - 2 * H.n_ghost), H.n_ghost + j * (H.ny - 2 * H.n_ghost), H.n_ghost + k * (H.nz - 2 * H.n_ghost), &x, &y, &z); // rotate cell position - rotate_point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp); + Rotate_Point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp); // find projected location // assumes box centered at [0,0,0] alpha = (R.nx * (xp + 0.5 * R.Lx) / R.Lx); @@ -1491,11 +1491,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef MHD if (H.Output_Complete_Data) { - WriteHDF5Field3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); - WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); - WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD @@ -1736,7 +1736,7 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) z += eps * H.dz * (drand48() - 0.5); // rotate cell positions - rotate_point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp); + Rotate_Point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp); // find projected locations // assumes box centered at [0,0,0] @@ -2601,7 +2601,7 @@ int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) return code; } -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) +void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) { Real cd, sd, cp, sp, ct, st; // sines and cosines Real a00, a01, a02; // rotation matrix elements @@ -2643,7 +2643,7 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *zp = a20 * x + a21 * y + a22 * z; } -void write_debug(Real *Value, const char *fname, int nValues, int iProc) +void Write_Debug(Real *Value, const char *fname, int nValues, int iProc) { char fn[1024]; int ret; diff --git a/src/io/io.h b/src/io/io.h index a24fe788c..26569d146 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -8,22 +8,22 @@ #include "../grid/grid3D.h" /* Write the data */ -void WriteData(Grid3D& G, struct parameters P, int nfile); +void Write_Data(Grid3D& G, struct parameters P, int nfile); /* Output the grid data to file. */ -void OutputData(Grid3D& G, struct parameters P, int nfile); +void Output_Data(Grid3D& G, struct parameters P, int nfile); /* Output the grid data to file as 32-bit floats. */ -void OutputFloat32(Grid3D& G, struct parameters P, int nfile); +void Output_Float32(Grid3D& G, struct parameters P, int nfile); /* Output a projection of the grid data to file. */ -void OutputProjectedData(Grid3D& G, struct parameters P, int nfile); +void Output_Projected_Data(Grid3D& G, struct parameters P, int nfile); /* Output a rotated projection of the grid data to file. */ -void OutputRotatedProjectedData(Grid3D& G, struct parameters P, int nfile); +void Output_Rotated_Projected_Data(Grid3D& G, struct parameters P, int nfile); /* Output xy, xz, and yz slices of the grid data to file. */ -void OutputSlices(Grid3D& G, struct parameters P, int nfile); +void Output_Slices(Grid3D& G, struct parameters P, int nfile); /* MPI-safe printf routine */ int chprintf(const char* __restrict sdata, ...); @@ -49,7 +49,7 @@ void Create_Log_File(struct parameters P); void Write_Message_To_Log_File(const char* message); -void write_debug(Real* Value, const char* fname, int nValues, int iProc); +void Write_Debug(Real* Value, const char* fname, int nValues, int iProc); #ifdef HDF5 // From io/io.cpp @@ -70,8 +70,8 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, // then write HDF5 field -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name, int mhd_direction = -1); -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name, int mhd_direction = -1); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 34da27ff2..9fa0b20e6 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -88,7 +88,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything // using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name, int mhd_direction) { herr_t status; @@ -116,7 +116,7 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int // When buffer is float, automatically use the float version of everything using // function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name, int mhd_direction) { herr_t status; diff --git a/src/main.cpp b/src/main.cpp index b33ee8eba..03b8ecaec 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) #endif // CPU_TIME // start the total time - start_total = get_time(); + start_total = Get_Time(); /* Initialize MPI communication */ #ifdef MPI_CHOLLA @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) Grid3D G; // read in the parameters - parse_params(param_file, &P, argc, argv); + Parse_Params(param_file, &P, argc, argv); // and output to screen chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); @@ -204,7 +204,7 @@ int main(int argc, char *argv[]) if (!is_restart || G.H.Output_Now) { // write the initial conditions to file chprintf("Writing initial conditions to file...\n"); - WriteData(G, P, nfile); + Write_Data(G, P, nfile); } // add one to the output file count nfile++; @@ -219,7 +219,7 @@ int main(int argc, char *argv[]) outtime += P.outstep; #ifdef CPU_TIME - stop_init = get_time(); + stop_init = Get_Time(); init = stop_init - start_total; #ifdef MPI_CHOLLA init_min = ReduceRealMin(init); @@ -244,7 +244,7 @@ int main(int argc, char *argv[]) #ifdef CPU_TIME G.Timer.Total.Start(); #endif // CPU_TIME - start_step = get_time(); + start_step = Get_Time(); // calculate the timestep by calling MPI_Allreduce G.set_dt(dti); @@ -305,8 +305,8 @@ int main(int argc, char *argv[]) #endif // get the time to compute the total timestep - stop_step = get_time(); - stop_total = get_time(); + stop_step = Get_Time(); + stop_total = Get_Time(); G.H.t_wall = stop_total - start_total; #ifdef MPI_CHOLLA G.H.t_wall = ReduceRealMax(G.H.t_wall); @@ -335,7 +335,7 @@ int main(int argc, char *argv[]) if (G.H.t == outtime || G.H.Output_Now) { #ifdef OUTPUT /*output the grid data*/ - WriteData(G, P, nfile); + Write_Data(G, P, nfile); // add one to the output file count nfile++; #endif // OUTPUT @@ -350,7 +350,7 @@ int main(int argc, char *argv[]) #ifdef N_STEPS_LIMIT // Exit the loop when reached the limit number of steps (optional) if (G.H.n_step == N_STEPS_LIMIT) { - WriteData(G, P, nfile); + Write_Data(G, P, nfile); break; } #endif diff --git a/src/main_tests.cpp b/src/main_tests.cpp index ee58fbd06..1b51f0aea 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -41,10 +41,10 @@ class InputParser * \param option The string option to look for * \return const std::string& The option the follows a given flag */ - const std::string &getCmdOption(const std::string &option) const + const std::string &Get_Cmd_Option(const std::string &option) const { // First check that the option exists - if (not cmdOptionExists(option)) { + if (not Cmd_Option_Exists(option)) { std::string errMessage = "Error: argument '" + option + "' not found. "; throw std::invalid_argument(errMessage); } @@ -69,7 +69,7 @@ class InputParser * \return true The option flag exists in argv * \return false The option flage does not exist in argv */ - bool cmdOptionExists(const std::string &option) const + bool Cmd_Option_Exists(const std::string &option) const { return std::find(this->_tokens.begin(), this->_tokens.end(), option) != this->_tokens.end(); } @@ -119,17 +119,17 @@ int main(int argc, char **argv) // Initialize global variables InputParser input(argc, argv); - globalChollaRoot.init(input.getCmdOption("--cholla-root")); - globalChollaBuild.init(input.getCmdOption("--build-type")); - globalChollaMachine.init(input.getCmdOption("--machine")); - if (input.cmdOptionExists("--mpi-launcher")) { - globalMpiLauncher.init(input.getCmdOption("--mpi-launcher")); + globalChollaRoot.init(input.Get_Cmd_Option("--cholla-root")); + globalChollaBuild.init(input.Get_Cmd_Option("--build-type")); + globalChollaMachine.init(input.Get_Cmd_Option("--machine")); + if (input.Cmd_Option_Exists("--mpi-launcher")) { + globalMpiLauncher.init(input.Get_Cmd_Option("--mpi-launcher")); } else { globalMpiLauncher.init("mpirun -np"); } - globalRunCholla = not input.cmdOptionExists("--runCholla=false"); - globalCompareSystemTestResults = not input.cmdOptionExists("--compareSystemTestResults=false"); + globalRunCholla = not input.Cmd_Option_Exists("--runCholla=false"); + globalCompareSystemTestResults = not input.Cmd_Option_Exists("--compareSystemTestResults=false"); // Run test and return result return RUN_ALL_TESTS(); diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index afbaada66..2cbbac2e8 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -91,7 +91,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test * \brief Launch the kernel and check results * */ - void runTest() + void Run_Test() { // Copy values to GPU CudaSafeCall(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice)); @@ -115,7 +115,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), testCTElectricFields.at(i), + testingUtilities::Check_Results(fiducialData.at(i), testCTElectricFields.at(i), "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } @@ -132,7 +132,7 @@ TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput) fiducialData.at(23) = 61.768055665002557; // Launch kernel and check results - runTest(); + Run_Test(); } // ============================================================================= @@ -153,7 +153,7 @@ TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput) } // Launch kernel and check results - runTest(); + Run_Test(); } // ============================================================================= @@ -174,7 +174,7 @@ TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput) } // Launch kernel and check results - runTest(); + Run_Test(); } // ============================================================================= #endif // MHD diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index c1c44a9a7..a898ae650 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -61,7 +61,7 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) MPI_Finalize(); // Perform Comparison Real const fiducialDivergence = 3.6318132783263106 / 1E15; - testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); + testingUtilities::Check_Results(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 9b78a8f5d..5ac59060b 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -79,7 +79,7 @@ class tMHDUpdateMagneticField3D : public ::testing::Test * \brief Launch the kernel and check results * */ - void runTest() + void Run_Test() { // Copy values to GPU CudaSafeCall( @@ -103,7 +103,7 @@ class tMHDUpdateMagneticField3D : public ::testing::Test for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), destinationGrid.at(i), + testingUtilities::Check_Results(fiducialData.at(i), destinationGrid.at(i), "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } @@ -120,7 +120,7 @@ TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput) fiducialData.at(202) = 204.56; // Launch kernel and check results - runTest(); + Run_Test(); } // ============================================================================= #endif // MHD diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 8e4bede3f..ab59e2805 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -21,10 +21,10 @@ // #define DISK_ICS // function with logarithms used in NFW definitions -Real log_func(Real y) { return log(1 + y) - y / (1 + y); } +Real Log_Func(Real y) { return log(1 + y) - y / (1 + y); } // vertical acceleration in NFW halo -Real gz_halo_D3D(Real R, Real z, Real *hdp) +Real Gz_Halo_D3D(Real R, Real z, Real *hdp) { Real M_h = hdp[2]; // halo mass Real R_h = hdp[5]; // halo scale length @@ -33,16 +33,16 @@ Real gz_halo_D3D(Real R, Real z, Real *hdp) Real x = r / R_h; Real z_comp = z / r; - Real A = log_func(x); + Real A = Log_Func(x); Real B = 1.0 / (r * r); - Real C = GN * M_h / log_func(c_vir); + Real C = GN * M_h / Log_Func(c_vir); // checked with wolfram alpha return -C * A * B * z_comp; } // radial acceleration in NFW halo -Real gr_halo_D3D(Real R, Real z, Real *hdp) +Real Gr_Halo_D3D(Real R, Real z, Real *hdp) { Real M_h = hdp[2]; // halo mass Real R_h = hdp[5]; // halo scale length @@ -51,16 +51,16 @@ Real gr_halo_D3D(Real R, Real z, Real *hdp) Real x = r / R_h; Real r_comp = R / r; - Real A = log_func(x); + Real A = Log_Func(x); Real B = 1.0 / (r * r); - Real C = GN * M_h / log_func(c_vir); + Real C = GN * M_h / Log_Func(c_vir); // checked with wolfram alpha return -C * A * B * r_comp; } // disk radial surface density profile -Real Sigma_disk_D3D(Real r, Real *hdp) +Real Sigma_Disk_D3D(Real r, Real *hdp) { // return the exponential surface density Real Sigma_0 = hdp[9]; @@ -80,7 +80,7 @@ Real Sigma_disk_D3D(Real r, Real *hdp) } // vertical acceleration in miyamoto nagai -Real gz_disk_D3D(Real R, Real z, Real *hdp) +Real Gz_Disk_D3D(Real R, Real z, Real *hdp) { Real M_d = hdp[1]; // disk mass Real R_d = hdp[6]; // MN disk length @@ -96,7 +96,7 @@ Real gz_disk_D3D(Real R, Real z, Real *hdp) } // radial acceleration in miyamoto nagai -Real gr_disk_D3D(Real R, Real z, Real *hdp) +Real Gr_Disk_D3D(Real R, Real z, Real *hdp) { Real M_d = hdp[1]; // disk mass Real R_d = hdp[6]; // MN disk length @@ -110,7 +110,7 @@ Real gr_disk_D3D(Real R, Real z, Real *hdp) } // NFW halo potential -Real phi_halo_D3D(Real R, Real z, Real *hdp) +Real Phi_Halo_D3D(Real R, Real z, Real *hdp) { Real M_h = hdp[2]; // halo mass Real R_h = hdp[5]; // halo scale length @@ -118,7 +118,7 @@ Real phi_halo_D3D(Real R, Real z, Real *hdp) Real r = sqrt(R * R + z * z); // spherical radius Real x = r / R_h; - Real C = GN * M_h / (R_h * log_func(c_vir)); + Real C = GN * M_h / (R_h * Log_Func(c_vir)); // limit x to non-zero value if (x < 1.0e-9) { @@ -130,7 +130,7 @@ Real phi_halo_D3D(Real R, Real z, Real *hdp) } // Miyamoto-Nagai potential -Real phi_disk_D3D(Real R, Real z, Real *hdp) +Real Phi_Disk_D3D(Real R, Real z, Real *hdp) { Real M_d = hdp[1]; // disk mass Real R_d = hdp[6]; // MN disk length @@ -144,17 +144,17 @@ Real phi_disk_D3D(Real R, Real z, Real *hdp) } // total potential -Real phi_total_D3D(Real R, Real z, Real *hdp) +Real Phi_Total_D3D(Real R, Real z, Real *hdp) { - Real Phi_A = phi_halo_D3D(R, z, hdp); - Real Phi_B = phi_disk_D3D(R, z, hdp); + Real Phi_A = Phi_Halo_D3D(R, z, hdp); + Real Phi_B = Phi_Disk_D3D(R, z, hdp); return Phi_A + Phi_B; } -Real phi_hot_halo_D3D(Real r, Real *hdp) +Real Phi_Hot_Halo_D3D(Real r, Real *hdp) { - Real Phi_A = phi_halo_D3D(0, r, hdp); - Real Phi_B = phi_disk_D3D(0, r, hdp); + Real Phi_A = Phi_Halo_D3D(0, r, hdp); + Real Phi_B = Phi_Disk_D3D(0, r, hdp); // return Phi_A; return Phi_A + Phi_B; } @@ -162,7 +162,7 @@ Real phi_hot_halo_D3D(Real r, Real *hdp) // returns the cell-centered vertical // location of the cell with index k // k is indexed at 0 at the lowest ghost cell -Real z_hc_D3D(int k, Real dz, int nz, int ng) +Real Z_Hc_D3D(int k, Real dz, int nz, int ng) { // checked that this works, such that the // if dz = L_z/nz for the real domain, then the z positions @@ -179,7 +179,7 @@ Real z_hc_D3D(int k, Real dz, int nz, int ng) // returns the cell-centered radial // location of the cell with index i -Real r_hc_D3D(int i, Real dr) +Real R_Hc_D3D(int i, Real dr) { // the zeroth cell is centered at 0.5*dr return 0.5 * dr + ((Real)i) * dr; @@ -190,7 +190,7 @@ Real r_hc_D3D(int i, Real dr) * \brief Calculate the density at spherical radius r due to a hydrostatic halo. Uses an analytic expression normalized by the value of the potential at the cooling radius. */ -void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) +void Hydrostatic_Ray_Analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) { // Routine to determine the hydrostatic density profile // along a ray from the galaxy center @@ -208,7 +208,7 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int Real gmo = gamma - 1.0; // gamma-1 // compute the potential at the cooling radius - Phi_0 = phi_hot_halo_D3D(r_cool, hdp); + Phi_0 = Phi_Hot_Halo_D3D(r_cool, hdp); // We are normalizing to the central density // so D_rho == 1 @@ -216,8 +216,8 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int // store densities for (i = 0; i < nr; i++) { - r[i] = r_hc_D3D(i, dr); - rho[i] = rho_eos * pow(D_rho - gmo * (phi_hot_halo_D3D(r[i], hdp) - Phi_0) / (cs * cs), 1. / gmo); + r[i] = R_Hc_D3D(i, dr); + rho[i] = rho_eos * pow(D_rho - gmo * (Phi_Hot_Halo_D3D(r[i], hdp) - Phi_0) / (cs * cs), 1. / gmo); } } @@ -227,7 +227,7 @@ void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int assuming an isothermal gas. Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0), then sets the densities according to an analytic expression. */ -void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) +void Hydrostatic_Column_Isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) { // x is cell center in x direction // y is cell center in y direction @@ -284,13 +284,13 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in // get the disk surface density // have verified that at this point, Sigma_r is correct - Sigma_r = Sigma_disk_D3D(R, hdp); + Sigma_r = Sigma_Disk_D3D(R, hdp); // set the z-column size, including ghost cells nzt = nz + 2 * ng; // compute the mid plane potential - Phi_0 = phi_total_D3D(R, 0, hdp); + Phi_0 = Phi_Total_D3D(R, 0, hdp); /* For an isothermal gas, we have @@ -307,21 +307,21 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in // perform a simple check about the fraction of density within // a single cell - z_1 = z_hc_D3D(ks, dz, nz, ng) + 0.5 * dz; // cell ceiling - D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + z_1 = Z_Hc_D3D(ks, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); if (exp(-1 * D_rho) < 0.1) { printf( "WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e " "Phi_0 %E cs %e\n", - R, D_rho, z_1, phi_total_D3D(R, z_1, hdp), Phi_0, cs); + R, D_rho, z_1, Phi_Total_D3D(R, z_1, hdp), Phi_0, cs); } // let's find the cell above the disk where the // density falls by exp(-7) < 1.0e-3. for (k = ks; k < nzt; k++) { - z_1 = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; // cell ceiling - D_rho = (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + z_1 = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); if (D_rho >= 7.0) { break; } @@ -340,7 +340,7 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in phi_int = 0.0; for (k = 0; k < n_int; k++) { z_0 = 0.5 * dz_int + dz_int * ((Real)k); - Delta_phi = (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); phi_int += exp(-1 * Delta_phi) * dz_int; } @@ -353,8 +353,8 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in n_int = 10; // integrate over a 1/10 cell for (k = ks; k < nzt; k++) { // find cell center, bottom, and top - z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; - z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; + z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; if (z_int_max > z_disk_max) { z_int_max = z_disk_max; } @@ -363,7 +363,7 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in phi_int = 0.0; for (i = 0; i < n_int; i++) { z_0 = 0.5 * dz_int + dz_int * ((Real)i) + z_int_min; - Delta_phi = (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); phi_int += rho_0 * exp(-1 * Delta_phi) * dz_int; } @@ -404,7 +404,7 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in * \brief Calculate the 1D density distribution in a hydrostatic column. Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0), then sets the densities according to an analytic expression. */ -void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) +void Hydrostatic_Column_Analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng) { // x is cell center in x direction // y is cell center in y direction @@ -472,13 +472,13 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in // get the disk surface density // have verified that at this point, Sigma_r is correct - Sigma_r = Sigma_disk_D3D(R, hdp); + Sigma_r = Sigma_Disk_D3D(R, hdp); // set the z-column size, including ghost cells nzt = nz + 2 * ng; // compute the mid plane potential - Phi_0 = phi_total_D3D(R, 0, hdp); + Phi_0 = Phi_Total_D3D(R, 0, hdp); // pick a fiducial guess for density ratio D_rho = pow(Sigma_r / Sigma_0, gamma - 1.); @@ -504,8 +504,8 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in z_0 = 1.0e-3; z_1 = 1.0e-2; while (!flag_phi) { - A_0 = D_rho - (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); - A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + A_0 = D_rho - (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { z_2 = 10. * z_1; @@ -516,8 +516,8 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in if (fabs(z_1 - z_0) < tol) { flag_phi = 1; - A_0 = D_rho - (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); - A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + A_0 = D_rho - (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); // make sure we haven't crossed 0 if (A_1 < 0) { z_1 = z_0; @@ -528,14 +528,14 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in printf("Something wrong in determining central density...\n"); printf("iter_phi = %d\n", iter_phi); printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, - phi_total_D3D(R, z_0, hdp), phi_total_D3D(R, z_1, hdp)); + Phi_Total_D3D(R, z_0, hdp), Phi_Total_D3D(R, z_1, hdp)); #ifdef MPI_CHOLLA MPI_Finalize(); #endif exit(0); } } - A_1 = D_rho - (phi_total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); z_disk_max = z_1; // Compute surface density @@ -545,7 +545,7 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in phi_int = 0.0; for (k = 0; k < n_int; k++) { z_0 = 0.5 * dz_int + dz_int * ((Real)k); - Delta_phi = (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); A = D_rho - Delta_phi; phi_int += rho_eos * pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int; } @@ -576,8 +576,8 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in n_int = 10; // integrate over a 1/10 cell for (k = ks; k < nzt; k++) { // find cell center, bottom, and top - z_int_min = z_hc_D3D(k, dz, nz, ng) - 0.5 * dz; - z_int_max = z_hc_D3D(k, dz, nz, ng) + 0.5 * dz; + z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; if (z_int_max > z_disk_max) { z_int_max = z_disk_max; } @@ -586,7 +586,7 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in phi_int = 0.0; for (i = 0; i < n_int; i++) { z_0 = 0.5 * dz_int + dz_int * ((Real)i) + z_int_min; - Delta_phi = (phi_total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); A = D_rho - Delta_phi; phi_int += rho_eos * pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int; } @@ -611,7 +611,7 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in km = ng + nz / 2 - (k - ks) - 1; } rho[km] = rho[k]; - Delta_phi = (phi_total_D3D(R, z_hc_D3D(k, dz, nz, ng), hdp) - Phi_0) / (cs * cs); + Delta_phi = (Phi_Total_D3D(R, Z_Hc_D3D(k, dz, nz, ng), hdp) - Phi_0) / (cs * cs); } // check the surface density @@ -621,13 +621,13 @@ void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, in } } -Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) +Real Determine_Rho_EOS_D3D(Real cs, Real Sigma_0, Real *hdp) { // OK, we need to set rho_eos based on the central surface density. // and the central potential int k; Real z_pos, rho_eos; - Real Phi_0 = phi_total_D3D(0, 0, hdp); + Real Phi_0 = Phi_Total_D3D(0, 0, hdp); Real gamma = hdp[13]; // Real gamma = 1.001; // CHANGED FOR ISOTHERMAL Real Delta_phi; @@ -645,8 +645,8 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) z_0 = 1.0e-3; z_1 = 1.0e-2; while (!flag_phi) { - A_0 = 1.0 - (phi_total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs); - A_1 = 1.0 - (phi_total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); + A_0 = 1.0 - (Phi_Total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = 1.0 - (Phi_Total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { @@ -660,8 +660,8 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) // printf("z_0 %e z_1 %e\n",z_0,z_1); if (fabs(z_1 - z_0) < tol) { flag_phi = 1; - A_0 = 1.0 - (phi_total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs); - A_1 = 1.0 - (phi_total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); + A_0 = 1.0 - (Phi_Total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = 1.0 - (Phi_Total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs); // make sure we haven't crossed 0 if (A_1 < 0) { z_1 = z_0; @@ -672,7 +672,7 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) printf("Something wrong in determining central density...\n"); printf("iter_phi = %d\n", iter_phi); printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, - phi_total_D3D(0, z_0, hdp), phi_total_D3D(0, z_1, hdp)); + Phi_Total_D3D(0, z_0, hdp), Phi_Total_D3D(0, z_1, hdp)); #ifdef MPI_CHOLLA MPI_Finalize(); #endif @@ -690,7 +690,7 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) // now integrate the density profile for (k = 0; k < n_int; k++) { z_pos = 0.5 * dz_int + dz_int * ((Real)k); - Delta_phi = phi_total_D3D(0, z_pos, hdp) - Phi_0; + Delta_phi = Phi_Total_D3D(0, z_pos, hdp) - Phi_0; A = 1.0 - Delta_phi / (cs * cs); phi_int += pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int; } @@ -713,7 +713,7 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp) return rho_eos; } -Real halo_density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr) +Real Halo_Density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr) { // interpolate the halo density profile int i; @@ -798,7 +798,7 @@ void Grid3D::Disk_3D(parameters p) // determine rho_eos by setting central density of disk // based on central temperature - rho_eos = determine_rho_eos_D3D(cs, Sigma_0, hdp); + rho_eos = Determine_Rho_EOS_D3D(cs, Sigma_0, hdp); // set EOS parameters // K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma @@ -834,7 +834,7 @@ void Grid3D::Disk_3D(parameters p) // Produce a look up table for a hydrostatic hot halo ////////////////////////////////////////////// ////////////////////////////////////////////// - hydrostatic_ray_analytical_D3D(rho_halo, r_halo, hdp, dr, nr); + Hydrostatic_Ray_Analytical_D3D(rho_halo, r_halo, hdp, dr, nr); chprintf("Hot halo lookup table generated...\n"); ////////////////////////////////////////////// @@ -859,7 +859,7 @@ void Grid3D::Disk_3D(parameters p) // Compute the hydrostatic density profile in this z column // owing to the disk // hydrostatic_column_analytical_D3D(rho, r, hdp, dz, nz, H.n_ghost); - hydrostatic_column_isothermal_D3D(rho, r, hdp, dz, nz, + Hydrostatic_Column_Isothermal_D3D(rho, r, hdp, dz, nz, H.n_ghost); // CHANGED_FOR_ISOTHERMAL // store densities @@ -919,9 +919,9 @@ void Grid3D::Disk_3D(parameters p) phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane) // radial acceleration from disk - a_d = fabs(gr_disk_D3D(r, z_pos, hdp)); + a_d = fabs(Gr_Disk_D3D(r, z_pos, hdp)); // radial acceleration from halo - a_h = fabs(gr_halo_D3D(r, z_pos, hdp)); + a_h = fabs(Gr_Halo_D3D(r, z_pos, hdp)); // pressure gradient along x direction // gradient calc is first order at boundaries @@ -1025,7 +1025,7 @@ void Grid3D::Disk_3D(parameters p) r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); // interpolate the density at this position - d = halo_density_D3D(r, r_halo, rho_halo, dr, nr); + d = Halo_Density_D3D(r, r_halo, rho_halo, dr, nr); // set pressure adiabatically P = K_eos_h * pow(d, p.gamma); diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 75bf1f5e8..f464e167e 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -36,7 +36,7 @@ int snr_n; } // namespace supernova #ifndef O_HIP -__device__ double atomicMax(double* address, double val) +__device__ double Atomic_Max(double* address, double val) { auto* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; @@ -48,7 +48,7 @@ __device__ double atomicMax(double* address, double val) } #endif // O_HIP -__global__ void initState_kernel(unsigned int seed, feedback_prng_t* states) +__global__ void Init_State_Kernel(unsigned int seed, feedback_prng_t* states) { int id = blockIdx.x * blockDim.x + threadIdx.x; curand_init(seed, id, 0, &states[id]); @@ -137,7 +137,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); - hipLaunchKernelGGL(initState_kernel, grid, block, 0, 0, P->prng_seed, randStates); + hipLaunchKernelGGL(Init_State_Kernel, grid, block, 0, 0, P->prng_seed, randStates); CHECK(cudaDeviceSynchronize()); chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); } @@ -175,9 +175,9 @@ __device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, Real* should be dx*1/2. In the above the 1/2 factor is normalize over 2 cells/direction. */ -__device__ Real frac(int i, Real dx) { return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; } +__device__ Real Frac(int i, Real dx) { return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; } -__device__ Real d_fr(int i, Real dx) +__device__ Real D_Fr(int i, Real dx) { return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - 3 * (i - 1) * (i + 1) * (0.5 - dx); } @@ -475,9 +475,9 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real // index in array of conserved quantities indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g; - x_frac = d_fr(i, delta_x) * frac(j, delta_y) * frac(k, delta_z); - y_frac = frac(i, delta_x) * d_fr(j, delta_y) * frac(k, delta_z); - z_frac = frac(i, delta_x) * frac(j, delta_y) * d_fr(k, delta_z); + x_frac = D_Fr(i, delta_x) * Frac(j, delta_y) * Frac(k, delta_z); + y_frac = Frac(i, delta_x) * D_Fr(j, delta_y) * Frac(k, delta_z); + z_frac = Frac(i, delta_x) * Frac(j, delta_y) * D_Fr(k, delta_z); px = x_frac * feedback_momentum; py = y_frac * feedback_momentum; @@ -611,7 +611,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real } } if (direction > 0) { - atomicMax(dti, local_dti); + Atomic_Max(dti, local_dti); } } } diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 88e9c3b34..34d8ab96b 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -138,7 +138,7 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -148,7 +148,7 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } @@ -252,7 +252,7 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -262,7 +262,7 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index d75cbfd3c..adec39695 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -21,7 +21,7 @@ #include "../utils/testing_utilities.h" #ifdef MHD -__global__ void test_prim_2_char(reconstruction::Primitive const primitive, +__global__ void Test_Prim_2_Char(reconstruction::Primitive const primitive, reconstruction::Primitive const primitive_slope, Real const gamma, Real const sound_speed, Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) @@ -30,7 +30,7 @@ __global__ void test_prim_2_char(reconstruction::Primitive const primitive, reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); } -__global__ void test_char_2_prim(reconstruction::Primitive const primitive, +__global__ void Test_Char_2_Prim(reconstruction::Primitive const primitive, reconstruction::Characteristic const characteristic_slope, Real const gamma, Real const sound_speed, Real const sound_speed_squared, reconstruction::Primitive *primitive_slope) @@ -50,7 +50,7 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, + hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); @@ -60,13 +60,13 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput reconstruction::Characteristic const fiducial_results{ 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; - testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); - testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); - testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); - testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); - testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); - testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); - testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); + testingUtilities::Check_Results(fiducial_results.a0, host_results.a0, "a0"); + testingUtilities::Check_Results(fiducial_results.a1, host_results.a1, "a1"); + testingUtilities::Check_Results(fiducial_results.a2, host_results.a2, "a2"); + testingUtilities::Check_Results(fiducial_results.a3, host_results.a3, "a3"); + testingUtilities::Check_Results(fiducial_results.a4, host_results.a4, "a4"); + testingUtilities::Check_Results(fiducial_results.a5, host_results.a5, "a5"); + testingUtilities::Check_Results(fiducial_results.a6, host_results.a6, "a6"); } TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) @@ -80,7 +80,7 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, + hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, sound_speed_squared, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); @@ -90,13 +90,13 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput reconstruction::Primitive const fiducial_results{ 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; - testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); - testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); - testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_results.density, host_results.density, "density"); + testingUtilities::Check_Results(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testingUtilities::Check_Results(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testingUtilities::Check_Results(fiducial_results.pressure, host_results.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); } #endif // MHD @@ -124,25 +124,25 @@ TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) #ifdef MHD reconstruction::Primitive const fiducial_data{ 13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); + testingUtilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 39950.641025641031}; #ifdef DE fiducial_data.pressure = 34274.282506448195; #endif // DE - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -164,20 +164,20 @@ TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) // Check results #ifdef MHD Real const fiducial_data = -2.5; - testingUtilities::checkResults(fiducial_data, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data, test_data.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data, test_data.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data, test_data.magnetic_z, "magnetic_z"); #else // MHD Real const fiducial_data = -2.5; - testingUtilities::checkResults(fiducial_data, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data, test_data.pressure, "pressure"); #endif // MHD } @@ -200,25 +200,25 @@ TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, 5.5384615384615383, 6.666666666666667, 0, 8.8421052631578956, 9.9047619047619051}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, 5.5384615384615383, 6.666666666666667}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } -__global__ void test_monotize_characteristic_return_primitive( +__global__ void Test_Monotize_Characteristic_Return_Primitive( reconstruction::Primitive const primitive, reconstruction::Primitive const del_L, reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, @@ -258,7 +258,7 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe // Get test data cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_monotize_characteristic_return_primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, + hipLaunchKernelGGL(Test_Monotize_Characteristic_Return_Primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); @@ -268,20 +268,20 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #ifdef MHD reconstruction::Primitive const fiducial_data{174, 74.796411763317991, 19.428234044886157, 16.129327015450095, 33524, 0, -1385.8699833027156, -1407.694707449215}; - testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data.density, host_results.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946}; - testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.density, host_results.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); #endif // MHD } @@ -303,20 +303,20 @@ TEST(tALLReconstructionCalcInterface, CorrectInputExpectCorrectOutput) // Check results #ifdef MHD reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -359,6 +359,6 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) double test_val = interface_arr.at(i); double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i]; - testingUtilities::checkResults(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); + testingUtilities::Check_Results(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); } } diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index c3efe9d96..e8d000f6e 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -44,7 +44,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * \param[in] gamma The adiabatic index * \return std::vector */ - std::vector computeFluxes(std::vector const &stateLeft, std::vector const &stateRight, + std::vector Compute_Fluxes(std::vector const &stateLeft, std::vector const &stateRight, Real const &gamma) { // Simulation Paramters @@ -105,7 +105,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * print. It will print after the default GTest output but before the * values that failed are printed */ - void checkResults(std::vector const &fiducialFlux, std::vector const &testFlux, + void Check_Results(std::vector const &fiducialFlux, std::vector const &testFlux, std::string const &customOutput = "") { // Field names @@ -168,12 +168,12 @@ TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name std::vector const fiducialFluxes{0, 1, 0, 0, 0}; // Compute the fluxes - std::vector const testFluxes = computeFluxes(state, // Left state + std::vector const testFluxes = Compute_Fluxes(state, // Left state state, // Right state gamma); // Adiabatic Index // Check for correctness - checkResults(fiducialFluxes, testFluxes); + Check_Results(fiducialFluxes, testFluxes); } // ========================================================================= diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c39e091d1..b67e18227 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -56,7 +56,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. * \return std::vector */ - std::vector computeFluxes(std::vector stateLeft, std::vector stateRight, Real const &gamma, + std::vector Compute_Fluxes(std::vector stateLeft, std::vector stateRight, Real const &gamma, int const &direction = 0) { // Rearrange X, Y, and Z values for the chosen direction @@ -161,7 +161,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * \param[in] direction Which plane the interface is. 0 = plane normal to * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. */ - void checkResults(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, + void Check_Results(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) { // Field names @@ -227,7 +227,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, * y-magnetic field, z-magnetic field. */ - std::vector primitive2Conserved(std::vector const &input, double const &gamma, + std::vector Primitive_2_Conserved(std::vector const &input, double const &gamma, std::vector const &primitiveScalars) { std::vector output(input.size()); @@ -299,30 +299,30 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive // Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), + leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), leftFastRareLeftSide = - primitive2Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), leftFastRareRightSide = - primitive2Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), compoundLeftSide = - primitive2Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), compoundPeak = - primitive2Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), compoundRightSide = - primitive2Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), contactLeftSide = - primitive2Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), contactRightSide = - primitive2Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), slowShockLeftSide = - primitive2Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, + Primitive_2_Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), + slowShockRightSide = Primitive_2_Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, primitiveScalar), - rightFastRareLeftSide = primitive2Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, + rightFastRareLeftSide = Primitive_2_Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, gamma, primitiveScalar), - rightFastRareRightSide = primitive2Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, + rightFastRareRightSide = Primitive_2_Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); + rightICs = Primitive_2_Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -336,8 +336,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -349,8 +349,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO std::vector const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -365,8 +365,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 1.0074980455427278, 0}; std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; Real thermalEnergyFlux = 0.20673357746080046; - std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -386,8 +386,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO leftICsNegBx[5] = -leftICsNegBx[5]; rightICsNegBx[5] = -rightICsNegBx[5]; - std::vector const testFluxes = computeFluxes(leftICsNegBx, rightICsNegBx, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICsNegBx, rightICsNegBx, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -402,8 +402,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO -1.0074980455427278, 0}; std::vector const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; Real thermalEnergyFlux = -0.20673357746080046; - std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -420,8 +420,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.53128887284876058, 0}; std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; Real thermalEnergyFlux = 0.41622256825457099; - std::vector const testFluxes = computeFluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -436,8 +436,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.058615131833681167, 0}; std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; Real thermalEnergyFlux = 0.047345816580591255; - std::vector const testFluxes = computeFluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -452,8 +452,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.76278089951123285, 0}; std::vector const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598}; Real thermalEnergyFlux = 0.38787931087981475; - std::vector const testFluxes = computeFluxes(compoundLeftSide, compoundRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundLeftSide, compoundRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -468,8 +468,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.71658566275120927, 0}; std::vector const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; Real thermalEnergyFlux = 0.28530908823756074; - std::vector const testFluxes = computeFluxes(compoundRightSide, compoundLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundRightSide, compoundLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -484,8 +484,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.74058225030218761, 0}; std::vector const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111}; Real thermalEnergyFlux = 0.36325864563467081; - std::vector const testFluxes = computeFluxes(compoundLeftSide, compoundPeak, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundLeftSide, compoundPeak, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -500,8 +500,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.71026545717401468, 0}; std::vector const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; Real thermalEnergyFlux = 0.32239432669410983; - std::vector const testFluxes = computeFluxes(compoundPeak, compoundLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundPeak, compoundLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -516,8 +516,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.85591904930227747, 0}; std::vector const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; Real thermalEnergyFlux = 0.34962629086469987; - std::vector const testFluxes = computeFluxes(compoundPeak, compoundRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundPeak, compoundRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -532,8 +532,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.83594460438033491, 0}; std::vector const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; Real thermalEnergyFlux = 0.28979582956267347; - std::vector const testFluxes = computeFluxes(compoundRightSide, compoundPeak, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(compoundRightSide, compoundPeak, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -548,8 +548,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.8687394222350926, 0}; std::vector const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; Real thermalEnergyFlux = 0.30895701155896288; - std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -564,8 +564,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.86909622543144227, 0}; std::vector const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; Real thermalEnergyFlux = 0.30928031735570188; - std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -580,8 +580,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.40636483121437972, 0}; std::vector const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261}; Real thermalEnergyFlux = 4.4037784886918126e-05; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -596,8 +596,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.37204015363322052, 0}; std::vector const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; Real thermalEnergyFlux = -0.020617189878790236; - std::vector const testFluxes = computeFluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -613,8 +613,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO std::vector const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; Real thermalEnergyFlux = -0.020960370728633469; std::vector const testFluxes = - computeFluxes(rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -630,8 +630,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO std::vector const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614}; Real thermalEnergyFlux = -0.00081785194236053073; std::vector const testFluxes = - computeFluxes(rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -655,36 +655,36 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, primitiveScalar), - leftFastShockLeftSide = primitive2Conserved( + leftICs = Primitive_2_Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, primitiveScalar), + leftFastShockLeftSide = Primitive_2_Conserved( {1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar), - leftFastShockRightSide = primitive2Conserved( + leftFastShockRightSide = Primitive_2_Conserved( {1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar), - leftRotationLeftSide = primitive2Conserved( + leftRotationLeftSide = Primitive_2_Conserved( {1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar), - leftRotationRightSide = primitive2Conserved( + leftRotationRightSide = Primitive_2_Conserved( {1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar), - leftSlowShockLeftSide = primitive2Conserved( + leftSlowShockLeftSide = Primitive_2_Conserved( {1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar), - leftSlowShockRightSide = primitive2Conserved( + leftSlowShockRightSide = Primitive_2_Conserved( {1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved( + contactLeftSide = Primitive_2_Conserved( {1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved( + contactRightSide = Primitive_2_Conserved( {1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar), - rightSlowShockLeftSide = primitive2Conserved( + rightSlowShockLeftSide = Primitive_2_Conserved( {1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar), - rightSlowShockRightSide = primitive2Conserved( + rightSlowShockRightSide = Primitive_2_Conserved( {1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar), - rightRotationLeftSide = primitive2Conserved( + rightRotationLeftSide = Primitive_2_Conserved( {1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar), - rightRotationRightSide = primitive2Conserved( + rightRotationRightSide = Primitive_2_Conserved( {1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar), - rightFastShockLeftSide = primitive2Conserved( + rightFastShockLeftSide = Primitive_2_Conserved( {1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar), - rightFastShockRightSide = primitive2Conserved( + rightFastShockRightSide = Primitive_2_Conserved( {1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, gamma, primitiveScalar); + rightICs = Primitive_2_Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -699,8 +699,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0, -1.1102230246251565e-16}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -714,8 +714,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.0, 2.2204460492503131e-16, -1.1283791670955123}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -729,8 +729,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.056156000248263505, -0.42800560867873094}; std::vector const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; Real thermalEnergyFlux = 0.24104061926661174; - std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -744,8 +744,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor -0.056156000248263505, -0.55262526758377528}; std::vector const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; Real thermalEnergyFlux = -0.24104061926661174; - std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -762,8 +762,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045}; Real thermalEnergyFlux = 1.2886155333980993; std::vector const testFluxes = - computeFluxes(leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -778,8 +778,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666}; Real thermalEnergyFlux = 2.081389946702628; std::vector const testFluxes = - computeFluxes(leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -793,8 +793,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.7248753989305099, -0.059178137562467162}; std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; Real thermalEnergyFlux = 1.5323573637968553; - std::vector const testFluxes = computeFluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -808,8 +808,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.75541746728406312, -0.13479771672887678}; std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; Real thermalEnergyFlux = 1.5333744977458499; - std::vector const testFluxes = computeFluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -824,8 +824,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; Real thermalEnergyFlux = 1.4145715457049737; std::vector const testFluxes = - computeFluxes(leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -840,8 +840,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879}; Real thermalEnergyFlux = 1.8639570701934713; std::vector const testFluxes = - computeFluxes(leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -855,8 +855,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.72241353110189066, -0.049073560388753337}; std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892}; Real thermalEnergyFlux = 1.7186185770667382; - std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -870,8 +870,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor 0.72244123046603836, -0.049025527032060034}; std::vector const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; Real thermalEnergyFlux = 1.7184928987481187; - std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -886,8 +886,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; Real thermalEnergyFlux = 1.6022319728249694; std::vector const testFluxes = - computeFluxes(rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -902,8 +902,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885}; Real thermalEnergyFlux = 1.0487665253534804; std::vector const testFluxes = - computeFluxes(rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -918,8 +918,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; Real thermalEnergyFlux = 0.9707694646266285; std::vector const testFluxes = - computeFluxes(rightRotationLeftSide, rightRotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightRotationLeftSide, rightRotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -934,8 +934,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; Real thermalEnergyFlux = 0.9702629326292449; std::vector const testFluxes = - computeFluxes(rightRotationRightSide, rightRotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightRotationRightSide, rightRotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -950,8 +950,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; Real thermalEnergyFlux = 0.84330016382608752; std::vector const testFluxes = - computeFluxes(rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -966,8 +966,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor std::vector const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; Real thermalEnergyFlux = 0.060961577855018087; std::vector const testFluxes = - computeFluxes(rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -990,44 +990,44 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), - hydroRareLeftSide = primitive2Conserved( + leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), + hydroRareLeftSide = Primitive_2_Conserved( {0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma, primitiveScalar), - hydroRareRightSide = primitive2Conserved( + hydroRareRightSide = Primitive_2_Conserved( {0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma, primitiveScalar), - switchOnSlowShockLeftSide = primitive2Conserved( + switchOnSlowShockLeftSide = Primitive_2_Conserved( {0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma, primitiveScalar), - switchOnSlowShockRightSide = primitive2Conserved( + switchOnSlowShockRightSide = Primitive_2_Conserved( {0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved( + contactLeftSide = Primitive_2_Conserved( {0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved( + contactRightSide = Primitive_2_Conserved( {0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved( + slowShockLeftSide = Primitive_2_Conserved( {0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved( + slowShockRightSide = Primitive_2_Conserved( {0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma, primitiveScalar), - rotationLeftSide = primitive2Conserved( + rotationLeftSide = Primitive_2_Conserved( {0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma, primitiveScalar), - rotationRightSide = primitive2Conserved( + rotationRightSide = Primitive_2_Conserved( {0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma, primitiveScalar), - fastRareLeftSide = primitive2Conserved( + fastRareLeftSide = Primitive_2_Conserved( {0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, -4.981931e-17}, gamma, primitiveScalar), - fastRareRightSide = primitive2Conserved( + fastRareRightSide = Primitive_2_Conserved( {0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); + rightICs = Primitive_2_Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -1041,8 +1041,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr std::vector const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1056,8 +1056,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.69999999999999996}; std::vector const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1071,8 +1071,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.40407579574102892, -0.21994567048141428}; std::vector const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; Real thermalEnergyFlux = 0.13864309478397996; - std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1086,8 +1086,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr 0.40407579574102892, -0.21994567048141428}; std::vector const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; Real thermalEnergyFlux = -0.13864309478397996; - std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Cross wave checks @@ -1103,8 +1103,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -6.3466063324344113e-41, -3.0340891384335242e-41}; std::vector const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; Real thermalEnergyFlux = 0.11034221894046368; - std::vector const testFluxes = computeFluxes(hydroRareLeftSide, hydroRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(hydroRareLeftSide, hydroRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1118,8 +1118,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr 6.2022392844946449e-41, 2.9606965476795895e-41}; std::vector const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; Real thermalEnergyFlux = 0.019189877201961154; - std::vector const testFluxes = computeFluxes(hydroRareRightSide, hydroRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(hydroRareRightSide, hydroRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1134,8 +1134,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr std::vector const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; Real thermalEnergyFlux = 0.2840373040888583; std::vector const testFluxes = - computeFluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1150,8 +1150,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr std::vector const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; Real thermalEnergyFlux = 0.1100817647375162; std::vector const testFluxes = - computeFluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1165,8 +1165,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.35226977371803297, -0.19316940226499904}; std::vector const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; Real thermalEnergyFlux = 0.23702444986592192; - std::vector const testFluxes = computeFluxes(contactLeftSide, contactRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1180,8 +1180,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.3522739911439669, -0.19317084712861482}; std::vector const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; Real thermalEnergyFlux = 0.23704936434506069; - std::vector const testFluxes = computeFluxes(contactRightSide, contactLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1195,8 +1195,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.37198011453582402, -0.20397277844271294}; std::vector const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; Real thermalEnergyFlux = 0.1738058891582446; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1210,8 +1210,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.15165427985881363, -0.082233932588833825}; std::vector const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; Real thermalEnergyFlux = 0.038445630017261548; - std::vector const testFluxes = computeFluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1225,8 +1225,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.18030635192654354, -0.098381113757603278}; std::vector const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; Real thermalEnergyFlux = -0.0052395622905745485; - std::vector const testFluxes = computeFluxes(rotationLeftSide, rotationRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rotationLeftSide, rotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1240,8 +1240,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.026148289294373184, -0.69914753968916865}; std::vector const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; Real thermalEnergyFlux = -0.0054536013916442853; - std::vector const testFluxes = computeFluxes(rotationRightSide, rotationLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rotationRightSide, rotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1255,8 +1255,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -0.027710302430178135, -0.70000000000000007}; std::vector const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; Real thermalEnergyFlux = -0.0059354109546219782; - std::vector const testFluxes = computeFluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1270,8 +1270,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorr -8.2898844654399895e-05, -0.69999999999999984}; std::vector const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373}; Real thermalEnergyFlux = -3.000421709818028e-05; - std::vector const testFluxes = computeFluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1299,17 +1299,17 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), + leftICs = Primitive_2_Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), leftRarefactionCenter = - primitive2Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), leftVxTurnOver = - primitive2Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), - midPoint = primitive2Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), + midPoint = Primitive_2_Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), rightVxTurnOver = - primitive2Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), + Primitive_2_Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), rightRarefactionCenter = - primitive2Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); + Primitive_2_Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), + rightICs = Primitive_2_Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { // Initial Condition Checks @@ -1323,8 +1323,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1336,8 +1336,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1349,8 +1349,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1362,8 +1362,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } // Intermediate state checks @@ -1378,8 +1378,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC -0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0}; std::vector const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; Real thermalEnergyFlux = -0.19705631998499995; - std::vector const testFluxes = computeFluxes(leftICs, leftRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftICs, leftRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1391,8 +1391,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, leftICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1405,8 +1405,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC -0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0}; std::vector const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; Real thermalEnergyFlux = -0.0052127803322822184; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, leftVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1419,8 +1419,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC -0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0}; std::vector const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636}; Real thermalEnergyFlux = -0.19746407621898149; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, leftRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftVxTurnOver, leftRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1433,8 +1433,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC -0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0}; std::vector const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054}; Real thermalEnergyFlux = -0.00034184143405415065; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, midPoint, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(leftVxTurnOver, midPoint, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1447,8 +1447,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC -0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0}; std::vector const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; Real thermalEnergyFlux = -0.001531664896602873; - std::vector const testFluxes = computeFluxes(midPoint, leftVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(midPoint, leftVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1461,8 +1461,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC 0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0}; std::vector const scalarFlux{0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; Real thermalEnergyFlux = 0.00040916715364737997; - std::vector const testFluxes = computeFluxes(midPoint, rightVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(midPoint, rightVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1475,8 +1475,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC 0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0}; std::vector const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; Real thermalEnergyFlux = 0.0020542406295284269; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, midPoint, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightVxTurnOver, midPoint, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1489,8 +1489,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC 0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0}; std::vector const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; Real thermalEnergyFlux = 0.0053191138878843835; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, rightRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightVxTurnOver, rightRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1503,8 +1503,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC 0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0}; std::vector const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; Real thermalEnergyFlux = 0.1444638874418068; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, rightVxTurnOver, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1517,8 +1517,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC 0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0}; std::vector const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; Real thermalEnergyFlux = 0.14472930749999999; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, rightICs, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1530,8 +1530,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, rightRarefactionCenter, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(rightICs, rightRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1554,8 +1554,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - zeroMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), - onesMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); + zeroMagneticField = Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { { @@ -1568,8 +1568,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) std::vector const fiducialFlux{0, 1.380658e-05, 0, 0, 0, 0, 0, 0}; std::vector const scalarFlux{0, 0, 0}; Real thermalEnergyFlux = 0.; - std::vector const testFluxes = computeFluxes(zeroMagneticField, zeroMagneticField, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(zeroMagneticField, zeroMagneticField, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1583,8 +1583,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) 3.4694469519536142e-18, 3.4694469519536142e-18}; std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; Real thermalEnergyFlux = 0.; - std::vector const testFluxes = computeFluxes(onesMagneticField, onesMagneticField, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(onesMagneticField, onesMagneticField, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1606,7 +1606,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DegenerateStateCorrectInputExpectCorrectOutp // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive // Scalars | - state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); + state = Primitive_2_Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); std::vector const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999}; std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; @@ -1623,8 +1623,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DegenerateStateCorrectInputExpectCorrectOutp // the Athena solver with theses tests gave me -0.00080700946455175148 // though for (size_t direction = 0; direction < 3; direction++) { - std::vector const testFluxes = computeFluxes(state, state, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(state, state, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } // ========================================================================= @@ -1658,8 +1658,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes) for (size_t direction = 0; direction < 3; direction++) { // Compute the fluxes and check for correctness // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const testFluxes = computeFluxes(state, state, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(state, state, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } // ========================================================================= @@ -1733,8 +1733,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; Real thermalEnergyFlux = -1.5; - std::vector const testFluxes = computeFluxes(negativePressure, negativePressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(negativePressure, negativePressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1746,8 +1746,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; Real thermalEnergyFlux = -6.333333333333333; - std::vector const testFluxes = computeFluxes(negativeEnergy, negativeEnergy, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(negativeEnergy, negativeEnergy, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1759,8 +1759,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0}; std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensity, negativeDensity, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + std::vector const testFluxes = Compute_Fluxes(negativeDensity, negativeDensity, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1773,8 +1773,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20}; Real thermalEnergyFlux = 1.5000000000000001e+40; std::vector const testFluxes = - computeFluxes(negativeDensityEnergyPressure, negativeDensityEnergyPressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(negativeDensityEnergyPressure, negativeDensityEnergyPressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { std::string const outputString{ @@ -1787,8 +1787,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; Real thermalEnergyFlux = -1.5000000000000001e+40; std::vector const testFluxes = - computeFluxes(negativeDensityPressure, negativeDensityPressure, gamma, direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + Compute_Fluxes(negativeDensityPressure, negativeDensityPressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } } } @@ -1878,8 +1878,8 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma); // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); + testingUtilities::Check_Results(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); + testingUtilities::Check_Results(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); } } // ========================================================================= @@ -1902,7 +1902,7 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); + testingUtilities::Check_Results(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); } } // ========================================================================= @@ -1927,8 +1927,8 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) parameters.magneticX.at(i), 1); // Now check results - testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); + testingUtilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); + testingUtilities::Check_Results(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); } } // ========================================================================= @@ -1953,18 +1953,18 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) mhd::_internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityFlux"); + testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); + testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); } } // ========================================================================= @@ -1991,15 +1991,15 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, + testingUtilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + testingUtilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, + testingUtilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, + testingUtilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + testingUtilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2029,19 +2029,19 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); } } @@ -2071,15 +2071,15 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, + testingUtilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + testingUtilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, + testingUtilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, + testingUtilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + testingUtilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2106,19 +2106,19 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticStarFluxZ"); } } @@ -2146,17 +2146,17 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + testingUtilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + testingUtilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + testingUtilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + testingUtilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + testingUtilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + testingUtilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2180,17 +2180,17 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + testingUtilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + testingUtilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + testingUtilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + testingUtilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + testingUtilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + testingUtilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2218,19 +2218,19 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) parameters.speed.at(i).L, parameters.speed.at(i).LStar); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, + testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, parameters.names.at(i) + ", MagneticStarFluxZ"); } } @@ -2338,7 +2338,7 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialPressure.at(i), testPressure, + testingUtilities::Check_Results(fiducialPressure.at(i), testPressure, parameters.names.at(i) + ", total pressure in the star states"); } } @@ -2387,15 +2387,15 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) parameters.gamma, threadId, n_cells, o1, o2, o3); // Now check results - testingUtilities::checkResults(fiducialState.at(direction).density, testState.density, ", Density"); - testingUtilities::checkResults(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); - testingUtilities::checkResults(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); - testingUtilities::checkResults(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); - testingUtilities::checkResults(fiducialState.at(direction).energy, testState.energy, ", energy"); - testingUtilities::checkResults(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); - testingUtilities::checkResults(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); - testingUtilities::checkResults(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); - testingUtilities::checkResults(fiducialState.at(direction).totalPressure, testState.totalPressure, + testingUtilities::Check_Results(fiducialState.at(direction).density, testState.density, ", Density"); + testingUtilities::Check_Results(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); + testingUtilities::Check_Results(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); + testingUtilities::Check_Results(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); + testingUtilities::Check_Results(fiducialState.at(direction).energy, testState.energy, ", energy"); + testingUtilities::Check_Results(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); + testingUtilities::Check_Results(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); + testingUtilities::Check_Results(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); + testingUtilities::Check_Results(fiducialState.at(direction).totalPressure, testState.totalPressure, ", totalPressure"); } } diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index dd92fbddc..be59ffba6 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -148,9 +148,9 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa double const allowedError = 1E-7; #endif // PCM - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &vx = 0.0) + void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &vx = 0.0) { // Constant for all tests size_t const N = 32; @@ -213,7 +213,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectI double const rEigenVec_E = 1.5; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -238,7 +238,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectIn double const rEigenVec_E = 1.5; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -266,7 +266,7 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInput double const velocityX = waveSpeed; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, velocityX); // Set the number of MPI ranks diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 622f0aa69..50235d71b 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -33,7 +33,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: systemTest::SystemTestRunner waveTest; - void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, int const &domain_direction, @@ -80,7 +80,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); break; default: - throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); + throw std::invalid_argument("Invalid value of domain_direction given to Set_Launch_Params"); break; } @@ -148,7 +148,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -184,7 +184,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCo auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -222,7 +222,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -258,7 +258,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -295,7 +295,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -330,7 +330,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInput auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps @@ -368,7 +368,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX); // Set the number of timesteps @@ -413,7 +413,7 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam -void checkPointerAttributes(cuda_utilities::DeviceVector &devVector) +void Check_Pointer_Attributes(cuda_utilities::DeviceVector &devVector) { // Get the pointer information cudaPointerAttributes ptrAttributes; @@ -67,7 +67,7 @@ TEST(tALLDeviceVectorConstructor, CheckConstructorDataAndSizeExpectProperAllocat EXPECT_EQ(vectorSize, devVector.size()); // Check the pointer information - checkPointerAttributes(devVector); + Check_Pointer_Attributes(devVector); } TEST(tALLDeviceVectorDestructor, CheckDestructorExpectProperDeallocation) @@ -229,7 +229,7 @@ TEST(tALLDeviceVectorReset, SetNewSizeExpectCorrectSize) EXPECT_EQ(newSize, devVector.size()); // Check the pointer - checkPointerAttributes(devVector); + Check_Pointer_Attributes(devVector); // Copy the new values into device memory devVector.cpyHostToDevice(newVec); @@ -259,7 +259,7 @@ TEST(tALLDeviceVectorResize, SetLargerSizeExpectCorrectSize) EXPECT_EQ(newSize, devVector.size()); // Check the pointer - checkPointerAttributes(devVector); + Check_Pointer_Attributes(devVector); // Check the values for (size_t i = 0; i < originalSize; i++) { @@ -287,7 +287,7 @@ TEST(tALLDeviceVectorResize, SetSmallerSizeExpectCorrectSize) EXPECT_EQ(newSize, devVector.size()); // Check the pointer - checkPointerAttributes(devVector); + Check_Pointer_Attributes(devVector); // Check the values for (size_t i = 0; i < newSize; i++) { diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index 08c1004b2..d86af03ae 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -62,7 +62,7 @@ TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) std::vector test_indices{is, ie, js, je, ks, ke}; for (size_t j = 0; j < test_indices.size(); j++) { - testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], + testingUtilities::Check_Results(fiducial_indices[i][j], test_indices[j], index_names[j] + " " + parameters.names[i]); } } diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index 7bab43b69..000fb23dc 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -72,7 +72,7 @@ TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); } } @@ -90,7 +90,7 @@ TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); } } @@ -133,7 +133,7 @@ TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } @@ -147,7 +147,7 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } #endif // DE @@ -166,7 +166,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -184,7 +184,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput) parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -201,7 +201,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -218,7 +218,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -231,7 +231,7 @@ TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::Check_Results(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } @@ -245,7 +245,7 @@ TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( coef * parameters.d.at(i), coef * parameters.vx.at(i), coef * parameters.vy.at(i), coef * parameters.vz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } @@ -259,6 +259,6 @@ TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( coef * parameters.d.at(i), coef * parameters.mx.at(i), coef * parameters.my.at(i), coef * parameters.mz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } \ No newline at end of file diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp index 665a5981c..889cb7546 100644 --- a/src/utils/math_utilities_tests.cpp +++ b/src/utils/math_utilities_tests.cpp @@ -31,9 +31,9 @@ TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); - testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); - testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); - testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); + testingUtilities::Check_Results<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testingUtilities::Check_Results<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testingUtilities::Check_Results<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); } // ============================================================================= @@ -54,6 +54,6 @@ TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), b.at(1), b.at(2)); // Now check results - testingUtilities::checkResults(fiducialDotProduct, testDotProduct, "dot product"); + testingUtilities::Check_Results(fiducialDotProduct, testDotProduct, "dot product"); } // ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 980259d28..044e74c29 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -66,7 +66,7 @@ TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); } } // ============================================================================= @@ -91,7 +91,7 @@ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); } } // ============================================================================= @@ -115,7 +115,7 @@ TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); } } @@ -165,7 +165,7 @@ TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + testingUtilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, parameters.names.at(i)); } } @@ -188,7 +188,7 @@ TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + testingUtilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, parameters.names.at(i)); } } @@ -217,7 +217,7 @@ TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + testingUtilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, parameters.names.at(i)); } } @@ -240,7 +240,7 @@ TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + testingUtilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, parameters.names.at(i)); } } @@ -264,7 +264,7 @@ TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } @@ -281,7 +281,7 @@ TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) for (size_t i = 0; i < parameters.names.size(); i++) { Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + testingUtilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } // ============================================================================= @@ -316,9 +316,9 @@ TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); // Check the results - testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); - testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); - testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); + testingUtilities::Check_Results(fiducialAvgBx, testAvgBx, "cell centered Bx value"); + testingUtilities::Check_Results(fiducialAvgBy, testAvgBy, "cell centered By value"); + testingUtilities::Check_Results(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } #endif // MHD // ============================================================================= @@ -369,13 +369,13 @@ TEST(tMHDInitMagneticFieldWithVectorPotential, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < conserved_vector.size(); i++) { if (i == 47) { - testingUtilities::checkResults(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testingUtilities::Check_Results(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else if (i == 55) { - testingUtilities::checkResults(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testingUtilities::Check_Results(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else if (i == 63) { - testingUtilities::checkResults(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testingUtilities::Check_Results(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else { - testingUtilities::checkResults(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testingUtilities::Check_Results(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } } } diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index e689e2a5f..a1226a910 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -63,7 +63,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) CudaCheckError(); // Perform comparison - testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); + testingUtilities::Check_Results(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 02aaadd68..b08892824 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -88,7 +88,7 @@ void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double te outString += std::to_string(k); outString += "]"; - ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); + ASSERT_NO_FATAL_FAILURE(Check_Results<1>(fid_value, test_value, outString, fixedEpsilon)); } void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value) diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 7057e01e9..55f6b6f1c 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -120,7 +120,7 @@ void analyticSine(systemTest::SystemTestRunner testObject, std::string const &da * values are ignored and default behaviour is used */ template -void checkResults(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, +void Check_Results(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference @@ -152,7 +152,7 @@ void checkResults(double fiducialNumber, double testNumber, std::string const &o } else { throw std::runtime_error( "Incorrect template argument passed to " - "checkResults. Options are 0 and 1 but " + + "Check_Results. Options are 0 and 1 but " + std::to_string(checkType) + " was passed"); } } diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 133971b68..7d1dc8b5e 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -18,7 +18,7 @@ void OneTime::Start() if (inactive) { return; } - time_start = get_time(); + time_start = Get_Time(); } void OneTime::Subtract(Real time_to_subtract) @@ -34,7 +34,7 @@ void OneTime::End() if (inactive) { return; } - Real time_end = get_time(); + Real time_end = Get_Time(); Real time = (time_end - time_start) * 1000; #ifdef MPI_CHOLLA @@ -219,14 +219,14 @@ ScopedTimer::ScopedTimer(const char* input_name) { #ifdef CPU_TIME name = input_name; - time_start = get_time(); + time_start = Get_Time(); #endif } ScopedTimer::~ScopedTimer(void) { #ifdef CPU_TIME - double time_elapsed_ms = (get_time() - time_start) * 1000; + double time_elapsed_ms = (Get_Time() - time_start) * 1000; #ifdef MPI_CHOLLA double t_min = ReduceRealMin(time_elapsed_ms); From 4715bec382b1ad0ea4e13f8cce2bdc5910b6c01d Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 10 Jul 2023 18:14:41 -0400 Subject: [PATCH 447/694] change function names to conform to Cholla naming standards and turn on readability-identifier-naming check --- run_check.sh | 11 -- src/gravity/potential_paris_3D.cu | 4 +- src/io/io.cpp | 24 ++-- src/io/io.h | 8 +- src/io/io_gpu.cu | 9 +- src/mhd/ct_electric_fields_tests.cu | 4 +- src/mhd/magnetic_update_tests.cu | 4 +- src/riemann_solvers/hllc_cuda_tests.cu | 8 +- src/riemann_solvers/hlld_cuda_tests.cu | 180 +++++++++++++----------- src/system_tests/hydro_system_tests.cpp | 14 +- src/system_tests/mhd_system_tests.cpp | 50 +++---- src/utils/cuda_utilities_tests.cpp | 2 +- src/utils/mhd_utilities_tests.cu | 8 +- src/utils/testing_utilities.h | 2 +- 14 files changed, 165 insertions(+), 163 deletions(-) delete mode 100644 run_check.sh diff --git a/run_check.sh b/run_check.sh deleted file mode 100644 index 01168bf12..000000000 --- a/run_check.sh +++ /dev/null @@ -1,11 +0,0 @@ -cd /ix/eschneider/helena/code/cholla - -make tidy TYPE=hydro & -make tidy TYPE=gravity & -make tidy TYPE=disk & -make tidy TYPE=particles & -make tidy TYPE=cosmology & -make tidy TYPE=mhd & -make tidy TYPE=dust & - -wait diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 51d967a9d..011906d14 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -8,8 +8,8 @@ #include "../io/io.h" #include "../utils/gpu.hpp" -static void __attribute__((unused)) -Print_Diff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) +static void __attribute__((unused)) Print_Diff(const Real *p, const Real *q, const int ng, const int nx, const int ny, + const int nz, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; diff --git a/src/io/io.cpp b/src/io/io.cpp index 690f8d3fc..2f01928e1 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -316,28 +316,28 @@ void Output_Float32(Grid3D &G, struct parameters P, int nfile) if (P.out_float32_density > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_density, "/density"); + device_dataset_vector.data(), G.C.d_density, "/density"); } if (P.out_float32_momentum_x > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); + device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); } if (P.out_float32_momentum_y > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); + device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); } if (P.out_float32_momentum_z > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); + device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); } if (P.out_float32_Energy > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_Energy, "/Energy"); + device_dataset_vector.data(), G.C.d_Energy, "/Energy"); } #ifdef DE if (P.out_float32_GasEnergy > 0) { Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); + device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); } #endif // DE #ifdef MHD @@ -346,17 +346,17 @@ void Output_Float32(Grid3D &G, struct parameters P, int nfile) if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); + device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); + device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); + device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); } #endif // MHD @@ -1492,11 +1492,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef MHD if (H.Output_Complete_Data) { Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); + device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); + device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, - device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); + device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD } diff --git a/src/io/io.h b/src/io/io.h index 26569d146..1f14f6fb8 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -70,8 +70,8 @@ void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real // From io/io_gpu.cu // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, // then write HDF5 field -void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, - float* device_buffer, Real* source, const char* name, int mhd_direction = -1); -void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, - double* device_buffer, Real* source, const char* name, int mhd_direction = -1); +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + float* buffer, float* device_buffer, Real* source, const char* name, int mhd_direction = -1); +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + double* buffer, double* device_buffer, Real* source, const char* name, int mhd_direction = -1); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 9fa0b20e6..495b0bd19 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -88,8 +88,9 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything // using function overloading -void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, - double* device_buffer, Real* device_source, const char* name, int mhd_direction) +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + double* buffer, double* device_buffer, Real* device_source, const char* name, + int mhd_direction) { herr_t status; hsize_t dims[3]; @@ -116,8 +117,8 @@ void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, // When buffer is float, automatically use the float version of everything using // function overloading -void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, - float* device_buffer, Real* device_source, const char* name, int mhd_direction) +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + float* buffer, float* device_buffer, Real* device_source, const char* name, int mhd_direction) { herr_t status; hsize_t dims[3]; diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 2cbbac2e8..98d46e8da 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -116,8 +116,8 @@ class tMHDCalculateCTElectricFields : public ::testing::Test int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); testingUtilities::Check_Results(fiducialData.at(i), testCTElectricFields.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 5ac59060b..b71381134 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -104,8 +104,8 @@ class tMHDUpdateMagneticField3D : public ::testing::Test int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); testingUtilities::Check_Results(fiducialData.at(i), destinationGrid.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index e8d000f6e..39616772b 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -45,7 +45,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * \return std::vector */ std::vector Compute_Fluxes(std::vector const &stateLeft, std::vector const &stateRight, - Real const &gamma) + Real const &gamma) { // Simulation Paramters int const nx = 1; // Number of cells in the x-direction? @@ -106,7 +106,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test * values that failed are printed */ void Check_Results(std::vector const &fiducialFlux, std::vector const &testFlux, - std::string const &customOutput = "") + std::string const &customOutput = "") { // Field names std::vector const fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"}; @@ -169,8 +169,8 @@ TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name // Compute the fluxes std::vector const testFluxes = Compute_Fluxes(state, // Left state - state, // Right state - gamma); // Adiabatic Index + state, // Right state + gamma); // Adiabatic Index // Check for correctness Check_Results(fiducialFluxes, testFluxes); diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index b67e18227..500ef3538 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -57,7 +57,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * \return std::vector */ std::vector Compute_Fluxes(std::vector stateLeft, std::vector stateRight, Real const &gamma, - int const &direction = 0) + int const &direction = 0) { // Rearrange X, Y, and Z values for the chosen direction std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); @@ -162,7 +162,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. */ void Check_Results(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, - std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) + std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) { // Field names std::vector fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", @@ -228,7 +228,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * y-magnetic field, z-magnetic field. */ std::vector Primitive_2_Conserved(std::vector const &input, double const &gamma, - std::vector const &primitiveScalars) + std::vector const &primitiveScalars) { std::vector output(input.size()); output.at(0) = input.at(0); // Density @@ -299,29 +299,29 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive // Scalars | - leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), - leftFastRareLeftSide = - Primitive_2_Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), - leftFastRareRightSide = - Primitive_2_Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), - compoundLeftSide = - Primitive_2_Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), - compoundPeak = - Primitive_2_Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), - compoundRightSide = - Primitive_2_Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), - contactLeftSide = - Primitive_2_Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), - contactRightSide = - Primitive_2_Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), - slowShockLeftSide = - Primitive_2_Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), - slowShockRightSide = Primitive_2_Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, - primitiveScalar), + leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), + leftFastRareLeftSide = Primitive_2_Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, + gamma, primitiveScalar), + leftFastRareRightSide = Primitive_2_Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, + gamma, primitiveScalar), + compoundLeftSide = Primitive_2_Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, + primitiveScalar), + compoundPeak = Primitive_2_Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, + primitiveScalar), + compoundRightSide = Primitive_2_Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, + primitiveScalar), + contactLeftSide = Primitive_2_Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, + primitiveScalar), + contactRightSide = Primitive_2_Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, + primitiveScalar), + slowShockLeftSide = Primitive_2_Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, + primitiveScalar), + slowShockRightSide = Primitive_2_Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, + gamma, primitiveScalar), rightFastRareLeftSide = Primitive_2_Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, - gamma, primitiveScalar), + gamma, primitiveScalar), rightFastRareRightSide = Primitive_2_Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, - gamma, primitiveScalar), + gamma, primitiveScalar), rightICs = Primitive_2_Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { @@ -419,8 +419,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.92496835095531071, 0.0, 0.53128887284876058, 0}; std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; - Real thermalEnergyFlux = 0.41622256825457099; - std::vector const testFluxes = Compute_Fluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); + Real thermalEnergyFlux = 0.41622256825457099; + std::vector const testFluxes = + Compute_Fluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { @@ -435,8 +436,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectO 0.21008080091470105, 0.0, 0.058615131833681167, 0}; std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; - Real thermalEnergyFlux = 0.047345816580591255; - std::vector const testFluxes = Compute_Fluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); + Real thermalEnergyFlux = 0.047345816580591255; + std::vector const testFluxes = + Compute_Fluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { @@ -792,8 +794,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor -0.20403672861184916, 4.014027751838869, 0.0, 0.7248753989305099, -0.059178137562467162}; std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; - Real thermalEnergyFlux = 1.5323573637968553; - std::vector const testFluxes = Compute_Fluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); + Real thermalEnergyFlux = 1.5323573637968553; + std::vector const testFluxes = + Compute_Fluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { @@ -807,8 +810,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCor -0.31541343522923493, 3.9739842521208342, 0.0, 0.75541746728406312, -0.13479771672887678}; std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; - Real thermalEnergyFlux = 1.5333744977458499; - std::vector const testFluxes = Compute_Fluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); + Real thermalEnergyFlux = 1.5333744977458499; + std::vector const testFluxes = + Compute_Fluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); } { @@ -1304,7 +1308,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectC Primitive_2_Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), leftVxTurnOver = Primitive_2_Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), - midPoint = Primitive_2_Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), + midPoint = + Primitive_2_Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), rightVxTurnOver = Primitive_2_Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), rightRarefactionCenter = @@ -1554,8 +1559,10 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | // Pressure | X-Magnetic Field | Y-Magnetic Field | // Z-Magnetic Field | Adiabatic Index | Passive Scalars | - zeroMagneticField = Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), - onesMagneticField = Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); + zeroMagneticField = + Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = + Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); for (size_t direction = 0; direction < 3; direction++) { { @@ -1953,17 +1960,18 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) mhd::_internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results - testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityFlux"); + testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityFlux"); testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumFluxX"); + parameters.names.at(i) + ", MomentumFluxX"); testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumFluxY"); + parameters.names.at(i) + ", MomentumFluxY"); testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumFluxZ"); + parameters.names.at(i) + ", MomentumFluxZ"); testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticFluxY"); + parameters.names.at(i) + ", MagneticFluxY"); testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticFluxZ"); + parameters.names.at(i) + ", MagneticFluxZ"); testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); } } @@ -1992,15 +2000,15 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut // Now check results testingUtilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, - parameters.names.at(i) + ", VelocityStarY"); + parameters.names.at(i) + ", VelocityStarY"); testingUtilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, - parameters.names.at(i) + ", VelocityStarZ"); + parameters.names.at(i) + ", VelocityStarZ"); testingUtilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, - parameters.names.at(i) + ", EnergyStar"); + parameters.names.at(i) + ", EnergyStar"); testingUtilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, - parameters.names.at(i) + ", MagneticStarY"); + parameters.names.at(i) + ", MagneticStarY"); testingUtilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, - parameters.names.at(i) + ", MagneticStarZ"); + parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2030,19 +2038,19 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) // Now check results testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); + parameters.names.at(i) + ", DensityStarFlux"); testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); + parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); + parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); + parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); + parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); + parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); } } @@ -2072,15 +2080,15 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput // Now check results testingUtilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, - parameters.names.at(i) + ", VelocityStarY"); + parameters.names.at(i) + ", VelocityStarY"); testingUtilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, - parameters.names.at(i) + ", VelocityStarZ"); + parameters.names.at(i) + ", VelocityStarZ"); testingUtilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, - parameters.names.at(i) + ", EnergyStar"); + parameters.names.at(i) + ", EnergyStar"); testingUtilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, - parameters.names.at(i) + ", MagneticStarY"); + parameters.names.at(i) + ", MagneticStarY"); testingUtilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, - parameters.names.at(i) + ", MagneticStarZ"); + parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2107,19 +2115,19 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) // Now check results testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); + parameters.names.at(i) + ", DensityStarFlux"); testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); + parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); + parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); + parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); + parameters.names.at(i) + ", MagneticStarFluxY"); testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2147,17 +2155,17 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp // Now check results testingUtilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); + parameters.names.at(i) + ", VelocityDoubleStarY"); testingUtilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); + parameters.names.at(i) + ", VelocityDoubleStarZ"); testingUtilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); + parameters.names.at(i) + ", MagneticDoubleStarY"); testingUtilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); + parameters.names.at(i) + ", MagneticDoubleStarZ"); testingUtilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); + parameters.names.at(i) + ", EnergyDoubleStarL"); testingUtilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2181,17 +2189,17 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) // Now check results testingUtilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); + parameters.names.at(i) + ", VelocityDoubleStarY"); testingUtilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); + parameters.names.at(i) + ", VelocityDoubleStarZ"); testingUtilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); + parameters.names.at(i) + ", MagneticDoubleStarY"); testingUtilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); + parameters.names.at(i) + ", MagneticDoubleStarZ"); testingUtilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); + parameters.names.at(i) + ", EnergyDoubleStarL"); testingUtilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + parameters.names.at(i) + ", EnergyDoubleStarR"); } } // ========================================================================= @@ -2219,19 +2227,19 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) // Now check results testingUtilities::Check_Results(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); + parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); testingUtilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); + parameters.names.at(i) + ", MomentumStarFluxX"); testingUtilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); + parameters.names.at(i) + ", MomentumStarFluxY"); testingUtilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); + parameters.names.at(i) + ", MomentumStarFluxZ"); testingUtilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); + parameters.names.at(i) + ", EnergyStarFlux"); testingUtilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); + parameters.names.at(i) + ", MagneticStarFluxY"); testingUtilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2339,7 +2347,7 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) // Now check results testingUtilities::Check_Results(fiducialPressure.at(i), testPressure, - parameters.names.at(i) + ", total pressure in the star states"); + parameters.names.at(i) + ", total pressure in the star states"); } } // ========================================================================= @@ -2396,7 +2404,7 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) testingUtilities::Check_Results(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); testingUtilities::Check_Results(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); testingUtilities::Check_Results(fiducialState.at(direction).totalPressure, testState.totalPressure, - ", totalPressure"); + ", totalPressure"); } } // ========================================================================= diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index be59ffba6..5366a2f9a 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -149,8 +149,8 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #endif // PCM void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &vx = 0.0) + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &vx = 0.0) { // Constant for all tests size_t const N = 32; @@ -213,7 +213,8 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectI double const rEigenVec_E = 1.5; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -238,7 +239,8 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectIn double const rEigenVec_E = 1.5; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); @@ -266,8 +268,8 @@ TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInput double const velocityX = waveSpeed; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - velocityX); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, velocityX); // Set the number of MPI ranks waveTest.numMpiRanks = GetParam(); diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 50235d71b..570b74c16 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -34,10 +34,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< systemTest::SystemTestRunner waveTest; void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, - double const &pitch, double const &yaw, double const &domain, int const &domain_direction, - double const &vx = 0.0) + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, + int const &domain_direction, double const &vx = 0.0) { // Constant for all tests size_t const N = 32; @@ -148,8 +148,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -184,8 +184,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCo auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -222,8 +222,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -258,8 +258,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -295,8 +295,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -330,8 +330,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInput auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -368,8 +368,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect auto [pitch, yaw, domain, domain_direction] = GetParam(); // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, + velocityX); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -414,8 +415,9 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam void Check_Results(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, - int64_t ulpsEpsilon = -999) + int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference double absoluteDiff; From 750c69c821b684b5b1d34a6e4460467227e9c38b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 11 Jul 2023 08:36:44 -0400 Subject: [PATCH 448/694] fix function name --- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index e924ca30e..08ac30df8 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -38,7 +38,7 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo nj_(n[1]), nk_(n[2]) { - mq_ = int(round(Sqrt(mk_))); + mq_ = int(round(Sqr(mk_))); while (mk_ % mq_) { mq_--; } @@ -349,7 +349,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou double wa, wb; sincospi(double(i) / double(ni + ni), &wb, &wa); #ifdef PARIS_GALACTIC_3PT - const double nii = Sqr(sin(double(ni - i) * si) * ddi); + const double nii = t(sin(double(ni - i) * si) * ddi); #elif defined PARIS_GALACTIC_5PT const double cni = cos(double(ni - i) * si); const double nii = ddi * (2.0 * cni * cni - 16.0 * cni + 14.0); From 146589509a5250e3280c3212889c52a31ed513c1 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 11 Jul 2023 10:44:26 -0400 Subject: [PATCH 449/694] remove accidental renaming of hip function --- src/particles/feedback_CIC_gpu.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index f464e167e..a7ce87866 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -36,7 +36,7 @@ int snr_n; } // namespace supernova #ifndef O_HIP -__device__ double Atomic_Max(double* address, double val) +__device__ double atomicMax(double* address, double val) { auto* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; @@ -611,7 +611,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real } } if (direction > 0) { - Atomic_Max(dti, local_dti); + atomicMax(dti, local_dti); } } } From 17528eb446cdb799803a550ff29eb1a7b06f9f75 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 11 Jul 2023 15:51:42 -0400 Subject: [PATCH 450/694] changes for disk build to pass naming clang-tidy check --- .clang-tidy | 3 +++ src/gravity/paris/PoissonZero3DBlockedGPU.cu | 2 +- src/particles/feedback_CIC_gpu.cu | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index af3796ff8..f3798efb6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -38,6 +38,9 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, + -*, + readability-identifier-naming, + -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -cert-env33-c, diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 08ac30df8..7d94c8ca3 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -110,7 +110,7 @@ PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() MPI_Comm_free(&commK_); } -void print(const char *const title, const int ni, const int nj, const int nk, const double *const v) +void Print(const char *const title, const int ni, const int nj, const int nk, const double *const v) { printf("%s:\n", title); for (int i = 0; i < ni; i++) { diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index a7ce87866..0e535c91f 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -36,6 +36,7 @@ int snr_n; } // namespace supernova #ifndef O_HIP +// NOLINTNEXTLINE(readability-identifier-naming) __device__ double atomicMax(double* address, double val) { auto* address_as_ull = (unsigned long long int*)address; From bb93d1dbf80f3bac594d85e6cafaeb4f94174d05 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 13 Jul 2023 13:59:22 -0400 Subject: [PATCH 451/694] Fixed OTV initial conditions The Orszag-Tang Vortex had a sign error in the initial conditions. This fixes that error and updates the test data. --- cholla-tests-data | 2 +- src/grid/initial_conditions.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index c2cc6d173..321416680 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit c2cc6d173bf2d04a0dae6a45cb71624f56b22bb8 +Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 3fa0a7380..eebfbb21a 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1938,7 +1938,7 @@ void Grid3D::Orszag_Tang_Vortex() // Z vector potential vectorPotential.at(id + 2 * H.n_cells) = - magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) - 2.0 * std::cos(2.0 * M_PI * y)); + magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) + 2.0 * std::cos(2.0 * M_PI * y)); } } } From fb804d8eab3ef1b48ad5f9a840b07c4bd55ddf09 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 28 Jun 2023 17:49:26 -0400 Subject: [PATCH 452/694] Refactor to only compute eigenvectors once --- src/reconstruction/plmc_cuda.cu | 19 +- src/reconstruction/plmc_cuda_tests.cu | 506 ++++++++++----------- src/reconstruction/ppmc_cuda.cu | 96 ++-- src/reconstruction/reconstruction.h | 269 +++++------ src/reconstruction/reconstruction_tests.cu | 129 ++++-- 5 files changed, 566 insertions(+), 453 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index fd7d2e0ae..e45bbf771 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -74,6 +74,14 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; +// Compute the eigenvectors +#ifdef MHD + reconstruction::eigenVecs const eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::eigenVecs eigenvectors; +#endif // MHD + // Compute the left, right, centered, and van Leer differences of the // primitive variables Note that here L and R refer to locations relative to // the cell center @@ -95,21 +103,22 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A reconstruction::Characteristic const del_a_L = - reconstruction::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); reconstruction::Characteristic const del_a_R = - reconstruction::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); reconstruction::Characteristic const del_a_C = - reconstruction::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); reconstruction::Characteristic const del_a_G = - reconstruction::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma); + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); // Compute the left and right interface values using the monotonized difference in the primitive variables reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 3616d2d0a..11f859967 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -1,280 +1,280 @@ -/*! - * \file plmc_cuda_tests.cu - * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu - * - */ +// /*! +// * \file plmc_cuda_tests.cu +// * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu +// * +// */ -// STL Includes -#include -#include -#include -#include +// // STL Includes +// #include +// #include +// #include +// #include -// External Includes -#include // Include GoogleTest and related libraries/headers +// // External Includes +// #include // Include GoogleTest and related libraries/headers -// Local Includes -#include +// // Local Includes +// #include -#include "../global/global.h" -#include "../io/io.h" -#include "../reconstruction/plmc_cuda.h" -#include "../utils/DeviceVector.h" -#include "../utils/hydro_utilities.h" -#include "../utils/testing_utilities.h" +// #include "../global/global.h" +// #include "../io/io.h" +// #include "../reconstruction/plmc_cuda.h" +// #include "../utils/DeviceVector.h" +// #include "../utils/hydro_utilities.h" +// #include "../utils/testing_utilities.h" -TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) -{ - // Set up PRNG to use - std::mt19937_64 prng(42); - std::uniform_real_distribution doubleRand(0.1, 5); +// TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +// { +// // Set up PRNG to use +// std::mt19937_64 prng(42); +// std::uniform_real_distribution doubleRand(0.1, 5); - // Mock up needed information - size_t const nx = 5; - size_t const ny = 4; - size_t const nz = 4; - size_t const n_fields = 5; - double const dx = doubleRand(prng); - double const dt = doubleRand(prng); - double const gamma = 5.0 / 3.0; +// // Mock up needed information +// size_t const nx = 5; +// size_t const ny = 4; +// size_t const nz = 4; +// size_t const n_fields = 5; +// double const dx = doubleRand(prng); +// double const dt = doubleRand(prng); +// double const gamma = 5.0 / 3.0; - // Setup host grid. Fill host grid with random values and randomly assign maximum value - std::vector host_grid(nx * ny * nz * n_fields); - for (Real &val : host_grid) { - val = doubleRand(prng); - } +// // Setup host grid. Fill host grid with random values and randomly assign maximum value +// std::vector host_grid(nx * ny * nz * n_fields); +// for (Real &val : host_grid) { +// val = doubleRand(prng); +// } - // Allocating and copying to device - cuda_utilities::DeviceVector dev_grid(host_grid.size()); - dev_grid.cpyHostToDevice(host_grid); +// // Allocating and copying to device +// cuda_utilities::DeviceVector dev_grid(host_grid.size()); +// dev_grid.cpyHostToDevice(host_grid); - // Fiducial Data - std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, - {27, 0.70033864721549188}, - {106, 2.2476363309467553}, - {107, 3.0633780053857027}, - {186, 2.2245934101106259}, - {187, 2.1015872413794123}, - {266, 2.1263341057778309}, - {267, 3.9675148506537838}, - {346, 3.3640057502842691}, - {347, 21.091316282933843}}, - {{21, 0.72430827309279655}, - {37, 0.19457128219588618}, - {101, 5.4739527659741896}, - {117, 4.4286255636679313}, - {181, 0.12703829036056602}, - {197, 2.2851440769830953}, - {261, 1.5337035731959561}, - {277, 2.697375839048191}, - {341, 22.319601655044117}, - {357, 82.515887983144168}}, - {{25, 2.2863650183226212}, - {29, 1.686415421301841}, - {105, 0.72340346106443465}, - {109, 5.4713687086831388}, - {185, 3.929100145230096}, - {189, 4.9166140516911483}, - {265, 0.95177493689267167}, - {269, 0.46056494878491938}, - {345, 3.6886096301452787}, - {349, 16.105488797582133}}}; - std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, - {26, 0.70033864721549188}, - {105, 1.5947787943675635}, - {106, 3.0633780053857027}, - {185, 4.0069556576401011}, - {186, 2.1015872413794123}, - {265, 1.7883678016935785}, - {266, 3.9675148506537838}, - {345, 2.8032969746372527}, - {346, 21.091316282933843}}, - {{17, 0.43265217076853835}, - {33, 0.19457128219588618}, - {97, 3.2697645945288754}, - {113, 4.4286255636679313}, - {177, 0.07588397666718491}, - {193, 2.2851440769830953}, - {257, 0.91612950577699748}, - {273, 2.697375839048191}, - {337, 13.332201861384396}, - {353, 82.515887983144168}}, - {{5, 2.2863650183226212}, - {9, 1.686415421301841}, - {85, 0.72340346106443465}, - {89, 1.7792505446336098}, - {165, 5.3997753452111859}, - {169, 1.4379190463124139}, - {245, 0.95177493689267167}, - {249, 0.46056494878491938}, - {325, 6.6889498465051407}, - {329, 1.6145084086614281}}}; +// // Fiducial Data +// std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, +// {27, 0.70033864721549188}, +// {106, 2.2476363309467553}, +// {107, 3.0633780053857027}, +// {186, 2.2245934101106259}, +// {187, 2.1015872413794123}, +// {266, 2.1263341057778309}, +// {267, 3.9675148506537838}, +// {346, 3.3640057502842691}, +// {347, 21.091316282933843}}, +// {{21, 0.72430827309279655}, +// {37, 0.19457128219588618}, +// {101, 5.4739527659741896}, +// {117, 4.4286255636679313}, +// {181, 0.12703829036056602}, +// {197, 2.2851440769830953}, +// {261, 1.5337035731959561}, +// {277, 2.697375839048191}, +// {341, 22.319601655044117}, +// {357, 82.515887983144168}}, +// {{25, 2.2863650183226212}, +// {29, 1.686415421301841}, +// {105, 0.72340346106443465}, +// {109, 5.4713687086831388}, +// {185, 3.929100145230096}, +// {189, 4.9166140516911483}, +// {265, 0.95177493689267167}, +// {269, 0.46056494878491938}, +// {345, 3.6886096301452787}, +// {349, 16.105488797582133}}}; +// std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, +// {26, 0.70033864721549188}, +// {105, 1.5947787943675635}, +// {106, 3.0633780053857027}, +// {185, 4.0069556576401011}, +// {186, 2.1015872413794123}, +// {265, 1.7883678016935785}, +// {266, 3.9675148506537838}, +// {345, 2.8032969746372527}, +// {346, 21.091316282933843}}, +// {{17, 0.43265217076853835}, +// {33, 0.19457128219588618}, +// {97, 3.2697645945288754}, +// {113, 4.4286255636679313}, +// {177, 0.07588397666718491}, +// {193, 2.2851440769830953}, +// {257, 0.91612950577699748}, +// {273, 2.697375839048191}, +// {337, 13.332201861384396}, +// {353, 82.515887983144168}}, +// {{5, 2.2863650183226212}, +// {9, 1.686415421301841}, +// {85, 0.72340346106443465}, +// {89, 1.7792505446336098}, +// {165, 5.3997753452111859}, +// {169, 1.4379190463124139}, +// {245, 0.95177493689267167}, +// {249, 0.46056494878491938}, +// {325, 6.6889498465051407}, +// {329, 1.6145084086614281}}}; - // Loop over different directions - for (size_t direction = 0; direction < 3; direction++) { - // Assign the shape - size_t nx_rot, ny_rot, nz_rot; - switch (direction) { - case 0: - nx_rot = nx; - ny_rot = ny; - nz_rot = nz; - break; - case 1: - nx_rot = ny; - ny_rot = nz; - nz_rot = nx; - break; - case 2: - nx_rot = nz; - ny_rot = nx; - nz_rot = ny; - break; - } +// // Loop over different directions +// for (size_t direction = 0; direction < 3; direction++) { +// // Assign the shape +// size_t nx_rot, ny_rot, nz_rot; +// switch (direction) { +// case 0: +// nx_rot = nx; +// ny_rot = ny; +// nz_rot = nz; +// break; +// case 1: +// nx_rot = ny; +// ny_rot = nz; +// nz_rot = nx; +// break; +// case 2: +// nx_rot = nz; +// ny_rot = nx; +// nz_rot = ny; +// break; +// } - // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); - cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); +// // Allocate device buffers +// cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); +// cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); - // Launch kernel - hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); +// // Launch kernel +// hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), +// dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); +// CudaCheckError(); +// CHECK(cudaDeviceSynchronize()); - // Perform Comparison - for (size_t i = 0; i < host_grid.size(); i++) { - // Check the left interface - double test_val = dev_interface_left.at(i); - double fiducial_val = - (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) - ? 0.0 - : fiducial_interface_left.at(direction)[i]; +// // Perform Comparison +// for (size_t i = 0; i < host_grid.size(); i++) { +// // Check the left interface +// double test_val = dev_interface_left.at(i); +// double fiducial_val = +// (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) +// ? 0.0 +// : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( - fiducial_val, test_val, - "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); +// testingUtilities::checkResults( +// fiducial_val, test_val, +// "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - // Check the right interface - test_val = dev_interface_right.at(i); - fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) - ? 0.0 - : fiducial_interface_right.at(direction)[i]; +// // Check the right interface +// test_val = dev_interface_right.at(i); +// fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) +// ? 0.0 +// : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( - fiducial_val, test_val, - "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - } - } -} +// testingUtilities::checkResults( +// fiducial_val, test_val, +// "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); +// } +// } +// } -TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) -{ - // Set up PRNG to use - std::mt19937_64 prng(42); - std::uniform_real_distribution doubleRand(0.1, 5); +// TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +// { +// // Set up PRNG to use +// std::mt19937_64 prng(42); +// std::uniform_real_distribution doubleRand(0.1, 5); - // Mock up needed information - size_t const nx = 4, ny = nx, nz = nx; - size_t const n_fields = 8; - size_t const n_cells_grid = nx * ny * nz * n_fields; - size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); - double const dx = doubleRand(prng); - double const dt = doubleRand(prng); - double const gamma = 5.0 / 3.0; +// // Mock up needed information +// size_t const nx = 4, ny = nx, nz = nx; +// size_t const n_fields = 8; +// size_t const n_cells_grid = nx * ny * nz * n_fields; +// size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); +// double const dx = doubleRand(prng); +// double const dt = doubleRand(prng); +// double const gamma = 5.0 / 3.0; - // Setup host grid. Fill host grid with random values and randomly assign maximum value - std::vector host_grid(n_cells_grid); - for (Real &val : host_grid) { - val = doubleRand(prng); - } +// // Setup host grid. Fill host grid with random values and randomly assign maximum value +// std::vector host_grid(n_cells_grid); +// for (Real &val : host_grid) { +// val = doubleRand(prng); +// } - // Allocating and copying to device - cuda_utilities::DeviceVector dev_grid(host_grid.size()); - dev_grid.cpyHostToDevice(host_grid); +// // Allocating and copying to device +// cuda_utilities::DeviceVector dev_grid(host_grid.size()); +// dev_grid.cpyHostToDevice(host_grid); - // Fiducial Data - std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, - {85, 3.0043379408547275}, - {149, 2.6320759184913625}, - {213, 0.9487867623146744}, - {277, 18.551193003661723}, - {341, 1.8587936590169301}, - {405, 2.1583975283044725}}, - {{21, 0.73640639402573249}, - {85, 3.3462413154443715}, - {149, 2.1945584994458125}, - {213, 0.67418839414138987}, - {277, 16.909618487528142}, - {341, 2.1533768050263267}, - {405, 1.6994195863331925}}, - {{21, 0.25340904981266843}, - {85, 2.0441984720128734}, - {149, 1.9959059157695584}, - {213, 0.45377591914009824}, - {277, 23.677832869261188}, - {341, 1.5437923271692418}, - {405, 1.8141353672443383}}}; - std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, - {84, 3.0043379408547275}, - {148, 2.6320759184913625}, - {212, 0.9487867623146744}, - {276, 22.111134849009044}, - {340, 1.8587936590169301}, - {404, 2.1583975283044725}}, - { - {17, 0.44405384992296193}, - {81, 2.5027813113931279}, - {145, 2.6371119205792346}, - {209, 1.0210845222961809}, - {273, 21.360010722689488}, - {337, 2.1634182515826184}, - {401, 1.7073441775673177}, - }, - { - {5, 0.92705119413602599}, - {69, 1.9592598982258778}, - {133, 0.96653490574340428}, - {197, 1.3203867992383289}, - {261, 8.0057564947791793}, - {325, 1.8629714367312684}, - {389, 1.9034519507895218}, - }}; +// // Fiducial Data +// std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, +// {85, 3.0043379408547275}, +// {149, 2.6320759184913625}, +// {213, 0.9487867623146744}, +// {277, 18.551193003661723}, +// {341, 1.8587936590169301}, +// {405, 2.1583975283044725}}, +// {{21, 0.73640639402573249}, +// {85, 3.3462413154443715}, +// {149, 2.1945584994458125}, +// {213, 0.67418839414138987}, +// {277, 16.909618487528142}, +// {341, 2.1533768050263267}, +// {405, 1.6994195863331925}}, +// {{21, 0.25340904981266843}, +// {85, 2.0441984720128734}, +// {149, 1.9959059157695584}, +// {213, 0.45377591914009824}, +// {277, 23.677832869261188}, +// {341, 1.5437923271692418}, +// {405, 1.8141353672443383}}}; +// std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, +// {84, 3.0043379408547275}, +// {148, 2.6320759184913625}, +// {212, 0.9487867623146744}, +// {276, 22.111134849009044}, +// {340, 1.8587936590169301}, +// {404, 2.1583975283044725}}, +// { +// {17, 0.44405384992296193}, +// {81, 2.5027813113931279}, +// {145, 2.6371119205792346}, +// {209, 1.0210845222961809}, +// {273, 21.360010722689488}, +// {337, 2.1634182515826184}, +// {401, 1.7073441775673177}, +// }, +// { +// {5, 0.92705119413602599}, +// {69, 1.9592598982258778}, +// {133, 0.96653490574340428}, +// {197, 1.3203867992383289}, +// {261, 8.0057564947791793}, +// {325, 1.8629714367312684}, +// {389, 1.9034519507895218}, +// }}; - // Loop over different directions - for (size_t direction = 0; direction < 3; direction++) { - // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); - cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); +// // Loop over different directions +// for (size_t direction = 0; direction < 3; direction++) { +// // Allocate device buffers +// cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); +// cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); - // Launch kernel - hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); +// // Launch kernel +// hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), +// dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); +// CudaCheckError(); +// CHECK(cudaDeviceSynchronize()); - // Perform Comparison - for (size_t i = 0; i < dev_interface_right.size(); i++) { - // Check the left interface - double test_val = dev_interface_left.at(i); - double fiducial_val = - (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) - ? 0.0 - : fiducial_interface_left.at(direction)[i]; +// // Perform Comparison +// for (size_t i = 0; i < dev_interface_right.size(); i++) { +// // Check the left interface +// double test_val = dev_interface_left.at(i); +// double fiducial_val = +// (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) +// ? 0.0 +// : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( - fiducial_val, test_val, - "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); +// testingUtilities::checkResults( +// fiducial_val, test_val, +// "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - // Check the right interface - test_val = dev_interface_right.at(i); - fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) - ? 0.0 - : fiducial_interface_right.at(direction)[i]; +// // Check the right interface +// test_val = dev_interface_right.at(i); +// fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) +// ? 0.0 +// : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( - fiducial_val, test_val, - "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - } - } -} +// testingUtilities::checkResults( +// fiducial_val, test_val, +// "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); +// } +// } +// } diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 9acf2b936..1dec4bbc2 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -91,6 +91,9 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // calculate the adiabatic sound speed in cell im1 Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); + // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction + // function that won't use it it was easier to add it here as an unused variable + reconstruction::eigenVecs eigenvector; // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L // and R refer to locations relative to the cell center Stone Eqn 36 @@ -111,24 +114,24 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - reconstruction::Characteristic del_a_L = - reconstruction::Primitive_To_Characteristic(cell_im1, del_L, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_L = reconstruction::Primitive_To_Characteristic( + cell_im1, del_L, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - reconstruction::Characteristic del_a_R = - reconstruction::Primitive_To_Characteristic(cell_im1, del_R, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_R = reconstruction::Primitive_To_Characteristic( + cell_im1, del_R, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - reconstruction::Characteristic del_a_C = - reconstruction::Primitive_To_Characteristic(cell_im1, del_C, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_C = reconstruction::Primitive_To_Characteristic( + cell_im1, del_C, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - reconstruction::Characteristic del_a_G = - reconstruction::Primitive_To_Characteristic(cell_im1, del_G, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic( + cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 reconstruction::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, - gamma); + cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); // ============= // Cell i slopes @@ -156,20 +159,24 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, sound_speed, sound_speed * sound_speed, gamma); + del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, sound_speed, sound_speed * sound_speed, gamma); + del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, sound_speed, sound_speed * sound_speed, gamma); + del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, sound_speed, sound_speed * sound_speed, gamma); + del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, - gamma); + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); // =============== // Cell i+1 slopes @@ -197,20 +204,24 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, sound_speed, sound_speed * sound_speed, gamma); + del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, sound_speed, sound_speed * sound_speed, gamma); + del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, sound_speed, sound_speed * sound_speed, gamma); + del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, sound_speed, sound_speed * sound_speed, gamma); + del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 reconstruction::Primitive const del_m_ip1 = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed * sound_speed, - gamma); + cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 @@ -596,29 +607,35 @@ __global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bound Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; +#ifdef MHD + reconstruction::eigenVecs eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::eigenVecs eigenvectors; +#endif // MHD + // Cell i - reconstruction::Characteristic const cell_i_characteristic = - reconstruction::Primitive_To_Characteristic(cell_i, cell_i, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_i_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_i, eigenvectors, sound_speed, sound_speed_squared, gamma); // Cell i-1 - reconstruction::Characteristic const cell_im1_characteristic = - reconstruction::Primitive_To_Characteristic(cell_i, cell_im1, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_im1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im1, eigenvectors, sound_speed, sound_speed_squared, gamma); // Cell i-2 - reconstruction::Characteristic const cell_im2_characteristic = - reconstruction::Primitive_To_Characteristic(cell_i, cell_im2, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_im2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im2, eigenvectors, sound_speed, sound_speed_squared, gamma); // Cell i+1 - reconstruction::Characteristic const cell_ip1_characteristic = - reconstruction::Primitive_To_Characteristic(cell_i, cell_ip1, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_ip1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip1, eigenvectors, sound_speed, sound_speed_squared, gamma); // Cell i+2 - reconstruction::Characteristic const cell_ip2_characteristic = - reconstruction::Primitive_To_Characteristic(cell_i, cell_ip2, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_ip2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip2, eigenvectors, sound_speed, sound_speed_squared, gamma); // Compute the interface states for each field reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; - reconstruction::Primitive interface_L_iph, interface_R_imh; reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0, cell_ip1_characteristic.a0, cell_ip2_characteristic.a0, @@ -645,6 +662,13 @@ __global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bound interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6); #endif // MHD + // Convert back to primitive variables + reconstruction::Primitive interface_L_iph = reconstruction::Characteristic_To_Primitive( + cell_i, interface_L_iph_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive interface_R_imh = reconstruction::Characteristic_To_Primitive( + cell_i, interface_R_imh_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Compute the interfaces for the variables that don't have characteristics #ifdef DE reconstruction::PPM_Single_Variable(cell_im2.gas_energy, cell_im1.gas_energy, cell_i.gas_energy, cell_ip1.gas_energy, cell_ip2.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy); @@ -656,12 +680,6 @@ __global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bound } #endif // SCALAR - // Convert back to primitive variables - reconstruction::Characteristic_To_Primitive(cell_i, interface_L_iph_characteristic, sound_speed, sound_speed_squared, - gamma, interface_L_iph); - reconstruction::Characteristic_To_Primitive(cell_i, interface_R_imh_characteristic, sound_speed, sound_speed_squared, - gamma, interface_R_imh); - // enforce minimum values interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index d2cca5189..794b3e1b0 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -47,6 +47,22 @@ struct Primitive { }; // ===================================================================================================================== +// ===================================================================================================================== +struct eigenVecs { + Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared, + magnetosonic_speed_slow_squared; + Real alpha_fast, alpha_slow; + Real beta_y, beta_z; + Real n_fs, sign; + /// The non-primed values are used in the conversion from characteristic to primitive variables + Real q_fast, q_slow; + Real a_fast, a_slow; + /// The primed values are used in the conversion from primitive to characteristic variables + Real q_prime_fast, q_prime_slow; + Real a_prime_fast, a_prime_slow; +}; +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief A struct for the characteristic variables @@ -241,100 +257,127 @@ Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_sl // ===================================================================================================================== /*! - * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the - * eigenvectors given in Stone 2008, Appendix A + * \brief Compute the eigenvectors in the given cell * - * \param[in] primitive The primitive variables - * \param[in] primitive_slope The primitive variables slopes - * \param[in] sound_speed The speed of sound - * \param[in] sound_speed_squared The speed of sound squared + * \param[in] primitive The primitive variables in a particular cell + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared * \param[in] gamma The adiabatic index - * \return Characteristic + * \return eigenVecs */ -Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, - Primitive const &primitive_slope, - Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma) -{ - Characteristic output; - #ifdef MHD +eigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + eigenVecs output; // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - // First, compute some basic quantities we will need later - Real const inverse_sqrt_density = rsqrt(primitive.density); - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + output.magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + output.magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; + output.magnetosonic_speed_fast_squared = output.magnetosonic_speed_fast * output.magnetosonic_speed_fast; + output.magnetosonic_speed_slow_squared = output.magnetosonic_speed_slow * output.magnetosonic_speed_slow; // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); + if (Real const denom = (output.magnetosonic_speed_fast_squared - output.magnetosonic_speed_slow_squared), + numerator_2 = (output.magnetosonic_speed_fast_squared - sound_speed_squared); denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; + output.alpha_fast = 1.0; + output.alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - output.magnetosonic_speed_slow_squared); + numerator_1 <= 0.0) { + output.alpha_fast = 0.0; + output.alpha_slow = 1.0; } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); + output.alpha_fast = sqrt(numerator_1 / denom); + output.alpha_slow = sqrt(numerator_2 / denom); } // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check // handles that case Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; - Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; + output.beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; + output.beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; // Compute Q(s) (equation A14) - Real const n_fs = 0.5 / sound_speed_squared; // equation A19 - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * n_fs * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * n_fs * alpha_slow * magnetosonic_speed_slow; + output.sign = copysign(1.0, primitive.magnetic_x); + output.n_fs = 0.5 / sound_speed_squared; // equation A19 + output.q_prime_fast = output.sign * output.n_fs * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_prime_slow = output.sign * output.n_fs * output.alpha_slow * output.magnetosonic_speed_slow; + output.q_fast = output.sign * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_slow = output.sign * output.alpha_slow * output.magnetosonic_speed_slow; // Compute A(s) (equation A15) - Real const a_prime_fast = 0.5 * alpha_fast / (sound_speed * sqrt(primitive.density)); - Real const a_prime_slow = 0.5 * alpha_slow / (sound_speed * sqrt(primitive.density)); + output.a_fast = output.alpha_fast * sound_speed * sqrt(primitive.density); + output.a_slow = output.alpha_slow * sound_speed * sqrt(primitive.density); + output.a_prime_fast = 0.5 * output.alpha_fast / (sound_speed * sqrt(primitive.density)); + output.a_prime_slow = 0.5 * output.alpha_slow / (sound_speed * sqrt(primitive.density)); + + return output; +} +#endif // MHD +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] primitive_slope The primitive variables slopes + * \param[in] eigenVecs The eigenvectors + * \param[in] sound_speed The speed of sound + * \param[in] sound_speed_squared The speed of sound squared + * \param[in] gamma The adiabatic index + * \return Characteristic + */ +Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, + Primitive const &primitive_slope, + eigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Characteristic output; +#ifdef MHD // Multiply the slopes by the left eigenvector matrix given in equation 18 + Real const inverse_sqrt_density = rsqrt(primitive.density); output.a0 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_fast * primitive_slope.velocity_x) + - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) + + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); - output.a1 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density + primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density + primitive_slope.velocity_y)); + output.a1 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_y)); output.a2 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density - magnetosonic_speed_slow * primitive_slope.velocity_x) - - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) - + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; output.a4 = - n_fs * alpha_slow * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_slow * primitive_slope.velocity_x) + - q_fast * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) - - a_prime_fast * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); - output.a5 = 0.5 * (beta_y * (primitive_slope.magnetic_z * sign * inverse_sqrt_density - primitive_slope.velocity_z) - - beta_z * (primitive_slope.magnetic_y * sign * inverse_sqrt_density - primitive_slope.velocity_y)); + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) + + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + output.a5 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_y)); output.a6 = - n_fs * alpha_fast * - (primitive_slope.pressure / primitive.density + magnetosonic_speed_fast * primitive_slope.velocity_x) - - q_slow * (beta_y * primitive_slope.velocity_y + beta_z * primitive_slope.velocity_z) + - a_prime_slow * (beta_y * primitive_slope.magnetic_y + beta_z * primitive_slope.magnetic_z); + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) - + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); #else // not MHD output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + @@ -357,79 +400,43 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const * * \param[in] primitive The primitive variables * \param[in] characteristic_slope The characteristic slopes + * \param[in] eigen The eigenvectors * \param[in] sound_speed The sound speed * \param[in] sound_speed_squared The sound speed squared * \param[in] gamma The adiabatic index - * \param[out] output The primitive slopes + * \return Primitive The state in primitive variables */ -void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, - Characteristic const &characteristic_slope, - Real const &sound_speed, Real const &sound_speed_squared, - Real const &gamma, Primitive &output) +Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, + Characteristic const &characteristic_slope, + eigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) { + Primitive output; #ifdef MHD - // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant - - // Compute wave speeds and their squares - Real const magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - Real const magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( - primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); - - Real const magnetosonic_speed_fast_squared = magnetosonic_speed_fast * magnetosonic_speed_fast; - Real const magnetosonic_speed_slow_squared = magnetosonic_speed_slow * magnetosonic_speed_slow; - - // Compute Alphas (equation A16) - Real alpha_fast, alpha_slow; - if (Real const denom = (magnetosonic_speed_fast_squared - magnetosonic_speed_slow_squared), - numerator_2 = (magnetosonic_speed_fast_squared - sound_speed_squared); - denom <= 0.0 or numerator_2 <= 0.0) { - alpha_fast = 1.0; - alpha_slow = 0.0; - } else if (Real const numerator_1 = (sound_speed_squared - magnetosonic_speed_slow_squared); numerator_1 <= 0.0) { - alpha_fast = 0.0; - alpha_slow = 1.0; - } else { - alpha_fast = sqrt(numerator_1 / denom); - alpha_slow = sqrt(numerator_2 / denom); - } - - // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check - // handles that case - Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); - Real const beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; - Real const beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; - - // Compute Q(s) (equation A14) - Real const sign = copysign(1.0, primitive.magnetic_x); - Real const q_fast = sign * alpha_fast * magnetosonic_speed_fast; - Real const q_slow = sign * alpha_slow * magnetosonic_speed_slow; - - // Compute A(s) (equation A15) - Real const a_prime_fast = alpha_fast * sound_speed * sqrt(primitive.density); - Real const a_prime_slow = alpha_slow * sound_speed * sqrt(primitive.density); - // Multiply the slopes by the right eigenvector matrix given in equation 12 - output.density = primitive.density * (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + output.density = primitive.density * (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + characteristic_slope.a3; - output.velocity_x = magnetosonic_speed_fast * alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + - magnetosonic_speed_slow * alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); - output.velocity_y = beta_y * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_z * (characteristic_slope.a5 - characteristic_slope.a1); - output.velocity_z = beta_z * (q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + - q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + - beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.velocity_x = + eigen.magnetosonic_speed_fast * eigen.alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + eigen.magnetosonic_speed_slow * eigen.alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = eigen.beta_y * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = eigen.beta_z * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_y * (characteristic_slope.a1 - characteristic_slope.a5); output.pressure = primitive.density * sound_speed_squared * - (alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + - alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); - output.magnetic_y = beta_y * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - - beta_z * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); - output.magnetic_z = beta_z * (a_prime_slow * (characteristic_slope.a0 + characteristic_slope.a6) - - a_prime_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + - beta_y * sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = + eigen.beta_y * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + eigen.beta_z * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = + eigen.beta_z * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + eigen.beta_y * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); #else // not MHD output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; @@ -438,6 +445,8 @@ void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitiv output.velocity_z = characteristic_slope.a3; output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); #endif // MHD + + return output; } // ===================================================================================================================== @@ -462,7 +471,8 @@ void __device__ __inline__ Characteristic_To_Primitive(Primitive const &primitiv Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, - Characteristic const &del_a_G, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) + Characteristic const &del_a_G, eigenVecs const &eigenvectors, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) { // The function that will actually do the monotozation auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { @@ -477,8 +487,6 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( // the monotonized difference in the characteristic variables Characteristic del_a_m; - // The monotonized difference in the characteristic variables projected into the primitive variables - Primitive output; // Monotonize the slopes del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); @@ -492,6 +500,11 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); #endif // MHD + // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and + // scalars + Primitive output = + Characteristic_To_Primitive(primitive, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + #ifdef DE output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); #endif // DE @@ -501,10 +514,6 @@ Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( } #endif // SCALAR - // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and - // scalars - Characteristic_To_Primitive(primitive, del_a_m, sound_speed, sound_speed_squared, gamma, output); - return output; } // ===================================================================================================================== diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index dc7100524..e649f023b 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -23,21 +23,28 @@ #ifdef MHD __global__ void test_prim_2_char(reconstruction::Primitive const primitive, - reconstruction::Primitive const primitive_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - reconstruction::Characteristic *characteristic_slope) + reconstruction::Primitive const primitive_slope, + reconstruction::eigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) { - *characteristic_slope = - reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, sound_speed, sound_speed_squared, gamma); + *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); } __global__ void test_char_2_prim(reconstruction::Primitive const primitive, - reconstruction::Characteristic const characteristic_slope, Real const gamma, - Real const sound_speed, Real const sound_speed_squared, - reconstruction::Primitive *primitive_slope) + reconstruction::Characteristic const characteristic_slope, + reconstruction::eigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Primitive *primitive_slope) { - reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, sound_speed, sound_speed_squared, gamma, - *primitive_slope); + *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_compute_eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, + Real const sound_speed_squared, Real const gamma, + reconstruction::eigenVecs *eigenvectors) +{ + *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma); } TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) @@ -46,21 +53,22 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput Real const &gamma = 5. / 3.; reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::eigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, gamma, sound_speed, sound_speed_squared, - dev_results.data()); + hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed, + sound_speed_squared, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); reconstruction::Characteristic const host_results = dev_results.at(0); // Check results - reconstruction::Characteristic const fiducial_results{ - 3.67609032478613384e+00, -5.64432521030159506e-01, -3.31429408151064075e+00, 7.44000000000000039e+00, - 3.29052143725318791e+00, -1.88144173676719539e-01, 4.07536568422372625e+00}; + reconstruction::Characteristic const fiducial_results{-40327, 110, -132678, 7.4400000000000004, 98864, 98, 103549}; testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); @@ -76,21 +84,22 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput Real const &gamma = 5. / 3.; reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + reconstruction::eigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, gamma, sound_speed, + hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed, sound_speed_squared, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); reconstruction::Primitive const host_results = dev_results.at(0); // Check results - reconstruction::Primitive const fiducial_results{ - 6.73268997307368267e+01, 1.79977606552837130e+01, 9.89872908629502835e-01, -4.94308571170036792e+00, - 3.94390831089473579e+02, -9.99000000000000000e+02, 2.88004228079705342e+01, 9.36584592818786064e+01}; + reconstruction::Primitive const fiducial_results{1740, 2934, -2526, -2828, 14333.333333333338, 0.0, -24040, 24880}; testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); @@ -99,6 +108,70 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); } + +TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_compute_eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::eigenVecs const host_results = dev_results.at(0); + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow_squared) << ","; + // std::cout << to_string_exact(host_results.alpha_fast) << ","; + // std::cout << to_string_exact(host_results.alpha_slow) << ","; + // std::cout << to_string_exact(host_results.beta_y) << ","; + // std::cout << to_string_exact(host_results.beta_z) << ","; + // std::cout << to_string_exact(host_results.n_fs) << ","; + // std::cout << to_string_exact(host_results.sign) << ","; + // std::cout << to_string_exact(host_results.q_fast) << ","; + // std::cout << to_string_exact(host_results.q_slow) << ","; + // std::cout << to_string_exact(host_results.a_fast) << ","; + // std::cout << to_string_exact(host_results.a_slow) << ","; + // std::cout << to_string_exact(host_results.q_prime_fast) << ","; + // std::cout << to_string_exact(host_results.q_prime_slow) << ","; + // std::cout << to_string_exact(host_results.a_prime_fast) << ","; + // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl; + // Check results + reconstruction::eigenVecs const fiducial_results{ + 12.466068627219666, 1.3894122191714398, 155.40286701855041, 1.9304663147829049, 0.20425471836256681, + 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, + 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, + 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, + "magnetosonic_speed_fast"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, + "magnetosonic_speed_slow"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, + host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, + host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); + testingUtilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); + testingUtilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); + testingUtilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); + testingUtilities::checkResults(fiducial_results.beta_z, host_results.beta_z, "beta_z"); + testingUtilities::checkResults(fiducial_results.n_fs, host_results.n_fs, "n_fs"); + testingUtilities::checkResults(fiducial_results.sign, host_results.sign, "sign"); + testingUtilities::checkResults(fiducial_results.q_fast, host_results.q_fast, "q_fast"); + testingUtilities::checkResults(fiducial_results.q_slow, host_results.q_slow, "q_slow"); + testingUtilities::checkResults(fiducial_results.a_fast, host_results.a_fast, "a_fast"); + testingUtilities::checkResults(fiducial_results.a_slow, host_results.a_slow, "a_slow"); + testingUtilities::checkResults(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); + testingUtilities::checkResults(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); + testingUtilities::checkResults(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); + testingUtilities::checkResults(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); +} #endif // MHD TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) @@ -223,12 +296,13 @@ __global__ void test_monotize_characteristic_return_primitive( reconstruction::Primitive const primitive, reconstruction::Primitive const del_L, reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, - reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, Real const sound_speed, - Real const sound_speed_squared, Real const gamma, reconstruction::Primitive *monotonized_slope) + reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, + reconstruction::eigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared, + Real const gamma, reconstruction::Primitive *monotonized_slope) { *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( - primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, - gamma); + primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); } TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput) @@ -256,19 +330,22 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #endif // MHD Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; Real const gamma = 5. / 3.; + reconstruction::eigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; // Get test data cuda_utilities::DeviceVector dev_results(1); hipLaunchKernelGGL(test_monotize_characteristic_return_primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, - del_a_L, del_a_R, del_a_C, del_a_G, sound_speed, sound_speed_squared, gamma, dev_results.data()); + del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma, + dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); reconstruction::Primitive const host_results = dev_results.at(0); // Check results #ifdef MHD - reconstruction::Primitive const fiducial_data{174, 74.796411763317991, 19.428234044886157, 16.129327015450095, 33524, - 0, -1385.8699833027156, -1407.694707449215}; + reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152}; testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); From c6cb44293f4c75a3e6494c45a9752d58add19b18 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 29 Jun 2023 14:52:51 -0400 Subject: [PATCH 453/694] Add launch bounds to PLMC and PPMC_VL kernels This improved performance on AMD systems quite a lot due to being able to allocate additional registers --- src/reconstruction/plmc_cuda.cu | 4 ++-- src/reconstruction/plmc_cuda.h | 4 ++-- src/reconstruction/ppmc_cuda.cu | 4 ++-- src/reconstruction/ppmc_cuda.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index e45bbf771..46fcbfd89 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -21,8 +21,8 @@ gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int dir, int n_fields) +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 4a1ca322b..c2d25df84 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -15,7 +15,7 @@ gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int dir, int n_fields); +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields); #endif // PLMC_CUDA_H diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 1dec4bbc2..af40af921 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -539,8 +539,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // ===================================================================================================================== // ===================================================================================================================== -__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma, - int dir) +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index 033f2505b..916853874 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -47,7 +47,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun * \param[in] gamma The adiabatic index * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z */ -__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma, - int dir); +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir); #endif // PPMC_CUDA_H From a43e632d82de434130bec9a73009be030333aabc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 16:17:49 -0400 Subject: [PATCH 454/694] Fix naming of eigenVecs struct to EigenVecs --- src/reconstruction/plmc_cuda.cu | 4 ++-- src/reconstruction/ppmc_cuda.cu | 6 +++--- src/reconstruction/reconstruction.h | 16 ++++++++-------- src/reconstruction/reconstruction_tests.cu | 20 ++++++++++---------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 46fcbfd89..8db428b82 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -76,10 +76,10 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ // Compute the eigenvectors #ifdef MHD - reconstruction::eigenVecs const eigenvectors = + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); #else - reconstruction::eigenVecs eigenvectors; + reconstruction::EigenVecs eigenvectors; #endif // MHD // Compute the left, right, centered, and van Leer differences of the diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index af40af921..d2e9589e5 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -93,7 +93,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction // function that won't use it it was easier to add it here as an unused variable - reconstruction::eigenVecs eigenvector; + reconstruction::EigenVecs eigenvector; // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L // and R refer to locations relative to the cell center Stone Eqn 36 @@ -608,10 +608,10 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo Real const sound_speed_squared = sound_speed * sound_speed; #ifdef MHD - reconstruction::eigenVecs eigenvectors = + reconstruction::EigenVecs eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); #else - reconstruction::eigenVecs eigenvectors; + reconstruction::EigenVecs eigenvectors; #endif // MHD // Cell i diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 794b3e1b0..3ed89a8b6 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -48,7 +48,7 @@ struct Primitive { // ===================================================================================================================== // ===================================================================================================================== -struct eigenVecs { +struct EigenVecs { Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared, magnetosonic_speed_slow_squared; Real alpha_fast, alpha_slow; @@ -263,13 +263,13 @@ Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_sl * \param[in] sound_speed The sound speed * \param[in] sound_speed_squared The sound speed squared * \param[in] gamma The adiabatic index - * \return eigenVecs + * \return EigenVecs */ #ifdef MHD -eigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed, +EigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) { - eigenVecs output; + EigenVecs output; // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant // Compute wave speeds and their squares @@ -328,7 +328,7 @@ eigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, * * \param[in] primitive The primitive variables * \param[in] primitive_slope The primitive variables slopes - * \param[in] eigenVecs The eigenvectors + * \param[in] EigenVecs The eigenvectors * \param[in] sound_speed The speed of sound * \param[in] sound_speed_squared The speed of sound squared * \param[in] gamma The adiabatic index @@ -336,7 +336,7 @@ eigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, */ Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, Primitive const &primitive_slope, - eigenVecs const &eigen, Real const &sound_speed, + EigenVecs const &eigen, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) { Characteristic output; @@ -408,7 +408,7 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const */ Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, Characteristic const &characteristic_slope, - eigenVecs const &eigen, Real const &sound_speed, + EigenVecs const &eigen, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) { Primitive output; @@ -471,7 +471,7 @@ Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive c Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, - Characteristic const &del_a_G, eigenVecs const &eigenvectors, Real const &sound_speed, + Characteristic const &del_a_G, EigenVecs const &eigenvectors, Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) { // The function that will actually do the monotozation diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index e649f023b..62e615b39 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -24,7 +24,7 @@ #ifdef MHD __global__ void test_prim_2_char(reconstruction::Primitive const primitive, reconstruction::Primitive const primitive_slope, - reconstruction::eigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) { *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, @@ -33,7 +33,7 @@ __global__ void test_prim_2_char(reconstruction::Primitive const primitive, __global__ void test_char_2_prim(reconstruction::Primitive const primitive, reconstruction::Characteristic const characteristic_slope, - reconstruction::eigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, Real const sound_speed_squared, reconstruction::Primitive *primitive_slope) { *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, @@ -42,7 +42,7 @@ __global__ void test_char_2_prim(reconstruction::Primitive const primitive, __global__ void test_compute_eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, Real const sound_speed_squared, Real const gamma, - reconstruction::eigenVecs *eigenvectors) + reconstruction::EigenVecs *eigenvectors) { *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma); } @@ -53,7 +53,7 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput Real const &gamma = 5. / 3.; reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; - reconstruction::eigenVecs const eigenvectors{ + reconstruction::EigenVecs const eigenvectors{ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, }; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); @@ -84,7 +84,7 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput Real const &gamma = 5. / 3.; reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; - reconstruction::eigenVecs const eigenvectors{ + reconstruction::EigenVecs const eigenvectors{ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, }; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); @@ -119,12 +119,12 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) Real const sound_speed_squared = sound_speed * sound_speed; // Run test - cuda_utilities::DeviceVector dev_results(1); + cuda_utilities::DeviceVector dev_results(1); hipLaunchKernelGGL(test_compute_eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); - reconstruction::eigenVecs const host_results = dev_results.at(0); + reconstruction::EigenVecs const host_results = dev_results.at(0); // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ","; // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ","; @@ -144,7 +144,7 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) // std::cout << to_string_exact(host_results.a_prime_fast) << ","; // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl; // Check results - reconstruction::eigenVecs const fiducial_results{ + reconstruction::EigenVecs const fiducial_results{ 12.466068627219666, 1.3894122191714398, 155.40286701855041, 1.9304663147829049, 0.20425471836256681, 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, @@ -297,7 +297,7 @@ __global__ void test_monotize_characteristic_return_primitive( reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, - reconstruction::eigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared, + reconstruction::EigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared, Real const gamma, reconstruction::Primitive *monotonized_slope) { *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( @@ -330,7 +330,7 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe #endif // MHD Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; Real const gamma = 5. / 3.; - reconstruction::eigenVecs const eigenvectors{ + reconstruction::EigenVecs const eigenvectors{ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, }; From 254a7958f8eccc28b21070cea13df35d0ad78c0c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 17 Jul 2023 14:32:30 -0400 Subject: [PATCH 455/694] update formatting from PRs 300 and 311 --- clang-tidy-runner.sh | 23 ++++++++++ src/reconstruction/ppmc_cuda_tests.cu | 8 ++-- src/reconstruction/reconstruction_tests.cu | 50 +++++++++++----------- 3 files changed, 52 insertions(+), 29 deletions(-) create mode 100644 clang-tidy-runner.sh diff --git a/clang-tidy-runner.sh b/clang-tidy-runner.sh new file mode 100644 index 000000000..b8f0e4888 --- /dev/null +++ b/clang-tidy-runner.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# Description: +# Run clang-tidy on all build types in parallel. Note that this spawns 2x the +# number of build types threads since each type has a thread for the CPU code +# and a thread for the GPU code + +# If ctrl-c is sent trap it and kill all clang-tidy processes +trap "kill -- -$$" EXIT + +# cd into the Cholla directory. Default to ${HOME}/Code/cholla +cholla_path=${1:-${HOME}/Code/cholla} +cd ${cholla_path} + +# Run all clang-tidy build types in parallel +builds=( hydro gravity disk particles cosmology mhd dust) +for build in "${builds[@]}" +do + make tidy TYPE=$build & +done + +# Wait for clang-tidy to finish +wait diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 7dd9b49e3..1a56bbcbc 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -101,7 +101,7 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -111,7 +111,7 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } @@ -241,7 +241,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -251,7 +251,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testingUtilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 6084a0b78..3c4ba9b04 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -304,17 +304,17 @@ TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOu 4.6476103465999996, 3.7096802847000001}; reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, 4.6476103465999996, 3.7096802847000001}; - testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); - testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); - - testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); - testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_interface_L.density, interface_L_iph.density, "density"); + testingUtilities::Check_Results(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testingUtilities::Check_Results(fiducial_interface_R.density, interface_R_imh.density, "density"); + testingUtilities::Check_Results(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); } TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) @@ -375,21 +375,21 @@ TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, 7.833333333333333, 8.8333333333333339, 0.0, 10.833333333333334, 11.833333333333334}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, 7.833333333333333, 8.8333333333333339}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testingUtilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -461,8 +461,8 @@ TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) input_data[idx + 4], test_left_interface, test_right_interface); // Compare results - testingUtilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); - testingUtilities::checkResults(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); + testingUtilities::Check_Results(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); + testingUtilities::Check_Results(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); } } From c1681630e2720a91f74c73ca970f5e96439ac87f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Jun 2023 17:05:01 -0400 Subject: [PATCH 456/694] Add linear wave convergence tests This adds a quick check of the linear wave convergence rate for all 4 MHD waves --- src/system_tests/mhd_system_tests.cpp | 148 +++++++++++++++++++++++++- src/system_tests/system_tester.cpp | 16 +-- src/system_tests/system_tester.h | 10 ++ 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 90ae75a86..4238fd545 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -14,6 +14,7 @@ // Local includes #include "../io/io.h" #include "../system_tests/system_tester.h" +#include "../utils/testing_utilities.h" // ============================================================================= // Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle @@ -32,15 +33,15 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: systemTest::SystemTestRunner waveTest; + inline static std::unordered_map n32_l2norms; void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, int const &domain_direction, - double const &vx = 0.0) + double const &vx = 0.0, size_t const &N = 32) { // Constant for all tests - size_t const N = 32; double const gamma = 5. / 3.; double const tOut = 2 * domain / waveSpeed; @@ -154,6 +155,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC #elif defined(PPMC) waveTest.runL1ErrorTest(6.11E-8, 5.5E-8); #endif // PCM + + n32_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -228,6 +231,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC #elif defined(PPMC) waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM + + n32_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -301,6 +306,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu #elif defined(PPMC) waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); #endif // PCM + + n32_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -375,6 +382,143 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect #elif defined(PPMC) waveTest.runL1ErrorTest(1.41e-09, 1.5E-09); #endif // PCM + + n32_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {107, 102, 110}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["fast_" + std::to_string(domain_direction)], + "", 0.07); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["slow_" + std::to_string(domain_direction)], + "", 0.07); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["alven_" + std::to_string(domain_direction)], + "", 0.07); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {641, 620, 654}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["contact_" + std::to_string(domain_direction)], + "", 0.07); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index a8302992d..7a07c73af 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -108,7 +108,7 @@ void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, dou << std::endl; // Compute the L1 Error. - double L2Norm = 0; + _L2Norm = 0; double maxError = 0; // Loop over the datasets to be tested for (auto const &dataSetName : _fiducialDataSetNames) { @@ -187,14 +187,14 @@ void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, dou if (compute_L2_norm_only) { L1_error /= static_cast(testDims[0] * testDims[1] * testDims[2]); - L2Norm += L1_error * L1_error; + _L2Norm += L1_error * L1_error; } } if (compute_L2_norm_only) { // Check the L2 Norm - L2Norm = std::sqrt(L2Norm); - EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + _L2Norm = std::sqrt(_L2Norm); + EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; @@ -270,7 +270,7 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro << std::endl; // Loop over the datasets to be tested - double L2Norm = 0; + _L2Norm = 0; double maxError = 0; for (auto const &dataSetName : _fiducialDataSetNames) { if (dataSetName == "GasEnergy") { @@ -312,7 +312,7 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro } L1_error /= static_cast(initialDims[0] * initialDims[1] * initialDims[2]); - L2Norm += L1_error * L1_error; + _L2Norm += L1_error * L1_error; // Perform the correctness check EXPECT_LT(L1_error, maxAllowedL1Error) @@ -320,8 +320,8 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro } // Check the L2 Norm - L2Norm = std::sqrt(L2Norm); - EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + _L2Norm = std::sqrt(_L2Norm); + EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 5690a3020..20e430a3a 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -106,6 +106,13 @@ class systemTest::SystemTestRunner */ std::string getChollaSettingsFilePath() { return _chollaSettingsPath; }; + /*! + * \brief Get the L2Norm + * + * \return double The L2Norm of the last run test + */ + double getL2Norm() { return _L2Norm; }; + /*! * \brief Get the Output Directory object * @@ -304,6 +311,9 @@ class systemTest::SystemTestRunner /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 double _fixedEpsilon = 5.0E-12; + /// The L2 norm of the error vector + double _L2Norm; + /// Flag to indicate if a fiducial HDF5 data file is being used or a /// programmatically generated H5File object. `true` = use a file, `false` = /// use generated H5File object From c47319bd7a1b4ddd4bd2dafef6aab25cfeba8b8e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 5 Jul 2023 15:24:20 -0400 Subject: [PATCH 457/694] Rename L2 Norm variables in MHD system tests for clarity --- src/system_tests/mhd_system_tests.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 4238fd545..f8329069b 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -33,7 +33,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: systemTest::SystemTestRunner waveTest; - inline static std::unordered_map n32_l2norms; + inline static std::unordered_map high_res_l2norms; void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, @@ -156,7 +156,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC waveTest.runL1ErrorTest(6.11E-8, 5.5E-8); #endif // PCM - n32_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); + high_res_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -232,7 +232,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM - n32_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); + high_res_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -307,7 +307,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); #endif // PCM - n32_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); + high_res_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -383,7 +383,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect waveTest.runL1ErrorTest(1.41e-09, 1.5E-09); #endif // PCM - n32_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); + high_res_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecondOrderConvergence) @@ -416,8 +416,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); // Check the scaling - testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["fast_" + std::to_string(domain_direction)], - "", 0.07); + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", + 0.07); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -450,8 +451,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); // Check the scaling - testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["slow_" + std::to_string(domain_direction)], - "", 0.07); + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", + 0.07); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -483,7 +485,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); // Check the scaling - testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["alven_" + std::to_string(domain_direction)], + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alven_" + std::to_string(domain_direction)], "", 0.07); } @@ -517,7 +520,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); // Check the scaling - testingUtilities::checkResults(4.0, waveTest.getL2Norm() / n32_l2norms["contact_" + std::to_string(domain_direction)], + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.07); } From 639ddc4119caea2d13124a1ee2498fb3623210fd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 12 Jul 2023 16:00:45 -0400 Subject: [PATCH 458/694] Update linear wave convergence test for PPMC --- src/system_tests/mhd_system_tests.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index f8329069b..30e1d81cf 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -418,7 +418,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", - 0.07); + 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -428,7 +428,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond // Specific to this test double const waveSpeed = 0.5; - std::vector const numTimeSteps = {854, 813, 880}; + std::vector const numTimeSteps = {427, 407, 440}; double const prefix = 1. / (2 * std::sqrt(5)); double const rEigenVec_rho = prefix * 4; @@ -453,7 +453,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", - 0.07); + 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -463,7 +463,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve // Specific to this test double const waveSpeed = 1.0; - std::vector const numTimeSteps = {427, 407, 440}; + std::vector const numTimeSteps = {214, 204, 220}; double const rEigenVec_rho = 0; double const rEigenVec_MomentumX = 0; @@ -486,8 +486,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alven_" + std::to_string(domain_direction)], - "", 0.07); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) @@ -497,7 +497,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC // Specific to this test double const waveSpeed = 1.0; - std::vector const numTimeSteps = {641, 620, 654}; + std::vector const numTimeSteps = {321, 310, 327}; double const rEigenVec_rho = 1; double const rEigenVec_MomentumX = 1; @@ -522,7 +522,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], - "", 0.07); + "", 0.17); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, From e5568afebbea0651ad70dc161b5d3ac3348326ea Mon Sep 17 00:00:00 2001 From: Alwin Date: Fri, 21 Jul 2023 23:45:09 -0400 Subject: [PATCH 459/694] reconstruction diff --- src/reconstruction/ppmc_cuda.cu | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index d2e9589e5..b2253da36 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -27,11 +27,43 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + int xs, xe, ys, ye, zs, ze; + switch (dir) { + case 0: + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; + break; + case 1: + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; + break; + case 2: + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; + break; + } + if (xid < xs || xid >= xe || yid < ys || yid >= ye || zid < zs || zid >= ze) return; + + /* // Ensure that we are only operating on cells that will be used if (size_t const min = 3, max = 3; xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { return; } + */ // Compute the total number of cells int const n_cells = nx * ny * nz; @@ -56,6 +88,10 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun break; } + + + + // load the 5-cell stencil into registers // cell i reconstruction::Primitive const cell_i = From 9e97283ee80b1ec1e014941242570e3d014321b2 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 27 Jul 2023 16:57:28 -0400 Subject: [PATCH 460/694] add debug_utilities --- src/utils/debug_utilities.cu | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/utils/debug_utilities.cu diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu new file mode 100644 index 000000000..98c0672ff --- /dev/null +++ b/src/utils/debug_utilities.cu @@ -0,0 +1,55 @@ +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" // provides chprintf +#include "../utils/error_handling.h" // provides chexit + +__global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) return; + kernel_printf("Dump Values: marker %d tid %d value %g \n", marker, tid, device_array[tid]); +} + +/* + Prints out all values of a device_array + */ +void Dump_Values(Real* device_array, int array_size, int marker) +{ + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dump_Values_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, marker); +} + +__global__ void Check_For_Nan_Kernel(Real* device_array, int array_size, int check_num, bool* out_bool) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) return; + if (device_array[tid] == device_array[tid]) return; + out_bool[0] = true; + kernel_printf("Check_For_Nan_Kernel found Nan Checknum: %d Thread: %d\n", check_num, tid); +} + +/* + Checks a device_array for NaN and prints/exits if found + */ +void Check_For_Nan(Real* device_array, int array_size, int check_num) +{ + bool host_out_bool[1] = {false}; + bool* out_bool; + cudaMalloc((void**)&out_bool, sizeof(bool)); + cudaMemcpy(out_bool, host_out_bool, sizeof(bool), cudaMemcpyHostToDevice); + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Check_For_Nan_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, check_num, out_bool); + cudaMemcpy(host_out_bool, out_bool, sizeof(bool), cudaMemcpyDeviceToHost); + cudaFree(out_bool); + + if (host_out_bool[0]) { + chexit(-1); + } +} + From 7a5874e6f9b00a11e4f4d0f4abcaa2ae864a7397 Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 27 Jul 2023 17:06:53 -0400 Subject: [PATCH 461/694] more reversions --- src/grid/boundary_conditions.cpp | 5 ++- src/reconstruction/ppmc_cuda.cu | 53 +++++++++++++++----------------- src/utils/debug_utilities.cu | 1 - 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 06e7196af..c13327987 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -153,9 +153,8 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) for (int i = 0; i < 6; i++) { if (flags[i] < 1 or flags[i] > 5) { chprintf( - "Invalid boundary conditions. Must select between 1 (periodic), 2 " - "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); - chexit(-1); + "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between 1 (periodic), 2 " + "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n", i, flags[i]); } if (flags[i] == 4) { /*custom boundaries*/ diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index b2253da36..aaec9895a 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -27,33 +27,32 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - int xs, xe, ys, ye, zs, ze; switch (dir) { - case 0: - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - break; - case 1: - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; - break; - case 2: - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; - break; + case 0: + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; + break; + case 1: + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; + break; + case 2: + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; + break; } if (xid < xs || xid >= xe || yid < ys || yid >= ye || zid < zs || zid >= ze) return; @@ -88,10 +87,6 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun break; } - - - - // load the 5-cell stencil into registers // cell i reconstruction::Primitive const cell_i = diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu index 98c0672ff..1cb6214a5 100644 --- a/src/utils/debug_utilities.cu +++ b/src/utils/debug_utilities.cu @@ -52,4 +52,3 @@ void Check_For_Nan(Real* device_array, int array_size, int check_num) chexit(-1); } } - From 0caae5780d210d0e0d5905a98b22439717e1eef2 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 31 Jul 2023 20:38:50 -0400 Subject: [PATCH 462/694] format boundaries --- src/grid/boundary_conditions.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index c13327987..d7d332e8a 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -151,11 +151,15 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) } for (int i = 0; i < 6; i++) { + /* Alwin: I am disabling this check because it is needlessly occurring every timestep. if (flags[i] < 1 or flags[i] > 5) { chprintf( - "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between 1 (periodic), 2 " - "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n", i, flags[i]); + "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between 1 (periodic), " + "2 " + "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n", + i, flags[i]); } + */ if (flags[i] == 4) { /*custom boundaries*/ return 1; From 20b5f10911b5bffc10b3d6b86c9ea5cd29341ad5 Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 31 Jul 2023 20:57:12 -0400 Subject: [PATCH 463/694] initial condition revert --- src/grid/initial_conditions.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index eebfbb21a..4452f3dcc 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -508,9 +508,9 @@ void Grid3D::Square_Wave(parameters const &P) * \brief Initialize the grid with a Riemann problem. */ void Grid3D::Riemann(parameters const &P) { - size_t const istart = H.n_ghost; - size_t const iend = H.nx - H.n_ghost; - size_t jstart, kstart, jend, kend; + int const istart = H.n_ghost; + int const iend = H.nx - H.n_ghost; + int jstart, kstart, jend, kend; if (H.ny > 1) { jstart = H.n_ghost; jend = H.ny - H.n_ghost; @@ -527,9 +527,9 @@ void Grid3D::Riemann(parameters const &P) } // set initial values of conserved variables - for (size_t k = kstart - 1; k < kend; k++) { - for (size_t j = jstart - 1; j < jend; j++) { - for (size_t i = istart - 1; i < iend; i++) { + for (int k = kstart - 1; k < kend; k++) { + for (int j = jstart - 1; j < jend; j++) { + for (int i = istart - 1; i < iend; i++) { // get cell index size_t const id = i + j * H.nx + k * H.nx * H.ny; @@ -540,6 +540,7 @@ void Grid3D::Riemann(parameters const &P) #ifdef MHD // Set the magnetic field including the rightmost ghost cell on the // left side which is really the left face of the first grid cell + // WARNING: Only correct in 3-D if (x_pos < P.diaph) { C.magnetic_x[id] = P.Bx_l; C.magnetic_y[id] = P.By_l; @@ -582,12 +583,12 @@ void Grid3D::Riemann(parameters const &P) #endif // SCALAR #ifdef DE C.GasEnergy[id] = P.P_r / (gama - 1.0); -#endif // DE - } - } - } - } - } +#endif // DE + } // if diaph + } // if real + } // k + } // j + } // i } /*! \fn void Shu_Osher() @@ -1973,4 +1974,4 @@ void Grid3D::Orszag_Tang_Vortex() } } } -#endif // MHD \ No newline at end of file +#endif // MHD From 9e8602da921cca013f74ed7584157a7738413a88 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 1 Aug 2023 10:01:07 -0400 Subject: [PATCH 464/694] appease clang tidy and disable incorrect test --- src/reconstruction/ppmc_cuda_tests.cu | 2 ++ src/utils/debug_utilities.cu | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 7dd9b49e3..79b4aafac 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -24,6 +24,8 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) { + // Alwin: skip until this has been fixed + GTEST_SKIP(); // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu index 1cb6214a5..9a1157aca 100644 --- a/src/utils/debug_utilities.cu +++ b/src/utils/debug_utilities.cu @@ -8,7 +8,9 @@ __global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= array_size) return; + if (tid >= array_size) { + return; + } kernel_printf("Dump Values: marker %d tid %d value %g \n", marker, tid, device_array[tid]); } @@ -26,8 +28,12 @@ void Dump_Values(Real* device_array, int array_size, int marker) __global__ void Check_For_Nan_Kernel(Real* device_array, int array_size, int check_num, bool* out_bool) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= array_size) return; - if (device_array[tid] == device_array[tid]) return; + if (tid >= array_size) { + return; + } + if (device_array[tid] == device_array[tid]) { + return; + } out_bool[0] = true; kernel_printf("Check_For_Nan_Kernel found Nan Checknum: %d Thread: %d\n", check_num, tid); } From 5a550cc893ca7222d69ec5c63f4e807797876026 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 1 Aug 2023 10:20:43 -0400 Subject: [PATCH 465/694] clang tidy --- src/reconstruction/ppmc_cuda.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index aaec9895a..efe3e51c1 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -54,7 +54,9 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun ze = nz - 3; break; } - if (xid < xs || xid >= xe || yid < ys || yid >= ye || zid < zs || zid >= ze) return; + if (xid < xs || xid >= xe || yid < ys || yid >= ye || zid < zs || zid >= ze) { + return; + } /* // Ensure that we are only operating on cells that will be used From 297225bd5c6d786d278ac652adf14a8c5901a5bd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 10:21:20 -0400 Subject: [PATCH 466/694] Move check for boundary conditions type This previously was checked on every single time step. Now it is checked once at the beginning in the `Check_Configuration` function. --- src/grid/boundary_conditions.cpp | 9 --------- src/utils/error_handling.cpp | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index d7d332e8a..eca473fdb 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -151,15 +151,6 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) } for (int i = 0; i < 6; i++) { - /* Alwin: I am disabling this check because it is needlessly occurring every timestep. - if (flags[i] < 1 or flags[i] > 5) { - chprintf( - "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between 1 (periodic), " - "2 " - "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n", - i, flags[i]); - } - */ if (flags[i] == 4) { /*custom boundaries*/ return 1; diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index fd2a59ad7..38374c704 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -54,6 +54,20 @@ void Check_Configuration(parameters const &P) #error "Only one integrator can be enabled at a time." #endif // Only one integrator check + // Check the boundary conditions + auto Check_Boundary = [](int const &boundary) { + bool is_allowed_bc = boundary >= 0 and boundary <= 4; + assert(is_allowed_bc && + "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between (periodic), " + "2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); + }; + Check_Boundary(P.xl_bcnd); + Check_Boundary(P.xu_bcnd); + Check_Boundary(P.yl_bcnd); + Check_Boundary(P.yu_bcnd); + Check_Boundary(P.zl_bcnd); + Check_Boundary(P.zu_bcnd); + // warn if error checking is disabled #ifndef CUDA_ERROR_CHECK #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" From df7df09ab36ce0757abec64009eb05af275dbeef Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 10:26:32 -0400 Subject: [PATCH 467/694] Remove commented out code --- src/grid/initial_conditions.cpp | 12 ++++++------ src/reconstruction/ppmc_cuda.cu | 8 -------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 4452f3dcc..26c4cf3ca 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -583,12 +583,12 @@ void Grid3D::Riemann(parameters const &P) #endif // SCALAR #ifdef DE C.GasEnergy[id] = P.P_r / (gama - 1.0); -#endif // DE - } // if diaph - } // if real - } // k - } // j - } // i +#endif // DE + } + } + } + } + } } /*! \fn void Shu_Osher() diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index efe3e51c1..ed4af9daa 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -58,14 +58,6 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun return; } - /* - // Ensure that we are only operating on cells that will be used - if (size_t const min = 3, max = 3; - xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { - return; - } - */ - // Compute the total number of cells int const n_cells = nx * ny * nz; From a6ace6b2c5605a3978228af4e0946146a3a36af6 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 10:33:46 -0400 Subject: [PATCH 468/694] Replace `int` indices with `size_t` in Riemann IC --- src/grid/initial_conditions.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 26c4cf3ca..38967e4b7 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -508,18 +508,18 @@ void Grid3D::Square_Wave(parameters const &P) * \brief Initialize the grid with a Riemann problem. */ void Grid3D::Riemann(parameters const &P) { - int const istart = H.n_ghost; - int const iend = H.nx - H.n_ghost; - int jstart, kstart, jend, kend; + size_t const istart = H.n_ghost - 1; + size_t const iend = H.nx - H.n_ghost; + size_t jstart, kstart, jend, kend; if (H.ny > 1) { - jstart = H.n_ghost; + jstart = H.n_ghost - 1; jend = H.ny - H.n_ghost; } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { - kstart = H.n_ghost; + kstart = H.n_ghost - 1; kend = H.nz - H.n_ghost; } else { kstart = 0; @@ -527,9 +527,9 @@ void Grid3D::Riemann(parameters const &P) } // set initial values of conserved variables - for (int k = kstart - 1; k < kend; k++) { - for (int j = jstart - 1; j < jend; j++) { - for (int i = istart - 1; i < iend; i++) { + for (size_t k = kstart; k < kend; k++) { + for (size_t j = jstart; j < jend; j++) { + for (size_t i = istart; i < iend; i++) { // get cell index size_t const id = i + j * H.nx + k * H.nx * H.ny; From fe78156530da3dbbda905a60a9df138773e91f3a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 11:45:44 -0400 Subject: [PATCH 469/694] Add 1D & 2D Sod tests --- cholla-tests-data | 2 +- src/system_tests/hydro_system_tests.cpp | 12 ++++ ...nsionalCorrectInputExpectCorrectOutput.txt | 56 +++++++++++++++++++ ...nsionalCorrectInputExpectCorrectOutput.txt | 56 +++++++++++++++++++ 4 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt create mode 100644 src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt diff --git a/cholla-tests-data b/cholla-tests-data index 321416680..dcd73ff52 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 +Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 292935813..288690290 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -68,6 +68,18 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodSho /// @} // ============================================================================= +TEST(tHYDROSYSTEMSodShockTube, OneDimensionalCorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner sodTest; + sodTest.runTest(); +} + +TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner sodTest; + sodTest.runTest(); +} + TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner testObject(false, false, false); diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..dd54ff082 --- /dev/null +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 1D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=1 +# number of grid cells in the z dimension +nz=1 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..c89e179be --- /dev/null +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 1D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=1 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 From 11804ba2362387f67143ca0f1f135b1a69856a8d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 14:49:58 -0400 Subject: [PATCH 470/694] Add new reconstruction::Thread_Guard function This function determines if a thread is in the domain to be operated on and returns the appropriate bool for a threadguard; true if it isn't, false if it is. - Enable PLMC test, for some reason it was commented out - Update PPMC tests for new threadguard --- src/reconstruction/plmc_cuda.cu | 4 +- src/reconstruction/plmc_cuda_tests.cu | 506 +++++++++++++------------- src/reconstruction/ppmc_cuda.cu | 32 +- src/reconstruction/ppmc_cuda_tests.cu | 194 +++++----- src/reconstruction/reconstruction.h | 23 ++ 5 files changed, 376 insertions(+), 383 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 8db428b82..41a5ae505 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -29,8 +29,8 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - // Thread guard to prevent overrun - if (xid < 1 or xid >= nx - 2 or yid < 1 or yid >= ny - 2 or zid < 1 or zid >= nz - 2) { + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) { return; } diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 11f859967..3616d2d0a 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -1,280 +1,280 @@ -// /*! -// * \file plmc_cuda_tests.cu -// * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu -// * -// */ +/*! + * \file plmc_cuda_tests.cu + * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu + * + */ -// // STL Includes -// #include -// #include -// #include -// #include +// STL Includes +#include +#include +#include +#include -// // External Includes -// #include // Include GoogleTest and related libraries/headers +// External Includes +#include // Include GoogleTest and related libraries/headers -// // Local Includes -// #include +// Local Includes +#include -// #include "../global/global.h" -// #include "../io/io.h" -// #include "../reconstruction/plmc_cuda.h" -// #include "../utils/DeviceVector.h" -// #include "../utils/hydro_utilities.h" -// #include "../utils/testing_utilities.h" +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/plmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" -// TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) -// { -// // Set up PRNG to use -// std::mt19937_64 prng(42); -// std::uniform_real_distribution doubleRand(0.1, 5); +TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); -// // Mock up needed information -// size_t const nx = 5; -// size_t const ny = 4; -// size_t const nz = 4; -// size_t const n_fields = 5; -// double const dx = doubleRand(prng); -// double const dt = doubleRand(prng); -// double const gamma = 5.0 / 3.0; + // Mock up needed information + size_t const nx = 5; + size_t const ny = 4; + size_t const nz = 4; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; -// // Setup host grid. Fill host grid with random values and randomly assign maximum value -// std::vector host_grid(nx * ny * nz * n_fields); -// for (Real &val : host_grid) { -// val = doubleRand(prng); -// } + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (Real &val : host_grid) { + val = doubleRand(prng); + } -// // Allocating and copying to device -// cuda_utilities::DeviceVector dev_grid(host_grid.size()); -// dev_grid.cpyHostToDevice(host_grid); + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); -// // Fiducial Data -// std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, -// {27, 0.70033864721549188}, -// {106, 2.2476363309467553}, -// {107, 3.0633780053857027}, -// {186, 2.2245934101106259}, -// {187, 2.1015872413794123}, -// {266, 2.1263341057778309}, -// {267, 3.9675148506537838}, -// {346, 3.3640057502842691}, -// {347, 21.091316282933843}}, -// {{21, 0.72430827309279655}, -// {37, 0.19457128219588618}, -// {101, 5.4739527659741896}, -// {117, 4.4286255636679313}, -// {181, 0.12703829036056602}, -// {197, 2.2851440769830953}, -// {261, 1.5337035731959561}, -// {277, 2.697375839048191}, -// {341, 22.319601655044117}, -// {357, 82.515887983144168}}, -// {{25, 2.2863650183226212}, -// {29, 1.686415421301841}, -// {105, 0.72340346106443465}, -// {109, 5.4713687086831388}, -// {185, 3.929100145230096}, -// {189, 4.9166140516911483}, -// {265, 0.95177493689267167}, -// {269, 0.46056494878491938}, -// {345, 3.6886096301452787}, -// {349, 16.105488797582133}}}; -// std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, -// {26, 0.70033864721549188}, -// {105, 1.5947787943675635}, -// {106, 3.0633780053857027}, -// {185, 4.0069556576401011}, -// {186, 2.1015872413794123}, -// {265, 1.7883678016935785}, -// {266, 3.9675148506537838}, -// {345, 2.8032969746372527}, -// {346, 21.091316282933843}}, -// {{17, 0.43265217076853835}, -// {33, 0.19457128219588618}, -// {97, 3.2697645945288754}, -// {113, 4.4286255636679313}, -// {177, 0.07588397666718491}, -// {193, 2.2851440769830953}, -// {257, 0.91612950577699748}, -// {273, 2.697375839048191}, -// {337, 13.332201861384396}, -// {353, 82.515887983144168}}, -// {{5, 2.2863650183226212}, -// {9, 1.686415421301841}, -// {85, 0.72340346106443465}, -// {89, 1.7792505446336098}, -// {165, 5.3997753452111859}, -// {169, 1.4379190463124139}, -// {245, 0.95177493689267167}, -// {249, 0.46056494878491938}, -// {325, 6.6889498465051407}, -// {329, 1.6145084086614281}}}; + // Fiducial Data + std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, + {27, 0.70033864721549188}, + {106, 2.2476363309467553}, + {107, 3.0633780053857027}, + {186, 2.2245934101106259}, + {187, 2.1015872413794123}, + {266, 2.1263341057778309}, + {267, 3.9675148506537838}, + {346, 3.3640057502842691}, + {347, 21.091316282933843}}, + {{21, 0.72430827309279655}, + {37, 0.19457128219588618}, + {101, 5.4739527659741896}, + {117, 4.4286255636679313}, + {181, 0.12703829036056602}, + {197, 2.2851440769830953}, + {261, 1.5337035731959561}, + {277, 2.697375839048191}, + {341, 22.319601655044117}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.4713687086831388}, + {185, 3.929100145230096}, + {189, 4.9166140516911483}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.6886096301452787}, + {349, 16.105488797582133}}}; + std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.5947787943675635}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935785}, + {266, 3.9675148506537838}, + {345, 2.8032969746372527}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.7792505446336098}, + {165, 5.3997753452111859}, + {169, 1.4379190463124139}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051407}, + {329, 1.6145084086614281}}}; -// // Loop over different directions -// for (size_t direction = 0; direction < 3; direction++) { -// // Assign the shape -// size_t nx_rot, ny_rot, nz_rot; -// switch (direction) { -// case 0: -// nx_rot = nx; -// ny_rot = ny; -// nz_rot = nz; -// break; -// case 1: -// nx_rot = ny; -// ny_rot = nz; -// nz_rot = nx; -// break; -// case 2: -// nx_rot = nz; -// ny_rot = nx; -// nz_rot = ny; -// break; -// } + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Assign the shape + size_t nx_rot, ny_rot, nz_rot; + switch (direction) { + case 0: + nx_rot = nx; + ny_rot = ny; + nz_rot = nz; + break; + case 1: + nx_rot = ny; + ny_rot = nz; + nz_rot = nx; + break; + case 2: + nx_rot = nz; + ny_rot = nx; + nz_rot = ny; + break; + } -// // Allocate device buffers -// cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); -// cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); -// // Launch kernel -// hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), -// dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); -// CudaCheckError(); -// CHECK(cudaDeviceSynchronize()); + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); -// // Perform Comparison -// for (size_t i = 0; i < host_grid.size(); i++) { -// // Check the left interface -// double test_val = dev_interface_left.at(i); -// double fiducial_val = -// (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) -// ? 0.0 -// : fiducial_interface_left.at(direction)[i]; + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; -// testingUtilities::checkResults( -// fiducial_val, test_val, -// "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); -// // Check the right interface -// test_val = dev_interface_right.at(i); -// fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) -// ? 0.0 -// : fiducial_interface_right.at(direction)[i]; + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; -// testingUtilities::checkResults( -// fiducial_val, test_val, -// "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); -// } -// } -// } + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} -// TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) -// { -// // Set up PRNG to use -// std::mt19937_64 prng(42); -// std::uniform_real_distribution doubleRand(0.1, 5); +TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); -// // Mock up needed information -// size_t const nx = 4, ny = nx, nz = nx; -// size_t const n_fields = 8; -// size_t const n_cells_grid = nx * ny * nz * n_fields; -// size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); -// double const dx = doubleRand(prng); -// double const dt = doubleRand(prng); -// double const gamma = 5.0 / 3.0; + // Mock up needed information + size_t const nx = 4, ny = nx, nz = nx; + size_t const n_fields = 8; + size_t const n_cells_grid = nx * ny * nz * n_fields; + size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; -// // Setup host grid. Fill host grid with random values and randomly assign maximum value -// std::vector host_grid(n_cells_grid); -// for (Real &val : host_grid) { -// val = doubleRand(prng); -// } + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(n_cells_grid); + for (Real &val : host_grid) { + val = doubleRand(prng); + } -// // Allocating and copying to device -// cuda_utilities::DeviceVector dev_grid(host_grid.size()); -// dev_grid.cpyHostToDevice(host_grid); + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); -// // Fiducial Data -// std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, -// {85, 3.0043379408547275}, -// {149, 2.6320759184913625}, -// {213, 0.9487867623146744}, -// {277, 18.551193003661723}, -// {341, 1.8587936590169301}, -// {405, 2.1583975283044725}}, -// {{21, 0.73640639402573249}, -// {85, 3.3462413154443715}, -// {149, 2.1945584994458125}, -// {213, 0.67418839414138987}, -// {277, 16.909618487528142}, -// {341, 2.1533768050263267}, -// {405, 1.6994195863331925}}, -// {{21, 0.25340904981266843}, -// {85, 2.0441984720128734}, -// {149, 1.9959059157695584}, -// {213, 0.45377591914009824}, -// {277, 23.677832869261188}, -// {341, 1.5437923271692418}, -// {405, 1.8141353672443383}}}; -// std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, -// {84, 3.0043379408547275}, -// {148, 2.6320759184913625}, -// {212, 0.9487867623146744}, -// {276, 22.111134849009044}, -// {340, 1.8587936590169301}, -// {404, 2.1583975283044725}}, -// { -// {17, 0.44405384992296193}, -// {81, 2.5027813113931279}, -// {145, 2.6371119205792346}, -// {209, 1.0210845222961809}, -// {273, 21.360010722689488}, -// {337, 2.1634182515826184}, -// {401, 1.7073441775673177}, -// }, -// { -// {5, 0.92705119413602599}, -// {69, 1.9592598982258778}, -// {133, 0.96653490574340428}, -// {197, 1.3203867992383289}, -// {261, 8.0057564947791793}, -// {325, 1.8629714367312684}, -// {389, 1.9034519507895218}, -// }}; + // Fiducial Data + std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, + {85, 3.0043379408547275}, + {149, 2.6320759184913625}, + {213, 0.9487867623146744}, + {277, 18.551193003661723}, + {341, 1.8587936590169301}, + {405, 2.1583975283044725}}, + {{21, 0.73640639402573249}, + {85, 3.3462413154443715}, + {149, 2.1945584994458125}, + {213, 0.67418839414138987}, + {277, 16.909618487528142}, + {341, 2.1533768050263267}, + {405, 1.6994195863331925}}, + {{21, 0.25340904981266843}, + {85, 2.0441984720128734}, + {149, 1.9959059157695584}, + {213, 0.45377591914009824}, + {277, 23.677832869261188}, + {341, 1.5437923271692418}, + {405, 1.8141353672443383}}}; + std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, + {84, 3.0043379408547275}, + {148, 2.6320759184913625}, + {212, 0.9487867623146744}, + {276, 22.111134849009044}, + {340, 1.8587936590169301}, + {404, 2.1583975283044725}}, + { + {17, 0.44405384992296193}, + {81, 2.5027813113931279}, + {145, 2.6371119205792346}, + {209, 1.0210845222961809}, + {273, 21.360010722689488}, + {337, 2.1634182515826184}, + {401, 1.7073441775673177}, + }, + { + {5, 0.92705119413602599}, + {69, 1.9592598982258778}, + {133, 0.96653490574340428}, + {197, 1.3203867992383289}, + {261, 8.0057564947791793}, + {325, 1.8629714367312684}, + {389, 1.9034519507895218}, + }}; -// // Loop over different directions -// for (size_t direction = 0; direction < 3; direction++) { -// // Allocate device buffers -// cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); -// cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); + cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); -// // Launch kernel -// hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), -// dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); -// CudaCheckError(); -// CHECK(cudaDeviceSynchronize()); + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); -// // Perform Comparison -// for (size_t i = 0; i < dev_interface_right.size(); i++) { -// // Check the left interface -// double test_val = dev_interface_left.at(i); -// double fiducial_val = -// (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) -// ? 0.0 -// : fiducial_interface_left.at(direction)[i]; + // Perform Comparison + for (size_t i = 0; i < dev_interface_right.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; -// testingUtilities::checkResults( -// fiducial_val, test_val, -// "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); -// // Check the right interface -// test_val = dev_interface_right.at(i); -// fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) -// ? 0.0 -// : fiducial_interface_right.at(direction)[i]; + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; -// testingUtilities::checkResults( -// fiducial_val, test_val, -// "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); -// } -// } -// } + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index ed4af9daa..4db993d70 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -27,34 +27,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun int xid, yid, zid; cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - int xs, xe, ys, ye, zs, ze; - switch (dir) { - case 0: - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - break; - case 1: - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; - break; - case 2: - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; - break; - } - if (xid < xs || xid >= xe || yid < ys || yid >= ye || zid < zs || zid >= ze) { + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { return; } @@ -573,8 +546,7 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); // Ensure that we are only operating on cells that will be used - if (size_t const min = 3, max = 3; - xid < min or xid >= nx - max or yid < min or yid >= ny - max or zid < min or zid >= nz - max) { + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { return; } diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 79b4aafac..1c7515ec0 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -24,16 +24,14 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) { - // Alwin: skip until this has been fixed - GTEST_SKIP(); // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 7; - size_t const ny = 7; - size_t const nz = 7; + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; size_t const n_fields = 5; double const dx = doubleRand(prng); double const dt = doubleRand(prng); @@ -50,37 +48,37 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = {{{171, 1.7598055553475744}, - {514, 3.3921082637175894}, - {857, 3.5866056366266772}, - {1200, 3.4794572581328902}, - {1543, 10.363861270296034}}, - {{171, 1.6206985712721598}, - {514, 3.123972986618837}, - {857, 3.30309596610488}, - {1200, 3.204417323222251}, - {1543, 9.544631281899882}}, - {{171, 1.6206985712721595}, - {514, 5.0316428671215876}, - {857, 2.3915465711497186}, - {1200, 3.2044173232222506}, - {1543, 12.74302824034023}}}; + std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.2002498722761787}, + {734, 1.764334292986655}, + {950, 3.3600925565746804}}, + {{86, 2.4950488327292639}, + {302, 0.79287723513518138}, + {518, 1.7614576990062414}, + {734, 1.8238574169157304}, + {950, 3.14294317122161}}, + {{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 3.2100231679403066}}}; - std::vector> fiducial_interface_right = {{{170, 1.7857012385420896}, - {513, 3.4420234152477129}, - {856, 3.6393828329638049}, - {1199, 3.5306577572855762}, - {1542, 10.516366339570284}}, - {{164, 1.6206985712721595}, - {507, 3.1239729866188366}, - {850, 3.3030959661048795}, - {1193, 3.2044173232222506}, - {1536, 9.5446312818998802}}, - {{122, 1.6206985712721595}, - {465, 5.4375307473677061}, - {808, 2.2442413290889327}, - {1151, 3.2044173232222506}, - {1494, 13.843305272338561}}}; + std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 1.8381070277226794}, + {733, 1.764334292986655}, + {949, 3.0847691079841209}}, + {{80, 3.1281603739188069}, + {296, 0.99406757727427164}, + {512, 1.8732124042412865}, + {728, 1.6489758692176784}, + {944, 2.8820015278590443}}, + {{50, 2.6558981128823214}, + {266, 0.84399195916314151}, + {482, 2.0109603398129137}, + {698, 1.764334292986655}, + {914, 3.2100231679403066}}}; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -134,9 +132,9 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) std::uniform_real_distribution doubleRand(0.1, 5); // Mock up needed information - size_t const nx = 7; - size_t const ny = 7; - size_t const nz = 7; + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; double const gamma = 5.0 / 3.0; #ifdef MHD size_t const n_fields = 8; @@ -156,70 +154,70 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Fiducial Data #ifdef MHD - std::vector> fiducial_interface_left = {{{171, 1.5556846217288991}, - {514, 1.7422005905354798}, - {857, 3.6289199464135558}, - {1200, 2.1487031353407438}, - {1543, 22.988345461909127}, - {1886, 3.1027541330860546}, - {2229, 3.2554981416903335}}, - {{171, 1.7167767631895592}, - {514, 1.8447385381907686}, - {857, 2.9211469103910663}, - {1200, 2.626030390823102}, - {1543, 28.84165870179233}, - {1886, 3.8209152940021962}, - {2229, 2.7248523895714203}}, - {{171, 1.421933695280897}, - {514, 1.2318388818745061}, - {857, 2.8667822907691818}, - {1200, 2.1256773710028964}, - {1543, 15.684026541123352}, - {1886, 2.3642698195433232}, - {2229, 2.9207483994866617}}}; + std::vector> fiducial_interface_left = {{{86, 3.6926886385390683}, + {302, 2.3022467009220993}, + {518, 2.3207781368125389}, + {734, 2.6544338753333747}, + {950, 11.430630157120799}, + {1166, 0.6428577630032507}, + {1382, 4.1406925096276597}}, + {{86, 3.811691682348938}, + {302, 1.4827993897794758}, + {518, 2.3955690789476871}, + {734, 4.06241130448349}, + {950, 10.552876853630949}, + {1166, 3.5147238706385471}, + {1382, 1.2344879085821312}}, + {{86, 3.1608655959160155}, + {302, 1.5377824007725194}, + {518, 0.41798730655927896}, + {734, 2.2721408530383784}, + {950, 5.6329522765789646}, + {1166, 0.84450832590555991}, + {1382, 1.4279317910797107}}}; - std::vector> fiducial_interface_right = {{{170, 1.4838721492695441}, - {513, 1.3797509020377114}, - {856, 3.223172223924883}, - {1199, 2.2593969253004111}, - {1542, 15.634488002075017}, - {1885, 2.7494588681249819}, - {2228, 3.2540533219925698}}, - {{164, 1.4075989434297753}, - {507, 1.34947711631431}, - {850, 3.605198021293794}, - {1193, 1.9244827470895529}, - {1536, 13.52285212927548}, - {1879, 2.9568307038177966}, - {2222, 2.1086380065800636}}, - {{122, 1.9532382085816002}, - {465, 2.6860067041011249}, - {808, 5.1657781029381917}, - {1151, 2.7811084475444732}, - {1494, 24.999993264381686}, - {1837, 2.3090650532529238}, - {2180, 2.8525500781893642}}}; + std::vector> fiducial_interface_right = {{{85, 2.8949509658187838}, + {301, 0.25766140043685887}, + {517, 1.8194165731976308}, + {733, 2.0809921071868756}, + {949, 8.1315538869542046}, + {1165, 0.49708185787322312}, + {1381, 3.2017395511439881}}, + {{80, 2.8600082827930269}, + {296, 0.37343415089084014}, + {512, 1.7974558224423689}, + {728, 0.94369445956099784}, + {944, 7.7011501503138504}, + {1160, 3.5147238706385471}, + {1376, 1.2344879085821312}}, + {{50, 3.1608655959160155}, + {266, 0.32035830490636008}, + {482, 3.1721881746709815}, + {698, 2.2721408530383784}, + {914, 14.017699282483312}, + {1130, 1.5292690020097823}, + {1346, -0.12121484974901264}}}; #else // not MHD std::vector> fiducial_interface_left = { - {{171, 1.5239648818969727}, {514, 1.658831367400063}, {857, 3.3918153400617137}, {1200, 2.4096936604224304}}, - {{171, 1.5239639282226562}, {514, 1.6246850138898132}, {857, 3.391813217514656}, {1200, 2.3220060950058032}}, - {{171, 1.7062816619873047}, {514, 1.3300289077249516}, {857, 3.5599794228554593}, {1200, 2.5175993972231074}}}; + {{86, 4.155160222900312}, {302, 1.1624633361407897}, {518, 1.6379195998743412}, {734, 2.9868746414179093}}, + {{86, 4.1795874335665655}, {302, 2.1094239978455054}, {518, 2.6811988240843849}, {734, 4.2540957888954054}}, + {{86, 2.1772852940944429}, {302, 0.58167501916840214}, {518, 1.3683785996473696}, {734, 0.40276763592716164}}}; - std::vector> fiducial_interface_right = {{{135, 6.5824208227997447}, - {170, 1.5239620208740234}, - {513, 1.5386557138925041}, - {856, 3.3918089724205411}, - {1199, 1.9263881802230425}}, - {{135, 6.4095055796015963}, - {164, 1.5239639282226562}, - {507, 1.5544994569400168}, - {850, 3.391813217514656}, - {1193, 2.1017627061702138}}, - {{122, 1.3893871307373047}, - {135, 6.0894802934332555}, - {465, 2.1518846449159135}, - {808, 3.4792525252435533}, - {1151, 2.0500250813102903}}}; + std::vector> fiducial_interface_right = {{{54, 3.8655260187947502}, + {85, 2.6637168309565289}, + {301, 0.69483650107094164}, + {517, 2.7558388224532218}, + {733, 1.9147729154830744}}, + {{54, 5.7556871317935459}, + {80, 2.6515032256234021}, + {296, 0.39344537106429511}, + {512, 1.6491544916805785}, + {728, 0.85830485311660487}}, + {{50, 2.8254070932730269}, + {54, 2.1884721760267873}, + {266, 0.75482470285166003}, + {482, 1.7757096932649317}, + {698, 3.6101832818706452}}}; #endif // MHD // Loop over different directions diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 3ed89a8b6..17a2f013e 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -78,6 +78,29 @@ struct Characteristic { }; // ===================================================================================================================== +// ===================================================================================================================== +template +bool __device__ __host__ __inline__ Thread_Guard(int const &nx, int const &ny, int const &nz, int const &xid, + int const &yid, int const &zid) +{ + // x check + bool out_of_bounds_thread = xid < order - 1 or xid >= nx - order; + + // y check + if (ny > 1) { + out_of_bounds_thread = yid < order - 1 or yid >= ny - order or out_of_bounds_thread; + } + + // z check + if (nz > 1) { + out_of_bounds_thread = zid < order - 1 or zid >= nz - order or out_of_bounds_thread; + } + out_of_bounds_thread = zid >= nz or out_of_bounds_thread; + + return out_of_bounds_thread; +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Load the data for reconstruction From 69d43a68228f60e551011f377e719058f4de8840 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 15:08:12 -0400 Subject: [PATCH 471/694] Add test for reconstruction::Thread_Guard --- src/reconstruction/reconstruction_tests.cu | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 62e615b39..5f8000bf8 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -18,6 +18,7 @@ #include "../io/io.h" #include "../reconstruction/reconstruction.h" #include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" @@ -174,6 +175,34 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) } #endif // MHD +TEST(tALLReconstructionThreadGuard, CorrectInputExpectCorrectOutput) +{ + // Test parameters + int const order = 3; + int const nx = 6; + int const ny = 6; + int const nz = 6; + + // fiducial data + std::vector fiducial_vals(nx * ny * nz, 1); + fiducial_vals.at(86) = 0; + + // loop through all values of the indices and check them + for (int xid = 0; xid < nx; xid++) { + for (int yid = 0; yid < ny; yid++) { + for (int zid = 0; zid < nz; zid++) { + // Get the test value + bool test_val = reconstruction::Thread_Guard(nx, ny, nz, xid, yid, zid); + + // Compare + int id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + ASSERT_EQ(test_val, fiducial_vals.at(id)) + << "Test value not equal to fiducial value at id = " << id << std::endl; + } + } + } +} + TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid From 890310ddfe75e929c2e777c544098bb2c1ffb901 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 8 Aug 2023 15:59:53 -0400 Subject: [PATCH 472/694] Add documentation for reconstruction::Thread_Guard --- src/reconstruction/reconstruction.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 17a2f013e..07aae21a6 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -79,23 +79,41 @@ struct Characteristic { // ===================================================================================================================== // ===================================================================================================================== +/*! + * \brief Determine if a thread is within the allowed range + * + * \tparam order The order of the reconstruction. 2 for PLM, 3 for PPM + * \param nx The number of cells in the X-direction + * \param ny The number of cells in the Y-direction + * \param nz The number of cells in the Z-direction + * \param xid The X thread index + * \param yid The Y thread index + * \param zid The Z thread index + * \return true The thread is NOT in the allowed range + * \return false The thread is in the allowed range + */ template bool __device__ __host__ __inline__ Thread_Guard(int const &nx, int const &ny, int const &nz, int const &xid, int const &yid, int const &zid) { - // x check + // These checks all make sure that the xid is such that the thread won't try to load any memory that is out of bounds + + // X check bool out_of_bounds_thread = xid < order - 1 or xid >= nx - order; - // y check + // Y check, only used for 2D and 3D if (ny > 1) { out_of_bounds_thread = yid < order - 1 or yid >= ny - order or out_of_bounds_thread; } - // z check + // z check, only used for 3D if (nz > 1) { out_of_bounds_thread = zid < order - 1 or zid >= nz - order or out_of_bounds_thread; } - out_of_bounds_thread = zid >= nz or out_of_bounds_thread; + // This is needed in the case that nz == 1 to avoid overrun + else { + out_of_bounds_thread = zid >= nz or out_of_bounds_thread; + } return out_of_bounds_thread; } From 595959f2baa67365e7f6a90dab9e450b0de06050 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 9 Aug 2023 10:12:23 -0400 Subject: [PATCH 473/694] Update boundary condition check message --- src/utils/error_handling.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 38374c704..37e527ca8 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -2,6 +2,7 @@ #include #include +#include #ifdef MPI_CHOLLA #include @@ -55,18 +56,20 @@ void Check_Configuration(parameters const &P) #endif // Only one integrator check // Check the boundary conditions - auto Check_Boundary = [](int const &boundary) { + auto Check_Boundary = [](int const &boundary, std::string const &direction) { bool is_allowed_bc = boundary >= 0 and boundary <= 4; - assert(is_allowed_bc && - "WARNING: Possibly invalid boundary conditions for direction: %d flag: %d. Must select between (periodic), " - "2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); + std::string const error_message = + "WARNING: Possibly invalid boundary conditions for direction: " + direction + + " flag: " + std::to_string(boundary) + + ". Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi)."; + assert(is_allowed_bc && error_message.c_str()); }; - Check_Boundary(P.xl_bcnd); - Check_Boundary(P.xu_bcnd); - Check_Boundary(P.yl_bcnd); - Check_Boundary(P.yu_bcnd); - Check_Boundary(P.zl_bcnd); - Check_Boundary(P.zu_bcnd); + Check_Boundary(P.xl_bcnd, "xl_bcnd"); + Check_Boundary(P.xu_bcnd, "xu_bcnd"); + Check_Boundary(P.yl_bcnd, "yl_bcnd"); + Check_Boundary(P.yu_bcnd, "yu_bcnd"); + Check_Boundary(P.zl_bcnd, "zl_bcnd"); + Check_Boundary(P.zu_bcnd, "zu_bcnd"); // warn if error checking is disabled #ifndef CUDA_ERROR_CHECK From 30a6aa3222b5cfd8de8f6926b3a6579ed8dbc543 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Wed, 9 Aug 2023 12:56:23 -0700 Subject: [PATCH 474/694] changed version of cuda to 11.2 and also added devtoolset to be loaded in --- builds/make.host.lux | 2 +- builds/setup.lux.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index edf4e42c0..6ce455fcb 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft diff --git a/builds/setup.lux.sh b/builds/setup.lux.sh index 6d6d408f3..ab3606d66 100755 --- a/builds/setup.lux.sh +++ b/builds/setup.lux.sh @@ -1,6 +1,6 @@ #!/bin/bash -module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 +module load hdf5/1.10.6 cuda11.2 openmpi/4.0.1 devtoolset-9 export MACHINE=lux export CHOLLA_ENVSET=1 From 7abfc95a05128c6b001f7d77b377afa1139b51f5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 17 Jul 2023 16:50:48 -0400 Subject: [PATCH 475/694] Add the ability to generate test coverage reports The run_tests.sh script now has the ability to generate a coverage report for the CPU code. It can be generated by passing the -l flag to the buildAndRunTests function then the coverage report will be available in the generated HTML website in bin/html_coverage_report_${CHOLLA_MAKE_TYPE} --- .gitignore | 2 ++ Makefile | 6 +++++ builds/run_tests.sh | 64 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 1f1fa3018..864a8ab2c 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,8 @@ out.* o.* run disk.* +*.gcno +*.gcda # Logs and databases # ###################### diff --git a/Makefile b/Makefile index 868f2c5bc..10feb281c 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,10 @@ else GPUFILES := $(filter-out src/system_tests/% %_tests.cu,$(GPUFILES)) endif +ifeq ($(COVERAGE), true) + CXXFLAGS += --coverage +endif + OBJS := $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) @@ -214,6 +218,8 @@ clean: rm -f $(CLEAN_OBJS) rm -rf googletest -find bin/ -type f -executable -name "cholla.*.$(MACHINE)*" -exec rm -f '{}' \; + -find src/ -type f -name "*.gcno" -delete + -find src/ -type f -name "*.gcda" -delete clobber: clean -find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \; diff --git a/builds/run_tests.sh b/builds/run_tests.sh index c2337ca05..0fc1ed629 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -108,7 +108,7 @@ buildCholla () { echo -e "\nBuilding Cholla...\n" builtin cd $CHOLLA_ROOT - make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} COVERAGE=${2} } # ============================================================================== @@ -119,7 +119,7 @@ buildChollaTests () { echo builtin cd $CHOLLA_ROOT - make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true COVERAGE=${1} } # ============================================================================== @@ -203,6 +203,51 @@ runTests () } # ============================================================================== +# ============================================================================== +# This function generates a coverage report after the tests have been run. +# The final report is a website in bin/html_coverage_report_${CHOLLA_MAKE_TYPE} +chollaCoverage () +{ + # Setup the names of files that we will use + local base_file="bin/coverage_base_${CHOLLA_MAKE_TYPE}.info" + local test_file="bin/coverage_test_${CHOLLA_MAKE_TYPE}.info" + local combined_file="bin/coverage_combined_${CHOLLA_MAKE_TYPE}.info" + + # Generate the initial report with no coverage info. This is needed so that + # lcov knows about all the files, not just the ones that are tested + lcov --capture --initial --directory ${CHOLLA_ROOT}/src --output-file ${base_file} + + # Now we get the actual coverage information + lcov --capture --directory ${CHOLLA_ROOT}/src --output-file ${test_file} + + # Then combine the the two coverage files so we know what changed, i.e. which + # lines were actually covered + lcov --add-tracefile ${base_file} --add-tracefile ${test_file} --output-file ${combined_file} + + # Extract data from only the files within CHOLLA_ROOT. This should exclude any + # system or external libraries + lcov --extract ${combined_file} "${CHOLLA_ROOT}/*" --output-file ${combined_file} + + # exclude_patterns=('*-tests.cpp') # Remove traces of the tests themselves + # # --remove TRACEFILE PATTERN = remove all things associated with PATTERN in TRACEFILE + # lcov --remove ${combined_file} "${exclude_patterns[@]}" --output-file ${combined_file} + + # List the contents + lcov --list ${combined_file} + + # Generate HTML report + genhtml ${combined_file} --output-directory bin/html_coverage_report_${CHOLLA_MAKE_TYPE} + + # Combine all tracefiles together. Define the different make types then add + # the appropriate prefixes and suffices. + # build_types=(cosmology disk dust gravity hydro mhd particles) + # build_types=("${build_types[@]/#/--add-trace bin/coverage_combined_}") + # build_types=("${build_types[@]/%/.info}") + # eval "build_types=(${build_types[@]})" + # lcov "${build_types[@]}" --output-file bin/full_coverage_report.info +} +# ============================================================================== + # ============================================================================== # Call all the functions required for setting up, building, and running tests # @@ -213,6 +258,7 @@ runTests () # \param[in] -g (optional) If set then download and build a local version of # GoogleTest to use instead of the machine default # \param[in] -d (optional) Build Cholla in debug mode +# \param[in] -l (optional) Generate coverage reports when building and running Cholla buildAndRunTests () { # Unset BUILD_GTEST so that subsequent runs aren't tied to what previous runs @@ -220,10 +266,11 @@ buildAndRunTests () unset BUILD_GTEST BUILD_MODE='OPTIMIZE' + CODE_COVERAGE='false' # Check arguments local OPTIND - while getopts "t:c:g:d" opt; do + while getopts "t:c:g:d:l" opt; do case $opt in t) # Set the make type MAKE_TYPE_ARG="-t ${OPTARG}" @@ -237,6 +284,9 @@ buildAndRunTests () d) # Build the debug version of Cholla? BUILD_MODE='DEBUG' ;; + l) # Generate Code Coverage? + CODE_COVERAGE='true' + ;; \?) echo "Invalid option: -${OPTARG}" >&2 return 1 @@ -263,8 +313,12 @@ buildAndRunTests () if [[ -n $BUILD_GTEST ]]; then buildGoogleTest fi - buildCholla $BUILD_MODE && \ - buildChollaTests && \ + buildCholla $BUILD_MODE $CODE_COVERAGE && \ + buildChollaTests $CODE_COVERAGE && \ runTests + + if [ $CODE_COVERAGE = "true" ]; then + chollaCoverage + fi } # ============================================================================== From 2a6eb5fab84679659701f57061dd1a52c837fb56 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 15 Aug 2023 10:24:55 -0400 Subject: [PATCH 476/694] save progress --- builds/make.type.dust | 1 + cholla-tests-data | 2 +- src/hydro/hydro_cuda.cu | 48 +++++++++++++++++++++++++------ src/hydro/hydro_cuda.h | 8 +++--- src/integrators/VL_3D_cuda.cu | 39 +++++++------------------ src/integrators/simple_3D_cuda.cu | 6 ++-- 6 files changed, 59 insertions(+), 45 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 0be259763..6570f7d8a 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -16,6 +16,7 @@ DFLAGS += -DHLLC # DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DSCALAR_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/cholla-tests-data b/cholla-tests-data index dcd73ff52..321416680 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e +Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 1f4b91fd3..0a3ed342f 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -276,7 +276,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R // issues #endif - #ifdef DENSITY_FLOOR + #ifdef DENSITY_FLOOR if (dev_conserved[id] < density_floor) { if (dev_conserved[id] > 0) { dens_0 = dev_conserved[id]; @@ -1086,7 +1086,6 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, in #endif // DE - #ifdef TEMPERATURE_FLOOR __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) { @@ -1124,8 +1123,39 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int #endif } } - #endif // TEMPERATURE_FLOOR +__global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor) +{ + int id, xid, yid, zid, n_cells; + Real density_init; // variable to store the value of the scalar before a floor is applied + n_cells = nx * ny * nz; + + // get a global thread ID + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + + // threads corresponding to real cells do the calculation + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + + density_init = dev_conserved[id + n_cells * grid_enum::density]; + + if (density_init < density_floor) { + printf("###Thread density change %f -> %f \n", density_init, density_floor); + dev_conserved[id] = density_floor; + // Scale the conserved values to the new density + dev_conserved[id + n_cells * grid_enum::momentum_x] *= (density_floor / density_init); + dev_conserved[id + n_cells * grid_enum::momentum_y] *= (density_floor / density_init); + dev_conserved[id + n_cells * grid_enum::momentum_z] *= (density_floor / density_init); + dev_conserved[id + n_cells * grid_enum::Energy] *= (density_floor / density_init); + #ifdef DE + dev_conserved[id + n_cells * grid_enum::GasEnergy] *= (density_floor / density_init); + #endif // DE + } + } +} __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved) { @@ -1170,11 +1200,11 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int #endif // DE } -__global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, - Real conserved_floor) +__global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real scalar_floor) { int id, xid, yid, zid, n_cells; - Real field_0; + Real scalar; // variable to store the value of the scalar before a floor is applied n_cells = nx * ny * nz; // get a global thread ID @@ -1186,10 +1216,10 @@ __global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int n // threads corresponding to real cells do the calculation if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { - field_0 = dev_conserved[id + n_cells * field_num]; + scalar = dev_conserved[id + n_cells * field_num]; - if (field_0 < conserved_floor) { - dev_conserved[id + n_cells * field_num] = conserved_floor; + if (scalar < scalar_floor) { + dev_conserved[id + n_cells * field_num] = scalar_floor; } } } diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 287224c57..1f7d9a473 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -84,13 +84,13 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n Real dy, Real dz, Real gamma, Real max_dti_slow); #endif - #ifdef TEMPERATURE_FLOOR __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); - #endif -__global__ void Apply_Conserved_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, - Real conserved_floor); +__global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor); + +__global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real scalar_floor); __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 6ccc814c2..dd088d8ef 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -32,8 +32,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, - Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, - Real density_floor); + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields); void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -194,8 +193,13 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, - F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); + F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields); CudaCheckError(); + + #ifdef DENSITY_FLOOR + hipLaunchKernelGGL(Apply_Density_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, nx, ny, nz, n_ghost, density_floor); + #endif // DENSITY_FLOOR + #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, @@ -325,11 +329,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int CudaCheckError(); #endif // TEMPERATURE_FLOOR - #ifdef DUST - hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + #ifdef SCALAR_FLOOR + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-5); CudaCheckError(); - #endif // DUST + #endif // SCALAR_FLOOR return; } @@ -353,8 +357,7 @@ void Free_Memory_VL_3D() __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, - Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, - Real density_floor) + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields) { Real dtodx = dt / dx; Real dtody = dt / dy; @@ -378,10 +381,6 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de int ipo, jpo, kpo; #endif // DE - #ifdef DENSITY_FLOOR - Real dens_0; - #endif // DENSITY_FLOOR - // threads corresponding to all cells except outer ring of ghost cells do the // calculation if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && zid < nz - 1) { @@ -455,22 +454,6 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); #endif // DE - - #ifdef DENSITY_FLOOR - if (dev_conserved_half[id] < density_floor) { - dens_0 = dev_conserved_half[id]; - printf("###Thread density change %f -> %f \n", dens_0, density_floor); - dev_conserved_half[id] = density_floor; - // Scale the conserved values to the new density - dev_conserved_half[1 * n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[2 * n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0); - #ifdef DE - dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); - #endif // DE - } - #endif // DENSITY_FLOOR } } diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 3c8dc13e9..dfc015f25 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -185,11 +185,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, CudaCheckError(); #endif // TEMPERATURE_FLOOR - #ifdef DUST - hipLaunchKernelGGL(Apply_Conserved_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + #ifdef SCALAR_FLOOR + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-5); CudaCheckError(); - #endif // DUST + #endif // SCALAR_FLOOR return; } From 758ac381667526e008e921116053940f6f3bcf77 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 15 Aug 2023 10:56:08 -0400 Subject: [PATCH 477/694] run clang format --- cholla-tests-data | 2 +- src/system_tests/mhd_system_tests.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index dcd73ff52..321416680 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e +Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 214a05541..94274dfc0 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -36,10 +36,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< inline static std::unordered_map high_res_l2norms; void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, - double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, - double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, - double const &pitch, double const &yaw, double const &domain, int const &domain_direction, - double const &vx = 0.0, size_t const &N = 32) + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, + int const &domain_direction, double const &vx = 0.0, size_t const &N = 32) { // Constant for all tests double const gamma = 5. / 3.; From 2c0b6e2cb3d45af1557b6d2ffb4a45703e6d8ea3 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 15 Aug 2023 11:45:44 -0400 Subject: [PATCH 478/694] implement naming convention to recent commits --- src/reconstruction/reconstruction_tests.cu | 48 +++++++++++----------- src/system_tests/mhd_system_tests.cpp | 32 +++++++-------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index f69655c41..35ea2297f 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -41,7 +41,7 @@ __global__ void Test_Char_2_Prim(reconstruction::Primitive const primitive, sound_speed, sound_speed_squared, gamma); } -__global__ void test_compute_eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, +__global__ void Test_Compute_Eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, Real const sound_speed_squared, Real const gamma, reconstruction::EigenVecs *eigenvectors) { @@ -121,7 +121,7 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(test_compute_eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, + hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, dev_results.data()); CudaCheckError(); cudaDeviceSynchronize(); @@ -150,28 +150,28 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, - "magnetosonic_speed_fast"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, - "magnetosonic_speed_slow"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, - host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, - host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); - testingUtilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); - testingUtilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); - testingUtilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); - testingUtilities::checkResults(fiducial_results.beta_z, host_results.beta_z, "beta_z"); - testingUtilities::checkResults(fiducial_results.n_fs, host_results.n_fs, "n_fs"); - testingUtilities::checkResults(fiducial_results.sign, host_results.sign, "sign"); - testingUtilities::checkResults(fiducial_results.q_fast, host_results.q_fast, "q_fast"); - testingUtilities::checkResults(fiducial_results.q_slow, host_results.q_slow, "q_slow"); - testingUtilities::checkResults(fiducial_results.a_fast, host_results.a_fast, "a_fast"); - testingUtilities::checkResults(fiducial_results.a_slow, host_results.a_slow, "a_slow"); - testingUtilities::checkResults(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); - testingUtilities::checkResults(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); - testingUtilities::checkResults(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); - testingUtilities::checkResults(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); + testingUtilities::Check_Results(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, + "magnetosonic_speed_fast"); + testingUtilities::Check_Results(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, + "magnetosonic_speed_slow"); + testingUtilities::Check_Results(fiducial_results.magnetosonic_speed_fast_squared, + host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); + testingUtilities::Check_Results(fiducial_results.magnetosonic_speed_slow_squared, + host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); + testingUtilities::Check_Results(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); + testingUtilities::Check_Results(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); + testingUtilities::Check_Results(fiducial_results.beta_y, host_results.beta_y, "beta_y"); + testingUtilities::Check_Results(fiducial_results.beta_z, host_results.beta_z, "beta_z"); + testingUtilities::Check_Results(fiducial_results.n_fs, host_results.n_fs, "n_fs"); + testingUtilities::Check_Results(fiducial_results.sign, host_results.sign, "sign"); + testingUtilities::Check_Results(fiducial_results.q_fast, host_results.q_fast, "q_fast"); + testingUtilities::Check_Results(fiducial_results.q_slow, host_results.q_slow, "q_slow"); + testingUtilities::Check_Results(fiducial_results.a_fast, host_results.a_fast, "a_fast"); + testingUtilities::Check_Results(fiducial_results.a_slow, host_results.a_slow, "a_slow"); + testingUtilities::Check_Results(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); + testingUtilities::Check_Results(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); + testingUtilities::Check_Results(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); + testingUtilities::Check_Results(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); } #endif // MHD diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 94274dfc0..a9aced3fd 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -407,8 +407,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond double const rEigenVec_E = prefix * 9; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -418,8 +418,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", - 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", + 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -442,8 +442,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond double const rEigenVec_E = prefix * 3; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -453,8 +453,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", - 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", + 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -476,8 +476,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve double const rEigenVec_E = 0; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -487,8 +487,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], - "", 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) @@ -511,8 +511,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC double const velocityX = waveSpeed; // Set the launch parameters - setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX, 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -522,8 +522,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], - "", 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], + "", 0.17); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, From a28041480f179404bc86abe624981bd5ea890dd9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 15 Aug 2023 11:46:57 -0400 Subject: [PATCH 479/694] run clang format --- src/system_tests/mhd_system_tests.cpp | 28 +++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index a9aced3fd..f05bd9bbb 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -407,8 +407,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond double const rEigenVec_E = prefix * 9; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -418,8 +419,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", - 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -442,8 +443,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond double const rEigenVec_E = prefix * 3; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -453,8 +455,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", - 0.17); + testingUtilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -476,8 +478,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve double const rEigenVec_E = 0; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); @@ -511,8 +514,9 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC double const velocityX = waveSpeed; // Set the launch parameters - Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, - rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX, 16); + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, + velocityX, 16); // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); From 59df1a59fc05018600e4a06763e13e8aa9e69f5f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 15 Aug 2023 12:11:36 -0400 Subject: [PATCH 480/694] fix test data --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 321416680..dcd73ff52 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 +Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e From 3034713d8c658cd4fcdaf598bb8e1fddb5489b15 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 18 Aug 2023 12:20:48 -0400 Subject: [PATCH 481/694] Fix MHD 3D check There were 'or' statements when it should have been 'and'. --- src/utils/error_handling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 37e527ca8..79f78b434 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -88,7 +88,7 @@ void Check_Configuration(parameters const &P) // MHD Checks // ========== #ifdef MHD - assert(P.nx > 1 or P.ny > 1 or P.nz > 1 and "MHD runs must be 3D"); + assert(P.nx > 1 and P.ny > 1 and P.nz > 1 and "MHD runs must be 3D"); // Must use the correct integrator #if !defined(VL) || defined(SIMPLE) From acb9250471386b132f45a4d49283400885ab862e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 18 Aug 2023 12:24:01 -0400 Subject: [PATCH 482/694] Add check for CTU integrator to MHD config check --- src/utils/error_handling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 79f78b434..7c5a0cb72 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -91,7 +91,7 @@ void Check_Configuration(parameters const &P) assert(P.nx > 1 and P.ny > 1 and P.nz > 1 and "MHD runs must be 3D"); // Must use the correct integrator - #if !defined(VL) || defined(SIMPLE) + #if !defined(VL) || defined(SIMPLE) || defined(CTU) #error "MHD only supports the Van Leer integrator" #endif //! VL or SIMPLE From d7200c89d869b5a0e50007b68cddd9efb9bbc3c8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 14 Aug 2023 16:27:44 -0400 Subject: [PATCH 483/694] Add a test for the _ctSlope function --- src/mhd/ct_electric_fields_tests.cu | 94 +++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index afbaada66..0e2adf624 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -17,6 +17,7 @@ // Local Includes #include "../global/global.h" +#include "../io/io.h" #include "../mhd/ct_electric_fields.h" #include "../utils/testing_utilities.h" @@ -177,4 +178,97 @@ TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput) runTest(); } // ============================================================================= + +// ============================================================================= +TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput) +{ + // Set up the basic parameters + size_t const nx = 5; + size_t const ny = nx; + size_t const nz = nx; + int const xid = nx / 2; + int const yid = ny / 2; + int const zid = nz / 2; + size_t const n_cells = nx * ny * nz; + + // Set up the grid + std::vector flux(grid_enum::num_fields * n_cells), conserved(grid_enum::num_fields * n_cells); + + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(-5, 5); + + for (double& conserved_data : conserved) { + conserved_data = doubleRand(prng); + } + for (double& flux_data : flux) { + flux_data = doubleRand(prng); + } + + // Fiducial data + std::vector fiducial_data = { + -6.8725060451062561, -77.056763568617669, 1.4564238051915397, 5.4541656143291437, -0.83503550003671911, + -78.091781647940934, -2.6187125848387525, -5.6934594000939542, -16.243259069749971, -59.321631150095314, + 0.99291378610068892, 4.4004574252725384, -1.6902722376320516, -63.074645759822637, -4.5776373499662899, + -19.476095152639683, -2.0173881091784471, -74.484407919605786, -7.8184484634991724, -0.23206265131850434, + 0.41622472388590037, -74.479121547383727, -6.9903417764222358, -1.832282425083853}; + + // Get test data. Only test the options that will be used + std::vector test_data; + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + + // Check the results + ASSERT_EQ(test_data.size(), fiducial_data.size()); + + for (size_t i = 0; i < test_data.size(); i++) { + testingUtilities::checkResults(fiducial_data.at(i), test_data.at(i), ""); + } +} +// ============================================================================= #endif // MHD From e8cd68211a48dd14e64b20b1da6b3e5ef9b8785b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 24 Aug 2023 13:34:26 -0400 Subject: [PATCH 484/694] Allow output to be turned off for MHD builds --- src/utils/error_handling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 37e527ca8..7f7e5f042 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -106,7 +106,7 @@ void Check_Configuration(parameters const &P) #endif // Reconstruction check // must have HDF5 - #ifndef HDF5 + #if defined(OUTPUT) and (not defined(HDF5)) #error "MHD only supports HDF5 output" #endif //! HDF5 From da283ee824ae5bdd62bfc6a586dc698eb699c6a0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 24 Aug 2023 14:23:49 -0400 Subject: [PATCH 485/694] clang-tidy: single line if statements don't need braces --- .clang-tidy | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index eccdfe06e..5082415d5 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,8 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - +-*, +readability-braces-around-statements, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -cert-env33-c, @@ -139,6 +140,7 @@ AnalyzeTemporaryDtors: false FormatStyle: 'file' UseColor: false CheckOptions: + readability-braces-around-statements.ShortStatementLines: 1 # readability-identifier-naming allowed casing types # - lower_case # - UPPER_CASE From 0f9c8b0345a4b0f700458b3a1fd58e16a4c7a597 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 24 Aug 2023 14:24:56 -0400 Subject: [PATCH 486/694] Add a message about how the timers work to run_timing.log Refactor of Time::Print_Average_Times to ease future additions to the header --- src/utils/timing_functions.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 133971b68..6c9a4eedd 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -150,14 +150,15 @@ void Time::Print_Average_Times(struct parameters P) } std::string file_name("run_timing.log"); - std::string header; chprintf("Writing timing values to file: %s \n", file_name.c_str()); - std::string gitHash = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); - std::string macroFlags = "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); + std::string header = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); + header += "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n"); + header += "Note that the timers all skip the first time step since it always takes longer." + std::string("\n") + + "To find the average time divide the time shown by n_steps-1" + std::string("\n"); - header = "#n_proc nx ny nz n_omp n_steps "; + header += std::string("\n") + "#n_proc nx ny nz n_omp n_steps "; for (OneTime* x : onetimes) { header += x->name; @@ -186,8 +187,6 @@ void Time::Print_Average_Times(struct parameters P) // Output timing values out_file.open(file_name.c_str(), std::ios::app); if (!file_exists) { - out_file << gitHash; - out_file << macroFlags; out_file << header; } #ifdef MPI_CHOLLA From b8b682a80dd30732dc2d18fba1eaa4d778ed7594 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 25 Aug 2023 14:47:05 -0400 Subject: [PATCH 487/694] Add Reduce_size_t_Max function to find the max of a size_t --- src/mpi/mpi_routines.cpp | 23 +++++++++++++++++++++++ src/mpi/mpi_routines.h | 8 ++++++++ 2 files changed, 31 insertions(+) diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 513d29056..4efdfae7e 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -716,6 +716,29 @@ Real ReduceRealAvg(Real x) return y; } +size_t Reduce_size_t_Max(size_t in) +{ + // Get the right MPI type + #if SIZE_MAX == UCHAR_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR + #elif SIZE_MAX == USHRT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT + #elif SIZE_MAX == UINT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED + #elif SIZE_MAX == ULONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG + #elif SIZE_MAX == ULLONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG + #else + #error "Error: Type of size_t not supported by Reduce_size_t_Max" + #endif + + // Perform the reduction + size_t out; + MPI_Allreduce(&in, &out, 1, my_MPI_SIZE_T, MPI_MAX, world); + return out; +} + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x) diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 2d2a644b4..3c4b85403 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -166,6 +166,14 @@ Real ReduceRealMin(Real x); /* MPI reduction wrapper for avg(Real)*/ Real ReduceRealAvg(Real x); +/*! + * \brief MPI reduction wrapper to find the maximum of a size_t variable + * + * \param in The rank-local value to be reduced + * \return size_t The global reduced value + */ +size_t Reduce_size_t_Max(size_t in); + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x); From 80b3d196792133d69a188c6f15840c9978d46f2d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 25 Aug 2023 14:48:15 -0400 Subject: [PATCH 488/694] Add a function to find the GPU memory used Added a call to that function to the end of the main loop --- src/main.cpp | 3 +++ src/utils/cuda_utilities.cpp | 31 +++++++++++++++++++++++++++++++ src/utils/cuda_utilities.h | 13 ++++++++++++- src/utils/gpu.hpp | 1 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index b33ee8eba..7c1869f7e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,7 +14,9 @@ #include "global/global.h" #include "grid/grid3D.h" #include "io/io.h" +#include "utils/cuda_utilities.h" #include "utils/error_handling.h" + #ifdef SUPERNOVA #include "particles/supernova.h" #ifdef ANALYSIS @@ -297,6 +299,7 @@ int main(int argc, char *argv[]) #endif #ifdef CPU_TIME + cuda_utilities::Print_GPU_Memory_Usage(); G.Timer.Total.End(); #endif // CPU_TIME diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp index f1d04ac94..0e915d93d 100644 --- a/src/utils/cuda_utilities.cpp +++ b/src/utils/cuda_utilities.cpp @@ -1,6 +1,37 @@ +/*! + * \file cuda_utilities.cpp + * \brief Implementation file for cuda_utilities.h + * + */ #include "../utils/cuda_utilities.h" +#include +#include + +#include "../io/io.h" +#include "../mpi/mpi_routines.h" + namespace cuda_utilities { +void Print_GPU_Memory_Usage(std::string const &additional_text) +{ + // Get the memory usage + size_t gpu_free_memory, gpu_total_memory; + CudaSafeCall(cudaMemGetInfo(&gpu_free_memory, &gpu_total_memory)); + + // Assuming that all GPUs in the system have the same amount of memory + size_t const gpu_used_memory = Reduce_size_t_Max(gpu_total_memory - gpu_free_memory); + + Real const percent_used = 100.0 * (static_cast(gpu_used_memory) / static_cast(gpu_total_memory)); + + // Prep the message to print + std::stringstream output_message_stream; + output_message_stream << std::fixed << std::setprecision(2); + output_message_stream << "Percentage of GPU memory used: " << percent_used << "%. GPU memory used " + << std::to_string(gpu_used_memory) << ", GPU total memory " << std::to_string(gpu_total_memory) + << additional_text << std::endl; + std::string output_message = output_message_stream.str(); + chprintf(output_message.c_str()); +} } // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 9838ae2d8..5a2e01673 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -7,6 +7,8 @@ #pragma once +#include + // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" @@ -121,4 +123,13 @@ struct AutomaticLaunchParams { int numBlocks; }; // ===================================================================== -} // end namespace cuda_utilities + +// ===================================================================== +/*! + * \brief Print the current GPU memory usage to standard out + * + * \param additional_text Any additional text to be appended to the end of the message + */ +void Print_GPU_Memory_Usage(std::string const &additional_text = ""); +// ===================================================================== +} // end namespace cuda_utilities \ No newline at end of file diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 49dffe13d..ec4554cf4 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -66,6 +66,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define cudaPointerAttributes hipPointerAttribute_t #define cudaPointerGetAttributes hipPointerGetAttributes #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize + #define cudaMemGetInfo hipMemGetInfo // Texture definitions #define cudaArray hipArray From 265db2ece5d9812bd5c7a4089b89e25d174f80ad Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Fri, 1 Sep 2023 15:01:11 -0700 Subject: [PATCH 489/694] fix parameter files so they can be run without modification -add y and z boundaries = 0 for 1D tests -add z boundaries = 0 for 2D tests -change name of initial conditions to Riemann on noh_1D.txt - change the domain to [-1,1] on Shu_Osher.txt - clarify that the Ryu_and_Jones_1a.txt param file is for test 1a and NOT 4d - change the output directory to the current directory for various 2 and 3D tests: 2D sound wave, 3D uniform, 3d spherical overpressure, 3d spherical collapse --- examples/1D/123.txt | 4 ++++ examples/1D/Creasey_shock.txt | 4 ++++ examples/1D/Shu_Osher.txt | 8 ++++++-- examples/1D/noh_1D.txt | 6 +++++- examples/1D/sound_wave.txt | 4 ++++ examples/1D/square_wave.txt | 4 ++++ examples/1D/stationary.txt | 4 ++++ examples/1D/strong_shock.txt | 4 ++++ examples/1D/test_3.txt | 4 ++++ examples/1D/trac_pen.txt | 4 ++++ examples/1D/two_shocks.txt | 4 ++++ examples/2D/Noh_2D.txt | 2 ++ examples/2D/sod.txt | 2 ++ examples/2D/sound_wave.txt | 3 ++- examples/3D/Ryu_and_Jones_1a.txt | 4 ++-- examples/3D/Spherical_Collapse.txt | 4 +--- examples/3D/Spherical_Overpressure.txt | 3 +-- examples/3D/Uniform.txt | 2 +- 18 files changed, 58 insertions(+), 12 deletions(-) diff --git a/examples/1D/123.txt b/examples/1D/123.txt index 79a3b23a3..3f693baa6 100644 --- a/examples/1D/123.txt +++ b/examples/1D/123.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/Creasey_shock.txt b/examples/1D/Creasey_shock.txt index f7d98d7dc..59821a945 100644 --- a/examples/1D/Creasey_shock.txt +++ b/examples/1D/Creasey_shock.txt @@ -26,6 +26,10 @@ zlen=3.08567758e18 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/Shu_Osher.txt b/examples/1D/Shu_Osher.txt index 5d78eba7d..42d8a7ccb 100644 --- a/examples/1D/Shu_Osher.txt +++ b/examples/1D/Shu_Osher.txt @@ -19,15 +19,19 @@ gamma=1.4 # name of initial conditions init=Shu_Osher # domain properties -xmin=0.0 +xmin=-1.0 ymin=0.0 zmin=0.0 -xlen=1.0 +xlen=2.0 ylen=1.0 zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/noh_1D.txt b/examples/1D/noh_1D.txt index 3e9552295..d350c2479 100644 --- a/examples/1D/noh_1D.txt +++ b/examples/1D/noh_1D.txt @@ -14,7 +14,7 @@ tout=1.0 # time interval for output outstep=1.0 # name of initial conditions -init=Riemann_1D +init=Riemann # domain properties xmin=0.0 ymin=0.0 @@ -25,6 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/sound_wave.txt b/examples/1D/sound_wave.txt index 97b7c92b1..13c6f8d05 100644 --- a/examples/1D/sound_wave.txt +++ b/examples/1D/sound_wave.txt @@ -25,6 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/square_wave.txt b/examples/1D/square_wave.txt index d33805c15..d22282a66 100644 --- a/examples/1D/square_wave.txt +++ b/examples/1D/square_wave.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/stationary.txt b/examples/1D/stationary.txt index 28941e868..746592847 100644 --- a/examples/1D/stationary.txt +++ b/examples/1D/stationary.txt @@ -27,6 +27,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/strong_shock.txt b/examples/1D/strong_shock.txt index 1726cf316..ff99eab23 100644 --- a/examples/1D/strong_shock.txt +++ b/examples/1D/strong_shock.txt @@ -25,6 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/test_3.txt b/examples/1D/test_3.txt index 60997270c..3eff8abcc 100644 --- a/examples/1D/test_3.txt +++ b/examples/1D/test_3.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/trac_pen.txt b/examples/1D/trac_pen.txt index 3c0081e5a..a24bf7167 100644 --- a/examples/1D/trac_pen.txt +++ b/examples/1D/trac_pen.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/two_shocks.txt b/examples/1D/two_shocks.txt index a998bae46..c1ac4616a 100644 --- a/examples/1D/two_shocks.txt +++ b/examples/1D/two_shocks.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/2D/Noh_2D.txt b/examples/2D/Noh_2D.txt index 0e43af07d..5223983d5 100644 --- a/examples/2D/Noh_2D.txt +++ b/examples/2D/Noh_2D.txt @@ -29,6 +29,8 @@ xl_bcnd=2 xu_bcnd=4 yl_bcnd=2 yu_bcnd=4 +zl_bcnd=0 +zu_bcnd=0 custom_bcnd=noh # path to output directory outdir=./ diff --git a/examples/2D/sod.txt b/examples/2D/sod.txt index 27df1f256..1f60eab77 100644 --- a/examples/2D/sod.txt +++ b/examples/2D/sod.txt @@ -27,6 +27,8 @@ xl_bcnd=3 xu_bcnd=3 yl_bcnd=3 yu_bcnd=3 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/2D/sound_wave.txt b/examples/2D/sound_wave.txt index 58608bac2..109eb8050 100644 --- a/examples/2D/sound_wave.txt +++ b/examples/2D/sound_wave.txt @@ -27,8 +27,9 @@ xl_bcnd=1 xu_bcnd=1 yl_bcnd=1 yu_bcnd=1 +zl_bcnd=0 +zu_bcnd=0 # path to output directory -#outdir=outputs/ outdir=./ ################################################# diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt index 3b5b44400..c0c73cced 100644 --- a/examples/3D/Ryu_and_Jones_1a.txt +++ b/examples/3D/Ryu_and_Jones_1a.txt @@ -1,10 +1,10 @@ # -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Parameter File for 3D Ryu & Jones MHD shock tube 1a. # Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: # Algorithms and Tests for One-Dimensional Flow" # # Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d +# specifically for shock tube 1a # ################################################ diff --git a/examples/3D/Spherical_Collapse.txt b/examples/3D/Spherical_Collapse.txt index 8fad21920..739661216 100644 --- a/examples/3D/Spherical_Collapse.txt +++ b/examples/3D/Spherical_Collapse.txt @@ -32,6 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -outdir=/data/groups/comp-astro/bruno/cosmo_sims/sphere_collapse/output_files/ -#outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_collapse/ -#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_collapse/output_files/ +outdir=./ diff --git a/examples/3D/Spherical_Overpressure.txt b/examples/3D/Spherical_Overpressure.txt index 7fec56a3b..0e77c4452 100644 --- a/examples/3D/Spherical_Overpressure.txt +++ b/examples/3D/Spherical_Overpressure.txt @@ -32,5 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_explosion/output_files/ -outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_explosion/ +outdir=./ diff --git a/examples/3D/Uniform.txt b/examples/3D/Uniform.txt index 84fd900f6..e08e76dba 100644 --- a/examples/3D/Uniform.txt +++ b/examples/3D/Uniform.txt @@ -32,4 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -outdir=/raid/bruno/data/cosmo_sims/cholla_pm/uniform/ +outdir=./ From 1f2b35b9a7c09116913c0d4a7b5c430cc61d979a Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Fri, 1 Sep 2023 15:07:05 -0700 Subject: [PATCH 490/694] reset to what cholla/dev has --- builds/make.host.lux | 2 +- builds/setup.lux.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index 6ce455fcb..edf4e42c0 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft diff --git a/builds/setup.lux.sh b/builds/setup.lux.sh index ab3606d66..6d6d408f3 100755 --- a/builds/setup.lux.sh +++ b/builds/setup.lux.sh @@ -1,6 +1,6 @@ #!/bin/bash -module load hdf5/1.10.6 cuda11.2 openmpi/4.0.1 devtoolset-9 +module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 export MACHINE=lux export CHOLLA_ENVSET=1 From b4413727b9a1f7aad1d65ca89e5ff3266b577637 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 7 Sep 2023 12:33:29 -0400 Subject: [PATCH 491/694] Ensures that any directories specifed as part of ``outdir`` exist. This is accomplished through the new ``Ensure_Outdir_Exists`` function. Along the way, I introduced a helper function ``Is_Root_Proc()``, local to ``io.cpp`` so that we can reduce some of the ``ifdef`` statements. --- src/io/io.cpp | 68 +++++++++++++++++++++++++++++++++++++++++---------- src/io/io.h | 6 +++++ src/main.cpp | 1 + 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 420982309..b487608eb 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -6,9 +6,11 @@ #include #include +#include #include #include #include +#include #ifdef HDF5 #include #endif // HDF5 @@ -32,13 +34,22 @@ * output routine */ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); -void Create_Log_File(struct parameters P) +/* local function that designates whether we are using a root-process. It gives + * gives a sensible result regardless of whether we are using MPI */ +static inline bool Is_Root_Proc() { #ifdef MPI_CHOLLA - if (procID != 0) { + return procID == root; +#else + return true; +#endif +} + +void Create_Log_File(struct parameters P) +{ + if (!Is_Root_Proc()) { return; } -#endif std::string file_name(LOG_FILE_NAME); chprintf("\nCreating Log File: %s \n\n", file_name.c_str()); @@ -64,11 +75,9 @@ void Create_Log_File(struct parameters P) void Write_Message_To_Log_File(const char *message) { -#ifdef MPI_CHOLLA - if (procID != 0) { + if (!!Is_Root_Proc()) { return; } -#endif std::string file_name(LOG_FILE_NAME); std::ofstream out_file; @@ -2587,20 +2596,14 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) { int code = 0; -#ifdef MPI_CHOLLA /*limit printf to root process only*/ - if (procID == root) { -#endif /*MPI_CHOLLA*/ - + if (!Is_Root_Proc()) { va_list ap; va_start(ap, sdata); code = vfprintf(stdout, sdata, ap); // NOLINT(clang-analyzer-valist.Uninitialized) va_end(ap); fflush(stdout); - -#ifdef MPI_CHOLLA } -#endif /*MPI_CHOLLA*/ return code; } @@ -2661,3 +2664,42 @@ void write_debug(Real *Value, const char *fname, int nValues, int iProc) fclose(fp); } + +void Ensure_Outdir_Exists(std::string outdir) +{ + if (outdir == "") { + return; + } else if (Is_Root_Proc()) { + // if the last character of outdir is not a '/', then the substring of + // characters after the final '/' (or entire string if there isn't any '/') + // is treated as a file-prefix + // + // this is accomplished here: + std::filesystem::path without_file_prefix = std::filesystem::path(outdir).parent_path(); + + if (!without_file_prefix.empty()) { + // try to create all directories specified within outdir (does nothing if + // the directories already exist) + std::error_code err_code; + std::filesystem::create_directories(without_file_prefix, err_code); + + // confirm that an error-code wasn't set & that the path actually refers + // to a directory (it's unclear from docs whether err-code is set in that + // case) + if (err_code || !std::filesystem::is_directory(without_file_prefix)) { + chprintf( + "something went wrong while trying to create the path to the " + "output-dir: %s\n", + outdir.c_str()); + chexit(1); + } + } + } + + // this barrier ensures we won't ever encounter a scenario when 1 process + // tries to write a file to a non-existent directory before the root process + // has a chance to create it +#ifdef MPI_CHOLLA + MPI_Barrier(world); +#endif +} diff --git a/src/io/io.h b/src/io/io.h index a24fe788c..bf547616e 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -51,6 +51,12 @@ void Write_Message_To_Log_File(const char* message); void write_debug(Real* Value, const char* fname, int nValues, int iProc); +/* Checks whether the directories referred to within outdir exist. Creates them + * if they don't. It gracefully handles cases where outdir contains a prefix + * for the output files. + */ +void Ensure_Outdir_Exists(std::string outdir); + #ifdef HDF5 // From io/io.cpp diff --git a/src/main.cpp b/src/main.cpp index b33ee8eba..39c201ce2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -93,6 +93,7 @@ int main(int argc, char *argv[]) chprintf("Input directory: %s\n", P.indir); } chprintf("Output directory: %s\n", P.outdir); + Ensure_Outdir_Exists(P.outdir); // Check the configuration Check_Configuration(P); From 44f8dd0aec72197cfb524d68206b2155d4db7ab9 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 7 Sep 2023 18:31:50 -0400 Subject: [PATCH 492/694] address PR review comments from @alwinm & @bcaddy - removed an unnecessary include-directive - fixed a double-negative boolean negation operation (I previously wrote `!!` when it should have just been `!`) - replaced `!` operator with `not` keyword and `||` operator with `or` keyword --- src/io/io.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index b487608eb..119d52203 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #ifdef HDF5 #include #endif // HDF5 @@ -47,7 +46,7 @@ static inline bool Is_Root_Proc() void Create_Log_File(struct parameters P) { - if (!Is_Root_Proc()) { + if (not Is_Root_Proc()) { return; } @@ -75,7 +74,7 @@ void Create_Log_File(struct parameters P) void Write_Message_To_Log_File(const char *message) { - if (!!Is_Root_Proc()) { + if (not Is_Root_Proc()) { return; } @@ -2597,7 +2596,7 @@ int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) { int code = 0; /*limit printf to root process only*/ - if (!Is_Root_Proc()) { + if (not Is_Root_Proc()) { va_list ap; va_start(ap, sdata); code = vfprintf(stdout, sdata, ap); // NOLINT(clang-analyzer-valist.Uninitialized) @@ -2686,7 +2685,7 @@ void Ensure_Outdir_Exists(std::string outdir) // confirm that an error-code wasn't set & that the path actually refers // to a directory (it's unclear from docs whether err-code is set in that // case) - if (err_code || !std::filesystem::is_directory(without_file_prefix)) { + if (err_code or not std::filesystem::is_directory(without_file_prefix)) { chprintf( "something went wrong while trying to create the path to the " "output-dir: %s\n", From 4ea78a30ad11a5fafeb0ec0d5f7161256c2e1e40 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Fri, 8 Sep 2023 11:34:47 -0700 Subject: [PATCH 493/694] fix builds --- builds/make.host.lux | 2 +- builds/setup.lux.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index edf4e42c0..6ce455fcb 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft diff --git a/builds/setup.lux.sh b/builds/setup.lux.sh index 6d6d408f3..3ef07c50c 100755 --- a/builds/setup.lux.sh +++ b/builds/setup.lux.sh @@ -1,6 +1,7 @@ #!/bin/bash -module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 +###module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 +module load hdf5/1.10.6 cuda11.2 openmpi/4.0.1 devtoolset-9 export MACHINE=lux export CHOLLA_ENVSET=1 From 888a557dc1d069dc393ae00e235d430851600315 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Fri, 8 Sep 2023 14:03:16 -0700 Subject: [PATCH 494/694] runs on lux --- builds/make.host.lux | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index edf4e42c0..6ce455fcb 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft From 1a3ae6667bdcf8650228523cf1c3528c470855c3 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Sat, 9 Sep 2023 16:15:03 -0700 Subject: [PATCH 495/694] change output time of 3D KH_res_ind and make 2D kh_res_ind ICs set --- examples/3D/KH_res_ind_3D.txt | 2 +- src/grid/initial_conditions.cpp | 126 ++++++++++++++++---------------- 2 files changed, 66 insertions(+), 62 deletions(-) diff --git a/examples/3D/KH_res_ind_3D.txt b/examples/3D/KH_res_ind_3D.txt index ab846867a..2ebe6cda0 100644 --- a/examples/3D/KH_res_ind_3D.txt +++ b/examples/3D/KH_res_ind_3D.txt @@ -10,7 +10,7 @@ ny=128 # number of grid cells in the z dimension nz=128 # final output time -tout=5.0 +tout=3.0 # time interval for output outstep=0.01 # value of gamma diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 38967e4b7..eba91f463 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -772,8 +772,8 @@ void Grid3D::KH_res_ind() d1 = 100.0; // inner density d2 = 1.0; // outer density - v1 = 10.5; // inner velocity - v2 = 9.5; // outer velocity + v1 = 0.5; // inner velocity + v2 = -0.5; // outer velocity P = 2.5; // pressure dy = 0.05; // width of ramp function (see Robertson 2009) A = 0.1; // amplitude of the perturbation @@ -788,73 +788,77 @@ void Grid3D::KH_res_ind() id = i + j * H.nx + k * H.nx * H.ny; // get the centered x and y positions Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - - // inner fluid - if (fabs(y_pos - 0.5) < 0.25) { - if (y_pos > 0.5) { - C.density[id] = + // 2D initial conditions: + if (H.nz == 1) { + // inner fluid + if (fabs(y_pos - 0.5) < 0.25) { + if (y_pos > 0.5) { + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = + C.momentum_x[id] = v1 * C.density[id] - C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else { - C.density[id] = + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = + C.momentum_x[id] = v1 * C.density[id] - C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } - } - // outer fluid - else { - if (y_pos > 0.5) { - C.density[id] = + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } + } + // outer fluid + else { + if (y_pos > 0.5) { + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = + C.momentum_x[id] = v2 * C.density[id] + C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else { - C.density[id] = + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = + C.momentum_x[id] = v2 * C.density[id] + C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } - } - // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); - C.momentum_z[id] = 0.0; - - // cylindrical version (3D only) - r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc - phi = atan2((z_pos - zc), (y_pos - yc)); - - if (r < 0.25) // inside the cylinder - { - C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = v1 * C.density[id] - - C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else // outside the cylinder - { - C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = v2 * C.density[id] + - C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); - } + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } + } + // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); + C.momentum_z[id] = 0.0; + + //3D initial conditions: + } else { + // cylindrical version (3D only) + r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc + phi = atan2((z_pos - zc), (y_pos - yc)); + + if (r < 0.25) // inside the cylinder + { + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else // outside the cylinder + { + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + } + } // No matter what we do with the density and momentum, set the Energy // and GasEnergy appropriately From d92ee47bdd98f20c3f3e71e765144d0248d19510 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Mon, 11 Sep 2023 09:29:26 -0700 Subject: [PATCH 496/694] Revert "fix builds" This reverts commit 4ea78a30ad11a5fafeb0ec0d5f7161256c2e1e40. --- builds/make.host.lux | 2 +- builds/setup.lux.sh | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index 6ce455fcb..edf4e42c0 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft diff --git a/builds/setup.lux.sh b/builds/setup.lux.sh index 3ef07c50c..6d6d408f3 100755 --- a/builds/setup.lux.sh +++ b/builds/setup.lux.sh @@ -1,7 +1,6 @@ #!/bin/bash -###module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 -module load hdf5/1.10.6 cuda11.2 openmpi/4.0.1 devtoolset-9 +module load hdf5/1.10.6 cuda10.2/10.2 openmpi/4.0.1 export MACHINE=lux export CHOLLA_ENVSET=1 From 4063d268319ce6607a7a64f35e634d5310d310ba Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Mon, 11 Sep 2023 15:36:47 -0700 Subject: [PATCH 497/694] clang-format --- src/grid/initial_conditions.cpp | 148 ++++++++++++++++---------------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index eba91f463..fded9236b 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -772,8 +772,8 @@ void Grid3D::KH_res_ind() d1 = 100.0; // inner density d2 = 1.0; // outer density - v1 = 0.5; // inner velocity - v2 = -0.5; // outer velocity + v1 = 0.5; // inner velocity + v2 = -0.5; // outer velocity P = 2.5; // pressure dy = 0.05; // width of ramp function (see Robertson 2009) A = 0.1; // amplitude of the perturbation @@ -788,77 +788,79 @@ void Grid3D::KH_res_ind() id = i + j * H.nx + k * H.nx * H.ny; // get the centered x and y positions Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - // 2D initial conditions: - if (H.nz == 1) { - // inner fluid - if (fabs(y_pos - 0.5) < 0.25) { - if (y_pos > 0.5) { - C.density[id] = - d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = - v1 * C.density[id] - C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else { - C.density[id] = - d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = - v1 * C.density[id] - C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } - } - // outer fluid - else { - if (y_pos > 0.5) { - C.density[id] = - d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = - v2 * C.density[id] + C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else { - C.density[id] = - d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = - v2 * C.density[id] + C.density[id] * (v1 - v2) * - exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } - } - // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); - C.momentum_z[id] = 0.0; - - //3D initial conditions: - } else { - // cylindrical version (3D only) - r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc - phi = atan2((z_pos - zc), (y_pos - yc)); - - if (r < 0.25) // inside the cylinder - { - C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = v1 * C.density[id] - - C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - } else // outside the cylinder - { - C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_x[id] = v2 * C.density[id] + - C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); - C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); - C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * - (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); - } - } + // 2D initial conditions: + if (H.nz == 1) { + // inner fluid + if (fabs(y_pos - 0.5) < 0.25) { + if (y_pos > 0.5) { + C.density[id] = + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } + } + // outer fluid + else { + if (y_pos > 0.5) { + C.density[id] = + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } + } + // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); + C.momentum_z[id] = 0.0; + + // 3D initial conditions: + } else { + // cylindrical version (3D only) + r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc + phi = atan2((z_pos - zc), (y_pos - yc)); + + if (r < 0.25) // inside the cylinder + { + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = + v1 * C.density[id] - + C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else // outside the cylinder + { + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = + v2 * C.density[id] + + C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + } + } // No matter what we do with the density and momentum, set the Energy // and GasEnergy appropriately From 5da2501113b5a932201ff30c4b14d56e7d39530f Mon Sep 17 00:00:00 2001 From: ezlimen Date: Mon, 11 Sep 2023 16:44:53 -0700 Subject: [PATCH 498/694] make it possible for noh initial conditions to be set --- src/grid/boundary_conditions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index eca473fdb..51d010069 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -512,7 +512,7 @@ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) // from grid/cuda_boundaries.cu Noh_Boundary(); } - if (strcmp(bcnd, "wind") == 0) { + else if (strcmp(bcnd, "wind") == 0) { // from grid/cuda_boundaries.cu Wind_Boundary(); } else { From f1551909766fcdc0eefc8fd4963bfa8048093e2f Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Mon, 11 Sep 2023 16:57:40 -0700 Subject: [PATCH 499/694] clang format --- src/grid/boundary_conditions.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 51d010069..cb5876f32 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -511,8 +511,7 @@ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) if (strcmp(bcnd, "noh") == 0) { // from grid/cuda_boundaries.cu Noh_Boundary(); - } - else if (strcmp(bcnd, "wind") == 0) { + } else if (strcmp(bcnd, "wind") == 0) { // from grid/cuda_boundaries.cu Wind_Boundary(); } else { From b31f1d1c5f0f77fd79665dc949be923b120642fc Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:01:08 -0700 Subject: [PATCH 500/694] reset to default --- builds/make.host.lux | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builds/make.host.lux b/builds/make.host.lux index 6ce455fcb..edf4e42c0 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -9,7 +9,7 @@ GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 #-- Library -CUDA_ROOT = /cm/shared/apps/cuda11.2/toolkit/current +CUDA_ROOT = /cm/shared/apps/cuda10.2/toolkit/current HDF5_ROOT = /cm/shared/apps/hdf5/1.10.6 FFTW_ROOT = /home/brvillas/code/fftw-3.3.8 PFFT_ROOT = /data/groups/comp-astro/bruno/code_mpi_local/pfft From 41a09c1d7ef57d2477514bf840d281eb81aa70b1 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Tue, 12 Sep 2023 20:01:36 -0700 Subject: [PATCH 501/694] BROKEN switch statement for static grav --- src/global/global.h | 1 + src/gravity/static_grav.h | 79 ++++++++++++++++++++++----------------- 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index b037c931d..a9c218033 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -183,6 +183,7 @@ struct parameters { int n_steps_output; Real gamma; char init[MAXLEN]; + int custom_grav; int nfile; int n_hydro; int n_particle; diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 9a4a30f6a..b4a292196 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -52,7 +52,7 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real return; } -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; @@ -60,51 +60,62 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + printf("%d\n", custom_grav); + switch(custom_grav){ + case 1: //Gresho + // for Gresho, also need r & phi + printf("%d\n", custom_grav); + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); - // for Gresho, also need r & phi - r = sqrt(x_pos * x_pos + y_pos * y_pos); - phi = atan2(y_pos, x_pos); - - /* // set acceleration to balance v_phi in Gresho problem if (r < 0.2) { - *gx = -cos(phi)*25.0*r; - *gy = -sin(phi)*25.0*r; + *gx = -cos(phi)*25.0*r; + *gy = -sin(phi)*25.0*r; } else if (r >= 0.2 && r < 0.4) { - *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; } else { - *gx = 0.0; - *gy = 0.0; + *gx = 0.0; + *gy = 0.0; } - */ - /* - // set gravitational acceleration for Keplarian potential + break; + case 2: //Keplerian potential + // set gravitational acceleration for Keplerian potential Real M; M = 1*Msun; *gx = -cos(phi)*GN*M/(r*r); *gy = -sin(phi)*GN*M/(r*r); - */ - // set gravitational acceleration for Kuzmin disk + NFW halo - Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun (assume all gas) - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20; // halo concentration - R_s = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc - - // calculate acceleration - x = r / R_s; - a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); - a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); - a = a_d + a_h; - - *gx = -cos(phi) * a; - *gy = -sin(phi) * a; + break; + case 3: //Kuzmin disk + NFW halo + // set gravitational acceleration for Kuzmin disk + NFW halo + Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun (assume all gas) + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20; // halo concentration + R_s = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc + + // calculate acceleration + x = r / R_s; + a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); + a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); + a = a_d + a_h; + + *gx = -cos(phi) * a; + *gy = -sin(phi) * a; + break; + case 4: //Rayleigh-taylor instability: + *gx = 0; + *gy = -1; + break; + default: + printf("ABORT: %d -> Unknown custom static gravity field.\n", custom_grav); + exit(0); return; } From cca7d90853826cb46275293a728729d0ee3ccb94 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 13 Sep 2023 14:35:01 -0400 Subject: [PATCH 502/694] Fix clang-tidy config error that disabled almost all checks --- .clang-tidy | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5082415d5..bd40fd46c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,8 +37,7 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, --*, -readability-braces-around-statements, + -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -cert-env33-c, From 483b711b4e94b1fd58ca8e4055e0196fa4245d6b Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 13 Sep 2023 14:52:36 -0400 Subject: [PATCH 503/694] minor bugfix related to chprintf --- src/io/io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 119d52203..32fb8804d 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2596,7 +2596,7 @@ int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) { int code = 0; /*limit printf to root process only*/ - if (not Is_Root_Proc()) { + if (Is_Root_Proc()) { va_list ap; va_start(ap, sdata); code = vfprintf(stdout, sdata, ap); // NOLINT(clang-analyzer-valist.Uninitialized) From 7bb532d9f12d643799af9922cc5cb11823c46551 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Wed, 13 Sep 2023 13:03:15 -0700 Subject: [PATCH 504/694] still broken but trying to get it to read in custom_grav as a parameter --- src/global/global.cpp | 2 + src/global/global.h | 4 +- src/gravity/static_grav.h | 61 ++++++++++++++++++++++--------- src/grid/grid3D.cpp | 8 +++- src/grid/grid3D.h | 3 ++ src/hydro/hydro_cuda.cu | 22 ++++++++++- src/hydro/hydro_cuda.h | 2 +- src/integrators/simple_2D_cuda.cu | 4 +- src/integrators/simple_2D_cuda.h | 2 +- src/main.cpp | 9 +++-- 10 files changed, 86 insertions(+), 31 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index a4c697d3c..b80ab3357 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -211,6 +211,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->ny = atoi(value); } else if (strcmp(name, "nz") == 0) { parms->nz = atoi(value); + //} else if (strcmp(name, "custom_grav") == 0) { + //parms->custom_grav = atoi(value); } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); } else if (strcmp(name, "outstep") == 0) { diff --git a/src/global/global.h b/src/global/global.h index a9c218033..a3f87a516 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -183,7 +183,6 @@ struct parameters { int n_steps_output; Real gamma; char init[MAXLEN]; - int custom_grav; int nfile; int n_hydro; int n_particle; @@ -199,6 +198,9 @@ struct parameters { #ifdef DE int out_float32_GasEnergy = 0; #endif +#ifdef STATIC_GRAV + int custom_grav; +#endif #ifdef MHD int out_float32_magnetic_x = 0; int out_float32_magnetic_y = 0; diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index b4a292196..e6ae21676 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -52,7 +52,7 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real return; } -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, Real xbound, +inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; @@ -60,11 +60,8 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - printf("%d\n", custom_grav); - switch(custom_grav){ - case 1: //Gresho // for Gresho, also need r & phi - printf("%d\n", custom_grav); + printf("Gresho\n"); r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); @@ -81,15 +78,38 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g *gx = 0.0; *gy = 0.0; } - break; - case 2: //Keplerian potential - // set gravitational acceleration for Keplerian potential + return; +} + +inline __device__ void calc_g_keplerian_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real *gx, Real *gy) +{ + Real x_pos, y_pos, r, phi; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); + // set gravitational acceleration for Keplerian potential Real M; - M = 1*Msun; + M = 1*MSUN_CGS; *gx = -cos(phi)*GN*M/(r*r); *gy = -sin(phi)*GN*M/(r*r); - break; - case 3: //Kuzmin disk + NFW halo + return; +} + +inline __device__ void calc_g_kuzmin_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real *gx, Real *gy) +{ + Real x_pos, y_pos, r, phi; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + printf("kuzmin\n"); + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); // set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; M_vir = 1.0e12; // viral mass of MW in M_sun @@ -108,15 +128,20 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g *gx = -cos(phi) * a; *gy = -sin(phi) * a; - break; - case 4: //Rayleigh-taylor instability: + return; +} + +inline __device__ void calc_g_rayleigh_taylor_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real *gx, Real *gy) +{ + Real x_pos, y_pos; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + printf("rayleigh taylor\n"); *gx = 0; *gy = -1; - break; - default: - printf("ABORT: %d -> Unknown custom static gravity field.\n", custom_grav); - exit(0); - return; } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 9010da354..6e51d52ee 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -146,7 +146,11 @@ void Grid3D::Initialize(struct parameters *P) int nx_in = P->nx; int ny_in = P->ny; int nz_in = P->nz; - +#ifdef STATIC_GRAV + H.custom_grav = P->custom_grav; + printf("P->custom_grav is %d\n", P->custom_grav); + printf("H.custom_grav is %d\n", H.custom_grav); +#endif // Set the CFL coefficient (a global variable) C_cfl = 0.3; @@ -446,7 +450,7 @@ Real Grid3D::Update_Grid(void) #endif // VL #ifdef SIMPLE Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, - H.n_fields); + H.n_fields, H.custom_grav); #endif // SIMPLE #endif // CUDA } else if (H.nx > 1 && H.ny > 1 && H.nz > 1) // 3D diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index e679415d9..ba6a85eae 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -209,6 +209,9 @@ struct Header { /*! \var dt * \brief Length of the current timestep */ Real dt; +#ifdef STATIC_GRAV + int custom_grav; +#endif #ifdef AVERAGE_SLOW_CELLS Real min_dt_slow; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 06090d476..d367b4b09 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -84,7 +84,7 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real dt, Real gamma, int n_fields) + Real ybound, Real dt, Real gamma, int n_fields, int custom_grav) { int id, xid, yid, n_cells; int imo, jmo; @@ -140,8 +140,26 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]); #endif #ifdef STATIC_GRAV + printf("%d\n", custom_grav); // calculate the gravitational acceleration as a function of x & y position - calc_g_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); + switch(custom_grav) { + case 1: //gresho + calc_g_gresho_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); + break; + case 2: //rayleigh taylor instability + calc_g_rayleigh_taylor_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); + break; +case 3: //keplerian disk + calc_g_keplerian_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); +break; +case 4: //Kuzmin/NFW halo + calc_g_kuzmin_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); +break; +default: + //printf("%d -> Unknown custom static gravity field. Options are \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (keplerian disk), \'4\' (Kuzmin disk with NFW halo). \n", custom_grav); + //printf("No gravity field will be set\n"); +printf("%d\t%d\t%d\t%d\n", custom_grav, custom_grav, custom_grav, custom_grav); +} // add gravitational source terms, time averaged from n to n+1 d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index a5c4ab713..0e9425091 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -13,7 +13,7 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real dt, Real gamma, int n_fields); + Real ybound, Real dt, Real gamma, int n_fields, int custom_grav); __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index b9d11b180..c1b2cc149 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -21,7 +21,7 @@ #include "../utils/gpu.hpp" void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, - Real xbound, Real ybound, Real dt, int n_fields) + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -115,7 +115,7 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, - y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); CudaCheckError(); // Synchronize the total and internal energy diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h index 297800b10..5439828a5 100644 --- a/src/integrators/simple_2D_cuda.h +++ b/src/integrators/simple_2D_cuda.h @@ -9,7 +9,7 @@ #include "../global/global.h" void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, - Real xbound, Real ybound, Real dt, int n_fields); + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav); void Free_Memory_Simple_2D(); diff --git a/src/main.cpp b/src/main.cpp index 9c54a77e9..5a63d7a7a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,9 +79,9 @@ int main(int argc, char *argv[]) chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); chprintf( - "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " + "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, custom_Grav = %d, " "boundaries = %d %d %d %d %d %d\n", - P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + P.nx, P.ny, P.nz, P.tout, P.init, P.custom_grav, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); bool is_restart = false; if (strcmp(P.init, "Read_Grid") == 0) { @@ -107,14 +107,15 @@ int main(int argc, char *argv[]) Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); Write_Message_To_Log_File(message.c_str()); - + //message = "custom gravity is = " + std::string(P.custom_grav); + //Write_Message_To_Log_File(message.c_str()); // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); message = "Initializing Simulation"; Write_Message_To_Log_File(message.c_str()); - + // Set initial conditions chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); From 8dad104be51d5b043092fcf57e948abd1923bfb0 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Wed, 13 Sep 2023 13:20:11 -0700 Subject: [PATCH 505/694] compiles and runs, not correct though --- src/global/global.cpp | 4 ++-- src/hydro/hydro_cuda.cu | 1 - src/main.cpp | 12 ++++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index b80ab3357..7bfcf7062 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -211,8 +211,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->ny = atoi(value); } else if (strcmp(name, "nz") == 0) { parms->nz = atoi(value); - //} else if (strcmp(name, "custom_grav") == 0) { - //parms->custom_grav = atoi(value); + } else if (strcmp(name, "custom_grav") == 0) { + parms->custom_grav = atoi(value); } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); } else if (strcmp(name, "outstep") == 0) { diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index d367b4b09..fe8335185 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -140,7 +140,6 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]); #endif #ifdef STATIC_GRAV - printf("%d\n", custom_grav); // calculate the gravitational acceleration as a function of x & y position switch(custom_grav) { case 1: //gresho diff --git a/src/main.cpp b/src/main.cpp index 5a63d7a7a..6886ac9c9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,9 +79,9 @@ int main(int argc, char *argv[]) chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); chprintf( - "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, custom_Grav = %d, " + "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", - P.nx, P.ny, P.nz, P.tout, P.init, P.custom_grav, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); bool is_restart = false; if (strcmp(P.init, "Read_Grid") == 0) { @@ -107,8 +107,6 @@ int main(int argc, char *argv[]) Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); Write_Message_To_Log_File(message.c_str()); - //message = "custom gravity is = " + std::string(P.custom_grav); - //Write_Message_To_Log_File(message.c_str()); // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); @@ -130,6 +128,12 @@ int main(int argc, char *argv[]) chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2); message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); Write_Message_To_Log_File(message.c_str()); +#endif + +#ifdef STATIC_GRAV + chprintf("\nUsing Static Gravity:\n Custom gravity field selected: %d.\n", P.custom_grav); + message = " Custom gravity field: " + std::to_string(P.custom_grav); + Write_Message_To_Log_File(message.c_str()); #endif #ifdef CPU_TIME From 46c2494718c786bc641abb3d163f0e66b3b3edc6 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Wed, 13 Sep 2023 13:44:37 -0700 Subject: [PATCH 506/694] compiling and running with error message if you put in an invalid custom gravity. it exits in that case but i dont think its thread safe. --- src/global/global.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 7bfcf7062..6c65dc68e 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -211,8 +211,15 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->ny = atoi(value); } else if (strcmp(name, "nz") == 0) { parms->nz = atoi(value); - } else if (strcmp(name, "custom_grav") == 0) { - parms->custom_grav = atoi(value); +#ifdef STATIC_GRAV + } else if (strcmp(name, "custom_grav") == 0) { + if (atoi(value) < 1 || atoi(value) > 4){ + printf("ABORT: %d -> Unknown custom static gravity field.\n", atoi(value)); + printf("Must select between \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (Keplerian disk), and \'4\' (Kuzmin disk/NFW halo).\n"); + exit(0);} + else{ + parms->custom_grav = atoi(value);} +#endif } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); } else if (strcmp(name, "outstep") == 0) { From e2affb4398116f670408f64a990f26fcdc20aaba Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 13 Sep 2023 14:44:16 -0400 Subject: [PATCH 507/694] Add MHD support to slice outputs --- src/io/io.cpp | 120 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 9 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 32fb8804d..c9817cc6d 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -15,6 +15,7 @@ #endif // HDF5 #include "../grid/grid3D.h" #include "../io/io.h" +#include "../utils/cuda_utilities.h" #include "../utils/timing_functions.h" // provides ScopedTimer #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" @@ -1865,6 +1866,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_E = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #ifdef MHD + std::vector dataset_buffer_magnetic_x(H.nx_real * H.ny_real); + std::vector dataset_buffer_magnetic_y(H.nx_real * H.ny_real); + std::vector dataset_buffer_magnetic_z(H.nx_real * H.ny_real); + #endif // MHD #ifdef DE dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); #endif @@ -1875,19 +1881,38 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) // Copy the xy slices to the memory buffers for (j = 0; j < H.ny_real; j++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + zslice * H.nx * H.ny; + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice, H.nx, H.ny); buf_id = j + i * H.ny_real; + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost, zslice, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1, zslice, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - 1, H.nx, H.ny); + #endif // MHD #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (zslice >= nz_local_start && zslice < nz_local_start + nz_local) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (zslice - nz_local_start + H.n_ghost) * H.nx * H.ny; - #endif // MPI_CHOLLA + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - nz_local_start + H.n_ghost, H.nx, + H.ny); + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost, + zslice - nz_local_start + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1, + zslice - nz_local_start + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, + zslice - nz_local_start + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; dataset_buffer_my[buf_id] = C.momentum_y[id]; dataset_buffer_mz[buf_id] = C.momentum_z[id]; dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif @@ -1905,6 +1930,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0; + dataset_buffer_magnetic_y[buf_id] = 0; + dataset_buffer_magnetic_z[buf_id] = 0; + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = 0; #endif @@ -1924,6 +1954,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy"); + #ifdef MHD + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_xy"); + #endif // MHD #ifdef DE status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); #endif @@ -1957,6 +1992,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); dataset_buffer_mz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); dataset_buffer_E = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef MHD + dataset_buffer_magnetic_x.resize(H.nx_real * H.nz_real); + dataset_buffer_magnetic_y.resize(H.nx_real * H.nz_real); + dataset_buffer_magnetic_z.resize(H.nx_real * H.nz_real); + #endif // MHD #ifdef DE dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); #endif @@ -1967,19 +2007,38 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) // Copy the xz slices to the memory buffers for (k = 0; k < H.nz_real; k++) { for (i = 0; i < H.nx_real; i++) { - id = (i + H.n_ghost) + yslice * H.nx + (k + H.n_ghost) * H.nx * H.ny; + id = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice, k + H.n_ghost, H.nx, H.ny); buf_id = k + i * H.nz_real; + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, yslice, k + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - 1, k + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice, k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (yslice >= ny_local_start && yslice < ny_local_start + ny_local) { - id = (i + H.n_ghost) + (yslice - ny_local_start + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - #endif // MPI_CHOLLA + id = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost, k + H.n_ghost, H.nx, + H.ny); + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, yslice - ny_local_start + H.n_ghost, + k + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost - 1, + k + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost, + k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; dataset_buffer_my[buf_id] = C.momentum_y[id]; dataset_buffer_mz[buf_id] = C.momentum_z[id]; dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif @@ -1997,6 +2056,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0; + dataset_buffer_magnetic_y[buf_id] = 0; + dataset_buffer_magnetic_z[buf_id] = 0; + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = 0; #endif @@ -2016,6 +2080,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz"); + #ifdef MHD + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_xz"); + #endif // MHD #ifdef DE status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); #endif @@ -2050,6 +2119,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); dataset_buffer_mz = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); dataset_buffer_E = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #ifdef MHD + dataset_buffer_magnetic_x.resize(H.ny_real * H.nz_real); + dataset_buffer_magnetic_y.resize(H.ny_real * H.nz_real); + dataset_buffer_magnetic_z.resize(H.ny_real * H.nz_real); + #endif // MHD #ifdef DE dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); #endif @@ -2060,19 +2134,37 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) // Copy the yz slices to the memory buffers for (k = 0; k < H.nz_real; k++) { for (j = 0; j < H.ny_real; j++) { - id = xslice + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + id = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); buf_id = k + j * H.nz_real; + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(xslice - 1, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost - 1, k + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost, k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD #ifdef MPI_CHOLLA // When there are multiple processes, check whether this slice is in // your domain if (xslice >= nx_local_start && xslice < nx_local_start + nx_local) { - id = (xslice - nx_local_start) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; - #endif // MPI_CHOLLA + id = cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + #ifdef MHD + int id_xm1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start - 1, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + int id_ym1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost - 1, k + H.n_ghost, H.nx, H.ny); + int id_zm1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; dataset_buffer_my[buf_id] = C.momentum_y[id]; dataset_buffer_mz[buf_id] = C.momentum_z[id]; dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif @@ -2090,6 +2182,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0; + dataset_buffer_magnetic_y[buf_id] = 0; + dataset_buffer_magnetic_z[buf_id] = 0; + #endif // MHD #ifdef DE dataset_buffer_GE[buf_id] = 0; #endif @@ -2109,6 +2206,11 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz"); status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz"); + #ifdef MHD + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_yz"); + #endif // MHD #ifdef DE status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); #endif From 9ab72fe7ae04f7e0fe4413f442191d473c58d004 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 13 Sep 2023 15:47:50 -0400 Subject: [PATCH 508/694] Move default paremeters into declaration Some member variables of the `parameters` struct were set to default values in an unrelated function. Moved the default initialization to the declaration of the struct. --- src/global/global.cpp | 16 ---------------- src/global/global.h | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index a4c697d3c..2cdb508d1 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -131,22 +131,6 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a exit(1); return; } - // set default hydro file output parameter - parms->n_hydro = 1; - parms->n_particle = 1; - parms->n_slice = 1; - parms->n_projection = 1; - parms->n_rotated_projection = 1; - -#ifdef ROTATED_PROJECTION - // initialize rotation parameters to zero - parms->delta = 0; - parms->theta = 0; - parms->phi = 0; - parms->n_delta = 0; - parms->ddelta_dt = 0; - parms->flag_delta = 0; -#endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY // Initialize file name as an empty string diff --git a/src/global/global.h b/src/global/global.h index b037c931d..75fee01fb 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -184,11 +184,11 @@ struct parameters { Real gamma; char init[MAXLEN]; int nfile; - int n_hydro; - int n_particle; - int n_projection; - int n_rotated_projection; - int n_slice; + int n_hydro = 1; + int n_particle = 1; + int n_projection = 1; + int n_rotated_projection = 1; + int n_slice = 1; int n_out_float32 = 0; int out_float32_density = 0; int out_float32_momentum_x = 0; @@ -275,16 +275,17 @@ struct parameters { char snr_filename[MAXLEN]; #endif #ifdef ROTATED_PROJECTION + // initialize rotation parameters to zero int nxr; int nzr; - Real delta; - Real theta; - Real phi; + Real delta = 0; + Real theta = 0; + Real phi = 0; Real Lx; Real Lz; - int n_delta; - Real ddelta_dt; - int flag_delta; + int n_delta = 0; + Real ddelta_dt = 0; + int flag_delta = 0; #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY Real H0; From 920db6a087bc3e88a825a8ee0d1e05768ab5414d Mon Sep 17 00:00:00 2001 From: ezlimen Date: Thu, 14 Sep 2023 12:33:28 -0700 Subject: [PATCH 509/694] rayleigh taylor static gravity works! gresho ics/stat grav params are wrong --- src/gravity/static_grav.h | 2 +- src/grid/initial_conditions.cpp | 2 +- src/hydro/hydro_cuda.cu | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index e6ae21676..c389c2f0f 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -139,7 +139,7 @@ inline __device__ void calc_g_rayleigh_taylor_2D(int xid, int yid, int x_off, in // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - printf("rayleigh taylor\n"); + // printf("shhhhh rayleigh taylor\n"); *gx = 0; *gy = -1; return; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 38967e4b7..333d2548a 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -894,7 +894,7 @@ void Grid3D::Rayleigh_Taylor() vy = 0.01 * cos(6 * M_PI * x_pos + M_PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); // vy = 0.0; - // lower half of slab + // lower half of slab if (y_pos <= 0.5 * H.ydglobal) { P_0 = 1.0 / gama - dl * g * 0.5; P = P_0 + dl * g * y_pos; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index fe8335185..c53ae86f1 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -156,8 +156,8 @@ case 4: //Kuzmin/NFW halo break; default: //printf("%d -> Unknown custom static gravity field. Options are \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (keplerian disk), \'4\' (Kuzmin disk with NFW halo). \n", custom_grav); - //printf("No gravity field will be set\n"); -printf("%d\t%d\t%d\t%d\n", custom_grav, custom_grav, custom_grav, custom_grav); +printf("No gravity field will be set\n"); +//printf("%d\t%d\t%d\t%d\n", custom_grav, custom_grav, custom_grav, custom_grav); } // add gravitational source terms, time averaged from n to n+1 d_n = dev_conserved[id]; From 65618be0860042bc23ae16eff8a3b426e370d745 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 13 Sep 2023 12:53:06 -0400 Subject: [PATCH 510/694] Introduce new ERROR and ASSERT macros (to simplify the process of reporting an error and exiting) --- src/utils/error_handling.cpp | 65 +++++++++++++++++++++++++++++++++--- src/utils/error_handling.h | 49 +++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 040c1885b..dc491ebdf 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -1,12 +1,15 @@ #include "../utils/error_handling.h" #include +#include +#include #include #include +#include #ifdef MPI_CHOLLA - #include -void chexit(int code) + #include "../mpi/mpi_routines.h" +[[noreturn]] void chexit(int code) { if (code == 0) { /*exit normally*/ @@ -20,14 +23,14 @@ void chexit(int code) } } #else /*MPI_CHOLLA*/ -void chexit(int code) +[[noreturn]] void chexit(int code) { /*exit using code*/ exit(code); } #endif /*MPI_CHOLLA*/ -void Check_Configuration(parameters const &P) +void Check_Configuration(parameters const& P) { // General Checks // ============== @@ -56,7 +59,7 @@ void Check_Configuration(parameters const &P) #endif // Only one integrator check // Check the boundary conditions - auto Check_Boundary = [](int const &boundary, std::string const &direction) { + auto Check_Boundary = [](int const& boundary, std::string const& direction) { bool is_allowed_bc = boundary >= 0 and boundary <= 4; std::string const error_message = "WARNING: Possibly invalid boundary conditions for direction: " + direction + @@ -126,3 +129,55 @@ void Check_Configuration(parameters const &P) #endif // MHD } + +// NOLINTNEXTLINE(cert-dcl50-cpp) +[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...) +{ + // considerations when using MPI: + // - all processes must execute this function to catch errors that happen on + // just one process + // - to handle cases where all processes encounter the same error, we + // pre-buffer the error message (so that the output remains legible) + + // since we are aborting, it's OK that this isn't the most optimized + + // prepare some info for the error message header + const char* santized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name; + + std::string proc_info = +#ifdef MPI_CHOLLA + std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)"; +#else + "0 / 1 (NOT using MPI)" +#endif + + // prepare the formatted message + std::vector msg_buf; + if (msg == nullptr) { + msg_buf = std::vector(80); + std::snprintf(msg_buf.data(), msg_buf.size(), "{nullptr encountered instead of error message}"); + } else { + std::va_list args, args_copy; + va_start(args, msg); + va_copy(args_copy, args); + + std::size_t msg_len = std::vsnprintf(nullptr, 0, msg, args) + 1; + va_end(args); + + msg_buf = std::vector(msg_len); + std::vsnprintf(msg_buf.data(), msg_len, msg, args); + va_end(args_copy); + } + + // now write the error and exit + std::fprintf(stderr, + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" + "Error occurred in %s on line %d\n" + "Function: %s\n" + "Rank: %s\n" + "Message: %s\n" + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n", + file_name, line_num, santized_func_name, proc_info.data(), msg_buf.data()); + std::fflush(stderr); // may be unnecessary for stderr + chexit(1); +} \ No newline at end of file diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index d539f0e50..7fe450cba 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -3,12 +3,57 @@ #include #include "../global/global.h" -void chexit(int code); +[[noreturn]] void chexit(int code); /*! * \brief Check that the Cholla configuration and parameters don't have any significant errors. Mostly compile time * checks. * */ -void Check_Configuration(parameters const &P); +void Check_Configuration(parameters const& P); + +/*! + * \brief helper function that prints an error message & aborts the program (in + * an MPI-safe way). Commonly invoked through a macro. + * + */ +[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...); + +/*! + * \brief print an error-message (with printf formatting) & abort the program. + * + * This macro should be treated as a function with the signature: + * [[noreturn]] void ERROR(const char* func_name, const char* msg, ...); + * + * - The 1st arg is the name of the function where it's called + * - The 2nd arg is printf-style format argument specifying the error message + * - The remaining args arguments are used to format error message + * + * \note + * the ``msg`` string is part of the variadic args so that there is always + * at least 1 variadic argument (even in cases when ``msg`` doesn't format + * any arguments). There is no way around this until C++ 20. + */ +#define ERROR(func_name, ...) Abort_With_Err_(func_name, __FILE__, __LINE__, __VA_ARGS__) + +/*! + * \brief if the condition is false, print an error-message (with printf + * formatting) & abort the program. + * + * This macro should be treated as a function with the signature: + * [[noreturn]] void ASSERT(bool cond, const char* func_name, const char* msg, ...); + * + * - The 1st arg is a boolean condition. When true, this does noth + * - The 2nd arg is the name of the function where it's called + * - The 3rd arg is printf-style format argument specifying the error message + * - The remaining args arguments are used to format error message + * + * \note + * the behavior is independent of the ``NDEBUG`` macro + */ +#define ASSERT(cond, func_name, ...) \ + if (not(cond)) { \ + Abort_With_Err_(func_name, __FILE__, __LINE__, __VA_ARGS__); \ + } + #endif /*ERROR_HANDLING_CHOLLA_H*/ From f8db7c890331a917238710be82ddc63b4d95b71e Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 14 Sep 2023 17:58:35 -0400 Subject: [PATCH 511/694] start using the ERROR and ASSERT macros in a few places (partially to confirm that they actually work) - moved the check on the value of ``gama``. Previously there was a logical bug in the check and the check was performed before the variable was initialized. --- src/global/global.cpp | 4 +++- src/io/io.cpp | 9 ++++----- src/utils/error_handling.cpp | 12 ++++-------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 2cdb508d1..ee1776922 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -12,7 +12,8 @@ #include -#include "../io/io.h" //defines chprintf +#include "../io/io.h" //defines chprintf +#include "../utils/error_handling.h" // defines ASSERT /* Global variables */ Real gama; // Ratio of specific heats @@ -33,6 +34,7 @@ void Set_Gammas(Real gamma_in) { // set gamma gama = gamma_in; + ASSERT(gama > 1.0, "Set_Gammas", "Gamma must be greater than one."); } /*! \fn double get_time(void) diff --git a/src/io/io.cpp b/src/io/io.cpp index c9817cc6d..9edf6c4b4 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2788,11 +2788,10 @@ void Ensure_Outdir_Exists(std::string outdir) // to a directory (it's unclear from docs whether err-code is set in that // case) if (err_code or not std::filesystem::is_directory(without_file_prefix)) { - chprintf( - "something went wrong while trying to create the path to the " - "output-dir: %s\n", - outdir.c_str()); - chexit(1); + ERROR("Ensure_Outdir_Exists", + "something went wrong while trying to create the path to the " + "output-dir: %s", + outdir.c_str()); } } } diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index dc491ebdf..c64ee362b 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -61,11 +61,10 @@ void Check_Configuration(parameters const& P) // Check the boundary conditions auto Check_Boundary = [](int const& boundary, std::string const& direction) { bool is_allowed_bc = boundary >= 0 and boundary <= 4; - std::string const error_message = - "WARNING: Possibly invalid boundary conditions for direction: " + direction + - " flag: " + std::to_string(boundary) + - ". Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi)."; - assert(is_allowed_bc && error_message.c_str()); + ASSERT(is_allowed_bc, "Check_Configuration", + "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. " + "Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).", + direction.c_str(), boundary); }; Check_Boundary(P.xl_bcnd, "xl_bcnd"); Check_Boundary(P.xu_bcnd, "xu_bcnd"); @@ -85,9 +84,6 @@ void Check_Configuration(parameters const& P) #endif //! PRECISION static_assert(PRECISION == 2, "PRECISION must be 2. Single precision is not currently supported"); - // Check that gamma, the ratio of specific heats, is greater than 1 - assert(::gama <= 1.0 and "Gamma must be greater than one."); - // MHD Checks // ========== #ifdef MHD From 0b25556f741c43a1c4b59886c3dc3c979612d0fd Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 15 Sep 2023 10:50:49 -0400 Subject: [PATCH 512/694] improve the ergonomics of ERROR and ASSERT --- src/global/global.cpp | 2 +- src/io/io.cpp | 8 ++++---- src/utils/error_handling.cpp | 2 +- src/utils/error_handling.h | 32 ++++++++++++++++++++++---------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index ee1776922..65b35a83f 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -34,7 +34,7 @@ void Set_Gammas(Real gamma_in) { // set gamma gama = gamma_in; - ASSERT(gama > 1.0, "Set_Gammas", "Gamma must be greater than one."); + ASSERT(gama > 1.0, "Gamma must be greater than one."); } /*! \fn double get_time(void) diff --git a/src/io/io.cpp b/src/io/io.cpp index 9edf6c4b4..88e460e3b 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2788,10 +2788,10 @@ void Ensure_Outdir_Exists(std::string outdir) // to a directory (it's unclear from docs whether err-code is set in that // case) if (err_code or not std::filesystem::is_directory(without_file_prefix)) { - ERROR("Ensure_Outdir_Exists", - "something went wrong while trying to create the path to the " - "output-dir: %s", - outdir.c_str()); + ERROR( + "something went wrong while trying to create the path to the " + "output-dir: %s", + outdir.c_str()); } } } diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index c64ee362b..04439c6c7 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -61,7 +61,7 @@ void Check_Configuration(parameters const& P) // Check the boundary conditions auto Check_Boundary = [](int const& boundary, std::string const& direction) { bool is_allowed_bc = boundary >= 0 and boundary <= 4; - ASSERT(is_allowed_bc, "Check_Configuration", + ASSERT(is_allowed_bc, "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. " "Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).", direction.c_str(), boundary); diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index 7fe450cba..2406b4f49 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -19,14 +19,27 @@ void Check_Configuration(parameters const& P); */ [[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...); +/* __CHOLLA_PRETTY_FUNC__ is a magic constant like __LINE__ or __FILE__ that + * provides the name of the current function. + * - The C++11 standard requires that __func__ is provided on all platforms, but + * that only provides limited information (just the name of the function). + * - Where available, we prefer to use compiler-specific features that provide + * more information about the function (like the scope of the function & the + * the function signature). + */ +#ifdef __GNUG__ + #define __CHOLLA_PRETTY_FUNC__ __PRETTY_FUNCTION__ +#else + #define __CHOLLA_PRETTY_FUNC__ __func__ +#endif + /*! * \brief print an error-message (with printf formatting) & abort the program. * * This macro should be treated as a function with the signature: - * [[noreturn]] void ERROR(const char* func_name, const char* msg, ...); + * [[noreturn]] void ERROR(const char* msg, ...); * - * - The 1st arg is the name of the function where it's called - * - The 2nd arg is printf-style format argument specifying the error message + * - The 1st arg is printf-style format argument specifying the error message * - The remaining args arguments are used to format error message * * \note @@ -34,26 +47,25 @@ void Check_Configuration(parameters const& P); * at least 1 variadic argument (even in cases when ``msg`` doesn't format * any arguments). There is no way around this until C++ 20. */ -#define ERROR(func_name, ...) Abort_With_Err_(func_name, __FILE__, __LINE__, __VA_ARGS__) +#define ERROR(...) Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__) /*! * \brief if the condition is false, print an error-message (with printf * formatting) & abort the program. * * This macro should be treated as a function with the signature: - * [[noreturn]] void ASSERT(bool cond, const char* func_name, const char* msg, ...); + * [[noreturn]] void ASSERT(bool cond, const char* msg, ...); * * - The 1st arg is a boolean condition. When true, this does noth - * - The 2nd arg is the name of the function where it's called - * - The 3rd arg is printf-style format argument specifying the error message + * - The 2nd arg is printf-style format argument specifying the error message * - The remaining args arguments are used to format error message * * \note * the behavior is independent of the ``NDEBUG`` macro */ -#define ASSERT(cond, func_name, ...) \ - if (not(cond)) { \ - Abort_With_Err_(func_name, __FILE__, __LINE__, __VA_ARGS__); \ +#define ASSERT(cond, ...) \ + if (not(cond)) { \ + Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__); \ } #endif /*ERROR_HANDLING_CHOLLA_H*/ From 5b92aac5773ada450b21403345d16f126acdf4e0 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Sat, 16 Sep 2023 12:24:25 -0700 Subject: [PATCH 513/694] runs and compiles --- src/global/global.cpp | 8 +++++++- src/grid/grid3D.cpp | 6 +++++- src/grid/grid3D.h | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 6c65dc68e..726fa36a6 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -148,6 +148,11 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a parms->flag_delta = 0; #endif /*ROTATED_PROJECTION*/ +#ifdef STATIC_GRAV + //initialize custom gravity flag to zero + parms->custom_grav = 0; +#endif + #ifdef COSMOLOGY // Initialize file name as an empty string parms->scale_outputs_file[0] = '\0'; @@ -218,7 +223,8 @@ void parse_param(char *name, char *value, struct parameters *parms) printf("Must select between \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (Keplerian disk), and \'4\' (Kuzmin disk/NFW halo).\n"); exit(0);} else{ - parms->custom_grav = atoi(value);} + parms->custom_grav = atoi(value); + printf("moving to next step..... custom_grav= %d\n", atoi(value));} #endif } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 6e51d52ee..4609150e2 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -147,9 +147,13 @@ void Grid3D::Initialize(struct parameters *P) int ny_in = P->ny; int nz_in = P->nz; #ifdef STATIC_GRAV + if (P->custom_grav !=0){ H.custom_grav = P->custom_grav; printf("P->custom_grav is %d\n", P->custom_grav); - printf("H.custom_grav is %d\n", H.custom_grav); + printf("H.custom_grav is %d\n", H.custom_grav);} + else{ + printf("Abort! No custom gravity field was set.\n"); + exit(0);} #endif // Set the CFL coefficient (a global variable) C_cfl = 0.3; diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index ba6a85eae..c66431dc0 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -209,9 +209,9 @@ struct Header { /*! \var dt * \brief Length of the current timestep */ Real dt; -#ifdef STATIC_GRAV + int custom_grav; -#endif + #ifdef AVERAGE_SLOW_CELLS Real min_dt_slow; From bccf811ac2232e8b5fdc33745c4e11be9af9f87c Mon Sep 17 00:00:00 2001 From: ezlimen Date: Sun, 17 Sep 2023 11:50:57 -0700 Subject: [PATCH 514/694] remove print statements --- src/gravity/static_grav.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index c389c2f0f..eb2eed6a9 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -61,18 +61,17 @@ inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; // for Gresho, also need r & phi - printf("Gresho\n"); r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); // set acceleration to balance v_phi in Gresho problem if (r < 0.2) { *gx = -cos(phi)*25.0*r; - *gy = -sin(phi)*25.0*r; + *gy = sin(phi)*25.0*r; } else if (r >= 0.2 && r < 0.4) { *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + *gy = sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; } else { *gx = 0.0; @@ -107,7 +106,6 @@ inline __device__ void calc_g_kuzmin_2D(int xid, int yid, int x_off, int y_off, // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - printf("kuzmin\n"); r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); // set gravitational acceleration for Kuzmin disk + NFW halo @@ -139,7 +137,6 @@ inline __device__ void calc_g_rayleigh_taylor_2D(int xid, int yid, int x_off, in // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - // printf("shhhhh rayleigh taylor\n"); *gx = 0; *gy = -1; return; From 4b2e9ecd421fa5af101d51abd055cd417b8171c2 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Mon, 18 Sep 2023 12:28:58 -0700 Subject: [PATCH 515/694] reset to defaults --- src/gravity/static_grav.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index eb2eed6a9..bb4744fa8 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -67,11 +67,11 @@ inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, // set acceleration to balance v_phi in Gresho problem if (r < 0.2) { *gx = -cos(phi)*25.0*r; - *gy = sin(phi)*25.0*r; + *gy = -sin(phi)*25.0*r; } else if (r >= 0.2 && r < 0.4) { *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; + *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; } else { *gx = 0.0; From ea0f44321eac96e7850b9a7e4ca0b963f97d65ff Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 18 Sep 2023 15:54:00 -0400 Subject: [PATCH 516/694] Add script to run clang-tidy on all builds tools/clang-tidy_runner.sh will run clang-tidy on all builds in parallel. Other scripts got an updated method of determining the cholla directory that works if invoked from outside the directory. --- tools/cholla-nv-compute-sanitizer.sh | 2 +- tools/clang-format_runner.sh | 3 ++- tools/clang-tidy_runner.sh | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100755 tools/clang-tidy_runner.sh diff --git a/tools/cholla-nv-compute-sanitizer.sh b/tools/cholla-nv-compute-sanitizer.sh index 73afddfc9..ece87b3e0 100755 --- a/tools/cholla-nv-compute-sanitizer.sh +++ b/tools/cholla-nv-compute-sanitizer.sh @@ -83,7 +83,7 @@ if [ -z "$tool" ]; then fi # Get Paths -cholla_root=$(git rev-parse --show-toplevel) +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" cholla_exe=$(find "${cholla_root}" -name cholla.*) cholla_parameter_file="${cholla_root}/examples/3D/sod.txt" COMPUTE_SANITIZER=$(which compute-sanitizer) diff --git a/tools/clang-format_runner.sh b/tools/clang-format_runner.sh index bc89d8050..ece80ec67 100755 --- a/tools/clang-format_runner.sh +++ b/tools/clang-format_runner.sh @@ -9,7 +9,8 @@ # - GNU Find, the default macos version won't work # Get the location of Cholla -cholla_root=$(git rev-parse --show-toplevel) +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" +cd $cholla_root # Get a list of all the files to format readarray -t files <<<$(find ${cholla_root} -regex '.*\.\(h\|hpp\|c\|cpp\|cu\|cuh\)$' -print) diff --git a/tools/clang-tidy_runner.sh b/tools/clang-tidy_runner.sh new file mode 100755 index 000000000..0e8930af8 --- /dev/null +++ b/tools/clang-tidy_runner.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# Description: +# Run clang-tidy on all build types in parallel. Note that this spawns 2x the +# number of build types threads since each type has a thread for the CPU code +# and a thread for the GPU code + +# If ctrl-c is sent trap it and kill all clang-tidy processes +trap "kill -- -$$" EXIT + +# cd into the Cholla directory. Default to ${HOME}/Code/cholla +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" +cd $cholla_root + +# Run all clang-tidy build types in parallel +builds=( hydro gravity disk particles cosmology mhd dust) +for build in "${builds[@]}" +do + make tidy TYPE=$build & +done + +# Wait for clang-tidy to finish +wait From 000cfbdfd93a4f15e8af70ecd9eb13944565998c Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Tue, 19 Sep 2023 11:15:01 -0400 Subject: [PATCH 517/694] addressed PR comments --- src/utils/error_handling.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 04439c6c7..4488a7fa4 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" @@ -140,28 +139,35 @@ void Check_Configuration(parameters const& P) // prepare some info for the error message header const char* santized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name; - std::string proc_info = #ifdef MPI_CHOLLA - std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)"; + std::string proc_info = std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)"; #else - "0 / 1 (NOT using MPI)" + std::string proc_info = "0 / 1 (NOT using MPI)"; #endif // prepare the formatted message - std::vector msg_buf; + std::string msg_buf; if (msg == nullptr) { - msg_buf = std::vector(80); - std::snprintf(msg_buf.data(), msg_buf.size(), "{nullptr encountered instead of error message}"); + msg_buf = "{nullptr encountered instead of error message}"; } else { std::va_list args, args_copy; va_start(args, msg); va_copy(args_copy, args); - std::size_t msg_len = std::vsnprintf(nullptr, 0, msg, args) + 1; + std::size_t bufsize_without_terminator = std::vsnprintf(nullptr, 0, msg, args); va_end(args); - msg_buf = std::vector(msg_len); - std::vsnprintf(msg_buf.data(), msg_len, msg, args); + // NOTE: starting in C++17 it's possible to mutate msg_buf by mutating msg_buf.data() + + // we initialize a msg_buf with size == bufsize_without_terminator (filled with ' ' chars) + // - msg_buf.data() returns a ptr with msg_buf.size() + 1 characters. We are allowed to + // mutate any of the first msg_buf.size() characters. The entry at + // msg_buf.data()[msg_buf.size()] is initially '\0' (& it MUST remain equal to '\0') + // - the 2nd argument of std::vsnprintf is the size of the output buffer. We NEED to + // include the terminator character in this argument, otherwise the formatted message + // will be truncated + msg_buf = std::string(bufsize_without_terminator, ' '); + std::vsnprintf(msg_buf.data(), bufsize_without_terminator + 1, msg, args_copy); va_end(args_copy); } From 3ebd636bccfa3898a1fd02ee42c8ff0f097f374e Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Tue, 19 Sep 2023 11:38:44 -0400 Subject: [PATCH 518/694] Rename the ``ASSERT`` and ``ERROR`` to ``CHOLLA_ASSERT`` and ``CHOLLA_ERROR`` --- src/global/global.cpp | 2 +- src/io/io.cpp | 2 +- src/utils/error_handling.cpp | 9 +++++---- src/utils/error_handling.h | 8 ++++---- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 65b35a83f..2aa5792fe 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -34,7 +34,7 @@ void Set_Gammas(Real gamma_in) { // set gamma gama = gamma_in; - ASSERT(gama > 1.0, "Gamma must be greater than one."); + CHOLLA_ASSERT(gama > 1.0, "Gamma must be greater than one."); } /*! \fn double get_time(void) diff --git a/src/io/io.cpp b/src/io/io.cpp index 88e460e3b..09ffd0d17 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2788,7 +2788,7 @@ void Ensure_Outdir_Exists(std::string outdir) // to a directory (it's unclear from docs whether err-code is set in that // case) if (err_code or not std::filesystem::is_directory(without_file_prefix)) { - ERROR( + CHOLLA_ERROR( "something went wrong while trying to create the path to the " "output-dir: %s", outdir.c_str()); diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 4488a7fa4..5a7bad073 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -60,10 +60,11 @@ void Check_Configuration(parameters const& P) // Check the boundary conditions auto Check_Boundary = [](int const& boundary, std::string const& direction) { bool is_allowed_bc = boundary >= 0 and boundary <= 4; - ASSERT(is_allowed_bc, - "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. " - "Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).", - direction.c_str(), boundary); + CHOLLA_ASSERT(is_allowed_bc, + "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. Must " + "select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), " + "4 (custom), 5 (mpi).", + direction.c_str(), boundary); }; Check_Boundary(P.xl_bcnd, "xl_bcnd"); Check_Boundary(P.xu_bcnd, "xu_bcnd"); diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index 2406b4f49..4db749881 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -37,7 +37,7 @@ void Check_Configuration(parameters const& P); * \brief print an error-message (with printf formatting) & abort the program. * * This macro should be treated as a function with the signature: - * [[noreturn]] void ERROR(const char* msg, ...); + * [[noreturn]] void CHOLLA_ERROR(const char* msg, ...); * * - The 1st arg is printf-style format argument specifying the error message * - The remaining args arguments are used to format error message @@ -47,14 +47,14 @@ void Check_Configuration(parameters const& P); * at least 1 variadic argument (even in cases when ``msg`` doesn't format * any arguments). There is no way around this until C++ 20. */ -#define ERROR(...) Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__) +#define CHOLLA_ERROR(...) Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__) /*! * \brief if the condition is false, print an error-message (with printf * formatting) & abort the program. * * This macro should be treated as a function with the signature: - * [[noreturn]] void ASSERT(bool cond, const char* msg, ...); + * [[noreturn]] void CHOLLA_ASSERT(bool cond, const char* msg, ...); * * - The 1st arg is a boolean condition. When true, this does noth * - The 2nd arg is printf-style format argument specifying the error message @@ -63,7 +63,7 @@ void Check_Configuration(parameters const& P); * \note * the behavior is independent of the ``NDEBUG`` macro */ -#define ASSERT(cond, ...) \ +#define CHOLLA_ASSERT(cond, ...) \ if (not(cond)) { \ Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__); \ } From d4035e7343f55eb611d7ba147d06d8e61772781a Mon Sep 17 00:00:00 2001 From: ezlimen Date: Tue, 19 Sep 2023 09:23:38 -0700 Subject: [PATCH 519/694] 2 and 3d static gravity functions given flag capabilities --- src/global/global.cpp | 14 ++--- src/gravity/static_grav.h | 88 ++++++++++++++----------------- src/grid/grid3D.cpp | 13 +++-- src/hydro/hydro_cuda.cu | 27 ++-------- src/hydro/hydro_cuda.h | 4 +- src/integrators/simple_1D_cuda.cu | 4 +- src/integrators/simple_1D_cuda.h | 2 +- src/integrators/simple_3D_cuda.cu | 4 +- src/integrators/simple_3D_cuda.h | 2 +- src/main.cpp | 6 --- 10 files changed, 65 insertions(+), 99 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 726fa36a6..0af0783c7 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -218,14 +218,14 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->nz = atoi(value); #ifdef STATIC_GRAV } else if (strcmp(name, "custom_grav") == 0) { - if (atoi(value) < 1 || atoi(value) > 4){ - printf("ABORT: %d -> Unknown custom static gravity field.\n", atoi(value)); - printf("Must select between \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (Keplerian disk), and \'4\' (Kuzmin disk/NFW halo).\n"); - exit(0);} - else{ + //if (atoi(value) == 0){ + // printf("WARNING:%d -> Unknown custom static gravity field.\n", atoi(value)); + // printf("Must select between \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (Keplerian disk), and \'4\' (Kuzmin disk/NFW halo).\n"); + // } + //else{ parms->custom_grav = atoi(value); - printf("moving to next step..... custom_grav= %d\n", atoi(value));} -#endif + printf("moving to next step..... custom_grav= %d\n", atoi(value));//} + #endif } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); } else if (strcmp(name, "outstep") == 0) { diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index bb4744fa8..552ca1116 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -14,11 +14,12 @@ // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x * x; } -inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_grav, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - + switch(custom_grav){ + case 1: // for disk components, calculate polar r // r_disk = 0.220970869121; // r_disk = 6.85009694274; @@ -48,11 +49,14 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real // total acceleration is the sum of the halo + disk components *gx = (x_pos / r_halo) * a_halo + a_disk_z; - + break; + default: + *gx = 0; + } return; } -inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; @@ -63,7 +67,9 @@ inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, // for Gresho, also need r & phi r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); - + switch(custom_grav){ + case 1: + // printf("gresho\n"); // set acceleration to balance v_phi in Gresho problem if (r < 0.2) { *gx = -cos(phi)*25.0*r; @@ -77,38 +83,22 @@ inline __device__ void calc_g_gresho_2D(int xid, int yid, int x_off, int y_off, *gx = 0.0; *gy = 0.0; } - return; -} - -inline __device__ void calc_g_keplerian_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real *gx, Real *gy) -{ - Real x_pos, y_pos, r, phi; - // use the subgrid offset and global boundaries to calculate absolute - // positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - r = sqrt(x_pos * x_pos + y_pos * y_pos); - phi = atan2(y_pos, x_pos); - // set gravitational acceleration for Keplerian potential - Real M; + break; + case 2: + //printf("rayleigh talor\n"); + *gx = 0; + *gy = -1; + break; + case 3: + //printf("keplerian\n"); + Real M; M = 1*MSUN_CGS; *gx = -cos(phi)*GN*M/(r*r); *gy = -sin(phi)*GN*M/(r*r); - return; -} - -inline __device__ void calc_g_kuzmin_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real *gx, Real *gy) -{ - Real x_pos, y_pos, r, phi; - // use the subgrid offset and global boundaries to calculate absolute - // positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - r = sqrt(x_pos * x_pos + y_pos * y_pos); - phi = atan2(y_pos, x_pos); - // set gravitational acceleration for Kuzmin disk + NFW halo + break; + case 4: + //printf("disk\n"); +// set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; M_vir = 1.0e12; // viral mass of MW in M_sun M_d = 6.5e10; // mass of disk in M_sun (assume all gas) @@ -126,23 +116,17 @@ inline __device__ void calc_g_kuzmin_2D(int xid, int yid, int x_off, int y_off, *gx = -cos(phi) * a; *gy = -sin(phi) * a; - return; -} - -inline __device__ void calc_g_rayleigh_taylor_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, - Real ybound, Real *gx, Real *gy) -{ - Real x_pos, y_pos; - // use the subgrid offset and global boundaries to calculate absolute - // positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + break; + default: + //printf("default\n"); *gx = 0; - *gy = -1; + *gy = 0; + } + return; } -inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, int custom_grav, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; @@ -156,7 +140,8 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off r_disk = sqrt(x_pos * x_pos + y_pos * y_pos); // for halo, calculate spherical r r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); - + switch(custom_grav){ + case 1: // set properties of halo and disk (these must match initial conditions) Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; @@ -193,7 +178,12 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); *gz = a_disk_z + a_halo_z; - + break; + default: + *gx = 0; + *gy = 0; + *gz = 0; + } return; } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 4609150e2..c165cf7ce 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -147,13 +147,12 @@ void Grid3D::Initialize(struct parameters *P) int ny_in = P->ny; int nz_in = P->nz; #ifdef STATIC_GRAV - if (P->custom_grav !=0){ + H.custom_grav = P->custom_grav; printf("P->custom_grav is %d\n", P->custom_grav); - printf("H.custom_grav is %d\n", H.custom_grav);} - else{ - printf("Abort! No custom gravity field was set.\n"); - exit(0);} + printf("H.custom_grav is %d\n", H.custom_grav); +if (H.custom_grav == 0){ + printf("WARNING: No custom gravity field was set.\n");} #endif // Set the CFL coefficient (a global variable) C_cfl = 0.3; @@ -442,7 +441,7 @@ Real Grid3D::Update_Grid(void) VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); #endif // VL #ifdef SIMPLE - Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); + Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); #endif // SIMPLE #endif // CUDA } else if (H.nx > 1 && H.ny > 1 && H.nz == 1) // 2D @@ -467,7 +466,7 @@ Real Grid3D::Update_Grid(void) #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, C.Grav_potential); #endif // SIMPLE #endif diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c53ae86f1..01b79c090 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -19,7 +19,7 @@ #include "../utils/reduction_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, Real gamma, int n_fields) + Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav) { int id; #ifdef STATIC_GRAV @@ -59,7 +59,7 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, #endif #ifdef STATIC_GRAV // add gravitational source terms, time averaged from n to // n+1 - calc_g_1D(id, x_off, n_ghost, dx, xbound, &gx); + calc_g_1D(id, x_off, n_ghost, custom_grav, dx, xbound, &gx); d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; @@ -141,24 +141,7 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x #endif #ifdef STATIC_GRAV // calculate the gravitational acceleration as a function of x & y position - switch(custom_grav) { - case 1: //gresho - calc_g_gresho_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); - break; - case 2: //rayleigh taylor instability - calc_g_rayleigh_taylor_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); - break; -case 3: //keplerian disk - calc_g_keplerian_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); -break; -case 4: //Kuzmin/NFW halo - calc_g_kuzmin_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy); -break; -default: - //printf("%d -> Unknown custom static gravity field. Options are \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (keplerian disk), \'4\' (Kuzmin disk with NFW halo). \n", custom_grav); -printf("No gravity field will be set\n"); -//printf("%d\t%d\t%d\t%d\n", custom_grav, custom_grav, custom_grav, custom_grav); -} + calc_g_2D(xid, yid, x_off, y_off, n_ghost, custom_grav, dx, dy, xbound, ybound, &gx, &gy); // add gravitational source terms, time averaged from n to n+1 d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; @@ -191,7 +174,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, Real density_floor, Real *dev_potential) + Real gamma, int n_fields, int custom_grav, Real density_floor, Real *dev_potential) { int id, xid, yid, zid, n_cells; int imo, jmo, kmo; @@ -316,7 +299,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R #endif // DENSITY_FLOOR #ifdef STATIC_GRAV - calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); + calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 0e9425091..371cda8d9 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -9,7 +9,7 @@ #include "../utils/mhd_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, - Real dx, Real xbound, Real dt, Real gamma, int n_fields); + Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav); __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, @@ -19,7 +19,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, Real density_floor, Real *dev_potential); + Real gamma, int n_fields, int custom_grav, Real density_floor, Real *dev_potential); /*! * \brief Determine the maximum inverse crossing time in a specific cell diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 3be5ba40a..067735fcd 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -24,7 +24,7 @@ #include "../utils/gpu.hpp" void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, - int n_fields) + int n_fields, int custom_grav) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -105,7 +105,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, - n_ghost, dx, xbound, dt, gama, n_fields); + n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); CudaCheckError(); // Synchronize the total and internal energy, if using dual-energy formalism diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h index 69c38cae7..43dcc4fc8 100644 --- a/src/integrators/simple_1D_cuda.h +++ b/src/integrators/simple_1D_cuda.h @@ -9,7 +9,7 @@ #include "../global/global.h" void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, - int n_fields); + int n_fields, int custom_grav); void Free_Memory_Simple_1D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 32994eeff..e2df9387c 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -26,7 +26,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -169,7 +169,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, - zbound, dt, gama, n_fields, density_floor, dev_grav_potential); + zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); CudaCheckError(); #ifdef DE diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index dc83e044c..60776aedf 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -11,7 +11,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential); void Free_Memory_Simple_3D(); diff --git a/src/main.cpp b/src/main.cpp index 6886ac9c9..ec7c5c0ca 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -130,12 +130,6 @@ int main(int argc, char *argv[]) Write_Message_To_Log_File(message.c_str()); #endif -#ifdef STATIC_GRAV - chprintf("\nUsing Static Gravity:\n Custom gravity field selected: %d.\n", P.custom_grav); - message = " Custom gravity field: " + std::to_string(P.custom_grav); - Write_Message_To_Log_File(message.c_str()); -#endif - #ifdef CPU_TIME G.Timer.Initialize(); #endif From 141201b6ae3f8621add80ce5d8a4b11d0af7908a Mon Sep 17 00:00:00 2001 From: ezlimen Date: Tue, 19 Sep 2023 15:33:27 -0700 Subject: [PATCH 520/694] trying to get outputs for gresho. rayleigh taylor works --- builds/make.type.static_grav | 2 +- examples/2D/Gresho.txt | 2 ++ examples/2D/Rayleigh_Taylor.txt | 2 ++ examples/2D/disk.txt | 2 ++ src/global/global.cpp | 8 +------- src/global/global.h | 2 +- src/grid/grid3D.cpp | 9 +++++---- src/grid/grid3D.h | 7 ++++--- src/grid/initial_conditions.cpp | 2 +- src/main.cpp | 7 ++++--- 10 files changed, 23 insertions(+), 20 deletions(-) diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index ffa15c4ee..cf621afa8 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -29,4 +29,4 @@ DFLAGS += -DSTATIC_GRAV # Can also add -DSLICES and -DPROJECTIONS OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) - +DN_OUTPUT_COMPLETE=1 diff --git a/examples/2D/Gresho.txt b/examples/2D/Gresho.txt index cc645431d..6595c5695 100644 --- a/examples/2D/Gresho.txt +++ b/examples/2D/Gresho.txt @@ -17,6 +17,8 @@ outstep=0.05 gamma=1.4 # name of initial conditions init=Gresho +# static gravity flag +custom_grav=1 # domain properties xmin=-0.5 ymin=-0.5 diff --git a/examples/2D/Rayleigh_Taylor.txt b/examples/2D/Rayleigh_Taylor.txt index 3cf87dbea..919e654e1 100644 --- a/examples/2D/Rayleigh_Taylor.txt +++ b/examples/2D/Rayleigh_Taylor.txt @@ -17,6 +17,8 @@ outstep=0.05 gamma=1.4 # name of initial conditions init=Rayleigh_Taylor +#static gravity flag +custom_grav=2 # domain properties xmin=0.0 ymin=0.0 diff --git a/examples/2D/disk.txt b/examples/2D/disk.txt index 3dd0ce821..86397f6d9 100644 --- a/examples/2D/disk.txt +++ b/examples/2D/disk.txt @@ -17,6 +17,8 @@ outstep=2185.9 gamma=1.001 # name of initial conditions init=Disk_2D +# static gravity flag +custom_grav=3 # domain properties xmin=-20 ymin=-20 diff --git a/src/global/global.cpp b/src/global/global.cpp index 0af0783c7..6bfa6d564 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -218,13 +218,7 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->nz = atoi(value); #ifdef STATIC_GRAV } else if (strcmp(name, "custom_grav") == 0) { - //if (atoi(value) == 0){ - // printf("WARNING:%d -> Unknown custom static gravity field.\n", atoi(value)); - // printf("Must select between \'1\' (Gresho), \'2\' (Rayleigh-Taylor), \'3\' (Keplerian disk), and \'4\' (Kuzmin disk/NFW halo).\n"); - // } - //else{ - parms->custom_grav = atoi(value); - printf("moving to next step..... custom_grav= %d\n", atoi(value));//} + parms->custom_grav = atoi(value); #endif } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); diff --git a/src/global/global.h b/src/global/global.h index a3f87a516..1cab0d8d8 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -199,7 +199,7 @@ struct parameters { int out_float32_GasEnergy = 0; #endif #ifdef STATIC_GRAV - int custom_grav; + int custom_grav; //flag to set specific static gravity field #endif #ifdef MHD int out_float32_magnetic_x = 0; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index c165cf7ce..f6259c48a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -146,14 +146,15 @@ void Grid3D::Initialize(struct parameters *P) int nx_in = P->nx; int ny_in = P->ny; int nz_in = P->nz; + #ifdef STATIC_GRAV - - H.custom_grav = P->custom_grav; - printf("P->custom_grav is %d\n", P->custom_grav); + H.custom_grav = P->custom_grav; //Initialize the custom static gravity flag printf("H.custom_grav is %d\n", H.custom_grav); if (H.custom_grav == 0){ - printf("WARNING: No custom gravity field was set.\n");} + printf("WARNING: No custom gravity field given. Gravity field will be set to zero.\n"); +} #endif + // Set the CFL coefficient (a global variable) C_cfl = 0.3; diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index c66431dc0..055646b2e 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -210,9 +210,6 @@ struct Header { * \brief Length of the current timestep */ Real dt; - int custom_grav; - - #ifdef AVERAGE_SLOW_CELLS Real min_dt_slow; #endif @@ -229,6 +226,10 @@ struct Header { * \brief Number of fields (conserved variables, scalars, etc.) */ int n_fields; + /*! \var custom_grav + * \brief Flag to set specific static gravity field */ + int custom_grav; + // Values for lower limit for density and temperature Real density_floor; Real temperature_floor; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 333d2548a..38967e4b7 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -894,7 +894,7 @@ void Grid3D::Rayleigh_Taylor() vy = 0.01 * cos(6 * M_PI * x_pos + M_PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1); // vy = 0.0; - // lower half of slab + // lower half of slab if (y_pos <= 0.5 * H.ydglobal) { P_0 = 1.0 / gama - dl * g * 0.5; P = P_0 + dl * g * y_pos; diff --git a/src/main.cpp b/src/main.cpp index ec7c5c0ca..f17f4231d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,7 +79,7 @@ int main(int argc, char *argv[]) chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); chprintf( - "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " + "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); @@ -107,13 +107,14 @@ int main(int argc, char *argv[]) Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); Write_Message_To_Log_File(message.c_str()); + // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); message = "Initializing Simulation"; Write_Message_To_Log_File(message.c_str()); - + // Set initial conditions chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); @@ -128,7 +129,7 @@ int main(int argc, char *argv[]) chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2); message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); Write_Message_To_Log_File(message.c_str()); -#endif +#endif #ifdef CPU_TIME G.Timer.Initialize(); From 1bf8b5c2128e0b8886421573b8d4d0fc636f19de Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 20 Sep 2023 11:43:34 -0400 Subject: [PATCH 521/694] Fix bug where output would be triggered incorrectly If a maximum number of timesteps was set with N_STEPS_LIMIT then the final time step would still output even if the OUTPUT macro was not provided. Added ifdef statement to fix this. --- src/main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 9c54a77e9..f13548c7a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -354,7 +354,9 @@ int main(int argc, char *argv[]) #ifdef N_STEPS_LIMIT // Exit the loop when reached the limit number of steps (optional) if (G.H.n_step == N_STEPS_LIMIT) { + #ifdef OUTPUT WriteData(G, P, nfile); + #endif // OUTPUT break; } #endif From c6cdd016d6cf5736a3993de1b3d161cf1b0e1134 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 20 Sep 2023 11:45:41 -0400 Subject: [PATCH 522/694] Add ability for Cholla to print IDs of slow ranks Added a section to OneTime::end() that checks if the rank took more than 10% longer than average. If it did then it prints the node ID and GPU PCIe Bus ID to std::cerr --- src/grid/grid3D.cpp | 2 +- src/utils/gpu.hpp | 1 + src/utils/timing_functions.cpp | 28 +++++++++++++++++++++++++++- src/utils/timing_functions.h | 2 +- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 9010da354..d8655d051 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -469,7 +469,7 @@ Real Grid3D::Update_Grid(void) } #ifdef CPU_TIME - Timer.Hydro_Integrator.End(); + Timer.Hydro_Integrator.End(true); #endif // CPU_TIME #ifdef CUDA diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index ec4554cf4..da45f2549 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -67,6 +67,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define cudaPointerGetAttributes hipPointerGetAttributes #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize #define cudaMemGetInfo hipMemGetInfo + #define cudaDeviceGetPCIBusId hipDeviceGetPCIBusId // Texture definitions #define cudaArray hipArray diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 6c9a4eedd..b582e8052 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -1,11 +1,13 @@ #include "../utils/timing_functions.h" #ifdef CPU_TIME + #include #include #include #include #include "../global/global.h" + #include "../global/global_cuda.h" #include "../io/io.h" #ifdef MPI_CHOLLA @@ -28,7 +30,7 @@ void OneTime::Subtract(Real time_to_subtract) time_start += time_to_subtract; } -void OneTime::End() +void OneTime::End(bool const print_high_values) { cudaDeviceSynchronize(); if (inactive) { @@ -50,6 +52,30 @@ void OneTime::End() t_all += t_max; } n_steps++; + + #ifdef MPI_CHOLLA + // Print out information if the process is unusually slow + if (time >= 1.1 * t_avg and print_high_values) { + // Get node ID + std::string node_id(MPI_MAX_PROCESSOR_NAME, ' '); + int length; + MPI_Get_processor_name(node_id.data(), &length); + node_id.resize(length); + + // Get GPU ID + std::string gpu_id(MPI_MAX_PROCESSOR_NAME, ' '); + int device; + CudaSafeCall(cudaGetDevice(&device)); + CudaSafeCall(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device)); + gpu_id.erase( + std::find_if(gpu_id.rbegin(), gpu_id.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), + gpu_id.end()); + + std::cerr << "WARNING: Rank took longer than expected to execute." << std::endl + << " Node ID: " << node_id << std::endl + << " GPU PCI Bus ID: " << gpu_id << std::endl; + } + #endif // MPI_CHOLLA } void OneTime::RecordTime(Real time) diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 09e209f8b..d2a0f066f 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -28,7 +28,7 @@ class OneTime } void Start(); void Subtract(Real time_to_subtract); - void End(); + void End(bool const print_high_values = false); void PrintStep(); void PrintAverage(); void PrintAll(); From 5c22e143c996ae80cd5901c1e483e90a117a2f40 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 21 Sep 2023 12:01:58 -0400 Subject: [PATCH 523/694] Update long time notification to provide more usefull output --- src/utils/timing_functions.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index b582e8052..044b0856f 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -55,7 +55,7 @@ void OneTime::End(bool const print_high_values) #ifdef MPI_CHOLLA // Print out information if the process is unusually slow - if (time >= 1.1 * t_avg and print_high_values) { + if ((time >= 1.1 * t_avg) and (n_steps > 0) and print_high_values) { // Get node ID std::string node_id(MPI_MAX_PROCESSOR_NAME, ' '); int length; @@ -72,6 +72,8 @@ void OneTime::End(bool const print_high_values) gpu_id.end()); std::cerr << "WARNING: Rank took longer than expected to execute." << std::endl + << " Node Time: " << time << std::endl + << " Avg Time: " << t_avg << std::endl << " Node ID: " << node_id << std::endl << " GPU PCI Bus ID: " << gpu_id << std::endl; } From 11ae4813c2b98072a81136b211dc40f4b916e6a1 Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Thu, 21 Sep 2023 10:19:57 -0700 Subject: [PATCH 524/694] clang format --- src/global/global.cpp | 4 +- src/global/global.h | 2 +- src/gravity/static_grav.h | 217 +++++++++++++++--------------- src/grid/grid3D.cpp | 10 +- src/hydro/hydro_cuda.cu | 6 +- src/hydro/hydro_cuda.h | 3 +- src/integrators/simple_3D_cuda.cu | 3 +- src/integrators/simple_3D_cuda.h | 3 +- src/main.cpp | 2 +- 9 files changed, 127 insertions(+), 123 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 6bfa6d564..ee81f0060 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -149,7 +149,7 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a #endif /*ROTATED_PROJECTION*/ #ifdef STATIC_GRAV - //initialize custom gravity flag to zero + // initialize custom gravity flag to zero parms->custom_grav = 0; #endif @@ -219,7 +219,7 @@ void parse_param(char *name, char *value, struct parameters *parms) #ifdef STATIC_GRAV } else if (strcmp(name, "custom_grav") == 0) { parms->custom_grav = atoi(value); - #endif +#endif } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); } else if (strcmp(name, "outstep") == 0) { diff --git a/src/global/global.h b/src/global/global.h index 1cab0d8d8..2fd16dd6b 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -199,7 +199,7 @@ struct parameters { int out_float32_GasEnergy = 0; #endif #ifdef STATIC_GRAV - int custom_grav; //flag to set specific static gravity field + int custom_grav; // flag to set specific static gravity field #endif #ifdef MHD int out_float32_magnetic_x = 0; diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 552ca1116..f0bfda9e1 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -18,87 +18,85 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_gra { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - switch(custom_grav){ - case 1: - // for disk components, calculate polar r - // r_disk = 0.220970869121; - // r_disk = 6.85009694274; - r_disk = 13.9211647546; - // r_disk = 20.9922325665; - // for halo, calculate spherical r - r_halo = sqrt(x_pos * x_pos + r_disk * r_disk); - - // set properties of halo and disk (these must match initial conditions) - Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20.0; // halo concentration - R_h = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc - z_d = 3.5 / 5.0; // disk scale height in kpc - phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); - x = r_halo / R_h; - - // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); - a_disk_z = - -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) / - (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * sqrt(x_pos * x_pos + z_d * z_d)); - - // total acceleration is the sum of the halo + disk components - *gx = (x_pos / r_halo) * a_halo + a_disk_z; - break; - default: - *gx = 0; + switch (custom_grav) { + case 1: + // for disk components, calculate polar r + // r_disk = 0.220970869121; + // r_disk = 6.85009694274; + r_disk = 13.9211647546; + // r_disk = 20.9922325665; + // for halo, calculate spherical r + r_halo = sqrt(x_pos * x_pos + r_disk * r_disk); + + // set properties of halo and disk (these must match initial conditions) + Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20.0; // halo concentration + R_h = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_disk_z = + -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * sqrt(x_pos * x_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_halo) * a_halo + a_disk_z; + break; + default: + *gx = 0; } return; } -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, Real xbound, - Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, + Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; // use the subgrid offset and global boundaries to calculate absolute // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - // for Gresho, also need r & phi - r = sqrt(x_pos * x_pos + y_pos * y_pos); - phi = atan2(y_pos, x_pos); - switch(custom_grav){ + // for Gresho, also need r & phi + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); + switch (custom_grav) { case 1: // printf("gresho\n"); - // set acceleration to balance v_phi in Gresho problem - if (r < 0.2) { - *gx = -cos(phi)*25.0*r; - *gy = -sin(phi)*25.0*r; - } - else if (r >= 0.2 && r < 0.4) { - *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - } - else { - *gx = 0.0; - *gy = 0.0; - } - break; + // set acceleration to balance v_phi in Gresho problem + if (r < 0.2) { + *gx = -cos(phi) * 25.0 * r; + *gy = -sin(phi) * 25.0 * r; + } else if (r >= 0.2 && r < 0.4) { + *gx = -cos(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r; + *gy = -sin(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r; + } else { + *gx = 0.0; + *gy = 0.0; + } + break; case 2: - //printf("rayleigh talor\n"); + // printf("rayleigh talor\n"); *gx = 0; *gy = -1; break; case 3: - //printf("keplerian\n"); + // printf("keplerian\n"); Real M; - M = 1*MSUN_CGS; - *gx = -cos(phi)*GN*M/(r*r); - *gy = -sin(phi)*GN*M/(r*r); - break; + M = 1 * MSUN_CGS; + *gx = -cos(phi) * GN * M / (r * r); + *gy = -sin(phi) * GN * M / (r * r); + break; case 4: - //printf("disk\n"); -// set gravitational acceleration for Kuzmin disk + NFW halo + // printf("disk\n"); + // set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; M_vir = 1.0e12; // viral mass of MW in M_sun M_d = 6.5e10; // mass of disk in M_sun (assume all gas) @@ -118,16 +116,17 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g *gy = -sin(phi) * a; break; default: - //printf("default\n"); + // printf("default\n"); *gx = 0; *gy = 0; - } + } return; } -inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, int custom_grav, Real dx, - Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, + int custom_grav, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; // use the subgrid offset and global boundaries to calculate absolute @@ -140,49 +139,49 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off r_disk = sqrt(x_pos * x_pos + y_pos * y_pos); // for halo, calculate spherical r r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); - switch(custom_grav){ - case 1: - // set properties of halo and disk (these must match initial conditions) - Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; - Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - // MW model - M_vir = 1.0e12; // viral mass of in M_sun - M_d = 6.5e10; // viral mass of in M_sun - R_d = 3.5; // disk scale length in kpc - z_d = 3.5 / 5.0; // disk scale height in kpc - R_vir = 261.; // virial radius in kpc - c_vir = 20.0; // halo concentration - // M82 model - // M_vir = 5.0e10; // viral mass of in M_sun - // M_d = 1.0e10; // mass of disk in M_sun - // R_d = 0.8; // disk scale length in kpc - // z_d = 0.15; // disk scale height in kpc - // R_vir = R_d/0.015; // viral radius in kpc - // c_vir = 10.0; // halo concentration - - M_h = M_vir - M_d; // halo mass in M_sun - R_h = R_vir / c_vir; // halo scale length in kpc - phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); - x = r_halo / R_h; - - // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); - a_halo_r = a_halo * (r_disk / r_halo); - a_halo_z = a_halo * (z_pos / r_halo); - a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); - a_disk_z = - -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / - (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); - - // total acceleration is the sum of the halo + disk components - *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); - *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); - *gz = a_disk_z + a_halo_z; - break; - default: - *gx = 0; - *gy = 0; - *gz = 0; + switch (custom_grav) { + case 1: + // set properties of halo and disk (these must match initial conditions) + Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; + Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; + // MW model + M_vir = 1.0e12; // viral mass of in M_sun + M_d = 6.5e10; // viral mass of in M_sun + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + R_vir = 261.; // virial radius in kpc + c_vir = 20.0; // halo concentration + // M82 model + // M_vir = 5.0e10; // viral mass of in M_sun + // M_d = 1.0e10; // mass of disk in M_sun + // R_d = 0.8; // disk scale length in kpc + // z_d = 0.15; // disk scale height in kpc + // R_vir = R_d/0.015; // viral radius in kpc + // c_vir = 10.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun + R_h = R_vir / c_vir; // halo scale length in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_halo_r = a_halo * (r_disk / r_halo); + a_halo_z = a_halo * (z_pos / r_halo); + a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_z = + -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); + *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); + *gz = a_disk_z + a_halo_z; + break; + default: + *gx = 0; + *gy = 0; + *gz = 0; } return; } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index f6259c48a..8eb528379 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -148,11 +148,11 @@ void Grid3D::Initialize(struct parameters *P) int nz_in = P->nz; #ifdef STATIC_GRAV - H.custom_grav = P->custom_grav; //Initialize the custom static gravity flag + H.custom_grav = P->custom_grav; // Initialize the custom static gravity flag printf("H.custom_grav is %d\n", H.custom_grav); -if (H.custom_grav == 0){ + if (H.custom_grav == 0) { printf("WARNING: No custom gravity field given. Gravity field will be set to zero.\n"); -} + } #endif // Set the CFL coefficient (a global variable) @@ -467,8 +467,8 @@ Real Grid3D::Update_Grid(void) #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, - C.Grav_potential); + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, + U_floor, C.Grav_potential); #endif // SIMPLE #endif } else { diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 01b79c090..c74654c0e 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -174,7 +174,8 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, int custom_grav, Real density_floor, Real *dev_potential) + Real gamma, int n_fields, int custom_grav, Real density_floor, + Real *dev_potential) { int id, xid, yid, zid, n_cells; int imo, jmo, kmo; @@ -299,7 +300,8 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R #endif // DENSITY_FLOOR #ifdef STATIC_GRAV - calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); + calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, + &gz); d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 371cda8d9..016e3a84f 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -19,7 +19,8 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, - Real gamma, int n_fields, int custom_grav, Real density_floor, Real *dev_potential); + Real gamma, int n_fields, int custom_grav, Real density_floor, + Real *dev_potential); /*! * \brief Determine the maximum inverse crossing time in a specific cell diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index e2df9387c..17a37c6c1 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -26,7 +26,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, + Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 60776aedf..585c553ba 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -11,7 +11,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, + Real *host_grav_potential); void Free_Memory_Simple_3D(); diff --git a/src/main.cpp b/src/main.cpp index f17f4231d..9c54a77e9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); Write_Message_To_Log_File(message.c_str()); - + // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); From 229438df32369e9569cd50956dd039eb6c35ad9f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 22 Sep 2023 13:44:40 -0400 Subject: [PATCH 525/694] fix naming --- src/mhd/ct_electric_fields_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 61e7e5002..4c94b57fc 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -267,7 +267,7 @@ TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput) ASSERT_EQ(test_data.size(), fiducial_data.size()); for (size_t i = 0; i < test_data.size(); i++) { - testingUtilities::checkResults(fiducial_data.at(i), test_data.at(i), ""); + testingUtilities::Check_Results(fiducial_data.at(i), test_data.at(i), ""); } } // ============================================================================= From 145f3ef491cef7cac031ef1b77ee1a58dced65df Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 22 Sep 2023 14:02:58 -0400 Subject: [PATCH 526/694] hacky fix to get the cooling test build to compile --- src/system_tests/hydro_system_tests.cpp | 6 ++++++ src/system_tests/mhd_system_tests.cpp | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 288690290..b36a9837f 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -159,9 +159,15 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #elif defined(PLMC) double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; +#elif defined(PLMP) + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #elif defined(PPMC) double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 double const allowedError = 2.7E-8; +#elif defined(PPMP) + double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.7E-8; #endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 30e1d81cf..20558b5bf 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -862,7 +862,13 @@ TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, MovingW cpawTest.setFiducialNumTimeSteps(84); double const allowedL1Error = 4.0E-3; // Based on results in Gardiner & Stone 2008 double const allowedError = 3.0E-3; -#endif // PCM +#elif defined(PLMP) + double const allowedL1Error = 5.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 5.0E-3; +#elif defined(PPMP) + double const allowedL1Error = 4.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 3.0E-3; +#endif // Set the launch parameters setLaunchParams(polarization, vx); @@ -897,7 +903,13 @@ TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, Standin cpawTest.setFiducialNumTimeSteps(130); double const allowedL1Error = 1.3E-3; // Based on results in Gardiner & Stone 2008 double const allowedError = 1.3E-3; -#endif // PCM +#elif defined(PLMP) + double const allowedL1Error = 2.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.0E-3; +#elif defined(PPMP) + double const allowedL1Error = 1.3E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1.3E-3; +#endif // Set the launch parameters setLaunchParams(polarization, vx); From d8613eec2f10dcb4e066729c19b7f690878bbb57 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 22 Sep 2023 14:07:35 -0400 Subject: [PATCH 527/694] removed functions to address Issues 239 and 240 --- src/global/global.cpp | 18 ------------------ src/global/global.h | 18 ------------------ 2 files changed, 36 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 2aa5792fe..5f6aa26da 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -57,24 +57,6 @@ int sgn(Real x) } } -#ifndef CUDA -/*! \fn Real calc_eta(Real cW[], Real gamma) - * \brief Calculate the eta value for the H correction. */ -Real calc_eta(Real cW[], Real gamma) -{ - Real pl, pr, al, ar; - - pl = (cW[8] - 0.5 * (cW[2] * cW[2] + cW[4] * cW[4] + cW[6] * cW[6]) / cW[0]) * (gamma - 1.0); - pl = fmax(pl, TINY_NUMBER); - pr = (cW[9] - 0.5 * (cW[3] * cW[3] + cW[5] * cW[5] + cW[7] * cW[7]) / cW[1]) * (gamma - 1.0); - pr = fmax(pr, TINY_NUMBER); - - al = sqrt(gamma * pl / cW[0]); - ar = sqrt(gamma * pr / cW[1]); - - return 0.5 * fabs((cW[3] / cW[1] + ar) - (cW[2] / cW[0] - al)); -} -#endif // NO CUDA /*! \fn char trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ diff --git a/src/global/global.h b/src/global/global.h index 75fee01fb..ed13190d7 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -6,11 +6,6 @@ #include "../grid/grid_enum.h" // defines NSCALARS -#ifdef COOLING_CPU - #include - #include -#endif - #ifdef PARTICLES #include #endif // PARTICLES @@ -143,14 +138,6 @@ extern Real C_cfl; // CFL number (0 - 0.5) extern Real t_comm; extern Real t_other; -#ifdef COOLING_CPU -extern gsl_interp_accel *acc; -extern gsl_interp_accel *xacc; -extern gsl_interp_accel *yacc; -extern gsl_spline *highT_C_spline; -extern gsl_spline2d *lowT_C_spline; -extern gsl_spline2d *lowT_H_spline; -#endif #ifdef COOLING_GPU extern float *cooling_table; extern float *heating_table; @@ -168,11 +155,6 @@ extern double get_time(void); * \brief Mathematical sign function. Returns sign of x. */ extern int sgn(Real x); -#ifndef CUDA -/*! \fn Real calc_eta(Real cW[], Real gamma) - * \brief Calculate the eta value for the H correction. */ -extern Real calc_eta(Real cW[], Real gamma); -#endif struct parameters { int nx; From 3d221035e06aef1d6e0f33b31e39af6577b003ba Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 22 Sep 2023 14:27:33 -0400 Subject: [PATCH 528/694] Resolve #278 - added ``"cholla"`` attribute to HDF5 header This should help yt with identifying cholla snapshots. --- src/io/io.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/io/io.cpp b/src/io/io.cpp index 09ffd0d17..3b35de7f2 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -660,6 +660,12 @@ void Grid3D::Write_Header_HDF5(hid_t file_id) status = H5Awrite(attribute_id, stringType, ¯oFlags); H5Aclose(attribute_id); + // attribute to help yt differentiate cholla outputs from outputs produced by other codes + attribute_id = H5Acreate(file_id, "cholla", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + const char *dummyStr = ""; // this doesn't really matter right now + status = H5Awrite(attribute_id, stringType, &dummyStr); + H5Aclose(attribute_id); + // Numeric Attributes status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t"); status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt"); From ed4e8f6db215471670cca8b1d6fdc967c7cbcf0b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 3 Jul 2023 15:07:52 -0400 Subject: [PATCH 529/694] Enable hicpp-signed-bitwise clang-tidy check This check only failed in a few spots and it was with string literals operating with an unsigned int type so I set the IgnorePositiveIntegerLiterals option to true. --- .clang-tidy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index bd40fd46c..a16bc4aa4 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -91,7 +91,6 @@ Checks: "*, -hicpp-member-init, -hicpp-no-array-decay, -hicpp-no-malloc, - -hicpp-signed-bitwise, -hicpp-special-member-functions, -hicpp-use-equals-default, -hicpp-use-noexcept, @@ -169,4 +168,6 @@ CheckOptions: readability-identifier-naming.PrivateMethodSuffix: '_' # readability-identifier-naming.StructCase: 'CamelCase' + + hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' ... From d8ea428d65b8cf21cff8dea1895815e14c2fb9af Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 3 Jul 2023 15:46:50 -0400 Subject: [PATCH 530/694] Fix & Enable misc-confusable-identifiers check --- .clang-tidy | 1 - src/riemann_solvers/roe_cuda.cu | 9 +++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index a16bc4aa4..b8adbf722 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -101,7 +101,6 @@ Checks: "*, -llvm-header-guard, -llvm-include-order, -llvm-namespace-comment, - -misc-confusable-identifiers, -misc-const-correctness, -misc-non-private-member-variables-in-classes, -modernize-avoid-c-arrays, diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index e14be647e..332dcf3be 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -316,18 +316,15 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R // if pressure or density is negative, and we have not already returned // the supersonic fluxes, return the HLLE fluxes if (hlle_flag != 0) { - Real cfl, cfr, al, ar, bm, bp, tmp; + Real cfl, cfr, bm, bp, tmp; // compute max and fmin wave speeds cfl = sqrt(gamma * pl / dl); // sound speed in left state cfr = sqrt(gamma * pr / dr); // sound speed in right state // take max/fmin of Roe eigenvalues and left and right sound speeds - al = fmin(lambda_m, vxl - cfl); - ar = fmax(lambda_p, vxr + cfr); - - bm = fmin(al, (Real)0.0); - bp = fmax(ar, (Real)0.0); + bm = fmin(fmin(lambda_m, vxl - cfl), (Real)0.0); + bp = fmax(fmax(lambda_p, vxr + cfr), (Real)0.0); // compute left and right fluxes f_d_l = mxl - bm * dl; From 8c9c990ffc07cd1e5c27ebdfb687006dd30bd7d7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 14:56:57 -0400 Subject: [PATCH 531/694] Enable namespace name case checking --- .clang-tidy | 34 ++- src/dust/dust_cuda_tests.cpp | 4 +- src/hydro/hydro_cuda_tests.cu | 6 +- src/io/io_tests.cpp | 4 +- src/main_tests.cpp | 8 +- src/mhd/ct_electric_fields_tests.cu | 6 +- src/mhd/magnetic_divergence_tests.cu | 2 +- src/mhd/magnetic_update_tests.cu | 6 +- src/reconstruction/plmc_cuda_tests.cu | 8 +- src/reconstruction/reconstruction_tests.cu | 170 +++++++------- src/riemann_solvers/hllc_cuda_tests.cu | 2 +- src/riemann_solvers/hlld_cuda_tests.cu | 235 ++++++++++---------- src/system_tests/cooling_system_tests.cpp | 36 +-- src/system_tests/gravity_system_tests.cpp | 2 +- src/system_tests/hydro_system_tests.cpp | 26 +-- src/system_tests/mhd_system_tests.cpp | 9 +- src/system_tests/particles_system_tests.cpp | 2 +- src/system_tests/system_tester.cpp | 51 ++--- src/system_tests/system_tester.h | 10 +- src/utils/cuda_utilities_tests.cpp | 4 +- src/utils/hydro_utilities_tests.cpp | 22 +- src/utils/math_utilities_tests.cpp | 8 +- src/utils/mhd_utilities_tests.cu | 40 ++-- src/utils/reduction_utilities_tests.cu | 2 +- src/utils/testing_utilities.cpp | 8 +- src/utils/testing_utilities.h | 24 +- 26 files changed, 364 insertions(+), 365 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index b8adbf722..67221c3a7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -120,7 +120,6 @@ Checks: "*, -readability-else-after-return, -readability-function-cognitive-complexity, -readability-identifier-length, - -readability-identifier-naming, -readability-implicit-bool-conversion, -readability-inconsistent-declaration-parameter-name, -readability-isolate-declaration, @@ -146,27 +145,26 @@ CheckOptions: # - camel_Snake_Back # - Camel_Snake_Case # - aNy_CasE - # - # Entries that are commented out probably aren't needed but it should be verified - readability-identifier-naming.VariableCase: 'lower_case' - readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' + + # readability-identifier-naming.VariableCase: 'lower_case' + # readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' readability-identifier-naming.NamespaceCase: 'lower_case' - readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' - readability-identifier-naming.TypedefCase: 'CamelCase' - readability-identifier-naming.TypeAliasCase: 'CamelCase' - readability-identifier-naming.EnumCase: 'CamelCase' - readability-identifier-naming.ConstantCase: 'lower_case' + # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' + # readability-identifier-naming.TypedefCase: 'CamelCase' + # readability-identifier-naming.TypeAliasCase: 'CamelCase' + # readability-identifier-naming.EnumCase: 'CamelCase' + # readability-identifier-naming.ConstantCase: 'lower_case' - readability-identifier-naming.ConstantPrefix: 'k_' - readability-identifier-naming.GlobalVariablePrefix: 'g_' + # readability-identifier-naming.ConstantPrefix: 'k_' + # readability-identifier-naming.GlobalVariablePrefix: 'g_' - readability-identifier-naming.ClassCase: 'CamelCase' - # readability-identifier-naming.MemberCase: 'lower_case' - # readability-identifier-naming.MethodCase: 'CamelCase' - readability-identifier-naming.PrivateMemberSuffix: '_' - readability-identifier-naming.PrivateMethodSuffix: '_' + # readability-identifier-naming.ClassCase: 'CamelCase' + # readability-identifier-naming.MemberCase: 'lower_case' # This entry might not be needed + # readability-identifier-naming.MethodCase: 'CamelCase' # This entry might not be needed + # readability-identifier-naming.PrivateMemberSuffix: '_' + # readability-identifier-naming.PrivateMethodSuffix: '_' - # readability-identifier-naming.StructCase: 'CamelCase' + # readability-identifier-naming.StructCase: 'CamelCase' # This entry might not be needed hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' ... diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 8790c1f4a..a1357037a 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -37,7 +37,7 @@ TEST(tDUSTTestSputteringTimescale, bool is_true; - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl << "The test value is: " << test_num << std::endl @@ -61,7 +61,7 @@ TEST(tDUSTTestSputteringGrowthRate, bool is_true; - is_true = testingUtilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl << "The test value is: " << test_num << std::endl diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 482564462..cdedf629b 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -72,7 +72,7 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) double absoluteDiff; int64_t ulpsDiff; bool areEqual; - areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); + areEqual = testing_utilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); EXPECT_TRUE(areEqual) << "The fiducial value is: " << fiducialDt << std::endl << "The test value is: " << testData << std::endl << "The absolute difference is: " << absoluteDiff << std::endl @@ -106,7 +106,7 @@ TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput) velocityZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testing_utilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the hydroInverseCrossingTime function @@ -140,7 +140,7 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testing_utilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the mhdInverseCrossingTime function diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp index f9c94119b..87fdf8a09 100644 --- a/src/io/io_tests.cpp +++ b/src/io/io_tests.cpp @@ -23,14 +23,14 @@ TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) int const num_ranks = 4; // Generate the data to read from - systemTest::SystemTestRunner initializer(false, true, false); + system_test::SystemTestRunner initializer(false, true, false); initializer.numMpiRanks = num_ranks; initializer.chollaLaunchParams.append(" tout=0.0 outstep=0.0"); initializer.launchCholla(); std::string const read_directory = initializer.getOutputDirectory() + "/"; // Reload data and run the test - systemTest::SystemTestRunner loadRun(false, true, false); + system_test::SystemTestRunner loadRun(false, true, false); loadRun.numMpiRanks = num_ranks; loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=0 indir=" + read_directory); diff --git a/src/main_tests.cpp b/src/main_tests.cpp index ee58fbd06..43627f4a4 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -18,10 +18,10 @@ #include "utils/testing_utilities.h" /// This is the global variable to store the path to the root of Cholla -testingUtilities::GlobalString globalChollaRoot; -testingUtilities::GlobalString globalChollaBuild; -testingUtilities::GlobalString globalChollaMachine; -testingUtilities::GlobalString globalMpiLauncher; +testing_utilities::GlobalString globalChollaRoot; +testing_utilities::GlobalString globalChollaBuild; +testing_utilities::GlobalString globalChollaMachine; +testing_utilities::GlobalString globalMpiLauncher; bool globalRunCholla; bool globalCompareSystemTestResults; diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 0e2adf624..e9b3ce0d9 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -116,9 +116,9 @@ class tMHDCalculateCTElectricFields : public ::testing::Test for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), testCTElectricFields.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + testing_utilities::checkResults(fiducialData.at(i), testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index c1c44a9a7..a2acf850c 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -61,7 +61,7 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) MPI_Finalize(); // Perform Comparison Real const fiducialDivergence = 3.6318132783263106 / 1E15; - testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); + testing_utilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); } // ============================================================================= // End of tests for the magnetic field divergence functions diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 9b78a8f5d..3719191e5 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -103,9 +103,9 @@ class tMHDUpdateMagneticField3D : public ::testing::Test for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); - testingUtilities::checkResults(fiducialData.at(i), destinationGrid.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + testing_utilities::checkResults(fiducialData.at(i), destinationGrid.at(i), + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 3616d2d0a..cc70d3487 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -151,7 +151,7 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -161,7 +161,7 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } @@ -262,7 +262,7 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -272,7 +272,7 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 5f8000bf8..bc8984265 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -70,13 +70,13 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput // Check results reconstruction::Characteristic const fiducial_results{-40327, 110, -132678, 7.4400000000000004, 98864, 98, 103549}; - testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); - testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); - testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); - testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); - testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); - testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); - testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); + testing_utilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); + testing_utilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); + testing_utilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); + testing_utilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); + testing_utilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); + testing_utilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); + testing_utilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); } TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) @@ -101,13 +101,13 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput // Check results reconstruction::Primitive const fiducial_results{1740, 2934, -2526, -2828, 14333.333333333338, 0.0, -24040, 24880}; - testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); - testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); - testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_results.density, host_results.density, "density"); + testing_utilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testing_utilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testing_utilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); + testing_utilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); } TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) @@ -227,25 +227,25 @@ TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) #ifdef MHD reconstruction::Primitive const fiducial_data{ 13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); + testing_utilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 39950.641025641031}; #ifdef DE fiducial_data.pressure = 34274.282506448195; #endif // DE - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -267,20 +267,20 @@ TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) // Check results #ifdef MHD Real const fiducial_data = -2.5; - testingUtilities::checkResults(fiducial_data, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data, test_data.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data, test_data.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data, test_data.magnetic_z, "magnetic_z"); #else // MHD Real const fiducial_data = -2.5; - testingUtilities::checkResults(fiducial_data, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data, test_data.pressure, "pressure"); #endif // MHD } @@ -303,21 +303,21 @@ TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, 5.5384615384615383, 6.666666666666667, 0, 8.8421052631578956, 9.9047619047619051}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, 5.5384615384615383, 6.666666666666667}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -375,20 +375,20 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe // Check results #ifdef MHD reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152}; - testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data.density, host_results.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946}; - testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.density, host_results.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); #endif // MHD } @@ -441,20 +441,20 @@ TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) // Check results #ifdef MHD reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } @@ -481,13 +481,13 @@ TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, 7.833333333333333, 8.8333333333333339, 0.0, 10.833333333333334, 11.833333333333334}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); - testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); - testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); #else // MHD reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, 7.833333333333333, 8.8333333333333339}; @@ -567,8 +567,8 @@ TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) input_data[idx + 4], test_left_interface, test_right_interface); // Compare results - testingUtilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); - testingUtilities::checkResults(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); + testing_utilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); + testing_utilities::checkResults(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); } } @@ -611,6 +611,6 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) double test_val = interface_arr.at(i); double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i]; - testingUtilities::checkResults(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); + testing_utilities::checkResults(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); } } diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index c3efe9d96..555aed966 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -125,7 +125,7 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test double absoluteDiff; int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); EXPECT_TRUE(areEqual) << std::endl << customOutput << std::endl << "There's a difference in " << fieldNames[i] << " Flux" << std::endl diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c39e091d1..c425f15fc 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -199,8 +199,8 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test double const fixedEpsilon = 2.7E-15; int64_t const ulpsEpsilon = 7; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, - fixedEpsilon, ulpsEpsilon); + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, + fixedEpsilon, ulpsEpsilon); EXPECT_TRUE(areEqual) << std::endl << customOutput << std::endl << "There's a difference in " << fieldNames[i] << " Flux" << std::endl @@ -1878,8 +1878,8 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma); // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); + testing_utilities::checkResults(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); + testing_utilities::checkResults(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); } } // ========================================================================= @@ -1902,7 +1902,7 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); + testing_utilities::checkResults(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); } } // ========================================================================= @@ -1927,8 +1927,8 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) parameters.magneticX.at(i), 1); // Now check results - testingUtilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); + testing_utilities::checkResults(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); + testing_utilities::checkResults(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); } } // ========================================================================= @@ -1953,18 +1953,19 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) mhd::_internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); + testing_utilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityFlux"); + testing_utilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumFluxX"); + testing_utilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumFluxY"); + testing_utilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticFluxY"); + testing_utilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); } } // ========================================================================= @@ -1991,16 +1992,16 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, - parameters.names.at(i) + ", MagneticStarZ"); + testing_utilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, + parameters.names.at(i) + ", VelocityStarY"); + testing_utilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + parameters.names.at(i) + ", VelocityStarZ"); + testing_utilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, + parameters.names.at(i) + ", EnergyStar"); + testing_utilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, + parameters.names.at(i) + ", MagneticStarY"); + testing_utilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2029,20 +2030,20 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); + testing_utilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testing_utilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); + testing_utilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); } } @@ -2071,16 +2072,16 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results - testingUtilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, - parameters.names.at(i) + ", MagneticStarZ"); + testing_utilities::checkResults(fiducialStarState.at(i).velocityY, testStarState.velocityY, + parameters.names.at(i) + ", VelocityStarY"); + testing_utilities::checkResults(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + parameters.names.at(i) + ", VelocityStarZ"); + testing_utilities::checkResults(fiducialStarState.at(i).energy, testStarState.energy, + parameters.names.at(i) + ", EnergyStar"); + testing_utilities::checkResults(fiducialStarState.at(i).magneticY, testStarState.magneticY, + parameters.names.at(i) + ", MagneticStarY"); + testing_utilities::checkResults(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + parameters.names.at(i) + ", MagneticStarZ"); } } @@ -2106,20 +2107,20 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) parameters.speed.at(i), parameters.speed.at(i).L); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testing_utilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testing_utilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2146,18 +2147,18 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + testing_utilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testing_utilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testing_utilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testing_utilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testing_utilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testing_utilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); } } @@ -2180,18 +2181,18 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialState.at(i).energyL, testState.energyL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialState.at(i).energyR, testState.energyR, - parameters.names.at(i) + ", EnergyDoubleStarR"); + testing_utilities::checkResults(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testing_utilities::checkResults(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testing_utilities::checkResults(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testing_utilities::checkResults(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testing_utilities::checkResults(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testing_utilities::checkResults(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); } } // ========================================================================= @@ -2218,20 +2219,20 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) parameters.speed.at(i).L, parameters.speed.at(i).LStar); // Now check results - testingUtilities::checkResults(fiducialFlux[i].density, testFlux.density, - parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); - testingUtilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialFlux[i].energy, testFlux.energy, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); + testing_utilities::checkResults(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::checkResults(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::checkResults(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::checkResults(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::checkResults(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testing_utilities::checkResults(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); } } // ========================================================================= @@ -2338,8 +2339,8 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) parameters.speed.at(i)); // Now check results - testingUtilities::checkResults(fiducialPressure.at(i), testPressure, - parameters.names.at(i) + ", total pressure in the star states"); + testing_utilities::checkResults(fiducialPressure.at(i), testPressure, + parameters.names.at(i) + ", total pressure in the star states"); } } // ========================================================================= @@ -2387,16 +2388,16 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) parameters.gamma, threadId, n_cells, o1, o2, o3); // Now check results - testingUtilities::checkResults(fiducialState.at(direction).density, testState.density, ", Density"); - testingUtilities::checkResults(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); - testingUtilities::checkResults(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); - testingUtilities::checkResults(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); - testingUtilities::checkResults(fiducialState.at(direction).energy, testState.energy, ", energy"); - testingUtilities::checkResults(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); - testingUtilities::checkResults(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); - testingUtilities::checkResults(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); - testingUtilities::checkResults(fiducialState.at(direction).totalPressure, testState.totalPressure, - ", totalPressure"); + testing_utilities::checkResults(fiducialState.at(direction).density, testState.density, ", Density"); + testing_utilities::checkResults(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); + testing_utilities::checkResults(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); + testing_utilities::checkResults(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); + testing_utilities::checkResults(fiducialState.at(direction).energy, testState.energy, ", energy"); + testing_utilities::checkResults(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); + testing_utilities::checkResults(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); + testing_utilities::checkResults(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); + testing_utilities::checkResults(fiducialState.at(direction).totalPressure, testState.totalPressure, + ", totalPressure"); } } // ========================================================================= diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp index f3fa90db4..71095151c 100644 --- a/src/system_tests/cooling_system_tests.cpp +++ b/src/system_tests/cooling_system_tests.cpp @@ -25,15 +25,15 @@ TEST(tCOOLINGSYSTEMConstant5, CorrectInputExpectCorrectOutput) double energy = 0.0014850544057189395;// Python */ double energy = 0.00148501098087863; // Cholla - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); - testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); - testingUtilities::analyticConstant(testObject, "Energy", energy); + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } TEST(tCOOLINGSYSTEMConstant7, CorrectInputExpectCorrectOutput) @@ -44,15 +44,15 @@ TEST(tCOOLINGSYSTEMConstant7, CorrectInputExpectCorrectOutput) // T = 1e7 // double energy = 0.14982743570299709; // Python double energy = 0.14982745510047499; // Cholla - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); - testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); - testingUtilities::analyticConstant(testObject, "Energy", energy); + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } TEST(tCOOLINGSYSTEMConstant8, CorrectInputExpectCorrectOutput) @@ -64,13 +64,13 @@ TEST(tCOOLINGSYSTEMConstant8, CorrectInputExpectCorrectOutput) // double energy = 1.499669522009355; // Python double energy = 1.4996695198095711; // Cholla - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); - testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); - testingUtilities::analyticConstant(testObject, "Energy", energy); + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } diff --git a/src/system_tests/gravity_system_tests.cpp b/src/system_tests/gravity_system_tests.cpp index eba293cbb..c2a59c40e 100644 --- a/src/system_tests/gravity_system_tests.cpp +++ b/src/system_tests/gravity_system_tests.cpp @@ -22,7 +22,7 @@ /// @{ TEST(tGRAVITYSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest; + system_test::SystemTestRunner collapseTest; collapseTest.runTest(); } /// @} diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 288690290..ad1f31aa1 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -29,7 +29,7 @@ class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: - systemTest::SystemTestRunner sodTest; + system_test::SystemTestRunner sodTest; }; TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) @@ -82,17 +82,17 @@ TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput) TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject, "density", 1.0); - testingUtilities::analyticConstant(testObject, "momentum_x", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_y", 0.0); - testingUtilities::analyticConstant(testObject, "momentum_z", 0.0); - testingUtilities::analyticConstant(testObject, "Energy", 1.5e-5); + testing_utilities::analyticConstant(testObject, "density", 1.0); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", 1.5e-5); } TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) @@ -108,7 +108,7 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) double phase = kx * 0.5 - speed * time * real_kx; // kx*0.5 for half-cell offset double tolerance = 1e-7; - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); #ifdef MHD // Loosen correctness check to account for MHD only having PCM. This is @@ -128,11 +128,11 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) testObject.openHydroTestData(); ASSERT_NO_FATAL_FAILURE( - testingUtilities::analyticSine(testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + testing_utilities::analyticSine(testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); ASSERT_NO_FATAL_FAILURE( - testingUtilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); - // testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); - // testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + testing_utilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + // testing_utilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + // testing_utilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); } // ============================================================================= @@ -151,7 +151,7 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){}; protected: - systemTest::SystemTestRunner waveTest; + system_test::SystemTestRunner waveTest; #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 30e1d81cf..5af66e352 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -32,8 +32,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< tMHDSYSTEMLinearWavesParameterizedAngle() : waveTest(false, true, false, false){}; protected: - systemTest::SystemTestRunner waveTest; - inline static std::unordered_map high_res_l2norms; + system_test::SystemTestRunner waveTest; void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, @@ -551,7 +550,7 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam { protected: - systemTest::SystemTestRunner test_runner; + system_test::SystemTestRunner test_runner; }; INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMParameterizedMpi, ::testing::Values(1, 2, 4)); @@ -793,7 +792,7 @@ class tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization : public tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization() : cpawTest(false, true, false, false){}; protected: - systemTest::SystemTestRunner cpawTest; + system_test::SystemTestRunner cpawTest; void setLaunchParams(double const &polarization, double const &vx) { diff --git a/src/system_tests/particles_system_tests.cpp b/src/system_tests/particles_system_tests.cpp index 7cbd587cb..4b6b36575 100644 --- a/src/system_tests/particles_system_tests.cpp +++ b/src/system_tests/particles_system_tests.cpp @@ -22,7 +22,7 @@ /// @{ TEST(tPARTICLESSYSTEMSphericalCollapse, DISABLED_CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest(true); + system_test::SystemTestRunner collapseTest(true); collapseTest.runTest(); } /// @} diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 7a07c73af..188d39f39 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -31,8 +31,8 @@ // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, double const &maxAllowedL1Error, - double const &maxAllowedError) +void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, double const &maxAllowedL1Error, + double const &maxAllowedError) { /// Only run if this variable is set to `true`. Generally this and /// globalCompareSystemTestResults should only be used for large MPI / tests @@ -171,8 +171,8 @@ void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, dou // Check for equality and iff not equal return difference double absoluteDiff; int64_t ulpsDiff; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, - ulpsDiff, _fixedEpsilon); + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, + ulpsDiff, _fixedEpsilon); ASSERT_TRUE(areEqual) << std::endl << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k << "]" << std::endl @@ -203,7 +203,7 @@ void systemTest::SystemTestRunner::runTest(bool const &compute_L2_norm_only, dou // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError) +void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError) { /// Only run if this variable is set to `true`. Generally this and /// globalCompareSystemTestResults should only be used for large MPI / tests @@ -329,7 +329,7 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::launchCholla() +void system_test::SystemTestRunner::launchCholla() { // Launch Cholla. Note that this dumps all console output to the console // log file as requested by the user. @@ -355,7 +355,7 @@ void systemTest::SystemTestRunner::launchCholla() // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::openHydroTestData() +void system_test::SystemTestRunner::openHydroTestData() { _testHydroFieldsFileVec.resize(numMpiRanks); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { @@ -368,7 +368,7 @@ void systemTest::SystemTestRunner::openHydroTestData() // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) +void system_test::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) { // First check if there's a fiducial data file if (_fiducialDataSets.count(fieldName) > 0) { @@ -383,8 +383,8 @@ void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateConstantData(double const &value, size_t const &nx, - size_t const &ny, size_t const &nz) +std::vector system_test::SystemTestRunner::generateConstantData(double const &value, size_t const &nx, + size_t const &ny, size_t const &nz) { size_t const length = nx * ny * nz; std::vector outVec(length); @@ -396,10 +396,11 @@ std::vector systemTest::SystemTestRunner::generateConstantData(double co // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateSineData(double const &offset, double const &litude, - double const &kx, double const &ky, double const &kz, - double const &phase, size_t const &nx, - size_t const &ny, size_t const &nz) +std::vector system_test::SystemTestRunner::generateSineData(double const &offset, double const &litude, + double const &kx, double const &ky, + double const &kz, double const &phase, + size_t const &nx, size_t const &ny, + size_t const &nz) { size_t const length = nx * ny * nz; std::vector outVec(length); @@ -419,8 +420,8 @@ std::vector systemTest::SystemTestRunner::generateSineData(double const // ============================================================================= // Constructor -systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData, - bool const &useFiducialFile, bool const &useSettingsFile) +system_test::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData, + bool const &useFiducialFile, bool const &useSettingsFile) : _particleDataExists(particleData), _hydroDataExists(hydroData) { // Get the test name, with and underscore instead of a "." since @@ -484,7 +485,7 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool co // ============================================================================= // Destructor -systemTest::SystemTestRunner::~SystemTestRunner() +system_test::SystemTestRunner::~SystemTestRunner() { _fiducialFile.close(); for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) { @@ -503,7 +504,7 @@ systemTest::SystemTestRunner::~SystemTestRunner() // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::_checkNumTimeSteps() +void system_test::SystemTestRunner::_checkNumTimeSteps() { int fiducialNSteps, testNSteps; @@ -531,9 +532,9 @@ void systemTest::SystemTestRunner::_checkNumTimeSteps() // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string dataSetName, - std::vector &testDims, - std::vector file) +std::vector system_test::SystemTestRunner::loadTestFieldData(std::string dataSetName, + std::vector &testDims, + std::vector file) { // Switch which fileset we're using if it's a particle dataset if (dataSetName == "particle_density") { @@ -619,7 +620,7 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData(std::string // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::string const &dataSetName) +std::vector system_test::SystemTestRunner::_loadTestParticleData(std::string const &dataSetName) { // Determine the total number of particles if (_testTotalNumParticles == 0) { @@ -677,7 +678,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::str // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) +std::vector system_test::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) { if (_fiducialFileExists and (_fiducialDataSets.find(dataSetName) == _fiducialDataSets.end())) { // Open the dataset @@ -704,7 +705,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialFieldData(std::st // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialParticleData(std::string const &dataSetName) +std::vector system_test::SystemTestRunner::_loadFiducialParticleData(std::string const &dataSetName) { if (_fiducialFileExists) { // Determine the total number of particles @@ -760,7 +761,7 @@ std::vector systemTest::SystemTestRunner::_loadFiducialParticleData(std: // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_findDataSetNames(H5::H5File const &inputFile) +std::vector system_test::SystemTestRunner::_findDataSetNames(H5::H5File const &inputFile) { std::vector outputVector; diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 20e430a3a..7ba1b5a53 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -21,7 +21,7 @@ * \brief This namespace contains one class, SystemTestRunner, whose * purpose is to (as you might expect) run system tests. */ -namespace systemTest +namespace system_test { /*! * \brief Runs a system test using the full test name to determine all @@ -49,9 +49,9 @@ namespace systemTest * */ class SystemTestRunner; -} // namespace systemTest +} // namespace system_test -class systemTest::SystemTestRunner +class system_test::SystemTestRunner { public: /// The number of MPI ranks, defaults to 1 @@ -62,7 +62,7 @@ class systemTest::SystemTestRunner * replacing the need for a settings file. A string of the launch parameters * that will override the values in the settings file (if given). Any of * Cholla's standard launch paramters work except `outdir` as that is - * reserved for usage in the systemTest::SystemTestRunner.runTest() method + * reserved for usage in the system_test::SystemTestRunner.runTest() method */ std::string chollaLaunchParams; @@ -375,4 +375,4 @@ class systemTest::SystemTestRunner * \return std::vector */ std::vector _findDataSetNames(H5::H5File const &inputFile); -}; // End of class systemTest::SystemTestRunner +}; // End of class system_test::SystemTestRunner diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index 08c1004b2..ca834f384 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -62,8 +62,8 @@ TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) std::vector test_indices{is, ie, js, je, ks, ke}; for (size_t j = 0; j < test_indices.size(); j++) { - testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], - index_names[j] + " " + parameters.names[i]); + testing_utilities::checkResults(fiducial_indices[i][j], test_indices[j], + index_names[j] + " " + parameters.names[i]); } } } diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index 7bab43b69..ff92f57a4 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -72,7 +72,7 @@ TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); } } @@ -90,7 +90,7 @@ TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); } } @@ -133,7 +133,7 @@ TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } @@ -147,7 +147,7 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } } #endif // DE @@ -166,7 +166,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -184,7 +184,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput) parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -201,7 +201,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -218,7 +218,7 @@ TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } @@ -231,7 +231,7 @@ TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testing_utilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } @@ -245,7 +245,7 @@ TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( coef * parameters.d.at(i), coef * parameters.vx.at(i), coef * parameters.vy.at(i), coef * parameters.vz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + testing_utilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } @@ -259,6 +259,6 @@ TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( coef * parameters.d.at(i), coef * parameters.mx.at(i), coef * parameters.my.at(i), coef * parameters.mz.at(i)); - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + testing_utilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); } } \ No newline at end of file diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp index 665a5981c..5af0ec5ca 100644 --- a/src/utils/math_utilities_tests.cpp +++ b/src/utils/math_utilities_tests.cpp @@ -31,9 +31,9 @@ TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); - testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); - testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); - testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); + testing_utilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testing_utilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testing_utilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); } // ============================================================================= @@ -54,6 +54,6 @@ TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), b.at(1), b.at(2)); // Now check results - testingUtilities::checkResults(fiducialDotProduct, testDotProduct, "dot product"); + testing_utilities::checkResults(fiducialDotProduct, testDotProduct, "dot product"); } // ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 980259d28..ab33836a7 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -66,7 +66,7 @@ TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); + testing_utilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); } } // ============================================================================= @@ -91,7 +91,7 @@ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); + testing_utilities::checkResults(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); } } // ============================================================================= @@ -115,7 +115,7 @@ TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); - testingUtilities::checkResults(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); + testing_utilities::checkResults(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); } } @@ -165,8 +165,8 @@ TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, - parameters.names.at(i)); + testing_utilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + parameters.names.at(i)); } } @@ -188,8 +188,8 @@ TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, - parameters.names.at(i)); + testing_utilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + parameters.names.at(i)); } } // ============================================================================= @@ -217,8 +217,8 @@ TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, - parameters.names.at(i)); + testing_utilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + parameters.names.at(i)); } } @@ -240,8 +240,8 @@ TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, - parameters.names.at(i)); + testing_utilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + parameters.names.at(i)); } } // ============================================================================= @@ -264,7 +264,7 @@ TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + testing_utilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } @@ -281,7 +281,7 @@ TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) for (size_t i = 0; i < parameters.names.size(); i++) { Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + testing_utilities::checkResults(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); } } // ============================================================================= @@ -316,9 +316,9 @@ TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); // Check the results - testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); - testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); - testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); + testing_utilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); + testing_utilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); + testing_utilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } #endif // MHD // ============================================================================= @@ -369,13 +369,13 @@ TEST(tMHDInitMagneticFieldWithVectorPotential, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < conserved_vector.size(); i++) { if (i == 47) { - testingUtilities::checkResults(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testing_utilities::checkResults(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else if (i == 55) { - testingUtilities::checkResults(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testing_utilities::checkResults(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else if (i == 63) { - testingUtilities::checkResults(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testing_utilities::checkResults(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } else { - testingUtilities::checkResults(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + testing_utilities::checkResults(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); } } } diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index e689e2a5f..40241ec66 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -63,7 +63,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) CudaCheckError(); // Perform comparison - testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); + testing_utilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 02aaadd68..19f38eefd 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -18,7 +18,7 @@ #include "../system_tests/system_tester.h" // provide systemTest class #include "../utils/testing_utilities.h" // Include the header file -namespace testingUtilities +namespace testing_utilities { // ========================================================================= int64_t ulpsDistanceDbl(double const &a, double const &b) @@ -91,7 +91,7 @@ void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double te ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); } -void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value) +void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value) { std::vector testDims(3, 1); std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); @@ -106,7 +106,7 @@ void analyticConstant(systemTest::SystemTestRunner testObject, std::string const } } -void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, +void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant, double amplitude, double kx, double ky, double kz, double phase, double tolerance) { std::vector testDims(3, 1); @@ -122,4 +122,4 @@ void analyticSine(systemTest::SystemTestRunner testObject, std::string const &da } } -} // namespace testingUtilities +} // namespace testing_utilities diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 7057e01e9..e4414a6e4 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -26,7 +26,7 @@ * considered compatible with CUDA/HIP. * */ -namespace testingUtilities +namespace testing_utilities { // ========================================================================= /*! @@ -98,9 +98,9 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, double fixedEpsilon); -void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value); +void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value); -void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, +void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant, double amplitude, double kx, double ky, double kz, double phase, double tolerance); // ========================================================================= @@ -129,12 +129,12 @@ void checkResults(double fiducialNumber, double testNumber, std::string const &o bool areEqual; if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); } else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); } else { - areEqual = - testingUtilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon, ulpsEpsilon); + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon, + ulpsEpsilon); } std::stringstream outputMessage; @@ -189,13 +189,13 @@ class GlobalString ~GlobalString() = default; }; // ========================================================================= -} // namespace testingUtilities +} // namespace testing_utilities // Declare the global string variables so everything that imports this file // has access to them -extern testingUtilities::GlobalString globalChollaRoot; -extern testingUtilities::GlobalString globalChollaBuild; -extern testingUtilities::GlobalString globalChollaMachine; -extern testingUtilities::GlobalString globalMpiLauncher; +extern testing_utilities::GlobalString globalChollaRoot; +extern testing_utilities::GlobalString globalChollaBuild; +extern testing_utilities::GlobalString globalChollaMachine; +extern testing_utilities::GlobalString globalMpiLauncher; extern bool globalRunCholla; extern bool globalCompareSystemTestResults; From bb0097ab01e1793052d70130cb7293ea95e26309 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 15:26:28 -0400 Subject: [PATCH 532/694] Enable several naming checks Enabled checks for - macro definitions - typdefs - aliases - enums --- .clang-tidy | 8 ++++---- src/particles/feedback_CIC_gpu.cu | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 67221c3a7..6319e80ff 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -149,10 +149,10 @@ CheckOptions: # readability-identifier-naming.VariableCase: 'lower_case' # readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' readability-identifier-naming.NamespaceCase: 'lower_case' - # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' - # readability-identifier-naming.TypedefCase: 'CamelCase' - # readability-identifier-naming.TypeAliasCase: 'CamelCase' - # readability-identifier-naming.EnumCase: 'CamelCase' + readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' + readability-identifier-naming.TypedefCase: 'CamelCase' + readability-identifier-naming.TypeAliasCase: 'CamelCase' + readability-identifier-naming.EnumCase: 'CamelCase' # readability-identifier-naming.ConstantCase: 'lower_case' # readability-identifier-naming.ConstantPrefix: 'k_' diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 75bf1f5e8..50c93c858 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -18,18 +18,18 @@ #define TPB_FEEDBACK 128 #define FEED_INFO_N 6 - #define i_RES 1 - #define i_UNRES 2 - #define i_ENERGY 3 - #define i_MOMENTUM 4 - #define i_UNRES_ENERGY 5 + #define I_RES 1 // unused + #define I_UNRES 2 // unused + #define I_ENERGY 3 // unused + #define I_MOMENTUM 4 // unused + #define I_UNRES_ENERGY 5 // used -typedef curandStateMRG32k3a_t feedback_prng_t; -// typedef curandStatePhilox4_32_10_t feedback_prng_t; +typedef curandStateMRG32k3a_t FeedbackPrng; +// typedef curandStatePhilox4_32_10_t FeedbackPrng; namespace supernova { -feedback_prng_t* randStates; +FeedbackPrng* randStates; part_int_t n_states; Real *dev_snr, snr_dt, time_sn_start, time_sn_end; int snr_n; @@ -48,7 +48,7 @@ __device__ double atomicMax(double* address, double val) } #endif // O_HIP -__global__ void initState_kernel(unsigned int seed, feedback_prng_t* states) +__global__ void initState_kernel(unsigned int seed, FeedbackPrng* states) { int id = blockIdx.x * blockDim.x + threadIdx.x; curand_init(seed, id, 0, &states[id]); @@ -131,7 +131,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat // Now initialize the poisson random number generator state. n_states = n_local * allocation_factor; - cudaMalloc((void**)&randStates, n_states * sizeof(feedback_prng_t)); + cudaMalloc((void**)&randStates, n_states * sizeof(FeedbackPrng)); int ngrid = (n_states - 1) / TPB_FEEDBACK + 1; dim3 grid(ngrid); @@ -233,7 +233,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, - Real* momentum_z, Real gamma, feedback_prng_t* states, Real* prev_dens, + Real* momentum_z, Real gamma, FeedbackPrng* states, Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, Real time_sn_end, int n_step) { @@ -308,7 +308,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real // N = (int) (average_num_sn + 0.5); - feedback_prng_t state; // = states[0]; // load initial state + FeedbackPrng state; // = states[0]; // load initial state curand_init(42, 0, 0, &state); unsigned long long skip = n_step * 10000 + id[gtid]; @@ -543,7 +543,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real // atomicAdd( &energy[indx], e ); // atomicAdd( &density[indx], d ); - s_info[FEED_INFO_N * tid + i_UNRES_ENERGY] += + s_info[FEED_INFO_N * tid + I_UNRES_ENERGY] += direction * (px * px + py * py + pz * pz) / 2 / density[indx] * dV; if (abs(momentum_x[indx] / density[indx]) >= C_L) { From 568fd46423486b3586c7e50743194637ee501893 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 15:42:45 -0400 Subject: [PATCH 533/694] Enable class naming check The ClassCase does cover structs so I removed the struct case line --- .clang-tidy | 4 +--- src/mhd/ct_electric_fields_tests.cu | 1 + src/mhd/magnetic_update_tests.cu | 1 + src/riemann_solvers/hllc_cuda_tests.cu | 1 + src/riemann_solvers/hlld_cuda_tests.cu | 31 +++++++++++++------------ src/system_tests/hydro_system_tests.cpp | 4 +++- src/system_tests/mhd_system_tests.cpp | 4 ++++ src/utils/mhd_utilities_tests.cu | 22 +++++++++--------- 8 files changed, 38 insertions(+), 30 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 6319e80ff..48813b746 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -158,13 +158,11 @@ CheckOptions: # readability-identifier-naming.ConstantPrefix: 'k_' # readability-identifier-naming.GlobalVariablePrefix: 'g_' - # readability-identifier-naming.ClassCase: 'CamelCase' + readability-identifier-naming.ClassCase: 'CamelCase' # readability-identifier-naming.MemberCase: 'lower_case' # This entry might not be needed # readability-identifier-naming.MethodCase: 'CamelCase' # This entry might not be needed # readability-identifier-naming.PrivateMemberSuffix: '_' # readability-identifier-naming.PrivateMethodSuffix: '_' - # readability-identifier-naming.StructCase: 'CamelCase' # This entry might not be needed - hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' ... diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index e9b3ce0d9..2d20dab9f 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -31,6 +31,7 @@ * \brief Test fixture for tMHDCalculateCTElectricFields test suite * */ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDCalculateCTElectricFields : public ::testing::Test { public: diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 3719191e5..dd3e3fc26 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -25,6 +25,7 @@ * \brief Test fixture for tMHDUpdateMagneticField3D test suite * */ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDUpdateMagneticField3D : public ::testing::Test { public: diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index 555aed966..8ef6f7881 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -28,6 +28,7 @@ custom user output then performs all the required running and testing * */ +// NOLINTNEXTLINE(readability-identifier-naming) class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test { protected: diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index c425f15fc..51ac6eca1 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -36,6 +36,7 @@ Effectively takes the left state, right state, fiducial fluxes, and custom user output then performs all the required running and testing * */ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test { protected: @@ -1807,7 +1808,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) */ namespace { -struct testParams { +struct TestParams { // List of cases std::vector names{"Case 1", "Case 2"}; @@ -1857,7 +1858,7 @@ struct testParams { {-22.40376497145191, -19.710500632936679, -0.81760587897407833, 9.6740190040662242, 24.295526347371595}, {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, 3.4191202933087519, 12.519790189404299}}; - testParams() = default; + TestParams() = default; }; } // namespace // ========================================================================= @@ -1869,7 +1870,7 @@ struct testParams { */ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector const fiducialSpeedL{-22.40376497145191, -11.190385012513822}; std::vector const fiducialSpeedR{24.295526347371595, 12.519790189404299}; @@ -1891,7 +1892,7 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector const fiducialSpeedM{-0.81760587897407833, -0.026643804611559244}; @@ -1914,7 +1915,7 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput */ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector const fiducialSpeedStarL{-18.18506608966894, -4.2968910457518161}; std::vector const fiducialSpeedStarR{12.420292938368167, 3.6786718447209252}; @@ -1940,7 +1941,7 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector fiducialFlux{ {38.504606872151484, -3088.4810263278778, -1127.8835013070616, -4229.5657456907293, -12344.460641662206, @@ -1978,7 +1979,7 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector fiducialStarState{ {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 1023.8840191068900, 18.648382121236992, @@ -2012,7 +2013,7 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut */ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector fiducialFlux{ {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617500, @@ -2054,7 +2055,7 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) */ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector fiducialStarState{ {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 4.5171065808847731e+17, 18.648382121236992, @@ -2087,7 +2088,7 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) { - testParams parameters; + TestParams parameters; // Used to get us into the degenerate case double const totalPressureStarMultiplier = 1E15; @@ -2133,7 +2134,7 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) */ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector fiducialState{ {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, 36.670978215630669, -2048.1953674500523, @@ -2169,7 +2170,7 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp */ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector fiducialState{ {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114}, @@ -2204,7 +2205,7 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) */ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector const fiducialFlux{ {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029, @@ -2330,7 +2331,7 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; std::vector const fiducialPressure{6802.2800807224075, 3476.1984612875144}; @@ -2352,7 +2353,7 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) */ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) { - testParams const parameters; + TestParams const parameters; int const threadId = 0; int const n_cells = 10; std::vector interfaceArray(n_cells * grid_enum::num_fields); diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index ad1f31aa1..a1d232018 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -26,7 +26,8 @@ * */ /// @{ -class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam +// NOLINTNEXTLINE(readability-identifier-naming) +class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: system_test::SystemTestRunner sodTest; @@ -145,6 +146,7 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) * */ /// @{ +// NOLINTNEXTLINE(readability-identifier-naming) class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam { public: diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 5af66e352..150edb8c0 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -26,6 +26,7 @@ * */ /// @{ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam> { public: @@ -544,6 +545,7 @@ INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, * */ /// @{ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam { public: @@ -690,6 +692,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorr * */ /// @{ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDSYSTEMParameterizedMpi : public ::testing::TestWithParam { protected: @@ -786,6 +789,7 @@ TEST_P(tMHDSYSTEMParameterizedMpi, OrszagTangVortexCorrectInputExpectCorrectOutp * */ /// @{ +// NOLINTNEXTLINE(readability-identifier-naming) class tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization : public ::testing::TestWithParam { public: diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index ab33836a7..874419765 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -26,7 +26,7 @@ // Local helper functions namespace { -struct testParams { +struct TestParams { double gamma = 5. / 3.; std::vector density{8.4087201154e-100, 1.6756968986e2, 5.4882403847e100}; std::vector velocityX{7.0378624601e-100, 7.0829278656e2, 1.8800514112e100}; @@ -56,7 +56,7 @@ struct testParams { */ TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; std::vector fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, 1.9480412919836246e+205}; @@ -83,7 +83,7 @@ TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) */ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; std::vector fiducialEnergy{0.0, 805356.08013056568, 6.7079331637514162e+201}; @@ -108,7 +108,7 @@ TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) */ TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector fiducialTotalPressures{9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201}; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -127,7 +127,7 @@ TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) */ TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) { - testParams parameters; + TestParams parameters; std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -155,7 +155,7 @@ TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) */ TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38}; std::vector coef{1.0, 1.0, 1.0e-25}; @@ -178,7 +178,7 @@ TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) */ TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; + TestParams parameters; std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86}; std::vector coef{1.0, 1.0, 1.0e-25}; @@ -207,7 +207,7 @@ TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) */ TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, 0.26678309355540852}; // Coefficient to make sure the output is well defined and not nan or inf double const coef = 1E-95; @@ -230,7 +230,7 @@ TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) */ TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; + TestParams parameters; std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, 1976400098318.3574}; // Coefficient to make sure the output is well defined and not nan or inf double const coef = 1E-95; @@ -258,7 +258,7 @@ TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) */ TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) { - testParams parameters; + TestParams parameters; std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49}; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -275,7 +275,7 @@ TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) */ TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) { - testParams parameters; + TestParams parameters; std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, 2.1621115263999998e+110}; for (size_t i = 0; i < parameters.names.size(); i++) { From e8ad0346531d23ee2a47c8c3663f9985b42c1ddf Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 10 Jul 2023 16:03:22 -0400 Subject: [PATCH 534/694] Permanantly disable cppcoreguidelines-pro-bounds-constant-array-index While this would be a good check to have on it's just not feasible in GPU code. --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 48813b746..929c6ccdd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -34,6 +34,7 @@ Checks: "*, -misc-unused-parameters, -hicpp-multiway-paths-covered, -cert-err58-cpp, + -cppcoreguidelines-pro-bounds-constant-array-index, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, @@ -71,7 +72,6 @@ Checks: "*, -cppcoreguidelines-owning-memory, -cppcoreguidelines-prefer-member-initializer, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, - -cppcoreguidelines-pro-bounds-constant-array-index, -cppcoreguidelines-pro-bounds-pointer-arithmetic, -cppcoreguidelines-pro-type-cstyle-cast, -cppcoreguidelines-pro-type-member-init, From 4b00f5758658342c3c07ae845753aee0c6e641b7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 19 Jul 2023 11:00:32 -0400 Subject: [PATCH 535/694] Fix bad rebase --- src/reconstruction/ppmc_cuda_tests.cu | 8 +-- src/reconstruction/reconstruction_tests.cu | 69 +++++++++++----------- src/system_tests/hydro_system_tests.cpp | 2 +- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 1c7515ec0..156f74272 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -101,7 +101,7 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -111,7 +111,7 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } @@ -241,7 +241,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_left.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); @@ -251,7 +251,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; - testingUtilities::checkResults( + testing_utilities::checkResults( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); } diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index bc8984265..af84d5a3c 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -115,7 +115,6 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) // Test parameters Real const &gamma = 5. / 3.; reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; - reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); Real const sound_speed_squared = sound_speed * sound_speed; @@ -150,28 +149,28 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, + testing_utilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, "magnetosonic_speed_fast"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, + testing_utilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, "magnetosonic_speed_slow"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, + testing_utilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); - testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, + testing_utilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); - testingUtilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); - testingUtilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); - testingUtilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); - testingUtilities::checkResults(fiducial_results.beta_z, host_results.beta_z, "beta_z"); - testingUtilities::checkResults(fiducial_results.n_fs, host_results.n_fs, "n_fs"); - testingUtilities::checkResults(fiducial_results.sign, host_results.sign, "sign"); - testingUtilities::checkResults(fiducial_results.q_fast, host_results.q_fast, "q_fast"); - testingUtilities::checkResults(fiducial_results.q_slow, host_results.q_slow, "q_slow"); - testingUtilities::checkResults(fiducial_results.a_fast, host_results.a_fast, "a_fast"); - testingUtilities::checkResults(fiducial_results.a_slow, host_results.a_slow, "a_slow"); - testingUtilities::checkResults(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); - testingUtilities::checkResults(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); - testingUtilities::checkResults(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); - testingUtilities::checkResults(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); + testing_utilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); + testing_utilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); + testing_utilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); + testing_utilities::checkResults(fiducial_results.beta_z, host_results.beta_z, "beta_z"); + testing_utilities::checkResults(fiducial_results.n_fs, host_results.n_fs, "n_fs"); + testing_utilities::checkResults(fiducial_results.sign, host_results.sign, "sign"); + testing_utilities::checkResults(fiducial_results.q_fast, host_results.q_fast, "q_fast"); + testing_utilities::checkResults(fiducial_results.q_slow, host_results.q_slow, "q_slow"); + testing_utilities::checkResults(fiducial_results.a_fast, host_results.a_fast, "a_fast"); + testing_utilities::checkResults(fiducial_results.a_slow, host_results.a_slow, "a_slow"); + testing_utilities::checkResults(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); + testing_utilities::checkResults(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); + testing_utilities::checkResults(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); + testing_utilities::checkResults(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); } #endif // MHD @@ -410,17 +409,17 @@ TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOu 4.6476103465999996, 3.7096802847000001}; reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, 4.6476103465999996, 3.7096802847000001}; - testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); - testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); - - testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); - testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); + testing_utilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); + testing_utilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testing_utilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); + testing_utilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); } TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) @@ -491,11 +490,11 @@ TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) #else // MHD reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, 7.833333333333333, 8.8333333333333339}; - testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); - testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); - testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); - testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); - testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::checkResults(fiducial_data.density, test_data.density, "density"); + testing_utilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); #endif // MHD } diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index a1d232018..0ea780d5e 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -27,7 +27,7 @@ */ /// @{ // NOLINTNEXTLINE(readability-identifier-naming) -class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam +class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: system_test::SystemTestRunner sodTest; From b8cc4b479fda821856a4056f92d7aeb3a4f8e696 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 22 Aug 2023 15:50:13 -0400 Subject: [PATCH 536/694] Formatting --- src/reconstruction/reconstruction_tests.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index af84d5a3c..f21409208 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -150,13 +150,13 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; testing_utilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, - "magnetosonic_speed_fast"); + "magnetosonic_speed_fast"); testing_utilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, - "magnetosonic_speed_slow"); + "magnetosonic_speed_slow"); testing_utilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, - host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); + host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); testing_utilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, - host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); + host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); testing_utilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); testing_utilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); testing_utilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); From 258f8f4df66925e66ec834ce7d6a2f460ec7308e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 22 Aug 2023 16:31:14 -0400 Subject: [PATCH 537/694] fix bad rebase --- src/system_tests/hydro_system_tests.cpp | 8 ++++---- src/system_tests/mhd_system_tests.cpp | 17 +++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 0ea780d5e..21f9f0d90 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -71,14 +71,14 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodSho TEST(tHYDROSYSTEMSodShockTube, OneDimensionalCorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner sodTest; - sodTest.runTest(); + system_test::SystemTestRunner sod_test; + sod_test.runTest(); } TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner sodTest; - sodTest.runTest(); + system_test::SystemTestRunner sod_test; + sod_test.runTest(); } TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 150edb8c0..3d84aa56f 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -34,6 +34,7 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: system_test::SystemTestRunner waveTest; + inline static std::unordered_map high_res_l2norms; void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, @@ -417,8 +418,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", - 0.17); + testing_utilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -452,8 +453,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", - 0.17); + testing_utilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -486,8 +487,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], - "", 0.17); + testing_utilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], + "", 0.17); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) @@ -521,8 +522,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], - "", 0.17); + testing_utilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], + "", 0.17); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, From ef132735025e31c9e6c02297e770ef02af09917f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 22 Aug 2023 17:14:53 -0400 Subject: [PATCH 538/694] Fix a few checks that came up during rebasing --- src/grid/grid3D.cpp | 2 +- src/mhd/ct_electric_fields_tests.cu | 2 +- src/model/disk_ICs.cpp | 2 +- src/mpi/mpi_routines.cpp | 8 +++++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d8655d051..a9f36c718 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -265,7 +265,7 @@ void Grid3D::Initialize(struct parameters *P) #endif #ifdef COSMOLOGY - H.OUTPUT_SCALE_FACOR = not P->scale_outputs_file[0] == '\0'; + H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0'); #endif H.Output_Initial = true; diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 2d20dab9f..a866694d5 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -268,7 +268,7 @@ TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput) ASSERT_EQ(test_data.size(), fiducial_data.size()); for (size_t i = 0; i < test_data.size(); i++) { - testingUtilities::checkResults(fiducial_data.at(i), test_data.at(i), ""); + testing_utilities::checkResults(fiducial_data.at(i), test_data.at(i), ""); } } // ============================================================================= diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 8e4bede3f..6f9909fb7 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -748,7 +748,7 @@ void Grid3D::Disk_3D(parameters p) Real r_cool; // MW model - DiskGalaxy galaxy = Galaxies::MW; + DiskGalaxy galaxy = Galaxies::MW; // NOLINT(cppcoreguidelines-slicing) // M82 model Galaxies::M82; M_vir = galaxy.getM_vir(); // viral mass in M_sun diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 4efdfae7e..7f03fd7b4 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -542,23 +542,25 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin void Allocate_MPI_DeviceBuffers(struct Header *H) { - int xbsize = 0, ybsize = 0, zbsize = 0; + int xbsize, ybsize, zbsize; if (H->ny == 1 && H->nz == 1) { xbsize = H->n_fields * H->n_ghost; ybsize = 1; zbsize = 1; } // 2D - if (H->ny > 1 && H->nz == 1) { + else if (H->ny > 1 && H->nz == 1) { xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost); ybsize = H->n_fields * H->n_ghost * (H->nx); zbsize = 1; } // 3D - if (H->ny > 1 && H->nz > 1) { + else if (H->ny > 1 && H->nz > 1) { xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * (H->nz - 2 * H->n_ghost); ybsize = H->n_fields * H->n_ghost * (H->nx) * (H->nz - 2 * H->n_ghost); zbsize = H->n_fields * H->n_ghost * (H->nx) * (H->ny); + } else { + throw std::runtime_error("MPI buffer size failed to set."); } x_buffer_length = xbsize; From 8af3cdc0222758599629845795ff237d7dcfdccb Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 22 Sep 2023 14:54:20 -0400 Subject: [PATCH 539/694] clang-format 17 --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/global/global.cpp | 1 - src/global/global.h | 1 - src/gravity/gravity_functions.cpp | 8 ++++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 6 +++--- src/grid/initial_conditions.cpp | 20 ++++++++++---------- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 4 ++-- src/particles/io_particles.cpp | 14 +++++++------- src/particles/particles_3D.cpp | 14 +++++++------- src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 ++------ src/system_tests/hydro_system_tests.cpp | 4 ++-- 19 files changed, 48 insertions(+), 54 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 4f870a33c..5241ec9ed 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 181c2a98e..ba67bf445 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index f57edcdb4..cecb9b4d4 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/global/global.cpp b/src/global/global.cpp index 5f6aa26da..f14f234fb 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -57,7 +57,6 @@ int sgn(Real x) } } - /*! \fn char trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ char *trim(char *s) diff --git a/src/global/global.h b/src/global/global.h index ed13190d7..b3defb561 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -155,7 +155,6 @@ extern double get_time(void); * \brief Mathematical sign function. Returns sign of x. */ extern int sgn(Real x); - struct parameters { int nx; int ny; diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 70eb749c9..5321780dd 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 236670b49..6cd177163 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d8655d051..9d86416be 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -255,7 +255,7 @@ void Grid3D::Initialize(struct parameters *P) #ifdef DENSITY_FLOOR H.density_floor = DENS_FLOOR; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef TEMPERATURE_FLOOR @@ -333,8 +333,8 @@ void Grid3D::AllocateMemory(void) CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); CudaSafeCall(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index fded9236b..f1e3a3307 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1481,18 +1481,18 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1552,17 +1552,17 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index cd99f472f..b7b8afb34 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -122,7 +122,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index 09ffd0d17..227ac84f0 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1394,12 +1394,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 02a7d6c3a..26c90d94f 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -451,12 +451,12 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -569,7 +569,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index e8ac74dbe..ec1b8b735 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 19fc238ef..ccdf008a8 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Rea offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..a000da4da 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 1c7515ec0..d523964ca 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 36b74aebf..89eaccc21 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 5f8000bf8..34906337b 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -576,13 +576,9 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface { - 1, 2, 3, 4, 5, 6, 7, 8 - }; + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive interface { - 6, 7, 8, 9, 10 - }; + reconstruction::Primitive interface{6, 7, 8, 9, 10}; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index b36a9837f..a69484759 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -157,8 +157,8 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; #elif defined(PLMC) - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #elif defined(PLMP) double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 1E-7; From 03c3e3d1065353654de27fbbc0ee5a00195a8800 Mon Sep 17 00:00:00 2001 From: ezlimen Date: Fri, 22 Sep 2023 12:47:52 -0700 Subject: [PATCH 540/694] add custom grav flag to VL integrator and remove print statements --- builds/make.type.static_grav | 1 - src/global/global.cpp | 5 --- src/global/global.h | 2 +- src/gravity/static_grav.h | 62 ++++++++++++++++++++++++----------- src/grid/grid3D.cpp | 7 ++-- src/integrators/VL_1D_cuda.cu | 4 +-- src/integrators/VL_1D_cuda.h | 2 +- src/integrators/VL_2D_cuda.cu | 4 +-- src/integrators/VL_2D_cuda.h | 2 +- src/integrators/VL_3D_cuda.cu | 4 +-- src/integrators/VL_3D_cuda.h | 2 +- 11 files changed, 56 insertions(+), 39 deletions(-) diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index cf621afa8..4f13e7288 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -29,4 +29,3 @@ DFLAGS += -DSTATIC_GRAV # Can also add -DSLICES and -DPROJECTIONS OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) -DN_OUTPUT_COMPLETE=1 diff --git a/src/global/global.cpp b/src/global/global.cpp index 149adeaf2..fbb2fe8df 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -134,11 +134,6 @@ void parse_params(char *param_file, struct parameters *parms, int argc, char **a return; } -#ifdef STATIC_GRAV - // initialize custom gravity flag to zero - parms->custom_grav = 0; -#endif - #ifdef COSMOLOGY // Initialize file name as an empty string parms->scale_outputs_file[0] = '\0'; diff --git a/src/global/global.h b/src/global/global.h index 142df0fde..7e818b79a 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -199,7 +199,7 @@ struct parameters { int out_float32_GasEnergy = 0; #endif #ifdef STATIC_GRAV - int custom_grav; // flag to set specific static gravity field + int custom_grav = 0; // flag to set specific static gravity field #endif #ifdef MHD int out_float32_magnetic_x = 0; diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index f0bfda9e1..6106d8800 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -18,8 +18,10 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_gra { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + //set gravity field according to parameter file input switch (custom_grav) { - case 1: + case 1: + //1D NFW halo & Miyamoto-Nagai disk // for disk components, calculate polar r // r_disk = 0.220970869121; // r_disk = 6.85009694274; @@ -64,12 +66,12 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g // positions on the grid x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; - // for Gresho, also need r & phi + // for Gresho and disks, also need r & phi r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); switch (custom_grav) { case 1: - // printf("gresho\n"); + // Gresho vortex // set acceleration to balance v_phi in Gresho problem if (r < 0.2) { *gx = -cos(phi) * 25.0 * r; @@ -83,19 +85,18 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g } break; case 2: - // printf("rayleigh talor\n"); + //Rayleigh-Taylor instability *gx = 0; *gy = -1; break; case 3: - // printf("keplerian\n"); + // 2D disk in keplerian rotation Real M; M = 1 * MSUN_CGS; *gx = -cos(phi) * GN * M / (r * r); *gy = -sin(phi) * GN * M / (r * r); break; - case 4: - // printf("disk\n"); + case 4: // set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; M_vir = 1.0e12; // viral mass of MW in M_sun @@ -116,7 +117,6 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g *gy = -sin(phi) * a; break; default: - // printf("default\n"); *gx = 0; *gy = 0; } @@ -139,31 +139,55 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off r_disk = sqrt(x_pos * x_pos + y_pos * y_pos); // for halo, calculate spherical r r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; + Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; switch (custom_grav) { case 1: + // Milky way disk model // set properties of halo and disk (these must match initial conditions) - Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; - Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - // MW model + M_vir = 1.0e12; // viral mass of in M_sun M_d = 6.5e10; // viral mass of in M_sun R_d = 3.5; // disk scale length in kpc z_d = 3.5 / 5.0; // disk scale height in kpc R_vir = 261.; // virial radius in kpc c_vir = 20.0; // halo concentration - // M82 model - // M_vir = 5.0e10; // viral mass of in M_sun - // M_d = 1.0e10; // mass of disk in M_sun - // R_d = 0.8; // disk scale length in kpc - // z_d = 0.15; // disk scale height in kpc - // R_vir = R_d/0.015; // viral radius in kpc - // c_vir = 10.0; // halo concentration - + M_h = M_vir - M_d; // halo mass in M_sun R_h = R_vir / c_vir; // halo scale length in kpc phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); x = r_halo / R_h; + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_halo_r = a_halo * (r_disk / r_halo); + a_halo_z = a_halo * (z_pos / r_halo); + a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_z = + -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); + *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); + *gz = a_disk_z + a_halo_z; + break; + case 2: + // M82 model + // set properties of halo and disk (these must match initial conditions) + + M_vir = 5.0e10; // viral mass of in M_sun + M_d = 1.0e10; // mass of disk in M_sun + R_d = 0.8; // disk scale length in kpc + z_d = 0.15; // disk scale height in kpc + R_vir = R_d/0.015; // viral radius in kpc + c_vir = 10.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun + R_h = R_vir / c_vir; // halo scale length in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); a_halo_r = a_halo * (r_disk / r_halo); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index aa02eb8c2..e9d76184a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -149,7 +149,6 @@ void Grid3D::Initialize(struct parameters *P) #ifdef STATIC_GRAV H.custom_grav = P->custom_grav; // Initialize the custom static gravity flag - printf("H.custom_grav is %d\n", H.custom_grav); if (H.custom_grav == 0) { printf("WARNING: No custom gravity field given. Gravity field will be set to zero.\n"); } @@ -439,7 +438,7 @@ Real Grid3D::Update_Grid(void) { #ifdef CUDA #ifdef VL - VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); + VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); #endif // VL #ifdef SIMPLE Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); @@ -450,7 +449,7 @@ Real Grid3D::Update_Grid(void) #ifdef CUDA #ifdef VL VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, - H.n_fields); + H.n_fields, H.custom_grav); #endif // VL #ifdef SIMPLE Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, @@ -462,7 +461,7 @@ Real Grid3D::Update_Grid(void) #ifdef CUDA #ifdef VL VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, C.Grav_potential); #endif // VL #ifdef SIMPLE diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 57c1a9c65..d21711cd3 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -29,7 +29,7 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de int n_fields); void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, - int n_fields) + int n_fields, int custom_grav) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -134,7 +134,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, - n_ghost, dx, xbound, dt, gama, n_fields); + n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); CudaCheckError(); #ifdef DE diff --git a/src/integrators/VL_1D_cuda.h b/src/integrators/VL_1D_cuda.h index da8837956..2d901234c 100644 --- a/src/integrators/VL_1D_cuda.h +++ b/src/integrators/VL_1D_cuda.h @@ -9,7 +9,7 @@ #include "../global/global.h" void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, - int n_fields); + int n_fields, int custom_grav); void Free_Memory_VL_1D(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 27677f61d..8fdaf2ec9 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -26,7 +26,7 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de Real dt, Real gamma, int n_fields); void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, - Real xbound, Real ybound, Real dt, int n_fields) + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -151,7 +151,7 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, - y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); CudaCheckError(); #ifdef DE diff --git a/src/integrators/VL_2D_cuda.h b/src/integrators/VL_2D_cuda.h index 0231f4582..a13495688 100644 --- a/src/integrators/VL_2D_cuda.h +++ b/src/integrators/VL_2D_cuda.h @@ -9,7 +9,7 @@ #include "../global/global.h" void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, - Real xbound, Real ybound, Real dt, int n_fields); + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav); void Free_Memory_VL_2D(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index cd99f472f..a9db3e4af 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -37,7 +37,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -302,7 +302,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, - zbound, dt, gama, n_fields, density_floor, dev_grav_potential); + zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); CudaCheckError(); #ifdef MHD diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index ab52cba85..44b036dda 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -10,7 +10,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential); void Free_Memory_VL_3D(); From 9a1b6a9f713abd05579c77a5fc25258b862eecf9 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 22 Sep 2023 15:53:03 -0400 Subject: [PATCH 541/694] run clang format --- src/mhd/ct_electric_fields_tests.cu | 4 ++-- src/mhd/magnetic_update_tests.cu | 4 ++-- src/riemann_solvers/hlld_cuda_tests.cu | 6 ++++-- src/system_tests/mhd_system_tests.cpp | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index d461878f6..3c8dc479a 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -117,8 +117,8 @@ class tMHDCalculateCTElectricFields : public ::testing::Test for (size_t i = 0; i < fiducialData.size(); i++) { int xid, yid, zid; testing_utilities::Check_Results(fiducialData.at(i), testCTElectricFields.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 7579919bb..db47d658e 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -105,8 +105,8 @@ class tMHDUpdateMagneticField3D : public ::testing::Test int xid, yid, zid; cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); testing_utilities::Check_Results(fiducialData.at(i), destinationGrid.at(i), - "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + - ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); } } }; diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 397116100..219f74739 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -1935,8 +1935,10 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) parameters.magneticX.at(i), 1); // Now check results - testing_utilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar, parameters.names.at(i) + ", SpeedStarL"); - testing_utilities::Check_Results(fiducialSpeedStarR.at(i), testSpeed.RStar, parameters.names.at(i) + ", SpeedStarR"); + testing_utilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar, + parameters.names.at(i) + ", SpeedStarL"); + testing_utilities::Check_Results(fiducialSpeedStarR.at(i), testSpeed.RStar, + parameters.names.at(i) + ", SpeedStarR"); } } // ========================================================================= diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index e2779a499..08225db4b 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -527,8 +527,8 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); - testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], - "", 0.17); + testing_utilities::Check_Results( + 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.17); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, From dbbb64d014011f78efd79aa92ddea583534b0511 Mon Sep 17 00:00:00 2001 From: evazlimen <109487593+evazlimen@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:53:21 -0700 Subject: [PATCH 542/694] format --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/gravity/gravity_functions.cpp | 8 ++-- src/gravity/gravity_functions_gpu.cu | 4 +- src/gravity/static_grav.h | 44 +++++++++++----------- src/grid/grid3D.cpp | 6 +-- src/grid/initial_conditions.cpp | 20 +++++----- src/integrators/VL_3D_cuda.cu | 5 ++- src/integrators/VL_3D_cuda.h | 3 +- src/io/io.cpp | 4 +- src/particles/io_particles.cpp | 14 +++---- src/particles/particles_3D.cpp | 14 +++---- src/particles/particles_boundaries_cpu.cpp | 4 +- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 +--- src/system_tests/hydro_system_tests.cpp | 4 +- 19 files changed, 74 insertions(+), 76 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 4f870a33c..5241ec9ed 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 181c2a98e..ba67bf445 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index f57edcdb4..cecb9b4d4 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 70eb749c9..5321780dd 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 236670b49..6cd177163 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 6106d8800..9b05181c1 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -18,13 +18,13 @@ inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_gra { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; - //set gravity field according to parameter file input + // set gravity field according to parameter file input switch (custom_grav) { - case 1: - //1D NFW halo & Miyamoto-Nagai disk - // for disk components, calculate polar r - // r_disk = 0.220970869121; - // r_disk = 6.85009694274; + case 1: + // 1D NFW halo & Miyamoto-Nagai disk + // for disk components, calculate polar r + // r_disk = 0.220970869121; + // r_disk = 6.85009694274; r_disk = 13.9211647546; // r_disk = 20.9922325665; // for halo, calculate spherical r @@ -85,7 +85,7 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g } break; case 2: - //Rayleigh-Taylor instability + // Rayleigh-Taylor instability *gx = 0; *gy = -1; break; @@ -96,7 +96,7 @@ inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_g *gx = -cos(phi) * GN * M / (r * r); *gy = -sin(phi) * GN * M / (r * r); break; - case 4: + case 4: // set gravitational acceleration for Kuzmin disk + NFW halo Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; M_vir = 1.0e12; // viral mass of MW in M_sun @@ -145,14 +145,14 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off case 1: // Milky way disk model // set properties of halo and disk (these must match initial conditions) - + M_vir = 1.0e12; // viral mass of in M_sun M_d = 6.5e10; // viral mass of in M_sun R_d = 3.5; // disk scale length in kpc z_d = 3.5 / 5.0; // disk scale height in kpc R_vir = 261.; // virial radius in kpc c_vir = 20.0; // halo concentration - + M_h = M_vir - M_d; // halo mass in M_sun R_h = R_vir / c_vir; // halo scale length in kpc phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); @@ -172,18 +172,18 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); *gz = a_disk_z + a_halo_z; break; - case 2: - // M82 model - // set properties of halo and disk (these must match initial conditions) - - M_vir = 5.0e10; // viral mass of in M_sun - M_d = 1.0e10; // mass of disk in M_sun - R_d = 0.8; // disk scale length in kpc - z_d = 0.15; // disk scale height in kpc - R_vir = R_d/0.015; // viral radius in kpc - c_vir = 10.0; // halo concentration - - M_h = M_vir - M_d; // halo mass in M_sun + case 2: + // M82 model + // set properties of halo and disk (these must match initial conditions) + + M_vir = 5.0e10; // viral mass of in M_sun + M_d = 1.0e10; // mass of disk in M_sun + R_d = 0.8; // disk scale length in kpc + z_d = 0.15; // disk scale height in kpc + R_vir = R_d / 0.015; // viral radius in kpc + c_vir = 10.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun R_h = R_vir / c_vir; // halo scale length in kpc phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); x = r_halo / R_h; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index e9d76184a..d25811ab2 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -262,7 +262,7 @@ void Grid3D::Initialize(struct parameters *P) #ifdef DENSITY_FLOOR H.density_floor = DENS_FLOOR; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef TEMPERATURE_FLOOR @@ -340,8 +340,8 @@ void Grid3D::AllocateMemory(void) CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); CudaSafeCall(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index fded9236b..f1e3a3307 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1481,18 +1481,18 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1552,17 +1552,17 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index a9db3e4af..097b40625 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -37,7 +37,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, + Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -122,7 +123,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 44b036dda..3f2cf8d75 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -10,7 +10,8 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, + Real *host_grav_potential); void Free_Memory_VL_3D(); diff --git a/src/io/io.cpp b/src/io/io.cpp index 09ffd0d17..227ac84f0 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1394,12 +1394,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 02a7d6c3a..26c90d94f 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -451,12 +451,12 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -569,7 +569,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index e8ac74dbe..ec1b8b735 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 19fc238ef..ccdf008a8 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Rea offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..a000da4da 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 1c7515ec0..d523964ca 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 36b74aebf..89eaccc21 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 5f8000bf8..34906337b 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -576,13 +576,9 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface { - 1, 2, 3, 4, 5, 6, 7, 8 - }; + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive interface { - 6, 7, 8, 9, 10 - }; + reconstruction::Primitive interface{6, 7, 8, 9, 10}; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 288690290..1a13d6102 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -157,8 +157,8 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; #elif defined(PLMC) - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; #elif defined(PPMC) double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 double const allowedError = 2.7E-8; From 1c2fb8513b5f6011814014383fefb834018d3160 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 22 Sep 2023 16:02:20 -0400 Subject: [PATCH 543/694] remove accidental commit --- src/reconstruction/reconstruction_tests.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 135b40e86..8db7c5fc4 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -374,7 +374,6 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe // Check results #ifdef MHD reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152}; -======= testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); testing_utilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); testing_utilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); From 0f90a64b3c975885e5e4aeafd487fcf60fcdb9b3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:10:10 -0400 Subject: [PATCH 544/694] Fix clang-tidy checking HDF5 headers --- .clang-tidy | 3 +-- Makefile | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 970949d14..6486d2883 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -133,8 +133,7 @@ Checks: "*, -readability-redundant-preprocessor, -readability-suspicious-call-argument" WarningsAsErrors: '' -# More paths can be ignored by modifying this so that it looks like '^((?!/PATH/ONE/|/PATH/TWO/).)*$' -HeaderFilterRegex: '^((?!/ihome/crc/install/power9/googletest/1.11.0/include/|/ihome/crc/install/power9/googletest/1.11.0/include/|/usr/lib/x86_64-linux-gnu/hdf5/serial/include/).)*$' +HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false FormatStyle: 'file' UseColor: false diff --git a/Makefile b/Makefile index 10feb281c..e53a660c5 100644 --- a/Makefile +++ b/Makefile @@ -176,7 +176,8 @@ DFLAGS += $(MACRO_FLAGS) # Setup variables for clang-tidy LIBS_CLANG_TIDY := $(subst -I/, -isystem /,$(LIBS)) -LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include +# This tells clang-tidy that the path after each -isystem command is a system library so that it can be easily ignored by the header filter regex +LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include -isystem $(HDF5_ROOT)/include CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) From 163db5a6aa3f43f68ab6b412ed7bb8590ef56bbb Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:13:05 -0400 Subject: [PATCH 545/694] Fix the use of reserved identifiers --- .clang-tidy | 4 ++++ src/system_tests/system_tester.cpp | 16 ++++++++-------- src/system_tests/system_tester.h | 4 ++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 6486d2883..5c767f7ac 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -34,6 +34,8 @@ Checks: "*, -misc-unused-parameters, -hicpp-multiway-paths-covered, -cert-err58-cpp, + -cert-dcl37-c, + -cert-dcl51-cpp, -cppcoreguidelines-pro-bounds-constant-array-index, google-readability-avoid-underscore-in-googletest-name, @@ -167,4 +169,6 @@ CheckOptions: # readability-identifier-naming.PrivateMethodSuffix: '_' hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' + + bugprone-reserved-identifier.AllowedIdentifiers: '__cudaSafeCall;__cudaCheckError;__shfl_down' ... diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 188d39f39..2fa2a4129 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -108,7 +108,7 @@ void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, do << std::endl; // Compute the L1 Error. - _L2Norm = 0; + L2Norm_ = 0; double maxError = 0; // Loop over the datasets to be tested for (auto const &dataSetName : _fiducialDataSetNames) { @@ -187,14 +187,14 @@ void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, do if (compute_L2_norm_only) { L1_error /= static_cast(testDims[0] * testDims[1] * testDims[2]); - _L2Norm += L1_error * L1_error; + L2Norm_ += L1_error * L1_error; } } if (compute_L2_norm_only) { // Check the L2 Norm - _L2Norm = std::sqrt(_L2Norm); - EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + L2Norm_ = std::sqrt(L2Norm_); + EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; @@ -270,7 +270,7 @@ void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Err << std::endl; // Loop over the datasets to be tested - _L2Norm = 0; + L2Norm_ = 0; double maxError = 0; for (auto const &dataSetName : _fiducialDataSetNames) { if (dataSetName == "GasEnergy") { @@ -312,7 +312,7 @@ void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Err } L1_error /= static_cast(initialDims[0] * initialDims[1] * initialDims[2]); - _L2Norm += L1_error * L1_error; + L2Norm_ += L1_error * L1_error; // Perform the correctness check EXPECT_LT(L1_error, maxAllowedL1Error) @@ -320,8 +320,8 @@ void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Err } // Check the L2 Norm - _L2Norm = std::sqrt(_L2Norm); - EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + L2Norm_ = std::sqrt(L2Norm_); + EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 7ba1b5a53..c0612806e 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -111,7 +111,7 @@ class system_test::SystemTestRunner * * \return double The L2Norm of the last run test */ - double getL2Norm() { return _L2Norm; }; + double getL2Norm() { return L2Norm_; }; /*! * \brief Get the Output Directory object @@ -312,7 +312,7 @@ class system_test::SystemTestRunner double _fixedEpsilon = 5.0E-12; /// The L2 norm of the error vector - double _L2Norm; + double L2Norm_; /// Flag to indicate if a fiducial HDF5 data file is being used or a /// programmatically generated H5File object. `true` = use a file, `false` = From 74b90691fab3ff8e71f0e02ea56f6d748388e3cb Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:13:37 -0400 Subject: [PATCH 546/694] Fix a case where an array could be of size zero --- src/mpi/mpi_routines.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 7f03fd7b4..bb0cbf24d 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -542,17 +542,14 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin void Allocate_MPI_DeviceBuffers(struct Header *H) { - int xbsize, ybsize, zbsize; + int xbsize = 1, ybsize = 1, zbsize = 1; if (H->ny == 1 && H->nz == 1) { xbsize = H->n_fields * H->n_ghost; - ybsize = 1; - zbsize = 1; } // 2D else if (H->ny > 1 && H->nz == 1) { xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost); ybsize = H->n_fields * H->n_ghost * (H->nx); - zbsize = 1; } // 3D else if (H->ny > 1 && H->nz > 1) { From 68fc71ea0f1e3c9f9a7cd46c9a9054d587ff81ba Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:14:05 -0400 Subject: [PATCH 547/694] Fully allocate a vector before writing to it --- src/model/disk_galaxy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index 04249853f..fd3dcc53f 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -166,9 +166,9 @@ class ClusteredDiskGalaxy : public DiskGalaxy std::vector generateClusterPopulationMasses(int N, std::mt19937_64 generator) { - std::vector population; + std::vector population(N); for (int i = 0; i < N; i++) { - population.push_back(singleClusterMass(generator)); + population[singleClusterMass(generator)]; } return population; } From 293f581a4706c037c157b46b1db547676ba7cb4a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:16:55 -0400 Subject: [PATCH 548/694] Remove unneeded const in DeviceVector --- src/utils/DeviceVector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 1af7aa3da..4024edf34 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -93,7 +93,7 @@ class DeviceVector * \param[in] index The index of the desired value * \return T The value at dev_ptr[index] */ - T const at(size_t const index); + T at(size_t const index); /*! * \brief Assign a single value in the array. Should generally only be @@ -255,7 +255,7 @@ T DeviceVector::operator[](size_t const &index) // ========================================================================= template -T const DeviceVector::at(size_t const index) +T DeviceVector::at(size_t const index) { if (index < _size) { // Use the overloaded [] operator to grab the value from GPU memory From 492cefd0d7e930c2493d464fe26157daff56cf90 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:21:35 -0400 Subject: [PATCH 549/694] Remove ran.h since it was unused --- src/grid/grid3D.cpp | 1 - src/utils/ran.h | 33 --------------------------------- 2 files changed, 34 deletions(-) delete mode 100644 src/utils/ran.h diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index a9f36c718..ebd677eee 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -18,7 +18,6 @@ #include "../integrators/simple_3D_cuda.h" #include "../io/io.h" #include "../utils/error_handling.h" -#include "../utils/ran.h" #ifdef MPI_CHOLLA #include #ifdef HDF5 diff --git a/src/utils/ran.h b/src/utils/ran.h deleted file mode 100644 index 95906713a..000000000 --- a/src/utils/ran.h +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include - -typedef unsigned long long int Ullong; -typedef double Doub; -typedef unsigned int Uint; - -struct Ran { - Ullong u, v, w; - Ran(Ullong j) : v(4101842887655102017LL), w(1) - { - u = j ^ v; - int64(); - v = u; - int64(); - w = v; - int64(); - } - inline Ullong int64() - { - u = u * 2862933555777941757LL + 7046029254386353087LL; - v ^= v >> 17; - v ^= v << 31; - v ^= v >> 8; - w = 4294957665U * (w & 0xffffffff) + (w >> 32); - Ullong x = u ^ (u << 21); - x ^= x >> 35; - x ^= x << 4; - return (x + v) ^ w; - } - inline Doub doub() { return 5.42101086242752217E-20 * int64(); } - inline Uint int32() { return (Uint)int64(); } -}; From a46c6150b3d733491344223e10e92686acc91b57 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 16:39:54 -0400 Subject: [PATCH 550/694] clang-tidy avoid duplicate usage of type name --- src/utils/error_handling.cpp | 1 + src/utils/reduction_utilities.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 5a7bad073..9623fd267 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -75,6 +75,7 @@ void Check_Configuration(parameters const& P) // warn if error checking is disabled #ifndef CUDA_ERROR_CHECK + // NOLINTNEXTLINE(clang-diagnostic-#warnings) #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" #endif //! CUDA_ERROR_CHECK diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 811a9a13b..429e5f0cb 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -127,7 +127,7 @@ inline __device__ int encode(float val) */ inline __device__ long long encode(double val) { - std::int64_t i = bit_cast(val); + auto i = bit_cast(val); return i >= 0 ? i : (1ULL << 63) | ~i; } From 4956592a0c670a130e598ff3027b71d744879cd1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 18:15:17 -0400 Subject: [PATCH 551/694] Switch member vars from const to static constexpr --- src/system_tests/hydro_system_tests.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index d5b3c8b60..f449e95a7 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); @@ -156,14 +156,14 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa system_test::SystemTestRunner waveTest; #ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; + double static constexpr allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 4E-7; #elif defined(PLMC) - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; + double static constexpr allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 1E-7; #elif defined(PPMC) - double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 - double const allowedError = 2.7E-8; + double static constexpr allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 2.7E-8; #endif // PCM void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, From c7887c4fc1a99e6c651d2c7f0e117471f403c2e8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 10 Aug 2023 18:17:39 -0400 Subject: [PATCH 552/694] clang-format-16 formatting --- src/grid/initial_conditions.cpp | 22 +++++++++++----------- src/io/io.cpp | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index fded9236b..600f773fc 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1481,18 +1481,18 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1552,20 +1552,20 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE - C.GasEnergy[id] = U; + C.GasEnergy[id] = U; #endif } } diff --git a/src/io/io.cpp b/src/io/io.cpp index 81a104146..988a1d4ec 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1386,13 +1386,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_ENERGY output_energy = true; #else // not OUTPUT_ENERGY - output_energy = false; + output_energy = false; #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; #else // not OUTPUT_MOMENTUM - output_momentum = false; + output_momentum = false; #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) From 36dc31490bb9ff41594618bcfcb8b255c078d3ca Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 11 Sep 2023 15:30:00 -0400 Subject: [PATCH 553/694] Update to clang 16 in Jenkins runs --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d699732cb..0c14a493b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -90,7 +90,7 @@ pipeline source builds/run_tests.sh setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} - module load clang/15.0.2 + module load clang/16.0.6 make tidy CLANG_TIDY_ARGS="--warnings-as-errors=*" TYPE=${CHOLLA_MAKE_TYPE} ''' } From d88f9051643448b064bad798e907d4e87e22e9b9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:06:48 -0400 Subject: [PATCH 554/694] Update .clang-tidy for clang-tidy 17 --- .clang-tidy | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5c767f7ac..6f97eb8f6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -3,7 +3,7 @@ # runs all checks with some exclusions by default. # # The full list of clang-tidy 15 checks and documentation can be found -# [here](https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/index.html) +# [here](https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/clang-tidy/index.html) # # The "Checks" command should have 5 sections seperated by a newline: # 1. Turn on all checks by default. Done with "*" @@ -41,11 +41,11 @@ Checks: "*, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, - -*, - readability-identifier-naming, - + -bugprone-empty-catch, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, + -bugprone-switch-missing-default-case, + -bugprone-switch-missing-default-case, -cert-env33-c, -cert-err33-c, -cert-err34-c, @@ -60,12 +60,15 @@ Checks: "*, -clang-analyzer-optin.performance.Padding, -clang-analyzer-security.insecureAPI.strcpy, -clang-diagnostic-format, + -clang-diagnostic-logical-not-parentheses, -clang-diagnostic-macro-redefined, -clang-diagnostic-unknown-cuda-version, -clang-diagnostic-unused-command-line-argument, -clang-diagnostic-unused-result, -concurrency-mt-unsafe, -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-const-or-ref-data-members, + -cppcoreguidelines-avoid-do-while, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-explicit-virtual-functions, @@ -83,6 +86,7 @@ Checks: "*, -cppcoreguidelines-pro-type-reinterpret-cast, -cppcoreguidelines-pro-type-vararg, -cppcoreguidelines-special-member-functions, + -cppcoreguidelines-use-default-member-init, -cppcoreguidelines-virtual-class-destructor, -google-explicit-constructor, -google-global-names-in-headers, @@ -97,6 +101,7 @@ Checks: "*, -hicpp-no-array-decay, -hicpp-no-malloc, -hicpp-special-member-functions, + -hicpp-use-auto, -hicpp-use-equals-default, -hicpp-use-noexcept, -hicpp-use-nullptr, @@ -107,11 +112,18 @@ Checks: "*, -llvm-include-order, -llvm-namespace-comment, -misc-const-correctness, + -misc-header-include-cycle, + -misc-include-cleaner, -misc-non-private-member-variables-in-classes, + -misc-use-anonymous-namespace, -modernize-avoid-c-arrays, -modernize-deprecated-headers, -modernize-macro-to-enum, -modernize-redundant-void-arg, + -modernize-type-traits, + -modernize-type-traits, + -modernize-use-auto, + -modernize-use-default-member-init, -modernize-use-equals-default, -modernize-use-nodiscard, -modernize-use-noexcept, @@ -119,6 +131,9 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, + -performance-avoid-endl, + -performance-unnecessary-value-param, + -readability-container-size-empty, -readability-convert-member-functions-to-static, -readability-delete-null-pointer, -readability-duplicate-include, From db0acb86473514418c248308362c6895b1c4a440 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:07:30 -0400 Subject: [PATCH 555/694] Remove deprecated argument from .clang-tidy --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 6f97eb8f6..6508724b6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -151,7 +151,6 @@ Checks: "*, -readability-suspicious-call-argument" WarningsAsErrors: '' HeaderFilterRegex: '.*' -AnalyzeTemporaryDtors: false FormatStyle: 'file' UseColor: false CheckOptions: From daea42054790d36df5a4c3f095831b2456258f7d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:07:50 -0400 Subject: [PATCH 556/694] Update to LLVM v17.0.1 in Jenkins build --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0c14a493b..96000eb6d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -90,7 +90,7 @@ pipeline source builds/run_tests.sh setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} - module load clang/16.0.6 + module load clang/17.0.1 make tidy CLANG_TIDY_ARGS="--warnings-as-errors=*" TYPE=${CHOLLA_MAKE_TYPE} ''' } From 18de55df84da4127678fb7a6f7eb31996d11bd96 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:08:51 -0400 Subject: [PATCH 557/694] Formatting for clang-format 17 --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/gravity/gravity_functions.cpp | 8 ++++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 6 +++--- src/grid/initial_conditions.cpp | 2 +- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 8 ++++---- src/particles/io_particles.cpp | 14 +++++++------- src/particles/particles_3D.cpp | 14 +++++++------- src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 ++------ src/system_tests/hydro_system_tests.cpp | 4 ++-- 17 files changed, 41 insertions(+), 45 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 4f870a33c..5241ec9ed 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 181c2a98e..ba67bf445 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index f57edcdb4..cecb9b4d4 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 70eb749c9..5321780dd 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 236670b49..6cd177163 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index ebd677eee..e6e3bbed1 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -254,7 +254,7 @@ void Grid3D::Initialize(struct parameters *P) #ifdef DENSITY_FLOOR H.density_floor = DENS_FLOOR; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef TEMPERATURE_FLOOR @@ -332,8 +332,8 @@ void Grid3D::AllocateMemory(void) CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); CudaSafeCall(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 600f773fc..f1e3a3307 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1565,7 +1565,7 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) C.Energy[id] = E; #ifdef DE - C.GasEnergy[id] = U; + C.GasEnergy[id] = U; #endif } } diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index cd99f472f..b7b8afb34 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -122,7 +122,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index 988a1d4ec..09c267d76 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1386,13 +1386,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_ENERGY output_energy = true; #else // not OUTPUT_ENERGY - output_energy = false; + output_energy = false; #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; #else // not OUTPUT_MOMENTUM - output_momentum = false; + output_momentum = false; #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) @@ -1400,12 +1400,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 02a7d6c3a..26c90d94f 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -451,12 +451,12 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -569,7 +569,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index e8ac74dbe..ec1b8b735 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 19fc238ef..ccdf008a8 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Rea offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..a000da4da 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index b8b70aa4e..1bd67bd6f 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 36b74aebf..89eaccc21 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 8db7c5fc4..f0c11e3e5 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,13 +575,9 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface { - 1, 2, 3, 4, 5, 6, 7, 8 - }; + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive interface { - 6, 7, 8, 9, 10 - }; + reconstruction::Primitive interface{6, 7, 8, 9, 10}; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index f449e95a7..97483b879 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); From 9c796d303d8c1e47b6b6a770c3640304f52c9e05 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:09:31 -0400 Subject: [PATCH 558/694] Add latest formatting commit to .git-blame-ignore-revs --- .git-blame-ignore-revs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 86ae19b2e..51e66225d 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -12,7 +12,8 @@ b78d8c96680c9c2d5a5d41656895cb3795e1e204 # Reformat Code with clang-format 729ef8ed307eaa2cf42baa1f5af6c389ad614ac4 +fcaa4714241ad764d9ae38159cac5618e59178c8 # Reformat Code with clang-format increasing column width to 120 b779b212b24ed19592ac309eab1c3ccb7ba66212 -8e5b4619734e0922d815f4d259323c68002af6db \ No newline at end of file +8e5b4619734e0922d815f4d259323c68002af6db From 95143ac87099b0d0088171b7ea793d1b3680a0ae Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:10:27 -0400 Subject: [PATCH 559/694] Update to clang-format 17 in CI format checker --- .github/workflows/code_formatting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code_formatting.yml b/.github/workflows/code_formatting.yml index f145efdd1..6176efac3 100644 --- a/.github/workflows/code_formatting.yml +++ b/.github/workflows/code_formatting.yml @@ -8,7 +8,7 @@ jobs: # Setup environment variables env: - CLANG_FORMAT_VERSION: 15 + CLANG_FORMAT_VERSION: 17 steps: - uses: actions/checkout@v3 From ad4e1107ed52d212b97602aad31ecd4d2f1b5c2a Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 22 Sep 2023 15:39:25 -0400 Subject: [PATCH 560/694] Adjust logic for clamping timestep (to fix #301) Previously, ``main`` would only clamp the next timestep based on when the time that the next output was scheduled for (this was based on incrementing ``outtime`` with ``outstep``). This produced some weird behavior when the ``OUTPUT_ALWAYS`` macro is defined. As documented in issue #301, when that macro is enabled the simulation would write the final output slightly after ``tout``. Now, timesteps are adjusted so that the simulation after the current timestep does not exceed the next output-time or ``tout`` --- src/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 9148cbb6e..16c0b7d3b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -252,8 +252,10 @@ int main(int argc, char *argv[]) // calculate the timestep by calling MPI_Allreduce G.set_dt(dti); - if (G.H.t + G.H.dt > outtime) { - G.H.dt = outtime - G.H.t; + // adjust timestep based on the next available scheduled time + const Real next_scheduled_time = fmin(outtime, P.tout); + if (G.H.t + G.H.dt > next_scheduled_time) { + G.H.dt = next_scheduled_time - G.H.t; } #if defined(SUPERNOVA) && defined(PARTICLE_AGE) From dfb72aaffd18e33e203d5f0449650e31e4eab6ed Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 22 Sep 2023 16:16:42 -0400 Subject: [PATCH 561/694] tweak the logic for incrementing outstep in ``main``. This ensures that the simulation will take the same timesteps whether or not ``OUTPUT_ALWAYS`` is enabled. Previously, when the code was compiled with ``OUTPUT_ALWAYS`` it might skip the timesteps that were based on the value ``outstep``. --- src/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 16c0b7d3b..9d355a46b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -345,8 +345,9 @@ int main(int argc, char *argv[]) // add one to the output file count nfile++; #endif // OUTPUT - // update to the next output time - outtime += P.outstep; + if (G.H.t == outtime) { + outtime += P.outstep; // update to the next output time + } } #ifdef CPU_TIME From 77d6e1ab38573857951fb15ad1c608cccf591ea7 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 25 Sep 2023 15:39:56 -0400 Subject: [PATCH 562/694] add test for scalar floor kernel --- src/dust/dust_cuda_tests.cpp | 4 ++-- src/hydro/hydro_cuda_tests.cu | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index a1357037a..eeaac2368 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -22,7 +22,7 @@ #ifdef DUST TEST(tDUSTTestSputteringTimescale, - CorrectInputExpectCorrectOutput) // test suite name, test name + CorrectInputExpectCorrectOutput) { // Parameters Real YR_IN_S = 3.154e7; @@ -46,7 +46,7 @@ TEST(tDUSTTestSputteringTimescale, } TEST(tDUSTTestSputteringGrowthRate, - CorrectInputExpectCorrectOutput) // test suite name, test name + CorrectInputExpectCorrectOutput) { // Parameters Real YR_IN_S = 3.154e7; diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index fe4b351f7..8fd728528 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -146,4 +146,49 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) // End of tests for the mhdInverseCrossingTime function // ============================================================================= +TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) +{ + // Call the function we are testing + int num_blocks = 1; + dim3 dim1dGrid(num_blocks, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + int const nx = 1; + int const ny = 1; + int const nz = 1; + int const n_fields = 6; // 5 conserved + 1 scalar + int const n_ghost = 0; + int const field_num = 5; // scalar field index + + // initialize host and device conserved arrays + std::vector host_conserved(n_fields); + cuda_utilities::DeviceVector dev_conserved(n_fields); + + // Set values of conserved variables for input (host) + host_conserved.at(0) = 0.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 0.0; // energy + + Real scalar_floor = 1.0; // minimum allowed value for scalar field + + // Case where scalar is below the floor + host_conserved.at(field_num) = 0.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + testing_utilities::Check_Results(scalar_floor, dev_conserved.at(field_num), "below floor"); + + // Case where scalar is above the floor + host_conserved.at(field_num) = 2.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "above floor"); + + // Case where scalar is at the floor + host_conserved.at(field_num) = 1.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "at floor"); +} + #endif // CUDA From 639be11a8369d604ea2351ac8f668b4d41e8ccc8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Mon, 25 Sep 2023 15:44:10 -0400 Subject: [PATCH 563/694] run clang format --- src/dust/dust_cuda_tests.cpp | 6 ++---- src/hydro/hydro_cuda.cu | 17 ++++++++--------- src/hydro/hydro_cuda.h | 2 +- src/hydro/hydro_cuda_tests.cu | 25 ++++++++++++++----------- src/integrators/VL_3D_cuda.cu | 3 ++- 5 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index eeaac2368..aa26f7a01 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -21,8 +21,7 @@ #ifdef DUST -TEST(tDUSTTestSputteringTimescale, - CorrectInputExpectCorrectOutput) +TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) { // Parameters Real YR_IN_S = 3.154e7; @@ -45,8 +44,7 @@ TEST(tDUSTTestSputteringTimescale, << "The ULP difference is: " << ulps_diff << std::endl; } -TEST(tDUSTTestSputteringGrowthRate, - CorrectInputExpectCorrectOutput) +TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) { // Parameters Real YR_IN_S = 3.154e7; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 0a3ed342f..037d41517 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -276,7 +276,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R // issues #endif - #ifdef DENSITY_FLOOR + #ifdef DENSITY_FLOOR if (dev_conserved[id] < density_floor) { if (dev_conserved[id] > 0) { dens_0 = dev_conserved[id]; @@ -1115,19 +1115,19 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; } - #ifdef DE + #ifdef DE U = dev_conserved[(n_fields - 1) * n_cells + id] / d; if (U < U_floor) { dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; } - #endif + #endif } } __global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor) { int id, xid, yid, zid, n_cells; - Real density_init; // variable to store the value of the scalar before a floor is applied + Real density_init; // variable to store the value of the scalar before a floor is applied n_cells = nx * ny * nz; // get a global thread ID @@ -1136,10 +1136,9 @@ __global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, yid = (id - zid * nx * ny) / nx; xid = id - zid * nx * ny - yid * nx; - // threads corresponding to real cells do the calculation + // threads corresponding to real cells do the calculation if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { - density_init = dev_conserved[id + n_cells * grid_enum::density]; if (density_init < density_floor) { @@ -1150,9 +1149,9 @@ __global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, dev_conserved[id + n_cells * grid_enum::momentum_y] *= (density_floor / density_init); dev_conserved[id + n_cells * grid_enum::momentum_z] *= (density_floor / density_init); dev_conserved[id + n_cells * grid_enum::Energy] *= (density_floor / density_init); - #ifdef DE + #ifdef DE dev_conserved[id + n_cells * grid_enum::GasEnergy] *= (density_floor / density_init); - #endif // DE + #endif // DE } } } @@ -1204,7 +1203,7 @@ __global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, Real scalar_floor) { int id, xid, yid, zid, n_cells; - Real scalar; // variable to store the value of the scalar before a floor is applied + Real scalar; // variable to store the value of the scalar before a floor is applied n_cells = nx * ny * nz; // get a global thread ID diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 1f7d9a473..2e93248f8 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -90,7 +90,7 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int __global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor); __global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, - Real scalar_floor); + Real scalar_floor); __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 8fd728528..da0f185e5 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -152,11 +152,11 @@ TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) int num_blocks = 1; dim3 dim1dGrid(num_blocks, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - int const nx = 1; - int const ny = 1; - int const nz = 1; - int const n_fields = 6; // 5 conserved + 1 scalar - int const n_ghost = 0; + int const nx = 1; + int const ny = 1; + int const nz = 1; + int const n_fields = 6; // 5 conserved + 1 scalar + int const n_ghost = 0; int const field_num = 5; // scalar field index // initialize host and device conserved arrays @@ -174,20 +174,23 @@ TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) // Case where scalar is below the floor host_conserved.at(field_num) = 0.0; // scalar - dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); testing_utilities::Check_Results(scalar_floor, dev_conserved.at(field_num), "below floor"); // Case where scalar is above the floor host_conserved.at(field_num) = 2.0; // scalar - dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "above floor"); // Case where scalar is at the floor host_conserved.at(field_num) = 1.0; // scalar - dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "at floor"); } diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index dd088d8ef..9b1fe5b86 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -197,7 +197,8 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int CudaCheckError(); #ifdef DENSITY_FLOOR - hipLaunchKernelGGL(Apply_Density_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, nx, ny, nz, n_ghost, density_floor); + hipLaunchKernelGGL(Apply_Density_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, nx, ny, nz, n_ghost, + density_floor); #endif // DENSITY_FLOOR #ifdef MHD From 2947d171037bb7e56e2e3795d598b16103584d4c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 27 Sep 2023 21:42:44 -0400 Subject: [PATCH 564/694] update naming format for namespqaces and classes --- .clang-tidy | 4 +- src/analysis/analysis.cpp | 14 +- src/analysis/analysis.h | 12 +- src/analysis/feedback_analysis.cpp | 2 +- src/analysis/io_analysis.cpp | 8 +- src/analysis/lya_statistics.cpp | 20 +-- src/analysis/phase_diagram.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 6 +- src/chemistry_gpu/chemistry_gpu.h | 8 +- src/chemistry_gpu/chemistry_io.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 4 +- src/cooling_grackle/cool_grackle.h | 2 +- src/cosmology/cosmology.cpp | 2 +- src/cosmology/cosmology.h | 6 +- src/cosmology/cosmology_functions.cpp | 2 +- src/cosmology/io_cosmology.cpp | 4 +- src/global/global.cpp | 10 +- src/global/global.h | 6 +- src/gravity/grav3D.cpp | 2 +- src/gravity/grav3D.h | 14 +- src/gravity/gravity_boundaries.cpp | 4 +- src/gravity/gravity_functions.cpp | 24 ++-- src/gravity/gravity_restart.cpp | 8 +- src/gravity/paris/HenryPeriodic.hpp | 2 +- src/gravity/paris/ParisPeriodic.hpp | 2 +- src/gravity/paris/README.md | 8 +- src/gravity/potential_SOR_3D.cpp | 6 +- src/gravity/potential_SOR_3D.h | 2 +- src/gravity/potential_paris_3D.cu | 16 +-- src/gravity/potential_paris_3D.h | 6 +- src/gravity/potential_paris_galactic.cu | 17 ++- src/gravity/potential_paris_galactic.h | 6 +- src/grid/boundary_conditions.cpp | 12 +- src/grid/grid3D.cpp | 2 +- src/grid/grid3D.h | 106 +++++++-------- src/grid/initial_conditions.cpp | 28 ++-- src/grid/mpi_boundaries.cpp | 4 +- src/io/io.cpp | 20 +-- src/io/io.h | 14 +- src/io/io_parallel.cpp | 4 +- src/main.cpp | 4 +- src/mhd/ct_electric_fields.cu | 99 +++++++------- src/mhd/ct_electric_fields.h | 4 +- src/mhd/ct_electric_fields_tests.cu | 48 +++---- src/model/disk_ICs.cpp | 8 +- src/model/disk_galaxy.h | 4 +- src/mpi/mpi_routines.cpp | 4 +- src/mpi/mpi_routines.h | 4 +- src/particles/density_CIC.cpp | 14 +- src/particles/density_CIC_gpu.cu | 12 +- src/particles/density_boundaries.cpp | 2 +- src/particles/feedback_CIC_gpu.cu | 2 +- src/particles/gravity_CIC.cpp | 4 +- src/particles/gravity_CIC_gpu.cu | 24 ++-- src/particles/io_particles.cpp | 8 +- src/particles/particles_3D.cpp | 42 +++--- src/particles/particles_3D.h | 16 +-- src/particles/particles_3D_gpu.cu | 32 ++--- src/particles/particles_boundaries.cpp | 22 ++-- src/particles/particles_boundaries_cpu.cpp | 26 ++-- src/particles/particles_dynamics_gpu.cu | 42 +++--- src/particles/supernova.h | 2 +- src/riemann_solvers/hlld_cuda.cu | 144 ++++++++++----------- src/riemann_solvers/hlld_cuda.h | 80 ++++++------ src/riemann_solvers/hlld_cuda_tests.cu | 138 ++++++++++---------- src/utils/error_handling.cpp | 8 +- src/utils/error_handling.h | 2 +- src/utils/mhd_utilities.h | 12 +- src/utils/prng_utilities.h | 2 +- src/utils/timing_functions.cpp | 2 +- src/utils/timing_functions.h | 2 +- 71 files changed, 613 insertions(+), 621 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 6508724b6..ea7e7dc48 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -2,7 +2,7 @@ # for those checks, etc. It uses as many of the default values as possible and # runs all checks with some exclusions by default. # -# The full list of clang-tidy 15 checks and documentation can be found +# The full list of clang-tidy 17 checks and documentation can be found # [here](https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/clang-tidy/index.html) # # The "Checks" command should have 5 sections seperated by a newline: @@ -184,5 +184,5 @@ CheckOptions: hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' - bugprone-reserved-identifier.AllowedIdentifiers: '__cudaSafeCall;__cudaCheckError;__shfl_down' + bugprone-reserved-identifier.AllowedIdentifiers: '__cudaSafeCall;__cudaCheckError;__shfl_down;__CHOLLA_PRETTY_FUNC__' ... diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index 3c5991e71..9b927daaa 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -6,7 +6,7 @@ #include "../io/io.h" -Analysis_Module::Analysis_Module(void) {} +AnalysisModule::AnalysisModule(void) {} #ifdef LYA_STATISTICS void Grid3D::Compute_Lya_Statistics() @@ -76,7 +76,7 @@ void Grid3D::Compute_Lya_Statistics() } #endif // LYA_STATISTICS -void Grid3D::Compute_and_Output_Analysis(struct parameters *P) +void Grid3D::Compute_and_Output_Analysis(struct Parameters *P) { #ifdef COSMOLOGY chprintf("\nComputing Analysis current_z: %f\n", Analysis.current_z); @@ -119,7 +119,7 @@ void Grid3D::Compute_and_Output_Analysis(struct parameters *P) // exit(0); } -void Grid3D::Initialize_Analysis_Module(struct parameters *P) +void Grid3D::Initialize_AnalysisModule(struct Parameters *P) { chprintf("\nInitializng Analysis Module...\n"); @@ -141,9 +141,9 @@ void Grid3D::Initialize_Analysis_Module(struct parameters *P) H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P); } -void Analysis_Module::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, - int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, - int n_ghost_hydro, Real z_now, struct parameters *P) +void AnalysisModule::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, + int n_ghost_hydro, Real z_now, struct Parameters *P) { // Domain Length Lbox_x = Lx; @@ -196,7 +196,7 @@ void Analysis_Module::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_m chprintf("Analysis Module Successfully Initialized.\n\n"); } -void Analysis_Module::Reset() +void AnalysisModule::Reset() { #ifdef PHASE_DIAGRAM free(phase_diagram); diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h index dcc2b9b24..59ccb050a 100644 --- a/src/analysis/analysis.h +++ b/src/analysis/analysis.h @@ -11,7 +11,7 @@ #include #endif -class Analysis_Module +class AnalysisModule { public: Real Lbox_x; @@ -290,21 +290,21 @@ class Analysis_Module #endif - Analysis_Module(void); + AnalysisModule(void); void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, - struct parameters *P); + struct Parameters *P); void Reset(void); - void Load_Scale_Outputs(struct parameters *P); + void Load_Scale_Outputs(struct Parameters *P); void Set_Next_Scale_Output(); #ifdef PHASE_DIAGRAM - void Initialize_Phase_Diagram(struct parameters *P); + void Initialize_Phase_Diagram(struct Parameters *P); #endif #ifdef LYA_STATISTICS - void Initialize_Lya_Statistics(struct parameters *P); + void Initialize_Lya_Statistics(struct Parameters *P); void Initialize_Lya_Statistics_Measurements(int axis); void Transfer_Skewers_Data(int axis); void Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis); diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 5241ec9ed..e63ea8203 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -32,7 +32,7 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) G.Get_Position(i, j, k, &x, &y, &z); r = sqrt(x * x + y * y); - vca = sqrt(r * fabs(Galaxies::MW.gr_total_D3D(r, z))); + vca = sqrt(r * fabs(galaxies::MW.gr_total_D3D(r, z))); h_circ_vel_x[id] = -y / r * vca; h_circ_vel_y[id] = x / r * vca; } diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp index 70dec2a7b..962503dea 100644 --- a/src/analysis/io_analysis.cpp +++ b/src/analysis/io_analysis.cpp @@ -10,7 +10,7 @@ // #define OUTPUT_SKEWERS_TRANSMITTED_FLUX #ifdef OUTPUT_SKEWERS -void Grid3D::Output_Skewers_File(struct parameters *P) +void Grid3D::Output_Skewers_File(struct Parameters *P) { FILE *out; char filename[180]; @@ -458,7 +458,7 @@ void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) #endif // OUTPUT_SKEWERS -void Grid3D::Output_Analysis(struct parameters *P) +void Grid3D::Output_Analysis(struct Parameters *P) { #ifdef OUTPUT_SKEWERS Output_Skewers_File(P); @@ -652,7 +652,7 @@ void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) } #ifdef COSMOLOGY -void Analysis_Module::Load_Scale_Outputs(struct parameters *P) +void AnalysisModule::Load_Scale_Outputs(struct Parameters *P) { char filename_1[100]; strcpy(filename_1, P->analysis_scale_outputs_file); @@ -701,7 +701,7 @@ void Analysis_Module::Load_Scale_Outputs(struct parameters *P) n_file = next_output_indx; } -void Analysis_Module::Set_Next_Scale_Output() +void AnalysisModule::Set_Next_Scale_Output() { int scale_indx = next_output_indx; Real a_value, current_a; diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index 3a22149a1..968011bae 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -13,7 +13,7 @@ // #define PRINT_ANALYSIS_LOG -void Analysis_Module::Transfer_Skewers_Global_Axis(int axis) +void AnalysisModule::Transfer_Skewers_Global_Axis(int axis) { bool am_I_root; int n_skewers_root, n_los; @@ -312,7 +312,7 @@ int Locate_Index(Real val, Real *values, int N) return index - 1; } -void Analysis_Module::Clear_Power_Spectrum_Measurements(void) +void AnalysisModule::Clear_Power_Spectrum_Measurements(void) { MPI_Barrier(world); @@ -627,7 +627,7 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis) if (axis == 2) Analysis.n_PS_processed_z += 1; } -void Analysis_Module::Reduce_Power_Spectrum_Axis(int axis) +void AnalysisModule::Reduce_Power_Spectrum_Axis(int axis) { int n_root, n_bins; Real *ps_root; @@ -663,7 +663,7 @@ void Analysis_Module::Reduce_Power_Spectrum_Axis(int axis) // chprintf( " N_Skewers_Processed: %d \n", *n_axis ); } -void Analysis_Module::Reduce_Power_Spectrum_Global() +void AnalysisModule::Reduce_Power_Spectrum_Global() { int n_PS_total = n_PS_axis_x + n_PS_axis_y + n_PS_axis_z; if (n_hist_edges_x != n_hist_edges_y || n_hist_edges_x != n_hist_edges_z) { @@ -685,7 +685,7 @@ void Analysis_Module::Reduce_Power_Spectrum_Global() // } } -void Analysis_Module::Reduce_Lya_Mean_Flux_Global() +void AnalysisModule::Reduce_Lya_Mean_Flux_Global() { n_skewers_processed = n_skewers_processed_x + n_skewers_processed_y + n_skewers_processed_z; Flux_mean_HI = (Flux_mean_HI_x * n_skewers_processed_x + Flux_mean_HI_y * n_skewers_processed_y + @@ -700,7 +700,7 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Global() Flux_mean_HeII); } -void Analysis_Module::Reduce_Lya_Mean_Flux_Axis(int axis) +void AnalysisModule::Reduce_Lya_Mean_Flux_Axis(int axis) { int *n_skewers_processed; int *n_skewers_processed_root; @@ -766,7 +766,7 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis(int axis) *Flux_mean_HeII); } -void Analysis_Module::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis) +void AnalysisModule::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis) { bool am_I_root; int n_los; @@ -821,7 +821,7 @@ void Analysis_Module::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis) *n_skewers_processed_root += 1; } -void Analysis_Module::Initialize_Lya_Statistics_Measurements(int axis) +void AnalysisModule::Initialize_Lya_Statistics_Measurements(int axis) { if (axis == 0) { n_skewers_processed_root_x = 0; @@ -1033,7 +1033,7 @@ void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis) } } -void Analysis_Module::Transfer_Skewers_Data(int axis) +void AnalysisModule::Transfer_Skewers_Data(int axis) { bool am_I_root; int n_skewers, n_los_local, n_los_total, root_id; @@ -1339,7 +1339,7 @@ void Grid3D::Populate_Lya_Skewers_Local(int axis) } } -void Analysis_Module::Initialize_Lya_Statistics(struct parameters *P) +void AnalysisModule::Initialize_Lya_Statistics(struct Parameters *P) { chprintf(" Initializing Lya Statistics...\n"); diff --git a/src/analysis/phase_diagram.cpp b/src/analysis/phase_diagram.cpp index 760179ccd..a3aa1dc3c 100644 --- a/src/analysis/phase_diagram.cpp +++ b/src/analysis/phase_diagram.cpp @@ -110,7 +110,7 @@ void Grid3D::Compute_Phase_Diagram() for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] /= phase_sum; } -void Analysis_Module::Initialize_Phase_Diagram(struct parameters *P) +void AnalysisModule::Initialize_Phase_Diagram(struct Parameters *P) { // Size of the diagram n_dens = 1000; diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index ba67bf445..d2ffc2556 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -13,7 +13,7 @@ #define TINY 1e-20 -void Grid3D::Initialize_Chemistry(struct parameters *P) +void Grid3D::Initialize_Chemistry(struct Parameters *P) { chprintf("Initializing the GPU Chemistry Solver... \n"); @@ -132,7 +132,7 @@ void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Func free(rate_table_array_h); } -void Chem_GPU::Initialize(struct parameters *P) +void Chem_GPU::Initialize(struct Parameters *P) { Initialize_Cooling_Rates(); @@ -191,7 +191,7 @@ void Chem_GPU::Initialize_Reaction_Rates() } } -void Chem_GPU::Initialize_UVB_Ionization_and_Heating_Rates(struct parameters *P) +void Chem_GPU::Initialize_UVB_Ionization_and_Heating_Rates(struct Parameters *P) { chprintf(" Initializing UVB Rates... \n"); Load_UVB_Ionization_and_Heating_Rates(P); diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h index 9d0790a4e..473f6b609 100644 --- a/src/chemistry_gpu/chemistry_gpu.h +++ b/src/chemistry_gpu/chemistry_gpu.h @@ -10,7 +10,7 @@ typedef Real (*Rate_Function_T)(Real, Real); // #define TEXTURES_UVB_INTERPOLATION -struct Chemistry_Header { +struct ChemistryHeader { Real gamma; Real density_conversion; Real energy_conversion; @@ -124,7 +124,7 @@ class Chem_GPU void Copy_Float_Array_to_Device(int size, float *array_h, float *array_d); void Free_Array_GPU_float(float *array_dev); - void Initialize(struct parameters *P); + void Initialize(struct Parameters *P); void Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units); @@ -132,9 +132,9 @@ class Chem_GPU void Initialize_Reaction_Rates(); - void Initialize_UVB_Ionization_and_Heating_Rates(struct parameters *P); + void Initialize_UVB_Ionization_and_Heating_Rates(struct Parameters *P); - void Load_UVB_Ionization_and_Heating_Rates(struct parameters *P); + void Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P); void Copy_UVB_Rates_to_GPU(); diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index 458042968..20d23318e 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -10,7 +10,7 @@ #include "../io/io.h" #include "chemistry_gpu.h" -void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct parameters *P) +void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P) { char uvb_filename[100]; // create the filename to read from diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index cecb9b4d4..a7f5c36cb 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -11,7 +11,7 @@ Cool_GK::Cool_GK(void) {} -void Grid3D::Initialize_Grackle(struct parameters *P) +void Grid3D::Initialize_Grackle(struct Parameters *P) { chprintf("Initializing Grackle... \n"); @@ -24,7 +24,7 @@ void Grid3D::Initialize_Grackle(struct parameters *P) chprintf("Grackle Initialized Successfully. \n\n"); } -void Cool_GK::Initialize(struct parameters *P, Cosmology &Cosmo) +void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) { chprintf(" Using Grackle for chemistry and cooling \n"); chprintf(" N scalar fields: %d \n", NSCALARS); diff --git a/src/cooling_grackle/cool_grackle.h b/src/cooling_grackle/cool_grackle.h index a8e19c338..0014f7e75 100644 --- a/src/cooling_grackle/cool_grackle.h +++ b/src/cooling_grackle/cool_grackle.h @@ -40,7 +40,7 @@ class Cool_GK Cool_GK(void); - void Initialize(struct parameters *P, Cosmology &Cosmo); + void Initialize(struct Parameters *P, Cosmology &Cosmo); void Free_Memory(); // void Do_Cooling_Step( Real dt ); diff --git a/src/cosmology/cosmology.cpp b/src/cosmology/cosmology.cpp index ac0045b6b..6575798e2 100644 --- a/src/cosmology/cosmology.cpp +++ b/src/cosmology/cosmology.cpp @@ -6,7 +6,7 @@ Cosmology::Cosmology(void) {} -void Cosmology::Initialize(struct parameters *P, Grav3D &Grav, Particles_3D &Particles) +void Cosmology::Initialize(struct Parameters *P, Grav3D &Grav, Particles3D &Particles) { chprintf("Cosmological Simulation\n"); diff --git a/src/cosmology/cosmology.h b/src/cosmology/cosmology.h index d9cf14bcf..1e7c9bd1c 100644 --- a/src/cosmology/cosmology.h +++ b/src/cosmology/cosmology.h @@ -55,10 +55,10 @@ class Cosmology bool exit_now; Cosmology(void); - void Initialize(struct parameters *P, Grav3D &Grav, Particles_3D &Particles); + void Initialize(struct Parameters *P, Grav3D &Grav, Particles3D &Particles); - void Load_Scale_Outputs(struct parameters *P); - void Set_Scale_Outputs(struct parameters *P); + void Load_Scale_Outputs(struct Parameters *P); + void Set_Scale_Outputs(struct Parameters *P); void Set_Next_Scale_Output(); diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index 3debb1dfb..f00c7e174 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -5,7 +5,7 @@ #include "../grid/grid_enum.h" #include "../io/io.h" -void Grid3D::Initialize_Cosmology(struct parameters *P) +void Grid3D::Initialize_Cosmology(struct Parameters *P) { chprintf("Initializing Cosmology... \n"); Cosmo.Initialize(P, Grav, Particles); diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp index 862b8be0d..7492a814c 100644 --- a/src/cosmology/io_cosmology.cpp +++ b/src/cosmology/io_cosmology.cpp @@ -6,7 +6,7 @@ #include "../cosmology/cosmology.h" #include "../io/io.h" -void Cosmology::Load_Scale_Outputs(struct parameters *P) +void Cosmology::Load_Scale_Outputs(struct Parameters *P) { char filename_1[100]; // create the filename to read from @@ -50,7 +50,7 @@ void Cosmology::Load_Scale_Outputs(struct parameters *P) exit_now = false; } -void Cosmology::Set_Scale_Outputs(struct parameters *P) +void Cosmology::Set_Scale_Outputs(struct Parameters *P) { if (P->scale_outputs_file[0] == '\0') { chprintf(" Output every %d timesteps.\n", P->n_steps_output); diff --git a/src/global/global.cpp b/src/global/global.cpp index cee8fafab..99358e1f9 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -119,11 +119,11 @@ int Is_Param_Valid(const char *param_name) return 0; } -void Parse_Param(char *name, char *value, struct parameters *parms); +void Parse_Param(char *name, char *value, struct Parameters *parms); -/*! \fn void Parse_Params(char *param_file, struct parameters * parms); +/*! \fn void Parse_Params(char *param_file, struct Parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -void Parse_Params(char *param_file, struct parameters *parms, int argc, char **argv) +void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv) { int buf; char *s, buff[256]; @@ -186,9 +186,9 @@ void Parse_Params(char *param_file, struct parameters *parms, int argc, char **a } } -/*! \fn void Parse_Param(char *name,char *value, struct parameters *parms); +/*! \fn void Parse_Param(char *name,char *value, struct Parameters *parms); * \brief Parses and sets a single param based on name and value. */ -void Parse_Param(char *name, char *value, struct parameters *parms) +void Parse_Param(char *name, char *value, struct Parameters *parms) { /* Copy into correct entry in parameters struct */ if (strcmp(name, "nx") == 0) { diff --git a/src/global/global.h b/src/global/global.h index cd12e3d3b..fe0321bae 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -174,7 +174,7 @@ extern int Sgn(Real x); extern Real Calc_Eta(Real cW[], Real gamma); #endif -struct parameters { +struct Parameters { int nx; int ny; int nz; @@ -325,9 +325,9 @@ struct parameters { #endif }; -/*! \fn void parse_params(char *param_file, struct parameters * parms); +/*! \fn void parse_params(char *param_file, struct Parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -extern void Parse_Params(char *param_file, struct parameters *parms, int argc, char **argv); +extern void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv); /*! \fn int is_param_valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index 20ca67655..866663589 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -18,7 +18,7 @@ Grav3D::Grav3D(void) {} void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, - Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P) + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P) { // Set Box Size Lbox_x = Lx; diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h index a751f1652..69cf8308a 100644 --- a/src/gravity/grav3D.h +++ b/src/gravity/grav3D.h @@ -101,15 +101,15 @@ class Grav3D #endif #ifdef PARIS - Potential_Paris_3D Poisson_solver; + PotentialParis3D Poisson_solver; #endif #ifdef PARIS_GALACTIC #ifdef SOR #define PARIS_GALACTIC_TEST - Potential_Paris_Galactic Poisson_solver_test; + PotentialParisGalactic Poisson_solver_test; #else - Potential_Paris_Galactic Poisson_solver; + PotentialParisGalactic Poisson_solver; #endif #endif @@ -193,14 +193,14 @@ class Grav3D * \brief Initialize the grid. */ void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, - Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P); void AllocateMemory_CPU(void); void Initialize_values_CPU(); void FreeMemory_CPU(void); - void Read_Restart_HDF5(struct parameters *P, int nfile); - void Write_Restart_HDF5(struct parameters *P, int nfile); + void Read_Restart_HDF5(struct Parameters *P, int nfile); + void Write_Restart_HDF5(struct Parameters *P, int nfile); Real Get_Average_Density(); Real Get_Average_Density_function(int g_start, int g_end); @@ -209,7 +209,7 @@ class Grav3D #ifdef SOR void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size); - void Copy_Isolated_Boundaries_To_GPU(struct parameters *P); + void Copy_Isolated_Boundaries_To_GPU(struct Parameters *P); #endif #ifdef GRAVITY_GPU diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index 85ee8a142..5e4b101eb 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -9,7 +9,7 @@ #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) -void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P) +void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P) { // Set Isolated Boundaries for the ghost cells. int bc_potential_type = P->bc_potential_type; @@ -234,7 +234,7 @@ void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc } else if (bc_potential_type == 1) { // M-W disk potential r = sqrt(pos_x * pos_x + pos_y * pos_y); - pot_val = mod_frac * Galaxies::MW.phi_disk_D3D(r, pos_z); + pot_val = mod_frac * galaxies::MW.phi_disk_D3D(r, pos_z); } else { chprintf( "ERROR: Boundary Potential not set, need to set appropriate " diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 5321780dd..2e94621a6 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -354,7 +354,7 @@ static void printDiff(const Real *p, const Real *q, const int nx, const int ny, #endif // Initialize the Grav Object at the beginning of the simulation -void Grid3D::Initialize_Gravity(struct parameters *P) +void Grid3D::Initialize_Gravity(struct Parameters *P) { chprintf("\nInitializing Gravity... \n"); Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, @@ -380,7 +380,7 @@ void Grid3D::Initialize_Gravity(struct parameters *P) const Real ddz = 1.0 / (scale * Grav.dz * Grav.dz); const Real *const phi = Grav.F.potential_h; const int nij = ni * nj; - const Real a0 = Galaxies::MW.phi_disk_D3D(0, 0); + const Real a0 = galaxies::MW.phi_disk_D3D(0, 0); const Real da0 = 2.0 / (25.0 * scale); #pragma omp parallel for for (int k = 0; k < nk; k++) { @@ -394,7 +394,7 @@ void Grid3D::Initialize_Gravity(struct parameters *P) const Real x = Grav.xMin + Grav.dx * (i + dr); const Real r = sqrt(x * x + yy); const int ijk = i + nijk; - exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r, z); + exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z); } } } @@ -414,13 +414,13 @@ void Grid3D::Initialize_Gravity(struct parameters *P) const Real rr = x * x + yy + zz; const Real f = a0 * exp(-0.2 * rr); const Real df = da0 * (15.0 - 2.0 * rr) * f; - Grav.F.density_h[ijk] = Galaxies::MW.rho_disk_D3D(r, z) + df; + Grav.F.density_h[ijk] = galaxies::MW.rho_disk_D3D(r, z) + df; const int ib = i + ng + ni * (j + ng + nj * (k + ng)); exact[ib] -= f; } } } - Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, Grav.Gconst, Galaxies::MW); + Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, Grav.Gconst, galaxies::MW); chprintf(" Paris Galactic"); printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local); Get_Potential_SOR(Grav.Gconst, 0, 0, P); @@ -442,7 +442,7 @@ void Grid3D::Initialize_Gravity(struct parameters *P) const Real x = Grav.xMin + Grav.dx * (i + dr); const Real r = sqrt(x * x + yy); const int ijk = i + nijk; - Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r, z); + Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z); } } } @@ -451,7 +451,7 @@ void Grid3D::Initialize_Gravity(struct parameters *P) } // Compute the Gravitational Potential by solving Poisson Equation -void Grid3D::Compute_Gravitational_Potential(struct parameters *P) +void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) { #ifdef CPU_TIME Timer.Grav_Potential.Start(); @@ -540,7 +540,7 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) #ifdef GRAVITY_GPU #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" #endif - Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, Grav_Constant, Galaxies::MW); + Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW); std::vector p(output_potential, output_potential + Grav.n_cells_potential); Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); chprintf(" Paris vs SOR"); @@ -550,7 +550,7 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) #endif #elif defined PARIS_GALACTIC - Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, Galaxies::MW); + Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW); #else Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, dens_avrg, current_a); #endif // SOR @@ -561,10 +561,10 @@ void Grid3D::Compute_Gravitational_Potential(struct parameters *P) } #ifdef GRAVITY_ANALYTIC_COMP -void Grid3D::Setup_Analytic_Potential(struct parameters *P) +void Grid3D::Setup_Analytic_Potential(struct Parameters *P) { #ifndef PARALLEL_OMP - Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, Galaxies::MW); + Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, galaxies::MW); #else #pragma omp parallel num_threads(N_OMP_THREADS) { @@ -575,7 +575,7 @@ void Grid3D::Setup_Analytic_Potential(struct parameters *P) n_omp_procs = omp_get_num_threads(); Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); - Setup_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); + Setup_Analytic_Galaxy_Potential(g_start, g_end, galaxies::MW); } #endif diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index 1944c7b10..1cfff9cc8 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -27,7 +27,7 @@ void Gravity_Restart_Filename(char* filename, char* dirname, int nfile) } #if defined(GRAVITY) && defined(HDF5) -void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) +void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) { H5open(); char filename[MAXLEN]; @@ -52,7 +52,7 @@ void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) INITIAL = false; } -void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) +void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) { H5open(); char filename[MAXLEN]; @@ -89,12 +89,12 @@ void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) #elif defined(GRAVITY) // Do nothing -void Grav3D::Read_Restart_HDF5(struct parameters* P, int nfile) +void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) { chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", __FILE__, __LINE__); } -void Grav3D::Write_Restart_HDF5(struct parameters* P, int nfile) +void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) { chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", __FILE__, __LINE__); } diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index 4fa2467b2..8d1502263 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -19,7 +19,7 @@ class HenryPeriodic * dimension. } * @param[in] hi[3] { Physical location of the global upper bound of each * dimension, minus one grid cell. The one-cell difference is because of the - * periodic domain. See @ref Potential_Paris_3D::Initialize for an example + * periodic domain. See @ref PotentialParis3D::Initialize for an example * computation of these arguments. } * @param[in] m[3] { Number of MPI tasks in each dimension. } * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index 2650e156d..8069cde65 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -15,7 +15,7 @@ class ParisPeriodic * dimension. } * @param[in] hi[3] { Physical location of the global upper bound of each * dimension, minus one grid cell. The one-cell difference is because of the - * periodic domain. See @ref Potential_Paris_3D::Initialize for an example + * periodic domain. See @ref PotentialParis3D::Initialize for an example * computation of these arguments. } * @param[in] m[3] { Number of MPI tasks in each dimension. } * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } diff --git a/src/gravity/paris/README.md b/src/gravity/paris/README.md index a73664fa3..d019d5e1f 100644 --- a/src/gravity/paris/README.md +++ b/src/gravity/paris/README.md @@ -8,7 +8,7 @@ A 3D Poisson solver that expects periodic boundary conditions. *ParisPeriodic* calls the FFT filter provided by the *HenryPeriodic* class, where it provides a C++ lambda function that solves the Poisson equation in frequency space. It assumes fields in a 3D block distribution with no ghost cells. -It is used by the Cholla class *Potential_Paris_3D* to solve Poisson problems with periodic boundary conditions. +It is used by the Cholla class *PotentialParis3D* to solve Poisson problems with periodic boundary conditions. To use: - Construct a *ParisPeriodic* object using information about the global domain and local MPI task. @@ -44,12 +44,12 @@ A 3D Poisson solver that expects zero-valued boundary conditions. *PoissonZero3DBlockedGPU* uses discrete sine transforms (DSTs) instead of Fourier transforms to enforce zero-valued, non-periodic boundary conditions. It is currently a monolithic class, not depenedent on a *Henry* class. -It is used by the Cholla class *Potential_Paris_Galactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions. +It is used by the Cholla class *PotentialParisGalactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions. -*Potential_Paris_Galactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows. +*PotentialParisGalactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows. - Subtract an analytic density from the input density, where the analytic density matches the input density at the domain boundaries. This results in a density with zero-valued boundaries. -- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries. +- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries. - Add an analytic potential to the resulting potential, where the analytic potential is the solution to the Poisson equation for the analytic density that was subtracted from the input density. The resulting sum of potentials is the solution to the Poisson problem for the full input density. diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index 016a33c54..403b31cf6 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -153,7 +153,7 @@ void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, Real ep F.density_d, F.potential_d, F.converged_h, F.converged_d); } -void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P) +void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P) { #ifdef TIME_SOR Real time_start, time_end, time; @@ -239,7 +239,7 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_ #endif } -void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct parameters *P) +void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct Parameters *P) { if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) return; @@ -265,7 +265,7 @@ void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct parameters *P) Poisson_solver.n_ghost * nx_local * ny_local); } -void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, struct parameters *P) +void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P) { if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) return; diff --git a/src/gravity/potential_SOR_3D.h b/src/gravity/potential_SOR_3D.h index 048b08f33..2b7c71abc 100644 --- a/src/gravity/potential_SOR_3D.h +++ b/src/gravity/potential_SOR_3D.h @@ -158,7 +158,7 @@ class Potential_SOR_3D void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); - void Set_Isolated_Boundary_Conditions(int *boundary_flags, struct parameters *P); + void Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P); void Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d); #ifdef MPI_CHOLLA diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 011906d14..aa6af2652 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -56,7 +56,7 @@ static void __attribute__((unused)) Print_Diff(const Real *p, const Real *q, con exit(0); } -Potential_Paris_3D::Potential_Paris_3D() +PotentialParis3D::PotentialParis3D() : dn_{0, 0, 0}, dr_{0, 0, 0}, lo_{0, 0, 0}, @@ -71,10 +71,10 @@ Potential_Paris_3D::Potential_Paris_3D() { } -Potential_Paris_3D::~Potential_Paris_3D() { Reset(); } +PotentialParis3D::~PotentialParis3D() { Reset(); } -void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const potential, const Real g, - const Real offset, const Real a) +void PotentialParis3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, + const Real a) { #ifdef COSMOLOGY const Real scale = Real(4) * M_PI * g / a; @@ -117,9 +117,9 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const po #endif } -void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, - const Real zMin, const int nx, const int ny, const int nz, const int nxReal, - const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) +void PotentialParis3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) { chprintf(" Using Poisson Solver: Paris Periodic"); #ifdef PARIS_5PT @@ -178,7 +178,7 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, assert(db_); } -void Potential_Paris_3D::Reset() +void PotentialParis3D::Reset() { if (db_) { CHECK(cudaFree(db_)); diff --git a/src/gravity/potential_paris_3D.h b/src/gravity/potential_paris_3D.h index 9cc2828c2..be80c4116 100644 --- a/src/gravity/potential_paris_3D.h +++ b/src/gravity/potential_paris_3D.h @@ -5,11 +5,11 @@ #include "../global/global.h" #include "paris/ParisPeriodic.hpp" -class Potential_Paris_3D +class PotentialParis3D { public: - Potential_Paris_3D(); - ~Potential_Paris_3D(); + PotentialParis3D(); + ~PotentialParis3D(); void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a); void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index 291f2a059..5d6c758b9 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -6,7 +6,7 @@ #include "../io/io.h" #include "../utils/gpu.hpp" -Potential_Paris_Galactic::Potential_Paris_Galactic() +PotentialParisGalactic::PotentialParisGalactic() : dn_{0, 0, 0}, dr_{0, 0, 0}, lo_{0, 0, 0}, @@ -25,10 +25,10 @@ Potential_Paris_Galactic::Potential_Paris_Galactic() { } -Potential_Paris_Galactic::~Potential_Paris_Galactic() { Reset(); } +PotentialParisGalactic::~PotentialParisGalactic() { Reset(); } -void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const potential, const Real g, - const DiskGalaxy &galaxy) +void PotentialParisGalactic::Get_Potential(const Real *const density, Real *const potential, const Real g, + const DiskGalaxy &galaxy) { const Real scale = Real(4) * M_PI * g; @@ -110,10 +110,9 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co #endif } -void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, - const Real zMin, const int nx, const int ny, const int nz, const int nxReal, - const int nyReal, const int nzReal, const Real dx, const Real dy, - const Real dz) +void PotentialParisGalactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) { const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); @@ -165,7 +164,7 @@ void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Re #endif } -void Potential_Paris_Galactic::Reset() +void PotentialParisGalactic::Reset() { #ifndef GRAVITY_GPU if (dc_) { diff --git a/src/gravity/potential_paris_galactic.h b/src/gravity/potential_paris_galactic.h index 999e4d56c..95fb1fc1f 100644 --- a/src/gravity/potential_paris_galactic.h +++ b/src/gravity/potential_paris_galactic.h @@ -6,11 +6,11 @@ #include "../model/disk_galaxy.h" #include "paris/PoissonZero3DBlockedGPU.hpp" -class Potential_Paris_Galactic +class PotentialParisGalactic { public: - Potential_Paris_Galactic(); - ~Potential_Paris_Galactic(); + PotentialParisGalactic(); + ~PotentialParisGalactic(); void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy); void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index cb5876f32..d9201fe8b 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -13,10 +13,10 @@ #include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" -/*! \fn void Set_Boundary_Conditions_Grid(parameters P) +/*! \fn void Set_Boundary_Conditions_Grid(Parameters P ) * \brief Set the boundary conditions for all components based on info in the * parameters structure. */ -void Grid3D::Set_Boundary_Conditions_Grid(parameters P) +void Grid3D::Set_Boundary_Conditions_Grid(Parameters P) { #ifndef ONLY_PARTICLES // Dont transfer Hydro boundaries when only doing particles @@ -48,10 +48,10 @@ void Grid3D::Set_Boundary_Conditions_Grid(parameters P) #endif // GRAVITY } -/*! \fn void Set_Boundary_Conditions(parameters P) +/*! \fn void Set_Boundary_Conditions(Parameters P ) * \brief Set the boundary conditions based on info in the parameters * structure. */ -void Grid3D::Set_Boundary_Conditions(parameters P) +void Grid3D::Set_Boundary_Conditions(Parameters P) { // Check Only one boundary type id being transferred int n_bounds = 0; @@ -128,9 +128,9 @@ void Grid3D::Set_Boundary_Conditions(parameters P) #endif /*MPI_CHOLLA*/ } -/*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) +/*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P) * \brief Check for custom boundary conditions and set boundary flags. */ -int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) +int Grid3D::Check_Custom_Boundary(int *flags, struct Parameters P) { /*check if any boundary is a custom boundary*/ /*if yes, then return 1*/ diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index e6e3bbed1..ac823321f 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -121,7 +121,7 @@ Real Grid3D::Calc_Inverse_Timestep() /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ -void Grid3D::Initialize(struct parameters *P) +void Grid3D::Initialize(struct Parameters *P) { // number of fields to track (default 5 is # of conserved variables) H.n_fields = 5; diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index e679415d9..8122017be 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -289,7 +289,7 @@ class Grid3D #ifdef PARTICLES // Object that contains data for particles - Particles_3D Particles; + Particles3D Particles; #endif #ifdef COSMOLOGY @@ -312,7 +312,7 @@ class Grid3D #endif #ifdef ANALYSIS - Analysis_Module Analysis; + AnalysisModule Analysis; #endif #ifdef SUPERNOVA // TODO refactor this into Analysis module @@ -424,16 +424,16 @@ class Grid3D /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ - void Initialize(struct parameters *P); + void Initialize(struct Parameters *P); /*! \fn void AllocateMemory(void) * \brief Allocate memory for the d, m, E arrays. */ void AllocateMemory(void); - /*! \fn void Set_Initial_Conditions(parameters P) + /*! \fn void Set_Initial_Conditions(Parameters P ) * \brief Set the initial conditions based on info in the parameters * structure. */ - void Set_Initial_Conditions(parameters P); + void Set_Initial_Conditions(Parameters P); /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real * *zpos) \brief Get the cell-centered position based on cell index */ @@ -441,9 +441,9 @@ class Grid3D Real Calc_Inverse_Timestep(); - /*! \fn void Set_Domain_Properties(struct parameters P) + /*! \fn void Set_Domain_Properties(struct Parameters P) * \brief Set local domain properties */ - void Set_Domain_Properties(struct parameters P); + void Set_Domain_Properties(struct Parameters P); /*! \fn void set_dt(Real dti) * \brief Calculate the timestep. */ @@ -509,13 +509,13 @@ class Grid3D #endif - /*! \fn void Read_Grid(struct parameters P) + /*! \fn void Read_Grid(struct Parameters P) * \brief Read in grid data from 1-per-process output files. */ - void Read_Grid(struct parameters P); + void Read_Grid(struct Parameters P); - /*! \fn void Read_Grid_Cat(struct parameters P) + /*! \fn void Read_Grid_Cat(struct Parameters P) * \brief Read in grid data from a single concatenated output file. */ - void Read_Grid_Cat(struct parameters P); + void Read_Grid_Cat(struct Parameters P); /*! \fn Read_Grid_Binary(FILE *fp) * \brief Read in grid data from a binary file. */ @@ -524,7 +524,7 @@ class Grid3D #ifdef HDF5 /*! \fn void Read_Grid_HDF5(hid_t file_id) * \brief Read in grid data from an hdf5 file. */ - void Read_Grid_HDF5(hid_t file_id, struct parameters P); + void Read_Grid_HDF5(hid_t file_id, struct Parameters P); #endif /*! \fn void Reset(void) @@ -540,21 +540,21 @@ class Grid3D * * \param[in] P the parameters struct. */ - void Constant(parameters const &P); + void Constant(Parameters const &P); /*! * \brief Sine wave perturbation. * * \param[in] P the parameters struct. */ - void Sound_Wave(parameters const &P); + void Sound_Wave(Parameters const &P); /*! * \brief Initialize the grid with a simple linear wave. * * \param[in] P the parameters struct. */ - void Linear_Wave(parameters const &P); + void Linear_Wave(Parameters const &P); /*! * \brief Square wave density perturbation with amplitude A*rho in pressure @@ -562,14 +562,14 @@ class Grid3D * * \param[in] P the parameters struct. */ - void Square_Wave(parameters const &P); + void Square_Wave(Parameters const &P); /*! * \brief Initialize the grid with a Riemann problem. * * \param[in] P the parameters struct. */ - void Riemann(parameters const &P); + void Riemann(Parameters const &P); /*! \fn void Shu_Osher() * \brief Initialize the grid with the Shu-Osher shock tube problem. See @@ -620,24 +620,24 @@ class Grid3D * \brief Initialize the grid with a 2D disk following a Kuzmin profile. */ void Disk_2D(); - /*! \fn void Disk_3D(parameters P) + /*! \fn void Disk_3D(Parameters P ) * \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai * profile. */ - void Disk_3D(parameters P); + void Disk_3D(Parameters P); - /*! \fn void Set_Boundary_Conditions(parameters P) + /*! \fn void Set_Boundary_Conditions(Parameters P ) * \brief Set the boundary conditions based on info in the parameters * structure. */ - void Set_Boundary_Conditions(parameters P); + void Set_Boundary_Conditions(Parameters P); - /*! \fn void Set_Boundary_Conditions_Grid(parameters P) + /*! \fn void Set_Boundary_Conditions_Grid(Parameters P ) * \brief Set the boundary conditions for all components based on info in the * parameters structure. */ - void Set_Boundary_Conditions_Grid(parameters P); + void Set_Boundary_Conditions_Grid(Parameters P); - /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) + /*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P) * \brief Check for custom boundary conditions */ - int Check_Custom_Boundary(int *flags, struct parameters P); + int Check_Custom_Boundary(int *flags, struct Parameters P); /*! \fn void Set_Boundaries(int dir, int flags[]) * \brief Apply boundary conditions to the grid. */ @@ -674,9 +674,9 @@ class Grid3D void Uniform_Grid(); - void Zeldovich_Pancake(struct parameters P); + void Zeldovich_Pancake(struct Parameters P); - void Chemistry_Test(struct parameters P); + void Chemistry_Test(struct Parameters P); #ifdef MHD /*! @@ -685,7 +685,7 @@ class Grid3D * * \param P The parameters. Only uses Vx, pitch, and yaw */ - void Circularly_Polarized_Alfven_Wave(struct parameters const P); + void Circularly_Polarized_Alfven_Wave(struct Parameters const P); /*! * \brief Initialize the grid with a advecting field loop. See [Gardiner & @@ -693,7 +693,7 @@ class Grid3D * * \param P The parameters object */ - void Advecting_Field_Loop(struct parameters const P); + void Advecting_Field_Loop(struct Parameters const P); /*! * \brief Initialize the grid with a spherical MHD blast wave. See [Gardiner & @@ -701,7 +701,7 @@ class Grid3D * * \param P The parameters struct */ - void MHD_Spherical_Blast(struct parameters const P); + void MHD_Spherical_Blast(struct Parameters const P); /*! * \brief Initialize the grid with the Orszag-Tang Vortex. See [Gardiner & Stone @@ -713,8 +713,8 @@ class Grid3D #endif // MHD #ifdef MPI_CHOLLA - void Set_Boundaries_MPI(struct parameters P); - void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); + void Set_Boundaries_MPI(struct Parameters P); + void Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P); void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags); void Unload_MPI_Comm_Buffers(int index); @@ -735,8 +735,8 @@ class Grid3D #endif /*MPI_CHOLLA*/ #ifdef GRAVITY - void Initialize_Gravity(struct parameters *P); - void Compute_Gravitational_Potential(struct parameters *P); + void Initialize_Gravity(struct Parameters *P); + void Compute_Gravitational_Potential(struct Parameters *P); void Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end); void Copy_Hydro_Density_to_Gravity(); void Extrapolate_Grav_Potential_Function(int g_start, int g_end); @@ -745,10 +745,10 @@ class Grid3D int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start); void Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start); void Set_Potential_Boundaries_Isolated(int direction, int side, int *flags); - void Compute_Potential_Boundaries_Isolated(int dir, struct parameters *P); + void Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P); void Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type); #ifdef SOR - void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P); + void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P); int Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer); void Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host); #endif @@ -766,7 +766,7 @@ class Grid3D #ifdef GRAVITY_ANALYTIC_COMP void Add_Analytic_Potential(); void Add_Analytic_Potential(int g_start, int g_end); - void Setup_Analytic_Potential(struct parameters *P); + void Setup_Analytic_Potential(struct Parameters *P); void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal); #ifdef GRAVITY_GPU void Add_Analytic_Potential_GPU(); @@ -774,14 +774,14 @@ class Grid3D #endif // GRAVITY_ANALYTIC_COMP #ifdef PARTICLES - void Initialize_Particles(struct parameters *P); + void Initialize_Particles(struct Parameters *P); void Initialize_Uniform_Particles(); void Copy_Particles_Density_function(int g_start, int g_end); void Copy_Particles_Density(); - void Copy_Particles_Density_to_Gravity(struct parameters P); + void Copy_Particles_Density_to_Gravity(struct Parameters P); void Set_Particles_Density_Boundaries_Periodic(int direction, int side); - void Transfer_Particles_Boundaries(struct parameters P); - Real Update_Grid_and_Particles_KDK(struct parameters P); + void Transfer_Particles_Boundaries(struct Parameters P); + Real Update_Grid_and_Particles_KDK(struct Parameters P); void Set_Particles_Boundary(int dir, int side); #ifdef PARTICLES_CPU void Set_Particles_Open_Boundary_CPU(int dir, int side); @@ -807,12 +807,12 @@ class Grid3D void Unload_Particles_From_Buffers_BLOCK(int index, int *flags); void Finish_Particles_Transfer(); #endif // MPI_CHOLLA - void Transfer_Particles_Density_Boundaries(struct parameters P); + void Transfer_Particles_Density_Boundaries(struct Parameters P); void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h); - // void Transfer_Particles_Boundaries( struct parameters P ); - void WriteData_Particles(struct parameters P, int nfile); - void OutputData_Particles(struct parameters P, int nfile); - void Load_Particles_Data(struct parameters P); + // void Transfer_Particles_Boundaries( struct Parameters P ); + void WriteData_Particles(struct Parameters P, int nfile); + void OutputData_Particles(struct Parameters P, int nfile); + void Load_Particles_Data(struct Parameters P); #ifdef HDF5 void Write_Particles_Header_HDF5(hid_t file_id); void Write_Particles_Data_HDF5(hid_t file_id); @@ -847,7 +847,7 @@ class Grid3D #endif // PARTICLES #ifdef COSMOLOGY - void Initialize_Cosmology(struct parameters *P); + void Initialize_Cosmology(struct Parameters *P); void Change_DM_Frame_System(bool forward); void Change_GAS_Frame_System(bool forward); void Change_GAS_Frame_System_GPU(bool forward); @@ -863,7 +863,7 @@ class Grid3D #endif // COSMOLOGY #ifdef COOLING_GRACKLE - void Initialize_Grackle(struct parameters *P); + void Initialize_Grackle(struct Parameters *P); void Allocate_Memory_Grackle(); void Initialize_Fields_Grackle(); void Copy_Fields_To_Grackle_function(int g_start, int g_end); @@ -874,15 +874,15 @@ class Grid3D #endif #ifdef CHEMISTRY_GPU - void Initialize_Chemistry(struct parameters *P); + void Initialize_Chemistry(struct Parameters *P); void Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units); void Update_Chemistry(); #endif #ifdef ANALYSIS - void Initialize_Analysis_Module(struct parameters *P); - void Compute_and_Output_Analysis(struct parameters *P); - void Output_Analysis(struct parameters *P); + void Initialize_AnalysisModule(struct Parameters *P); + void Compute_and_Output_Analysis(struct Parameters *P); + void Output_Analysis(struct Parameters *P); void Write_Analysis_Header_HDF5(hid_t file_id); void Write_Analysis_Data_HDF5(hid_t file_id); @@ -897,7 +897,7 @@ class Grid3D void Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis); void Initialize_Power_Spectrum_Measurements(int axis); #ifdef OUTPUT_SKEWERS - void Output_Skewers_File(struct parameters *P); + void Output_Skewers_File(struct Parameters *P); void Write_Skewers_Header_HDF5(hid_t file_id); void Write_Skewers_Data_HDF5(hid_t file_id); #endif diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index f1e3a3307..768bf0960 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -23,10 +23,10 @@ #include "../utils/math_utilities.h" #include "../utils/mhd_utilities.h" -/*! \fn void Set_Initial_Conditions(parameters P) +/*! \fn void Set_Initial_Conditions(Parameters P ) * \brief Set the initial conditions based on info in the parameters structure. */ -void Grid3D::Set_Initial_Conditions(parameters P) +void Grid3D::Set_Initial_Conditions(Parameters P) { Set_Domain_Properties(P); Set_Gammas(P.gamma); @@ -104,9 +104,9 @@ void Grid3D::Set_Initial_Conditions(parameters P) } } -/*! \fn void Set_Domain_Properties(struct parameters P) +/*! \fn void Set_Domain_Properties(struct Parameters P) * \brief Set local domain properties */ -void Grid3D::Set_Domain_Properties(struct parameters P) +void Grid3D::Set_Domain_Properties(struct Parameters P) { // Global Boundary Coordinates H.xbound = P.xmin; @@ -177,7 +177,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real * By, Real Bz) \brief Constant gas properties. */ -void Grid3D::Constant(parameters const &P) +void Grid3D::Constant(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -241,7 +241,7 @@ void Grid3D::Constant(parameters const &P) /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Sound_Wave(parameters const &P) +void Grid3D::Sound_Wave(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -299,7 +299,7 @@ void Grid3D::Sound_Wave(parameters const &P) /*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Linear_Wave(parameters const &P) +void Grid3D::Linear_Wave(Parameters const &P) { // Compute any test parameters needed // ================================== @@ -434,7 +434,7 @@ void Grid3D::Linear_Wave(parameters const &P) /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure * equilibrium. */ -void Grid3D::Square_Wave(parameters const &P) +void Grid3D::Square_Wave(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -506,7 +506,7 @@ void Grid3D::Square_Wave(parameters const &P) Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(parameters const &P) +void Grid3D::Riemann(Parameters const &P) { size_t const istart = H.n_ghost - 1; size_t const iend = H.nx - H.n_ghost; @@ -1469,7 +1469,7 @@ void Grid3D::Uniform_Grid() } } -void Grid3D::Zeldovich_Pancake(struct parameters P) +void Grid3D::Zeldovich_Pancake(struct Parameters P) { #ifndef COSMOLOGY chprintf("To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n"); @@ -1574,7 +1574,7 @@ void Grid3D::Zeldovich_Pancake(struct parameters P) #endif // COSMOLOGY } -void Grid3D::Chemistry_Test(struct parameters P) +void Grid3D::Chemistry_Test(struct Parameters P) { chprintf("Initializing Chemistry Test...\n"); @@ -1676,7 +1676,7 @@ void Grid3D::Chemistry_Test(struct parameters P) } #ifdef MHD -void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) +void Grid3D::Circularly_Polarized_Alfven_Wave(struct Parameters const P) { // This test is only meaningful for a limited number of parameter values so I will check them here assert(P.polarization == 1.0 or @@ -1795,7 +1795,7 @@ void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) } } -void Grid3D::Advecting_Field_Loop(struct parameters const P) +void Grid3D::Advecting_Field_Loop(struct Parameters const P) { // This test is only meaningful for a limited number of parameter values so I will check them here // Check that the domain is centered on zero @@ -1861,7 +1861,7 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) } } -void Grid3D::MHD_Spherical_Blast(struct parameters const P) +void Grid3D::MHD_Spherical_Blast(struct Parameters const P) { // This test is only meaningful for a limited number of parameter values so I will check them here // Check that the domain is centered on zero diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 9b858c8cf..747bcd6ec 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -10,7 +10,7 @@ #ifdef MPI_CHOLLA -void Grid3D::Set_Boundaries_MPI(struct parameters P) +void Grid3D::Set_Boundaries_MPI(struct Parameters P) { int flags[6] = {0, 0, 0, 0, 0, 0}; @@ -26,7 +26,7 @@ void Grid3D::Set_Boundaries_MPI(struct parameters P) #endif } -void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) +void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P) { #ifdef PARTICLES // Clear the vectors that contain the particles IDs to be transfred diff --git a/src/io/io.cpp b/src/io/io.cpp index 09c267d76..9959267de 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -45,7 +45,7 @@ static inline bool Is_Root_Proc() #endif } -void Create_Log_File(struct parameters P) +void Create_Log_File(struct Parameters P) { if (not Is_Root_Proc()) { return; @@ -87,7 +87,7 @@ void Write_Message_To_Log_File(const char *message) } /* Write Cholla Output Data */ -void Write_Data(Grid3D &G, struct parameters P, int nfile) +void Write_Data(Grid3D &G, struct Parameters P, int nfile) { cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); @@ -188,7 +188,7 @@ void Write_Data(Grid3D &G, struct parameters P, int nfile) } /* Output the grid data to file. */ -void Output_Data(Grid3D &G, struct parameters P, int nfile) +void Output_Data(Grid3D &G, struct Parameters P, int nfile) { // create the filename std::string filename(P.outdir); @@ -266,7 +266,7 @@ void Output_Data(Grid3D &G, struct parameters P, int nfile) #endif } -void Output_Float32(Grid3D &G, struct parameters P, int nfile) +void Output_Float32(Grid3D &G, struct Parameters P, int nfile) { #ifdef HDF5 Header H = G.H; @@ -384,7 +384,7 @@ void Output_Float32(Grid3D &G, struct parameters P, int nfile) } /* Output a projection of the grid data to file. */ -void Output_Projected_Data(Grid3D &G, struct parameters P, int nfile) +void Output_Projected_Data(Grid3D &G, struct Parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -429,7 +429,7 @@ void Output_Projected_Data(Grid3D &G, struct parameters P, int nfile) } /* Output a rotated projection of the grid data to file. */ -void Output_Rotated_Projected_Data(Grid3D &G, struct parameters P, int nfile) +void Output_Rotated_Projected_Data(Grid3D &G, struct Parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -534,7 +534,7 @@ void Output_Rotated_Projected_Data(Grid3D &G, struct parameters P, int nfile) } /* Output xy, xz, and yz slices of the grid data. */ -void Output_Slices(Grid3D &G, struct parameters P, int nfile) +void Output_Slices(Grid3D &G, struct Parameters P, int nfile) { #ifdef HDF5 hid_t file_id; @@ -2246,9 +2246,9 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) } #endif // HDF5 -/*! \fn void Read_Grid(struct parameters P) +/*! \fn void Read_Grid(struct Parameters P) * \brief Read in grid data from an output file. */ -void Grid3D::Read_Grid(struct parameters P) +void Grid3D::Read_Grid(struct Parameters P) { ScopedTimer timer("Read_Grid"); int nfile = P.nfile; // output step you want to read from @@ -2501,7 +2501,7 @@ void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real *dataset_buffer, Header H /*! \fn void Read_Grid_HDF5(hid_t file_id) * \brief Read in grid data from an hdf5 file. */ -void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) +void Grid3D::Read_Grid_HDF5(hid_t file_id, struct Parameters P) { int i, j, k, id, buf_id; hid_t attribute_id, dataset_id; diff --git a/src/io/io.h b/src/io/io.h index 0dfc6370a..598a57a66 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -8,22 +8,22 @@ #include "../grid/grid3D.h" /* Write the data */ -void Write_Data(Grid3D& G, struct parameters P, int nfile); +void Write_Data(Grid3D& G, struct Parameters P, int nfile); /* Output the grid data to file. */ -void Output_Data(Grid3D& G, struct parameters P, int nfile); +void Output_Data(Grid3D& G, struct Parameters P, int nfile); /* Output the grid data to file as 32-bit floats. */ -void Output_Float32(Grid3D& G, struct parameters P, int nfile); +void Output_Float32(Grid3D& G, struct Parameters P, int nfile); /* Output a projection of the grid data to file. */ -void Output_Projected_Data(Grid3D& G, struct parameters P, int nfile); +void Output_Projected_Data(Grid3D& G, struct Parameters P, int nfile); /* Output a rotated projection of the grid data to file. */ -void Output_Rotated_Projected_Data(Grid3D& G, struct parameters P, int nfile); +void Output_Rotated_Projected_Data(Grid3D& G, struct Parameters P, int nfile); /* Output xy, xz, and yz slices of the grid data to file. */ -void Output_Slices(Grid3D& G, struct parameters P, int nfile); +void Output_Slices(Grid3D& G, struct Parameters P, int nfile); /* MPI-safe printf routine */ int chprintf(const char* __restrict sdata, ...); @@ -45,7 +45,7 @@ std::string to_string_exact(T const& input) return output.str(); } -void Create_Log_File(struct parameters P); +void Create_Log_File(struct Parameters P); void Write_Message_To_Log_File(const char* message); diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 70a97e974..22257b1fc 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -56,7 +56,7 @@ void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Head } /*! \brief Read in grid data from a single concatenated output file. */ -void Grid3D::Read_Grid_Cat(struct parameters P) +void Grid3D::Read_Grid_Cat(struct Parameters P) { ScopedTimer timer("Read_Grid_Cat"); herr_t status; @@ -130,7 +130,7 @@ void Grid3D::Read_Grid_Cat(struct parameters P) #else -void Grid3D::Read_Grid_Cat(struct parameters P) +void Grid3D::Read_Grid_Cat(struct Parameters P) { chprintf("Warning: Read_Grid_Cat does nothing without MPI_CHOLLA and HDF5\n"); chexit(-1); diff --git a/src/main.cpp b/src/main.cpp index 9148cbb6e..de46957ce 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -56,7 +56,7 @@ int main(int argc, char *argv[]) // input parameter variables char *param_file; - struct parameters P; + struct Parameters P; int nfile = 0; // number of output files Real outtime = 0; // current output time @@ -156,7 +156,7 @@ int main(int argc, char *argv[]) #endif #ifdef ANALYSIS - G.Initialize_Analysis_Module(&P); + G.Initialize_AnalysisModule(&P); if (G.Analysis.Output_Now) { G.Compute_and_Output_Analysis(&P); } diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu index 8e95b8dbd..f061edeb7 100644 --- a/src/mhd/ct_electric_fields.cu +++ b/src/mhd/ct_electric_fields.cu @@ -72,50 +72,49 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // Y-direction slope on the positive Y side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_pos = - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Y-direction slope on the negative Y side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the positive Z side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the negative Z side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); } else { slope_z_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to @@ -144,50 +143,49 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // X-direction slope on the positive X side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the negative X side. S&G 2009 equation 23 signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the positive Z side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_pos = - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Z-direction slope on the negative Z side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_z_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to @@ -215,50 +213,49 @@ __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *flux // Y-direction slope on the positive Y side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // Y-direction slope on the negative Y side. S&G 2009 equation 23 signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_y_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the positive X side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_pos = - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_pos = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // X-direction slope on the negative X side. S&G 2009 equation 23 signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; if (signUpwind > 0.0) { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); } else if (signUpwind < 0.0) { - slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); } else { slope_x_neg = - 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + - mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); } // Load the face centered electric fields Note the negative signs to diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h index 7fb6c5063..c151f5bd0 100644 --- a/src/mhd/ct_electric_fields.h +++ b/src/mhd/ct_electric_fields.h @@ -28,7 +28,7 @@ namespace mhd * but is made accesible for testing * */ -namespace _internal +namespace internal { // ===================================================================== /*! @@ -118,7 +118,7 @@ inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conse return electric_face - electric_centered; } // ===================================================================== -} // namespace _internal +} // namespace internal // ========================================================================= /*! diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index 3c8dc479a..d5fcfb656 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -215,53 +215,53 @@ TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput) // Get test data. Only test the options that will be used std::vector test_data; test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); test_data.emplace_back( - mhd::_internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); // Check the results ASSERT_EQ(test_data.size(), fiducial_data.size()); diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index 09edb949b..055827001 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -732,9 +732,9 @@ Real Halo_Density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr) return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / (r_halo[i + 1] - r_halo[i]) + rho_halo[i]; } -/*! \fn void Disk_3D(parameters P) +/*! \fn void Disk_3D(Parameters P ) * \brief Initialize the grid with a 3D disk. */ -void Grid3D::Disk_3D(parameters p) +void Grid3D::Disk_3D(Parameters p) { #ifdef DISK_ICS @@ -748,8 +748,8 @@ void Grid3D::Disk_3D(parameters p) Real r_cool; // MW model - DiskGalaxy galaxy = Galaxies::MW; // NOLINT(cppcoreguidelines-slicing) - // M82 model Galaxies::M82; + DiskGalaxy galaxy = galaxies::MW; // NOLINT(cppcoreguidelines-slicing) + // M82 model galaxies::M82; M_vir = galaxy.getM_vir(); // viral mass in M_sun M_d = galaxy.getM_d(); // mass of disk in M_sun (assume all stars) diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h index fd3dcc53f..9f58bf2a6 100644 --- a/src/model/disk_galaxy.h +++ b/src/model/disk_galaxy.h @@ -180,12 +180,12 @@ class ClusteredDiskGalaxy : public DiskGalaxy } }; -namespace Galaxies +namespace galaxies { // all masses in M_sun and all distances in kpc // static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); -}; // namespace Galaxies +}; // namespace galaxies #endif // DISK_GALAXY diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index bb0cbf24d..0aa9f31c5 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -239,7 +239,7 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) } /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { DomainDecompositionBLOCK(P, H, nx_gin, ny_gin, nz_gin); @@ -270,7 +270,7 @@ void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int } /* Perform domain decomposition */ -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { int n; int i, j, k; diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 3c4b85403..5faf7d9cc 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -150,9 +150,9 @@ extern int nproc_z; void InitializeChollaMPI(int *pargc, char **pargv[]); /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); /*tile MPI processes in a block decomposition*/ void TileBlockDecomposition(void); diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index 3e28f55de..428a0e864 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -16,7 +16,7 @@ #endif // Get the particles Cloud-In-Cell interpolated density -void Particles_3D::Get_Density_CIC() +void Particles3D::Get_Density_CIC() { #ifdef PARTICLES_CPU #ifdef PARALLEL_OMP @@ -33,7 +33,7 @@ void Particles_3D::Get_Density_CIC() // Compute the particles density and copy it to the array in Grav to compute the // potential -void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P) +void Grid3D::Copy_Particles_Density_to_Gravity(struct Parameters P) { #ifdef CPU_TIME Timer.Part_Density.Start(); @@ -119,7 +119,7 @@ void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) } // Clear the density array: density=0 -void ::Particles_3D::Clear_Density() +void ::Particles3D::Clear_Density() { #ifdef PARTICLES_CPU for (int i = 0; i < G.n_cells; i++) G.density[i] = 0; @@ -132,9 +132,9 @@ void ::Particles_3D::Clear_Density() #ifdef PARTICLES_GPU -void Particles_3D::Clear_Density_GPU() { Clear_Density_GPU_function(G.density_dev, G.n_cells); } +void Particles3D::Clear_Density_GPU() { Clear_Density_GPU_function(G.density_dev, G.n_cells); } -void Particles_3D::Get_Density_CIC_GPU() +void Particles3D::Get_Density_CIC_GPU() { Get_Density_CIC_GPU_function(n_local, particle_mass, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells, G.density, @@ -154,7 +154,7 @@ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, } // Comute the CIC density (NO OpenMP) -void Particles_3D::Get_Density_CIC_Serial() +void Particles3D::Get_Density_CIC_Serial() { int nGHST = G.n_ghost_particles_grid; int nx_g = G.nx_local + 2 * nGHST; @@ -269,7 +269,7 @@ void Particles_3D::Get_Density_CIC_Serial() #ifdef PARALLEL_OMP // Compute the CIC density when PARALLEL_OMP -void Particles_3D::Get_Density_CIC_OMP() +void Particles3D::Get_Density_CIC_OMP() { // Span OpenMP threads #pragma omp parallel num_threads(N_OMP_THREADS) diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index c33544046..68346912e 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -142,17 +142,17 @@ __global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, R } // Clear the density array: density=0 -void Particles_3D::Clear_Density_GPU_function(Real *density_dev, int n_cells) +void Particles3D::Clear_Density_GPU_function(Real *density_dev, int n_cells) { Set_Particles_Array_Real(0.0, density_dev, n_cells); } // Call the CIC density kernel to get the particles density -void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, - Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, - int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, - int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev) +void Particles3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, + Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, + int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, + Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *mass_dev) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 15680d6f8..6884e99cd 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -74,7 +74,7 @@ void Grid3D::Set_Particles_Density_Boundaries_Periodic(int direction, int side) } } -void Grid3D::Transfer_Particles_Density_Boundaries(struct parameters P) +void Grid3D::Transfer_Particles_Density_Boundaries(struct Parameters P) { // Transfer the Particles Density Boundares diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 1c8b5e9ba..b0d0cf288 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -69,7 +69,7 @@ __global__ void Init_State_Kernel(unsigned int seed, FeedbackPrng* states) * @param n_local number of local particles on the GPU * @param allocation_factor */ -void supernova::initState(struct parameters* P, part_int_t n_local, Real allocation_factor) +void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocation_factor) { chprintf("supernova::initState start\n"); std::string snr_filename(P->snr_filename); diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index 13938942e..495e7cf33 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -80,14 +80,14 @@ void Grid3D::Get_Gravity_CIC() } #ifdef PARTICLES_GPU -void Particles_3D::Get_Gravity_Field_Particles_GPU(Real *potential_host) +void Particles3D::Get_Gravity_Field_Particles_GPU(Real *potential_host) { Get_Gravity_Field_Particles_GPU_function(G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } -void Particles_3D::Get_Gravity_CIC_GPU() +void Particles3D::Get_Gravity_CIC_GPU() { Get_Gravity_CIC_GPU_function(n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index e66eb928e..556166a65 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -17,7 +17,7 @@ #ifdef PARTICLES_GPU // Copy the potential from host to device -void Particles_3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential) +void Particles3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential) { CudaSafeCall(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); } @@ -102,11 +102,11 @@ __global__ void Get_Gravity_Field_Particles_Kernel(Real *potential_dev, Real *gr } // Call the kernel to compute the gradient of the potential -void Particles_3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, - int n_ghost_particles_grid, int n_cells_potential, Real dx, - Real dy, Real dz, Real *potential_host, Real *potential_dev, - Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev) +void Particles3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, int n_cells_potential, Real dx, + Real dy, Real dz, Real *potential_host, Real *potential_dev, + Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) { #ifndef GRAVITY_GPU Copy_Potential_To_GPU(potential_host, potential_dev, n_cells_potential); @@ -265,12 +265,12 @@ __global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, // Call the kernel to compote the gravitational field at the particles positions // ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, - int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, - Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, - Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, - Real *gravity_z_dev) +void Particles3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 26c90d94f..a5f04ce13 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -21,7 +21,7 @@ // #define OUTPUT_PARTICLES_DATA -void Particles_3D::Load_Particles_Data(struct parameters *P) +void Particles3D::Load_Particles_Data(struct Parameters *P) { char filename[100]; char timestep[20]; @@ -67,7 +67,7 @@ void Particles_3D::Load_Particles_Data(struct parameters *P) #endif } -void Grid3D::WriteData_Particles(struct parameters P, int nfile) +void Grid3D::WriteData_Particles(struct Parameters P, int nfile) { // Write the particles data to file OutputData_Particles(P, nfile); @@ -75,7 +75,7 @@ void Grid3D::WriteData_Particles(struct parameters P, int nfile) #ifdef HDF5 -void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P) +void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P) { int i, j, k, id, buf_id; hid_t attribute_id, dataset_id; @@ -760,7 +760,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) } #endif // HDF5 -void Grid3D::OutputData_Particles(struct parameters P, int nfile) +void Grid3D::OutputData_Particles(struct Parameters P, int nfile) { FILE *out; char filename[MAXLEN]; diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index ec1b8b735..6417e4136 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -23,9 +23,9 @@ #include "../utils/parallel_omp.h" #endif -Particles_3D::Particles_3D(void) : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) {} +Particles3D::Particles3D(void) : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) {} -void Grid3D::Initialize_Particles(struct parameters *P) +void Grid3D::Initialize_Particles(struct Parameters *P) { chprintf("\nInitializing Particles...\n"); @@ -47,8 +47,8 @@ void Grid3D::Initialize_Particles(struct parameters *P) chprintf("Particles Initialized Successfully. \n\n"); } -void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, - Real ydglobal, Real zdglobal) +void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, + Real ydglobal, Real zdglobal) { // Initialize local and total number of particles to 0 n_local = 0; @@ -269,7 +269,7 @@ void Particles_3D::Initialize(struct parameters *P, Grav3D &Grav, Real xbound, R #endif // MPI_CHOLLA } -void Particles_3D::Allocate_Memory(void) +void Particles3D::Allocate_Memory(void) { // Allocate arrays for density and gravitational field @@ -292,7 +292,7 @@ void Particles_3D::Allocate_Memory(void) } #ifdef PARTICLES_GPU -void Particles_3D::Allocate_Memory_GPU() +void Particles3D::Allocate_Memory_GPU() { // Allocate arrays for density and gravitational field on the GPU @@ -307,7 +307,7 @@ void Particles_3D::Allocate_Memory_GPU() chprintf(" Allocated GPU memory.\n"); } -part_int_t Particles_3D::Compute_Particles_GPU_Array_Size(part_int_t n) +part_int_t Particles3D::Compute_Particles_GPU_Array_Size(part_int_t n) { part_int_t buffer_size = n * G.gpu_allocation_factor; return buffer_size; @@ -315,7 +315,7 @@ part_int_t Particles_3D::Compute_Particles_GPU_Array_Size(part_int_t n) #ifdef MPI_CHOLLA -void Particles_3D::ReAllocate_Memory_GPU_MPI() +void Particles3D::ReAllocate_Memory_GPU_MPI() { // Free the previous arrays Free_GPU_Array_bool(G.transfer_particles_flags_d); @@ -336,7 +336,7 @@ void Particles_3D::ReAllocate_Memory_GPU_MPI() printf(" New allocation of arrays for particles transfers new_size: %d \n", (int)buffer_size); } -void Particles_3D::Allocate_Memory_GPU_MPI() +void Particles3D::Allocate_Memory_GPU_MPI() { // Allocate memory for the the particles MPI transfers part_int_t buffer_size, half_blocks_size; @@ -385,7 +385,7 @@ void Particles_3D::Allocate_Memory_GPU_MPI() } #endif // MPI_CHOLLA -void Particles_3D::Free_Memory_GPU() +void Particles3D::Free_Memory_GPU() { Free_GPU_Array_Real(G.density_dev); Free_GPU_Array_Real(G.gravity_x_dev); @@ -444,7 +444,7 @@ void Particles_3D::Free_Memory_GPU() #endif // PARTICLES_GPU -void Particles_3D::Initialize_Grid_Values(void) +void Particles3D::Initialize_Grid_Values(void) { // Initialize density and gravitational field to 0. @@ -459,7 +459,7 @@ void Particles_3D::Initialize_Grid_Values(void) } } -void Particles_3D::Initialize_Sphere(struct parameters *P) +void Particles3D::Initialize_Sphere(struct Parameters *P) { // Initialize Random positions for sphere of quasi-uniform density chprintf(" Initializing Particles Uniform Sphere\n"); @@ -638,7 +638,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) /** * Initializes a disk population of uniform mass stellar clusters */ -void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) +void Particles3D::Initialize_Disk_Stellar_Clusters(struct Parameters *P) { chprintf(" Initializing Particles Stellar Disk\n"); @@ -653,9 +653,9 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) std::normal_distribution speedDist(0, 1); // for generating random speeds. - Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) - Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc - Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc + Real M_d = galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real R_d = galaxies::MW.getR_d(); // MW stellar disk scale length in kpc + Real Z_d = galaxies::MW.getZ_d(); // MW stellar height scale length in kpc Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; R_max = P->xlen / 2.0; @@ -683,7 +683,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) long lost_particles = 0; part_int_t id = -1; while (total_mass < upper_limit_cluster_mass) { - Real cluster_mass = Galaxies::MW.singleClusterMass(generator); + Real cluster_mass = galaxies::MW.singleClusterMass(generator); total_mass += cluster_mass; id += 1; // do this here before we check whether the particle is in the MPI // domain, otherwise could end up with duplicated IDs @@ -706,7 +706,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) continue; } - ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); + ac = fabs(galaxies::MW.gr_disk_D3D(R, 0) + galaxies::MW.gr_halo_D3D(R, 0)); vPhi = sqrt(R * ac); vx = -vPhi * sin(phi); @@ -795,7 +795,7 @@ void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) } #endif -void Particles_3D::Initialize_Zeldovich_Pancake(struct parameters *P) +void Particles3D::Initialize_Zeldovich_Pancake(struct Parameters *P) { // No particles for the Zeldovich Pancake problem. n_local=0 @@ -868,7 +868,7 @@ void Grid3D::Initialize_Uniform_Particles() Particles.n_total_initial); } -void Particles_3D::Free_Memory(void) +void Particles3D::Free_Memory(void) { // Free the particles arrays free(G.density); @@ -900,7 +900,7 @@ void Particles_3D::Free_Memory(void) #endif // PARTICLES_CPU } -void Particles_3D::Reset(void) +void Particles3D::Reset(void) { Free_Memory(); diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h index b52f85bfc..58f2137eb 100644 --- a/src/particles/particles_3D.h +++ b/src/particles/particles_3D.h @@ -21,7 +21,7 @@ /*! \class Part3D * \brief Class to create a set of particles in 3D space. */ -class Particles_3D +class Particles3D { public: part_int_t n_local; @@ -214,9 +214,9 @@ class Particles_3D } G; - Particles_3D(void); + Particles3D(void); - void Initialize(struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, + void Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal); void Allocate_Particles_Grid_Field_Real(Real **array_dev, int size); @@ -297,15 +297,15 @@ class Particles_3D void Initialize_Grid_Values(); - void Initialize_Sphere(struct parameters *P); + void Initialize_Sphere(struct Parameters *P); #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) - void Initialize_Disk_Stellar_Clusters(struct parameters *P); + void Initialize_Disk_Stellar_Clusters(struct Parameters *P); #endif - void Initialize_Zeldovich_Pancake(struct parameters *P); + void Initialize_Zeldovich_Pancake(struct Parameters *P); - void Load_Particles_Data(struct parameters *P); + void Load_Particles_Data(struct Parameters *P); void Free_Memory(); @@ -316,7 +316,7 @@ class Particles_3D void Get_Density_CIC_Serial(); #ifdef HDF5 - void Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P); + void Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P); #endif #ifdef PARALLEL_OMP diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 0a4915d5c..d72199179 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -11,9 +11,9 @@ #include "../utils/gpu.hpp" #include "particles_3D.h" -void Particles_3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } +void Particles3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } -void Particles_3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) +void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -35,7 +35,7 @@ void Particles_3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size #ifdef PRINT_MAX_MEMORY_USAGE #include "../mpi/mpi_routines.h" -void Particles_3D::Print_Max_Memory_Usage() +void Particles3D::Print_Max_Memory_Usage() { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -59,8 +59,8 @@ void Particles_3D::Print_Max_Memory_Usage() #endif -void Particles_3D::Free_GPU_Array_int(int *array) { cudaFree(array); } -void Particles_3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); } +void Particles3D::Free_GPU_Array_int(int *array) { cudaFree(array); } +void Particles3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); } template void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, part_int_t size) @@ -81,7 +81,7 @@ void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) CudaCheckError(); } -void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size) +void Particles3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -98,7 +98,7 @@ void Particles_3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_ cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size) +void Particles3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -115,7 +115,7 @@ void Particles_3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size) +void Particles3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -132,7 +132,7 @@ void Particles_3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size) +void Particles3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size) { size_t global_free, global_total; CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); @@ -149,27 +149,27 @@ void Particles_3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_ cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size) +void Particles3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size) { CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size) +void Particles3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size) { CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, - part_int_t size) +void Particles3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, + part_int_t size) { CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, - part_int_t size) +void Particles3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, + part_int_t size) { CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); @@ -183,7 +183,7 @@ __global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, par } } -void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) +void Particles3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) { // set values for GPU kernels int ngrid = (size - 1) / TPB_PARTICLES + 1; diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index 689beaccc..a8a4909d5 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -18,7 +18,7 @@ #endif // MPI_CHOLLA // Transfer the particles that moved outside the local domain -void Grid3D::Transfer_Particles_Boundaries(struct parameters P) +void Grid3D::Transfer_Particles_Boundaries(struct Parameters P) { CudaCheckError(); // Transfer Particles Boundaries @@ -648,7 +648,7 @@ void Grid3D::Unload_Particles_from_Buffer_Z1(int *flags) // Find the particles that moved outside the local domain in order to transfer // them. -void Particles_3D::Select_Particles_to_Transfer_All(int *flags) +void Particles3D::Select_Particles_to_Transfer_All(int *flags) { #ifdef PARTICLES_CPU Select_Particles_to_Transfer_All_CPU(flags); @@ -658,7 +658,7 @@ void Particles_3D::Select_Particles_to_Transfer_All(int *flags) // are selected on the Load_Buffer_GPU functions } -void Particles_3D::Clear_Particles_For_Transfer(void) +void Particles3D::Clear_Particles_For_Transfer(void) { // Set the number of transferred particles to 0. n_transfer_x0 = 0; @@ -701,7 +701,7 @@ void Particles_3D::Clear_Particles_For_Transfer(void) #ifdef PARTICLES_GPU -int Particles_3D::Select_Particles_to_Transfer_GPU(int direction, int side) +int Particles3D::Select_Particles_to_Transfer_GPU(int direction, int side) { int n_transfer; Real *pos; @@ -735,8 +735,8 @@ int Particles_3D::Select_Particles_to_Transfer_GPU(int direction, int side) return n_transfer; } -void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, - int buffer_length) +void Particles3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, + int buffer_length) { part_int_t *n_send; int *buffer_size; @@ -853,7 +853,7 @@ void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dir // if ( *n_send > 0 ) printf( "###Transfered %ld particles\n", *n_send); } -void Particles_3D::Replace_Tranfered_Particles_GPU(int n_transfer) +void Particles3D::Replace_Tranfered_Particles_GPU(int n_transfer) { // Replace the particles that were transferred Replace_Transfered_Particles_GPU_function(n_transfer, pos_x_dev, G.transfer_particles_indices_d, @@ -886,7 +886,7 @@ void Particles_3D::Replace_Tranfered_Particles_GPU(int n_transfer) n_local -= n_transfer; } -void Particles_3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer_h, int buffer_length) +void Particles3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer_h, int buffer_length) { int n_transfer; n_transfer = Select_Particles_to_Transfer_GPU(direction, side); @@ -903,7 +903,7 @@ void Particles_3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *s * step is skipped). Also the domainMix/domainMax are the global min/max * values. */ -void Particles_3D::Set_Particles_Open_Boundary_GPU(int dir, int side) +void Particles3D::Set_Particles_Open_Boundary_GPU(int dir, int side) { int n_transfer; /*Real *pos; @@ -932,7 +932,7 @@ void Particles_3D::Set_Particles_Open_Boundary_GPU(int dir, int side) Replace_Tranfered_Particles_GPU(n_transfer); } -void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d) +void Particles3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d) { int n_fields_to_transfer; @@ -992,7 +992,7 @@ void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *rec // if ( n_recv > 0 ) printf( "###Unloaded %d particles\n", n_recv ); } -void Particles_3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv) +void Particles3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv) { int buffer_size; Real domainMin, domainMax; diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index ccdf008a8..27470befe 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -174,7 +174,7 @@ void Grid3D::Set_Particles_Open_Boundary_CPU(int dir, int side) // Find the particles that moved outside the local domain in order to transfer // them. The indices of selected particles are added to the out_indx_vectors -void Particles_3D::Select_Particles_to_Transfer_All_CPU(int *flags) +void Particles3D::Select_Particles_to_Transfer_All_CPU(int *flags) { part_int_t pIndx; for (pIndx = 0; pIndx < n_local; pIndx++) { @@ -224,7 +224,7 @@ void Particles_3D::Select_Particles_to_Transfer_All_CPU(int *flags) } // Load the particles that need to be transferred to the MPI buffer -void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length) +void Particles3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length) { part_int_t n_out; part_int_t n_send; @@ -317,9 +317,9 @@ void Particles_3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *s } // Add the data of a single particle to a transfer buffer -void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, - Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, - Real pVel_z) +void Particles3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, + Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, + Real pVel_z) { int offset, offset_extra; offset = n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; @@ -349,8 +349,8 @@ void Particles_3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, // After a particle was transferred, add the transferred particle data to the // vectors that contain the data of the local particles -void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, - Real pVel_x, Real pVel_y, Real pVel_z, int *flags) +void Particles3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, + Real pVel_x, Real pVel_y, Real pVel_z, int *flags) { // Make sure that the particle position is inside the local domain bool in_local = true; @@ -399,10 +399,10 @@ void Particles_3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real } // After the MPI transfer, unload the particles data from the buffers -void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, - Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, - int buffer_length_z0, int buffer_length_z1, int *flags) +void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags) { // Loop over the data in the recv_buffer, get the data for each particle and // append the particle data to the local vecors @@ -555,7 +555,7 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Rea } // Remove the particles that were transferred outside the local domain -void Particles_3D::Remove_Transfered_Particles(void) +void Particles3D::Remove_Transfered_Particles(void) { // Get the number of particles to delete part_int_t n_delete = 0; @@ -644,7 +644,7 @@ void Particles_3D::Remove_Transfered_Particles(void) } } -void Particles_3D::Clear_Vectors_For_Transfers(void) +void Particles3D::Clear_Vectors_For_Transfers(void) { out_indxs_vec_x0.clear(); out_indxs_vec_x1.clear(); diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index 665be8ff3..710659849 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -75,9 +75,9 @@ __global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, } } -Real Particles_3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, - Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host, - Real *dti_array_dev) +Real Particles3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, + Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host, + Real *dti_array_dev) { // // set values for GPU kernels // int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; @@ -144,10 +144,10 @@ __global__ void Advance_Particles_KDK_Step2_Kernel(part_int_t n_local, Real dt, vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; } -void Particles_3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, - Real *grav_y_dev, Real *grav_z_dev) +void Particles3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; @@ -164,9 +164,9 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, } } -void Particles_3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, - Real *grav_y_dev, Real *grav_z_dev) +void Particles3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; @@ -257,12 +257,12 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel(part_int_t n_local, Rea vel_z_dev[tid] = (a_half * vel_z + 0.5 * dt * grav_z_dev[tid]) / current_a; } -void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, - Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, - Real *grav_y_dev, Real *grav_z_dev, Real current_a, - Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, - Real Omega_K) +void Particles3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; @@ -281,11 +281,11 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_l } } -void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, - Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, - Real *grav_y_dev, Real *grav_z_dev, Real current_a, - Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, - Real Omega_K) +void Particles3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) { // set values for GPU kernels int ngrid = (n_local - 1) / TPB_PARTICLES + 1; diff --git a/src/particles/supernova.h b/src/particles/supernova.h index e788ea0b2..409b2e56b 100644 --- a/src/particles/supernova.h +++ b/src/particles/supernova.h @@ -26,7 +26,7 @@ static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31 static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) -void initState(struct parameters* P, part_int_t n_local, Real allocation_factor = 1); +void initState(struct Parameters* P, part_int_t n_local, Real allocation_factor = 1); Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); } // namespace supernova #endif // PARTICLES_GPU && SUPERNOVA diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index b7c1fdff0..361393940 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -68,35 +68,35 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // The magnetic field in the X-direction Real const magneticX = dev_magnetic_face[threadId]; - mhd::_internal::State const stateL = - mhd::_internal::loadState(dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); - mhd::_internal::State const stateR = - mhd::_internal::loadState(dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::internal::State const stateL = + mhd::internal::loadState(dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::internal::State const stateR = + mhd::internal::loadState(dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); // Compute the approximate Left and Right wave speeds - mhd::_internal::Speeds speed = mhd::_internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); + mhd::internal::Speeds speed = mhd::internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); // ================================================================= // Compute the fluxes in the non-star states // ================================================================= // Left state - mhd::_internal::Flux fluxL = mhd::_internal::nonStarFluxes(stateL, magneticX); + mhd::internal::Flux fluxL = mhd::internal::nonStarFluxes(stateL, magneticX); // If we're in the L state then assign fluxes and return. // In this state the flow is supersonic // M&K 2005 equation 66 if (speed.L > 0.0) { - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Right state - mhd::_internal::Flux fluxR = mhd::_internal::nonStarFluxes(stateR, magneticX); + mhd::internal::Flux fluxR = mhd::internal::nonStarFluxes(stateR, magneticX); // If we're in the R state then assign fluxes and return. // In this state the flow is supersonic // M&K 2005 equation 66 if (speed.R < 0.0) { - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } @@ -107,73 +107,73 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // - velocityStarX = speedM // - totalPrssureStar is the same on both sides speed.M = approximateMiddleWaveSpeed(stateL, stateR, speed); - Real const totalPressureStar = mhd::_internal::starTotalPressure(stateL, stateR, speed); + Real const totalPressureStar = mhd::internal::starTotalPressure(stateL, stateR, speed); // Left star state - mhd::_internal::StarState const starStateL = - mhd::_internal::computeStarState(stateL, speed, speed.L, magneticX, totalPressureStar); + mhd::internal::StarState const starStateL = + mhd::internal::computeStarState(stateL, speed, speed.L, magneticX, totalPressureStar); // Left star speed - speed.LStar = mhd::_internal::approximateStarWaveSpeed(starStateL, speed, magneticX, -1); + speed.LStar = mhd::internal::approximateStarWaveSpeed(starStateL, speed, magneticX, -1); // If we're in the L* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 if (speed.LStar > 0.0 and speed.L <= 0.0) { - fluxL = mhd::_internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + fluxL = mhd::internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Right star state - mhd::_internal::StarState const starStateR = - mhd::_internal::computeStarState(stateR, speed, speed.R, magneticX, totalPressureStar); + mhd::internal::StarState const starStateR = + mhd::internal::computeStarState(stateR, speed, speed.R, magneticX, totalPressureStar); // Right star speed - speed.RStar = mhd::_internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); + speed.RStar = mhd::internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); // If we're in the R* state then assign fluxes and return. // In this state the flow is subsonic // M&K 2005 equation 66 if (speed.RStar <= 0.0 and speed.R >= 0.0) { - fluxR = mhd::_internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + fluxR = mhd::internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } // ================================================================= // Compute the fluxes in the double star states // ================================================================= - mhd::_internal::DoubleStarState const doubleStarState = - mhd::_internal::computeDoubleStarState(starStateL, starStateR, magneticX, totalPressureStar, speed); + mhd::internal::DoubleStarState const doubleStarState = + mhd::internal::computeDoubleStarState(starStateL, starStateR, magneticX, totalPressureStar, speed); // Compute and return L** fluxes // M&K 2005 equation 66 if (speed.M > 0.0 and speed.LStar <= 0.0) { - fluxL = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, - speed, speed.L, speed.LStar); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + fluxL = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, + speed, speed.L, speed.LStar); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); return; } // Compute and return R** fluxes // M&K 2005 equation 66 if (speed.RStar > 0.0 and speed.M <= 0.0) { - fluxR = mhd::_internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, - speed, speed.R, speed.RStar); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + fluxR = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, + speed, speed.R, speed.RStar); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); return; } } // ========================================================================= -namespace _internal +namespace internal { // ===================================================================== -__device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, - int const &threadId, int const &n_cells, int const &o1, - int const &o2, int const &o3) +__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3) { - mhd::_internal::State state; + mhd::internal::State state; state.density = interfaceArr[threadId + n_cells * grid_enum::density]; state.density = fmax(state.density, (Real)TINY_NUMBER); state.velocityX = interfaceArr[threadId + n_cells * o1] / state.density; @@ -202,7 +202,7 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re #else // Note that this function does the positive pressure check // internally - state.gasPressure = mhd::_internal::Calc_Pressure_Primitive(state, magneticX, gamma); + state.gasPressure = mhd::internal::Calc_Pressure_Primitive(state, magneticX, gamma); #endif // DE state.totalPressure = @@ -213,9 +213,9 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, - Real const &magneticX, Real const &gamma) +__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + Real const &magneticX, Real const &gamma) { // Get the fast magnetosonic wave speeds Real magSonicL = mhd::utils::fastMagnetosonicSpeed(stateL.density, stateL.gasPressure, magneticX, stateL.magneticY, @@ -227,7 +227,7 @@ __device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_interna // Version suggested by Miyoshi & Kusano 2005 and used in Athena // M&K 2005 equation 67 Real magSonicMax = fmax(magSonicL, magSonicR); - mhd::_internal::Speeds speed; + mhd::internal::Speeds speed; speed.L = fmin(stateL.velocityX, stateR.velocityX) - magSonicMax; speed.R = fmax(stateL.velocityX, stateR.velocityX) + magSonicMax; @@ -236,9 +236,9 @@ __device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_interna // ===================================================================== // ===================================================================== -__device__ __host__ Real approximateMiddleWaveSpeed(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed) +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed) { // Compute the S_M wave speed // M&K 2005 equation 38 @@ -254,8 +254,8 @@ __device__ __host__ Real approximateMiddleWaveSpeed(mhd::_internal::State const // ===================================================================== // ===================================================================== -__device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState const &starState, - mhd::_internal::Speeds const &speed, Real const &magneticX, +__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState, + mhd::internal::Speeds const &speed, Real const &magneticX, Real const &side) { // Compute the S_L^* and S_R^* wave speeds @@ -265,9 +265,9 @@ __device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState cons // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State const &state, Real const &magneticX) +__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX) { - mhd::_internal::Flux flux; + mhd::internal::Flux flux; // M&K 2005 equation 2 flux.density = state.density * state.velocityX; @@ -289,8 +289,8 @@ __device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State con // ===================================================================== __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, mhd::_internal::Flux const &flux, - mhd::_internal::State const &state) + int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux, + mhd::internal::State const &state) { // Note that the direction of the grid_enum::fluxX_magnetic_DIR is the // direction of the electric field that the magnetic flux is, not the magnetic @@ -315,8 +315,8 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int co // ===================================================================== // ===================================================================== -__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed) +__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed) { // M&K 2005 equation 41 return // Numerator @@ -330,12 +330,11 @@ __device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::State const &state, - mhd::_internal::Speeds const &speed, - Real const &speedSide, Real const &magneticX, - Real const &totalPressureStar) +__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state, + mhd::internal::Speeds const &speed, Real const &speedSide, + Real const &magneticX, Real const &totalPressureStar) { - mhd::_internal::StarState starState; + mhd::internal::StarState starState; // Compute the densities in the star state // M&K 2005 equation 43 @@ -344,7 +343,7 @@ __device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::S // Check for and handle the degenerate case // Explained at the top of page 326 in M&K 2005 if (fabs(state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX)) < - totalPressureStar * mhd::_internal::_hlldSmallNumber) { + totalPressureStar * mhd::internal::_hlldSmallNumber) { starState.velocityY = state.velocityY; starState.velocityZ = state.velocityZ; starState.magneticY = state.magneticY; @@ -381,12 +380,11 @@ __device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::S // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, - mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide) +__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState, + mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide) { - mhd::_internal::Flux starFlux; + mhd::internal::Flux starFlux; // Now compute the star state fluxes // M&K 2005 equations 64 @@ -405,18 +403,18 @@ __device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState co // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd::_internal::StarState const &starStateL, - mhd::_internal::StarState const &starStateR, - Real const &magneticX, - Real const &totalPressureStar, - mhd::_internal::Speeds const &speed) +__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL, + mhd::internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::internal::Speeds const &speed) { - mhd::_internal::DoubleStarState doubleStarState; + mhd::internal::DoubleStarState doubleStarState; // if Bx is zero then just return the star state // Explained at the top of page 328 in M&K 2005. Essentially when // magneticX is 0 this reduces to the HLLC solver - if (0.5 * (magneticX * magneticX) < mhd::_internal::_hlldSmallNumber * totalPressureStar) { + if (0.5 * (magneticX * magneticX) < mhd::internal::_hlldSmallNumber * totalPressureStar) { if (speed.M >= 0.0) { // We're in the L** state but Bx=0 so return L* state doubleStarState.velocityY = starStateL.velocityY; @@ -480,12 +478,12 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd:: // ===================================================================== // ===================================================================== -__device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( - mhd::_internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, - mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar) +__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes( + mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar) { - mhd::_internal::Flux doubleStarFlux; + mhd::internal::Flux doubleStarFlux; Real const speed_diff = speedSideStar - speedSide; @@ -512,7 +510,7 @@ __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( } // ===================================================================== -} // namespace _internal +} // namespace internal } // end namespace mhd #endif // MHD #endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index c7363ed65..49f48f5b9 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -48,7 +48,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const * solver * */ -namespace _internal +namespace internal { /*! * \brief Used for some comparisons. Value was chosen to match what is @@ -125,33 +125,33 @@ struct Speeds { * \param o1 Direction parameter * \param o2 Direction parameter * \param o3 Direction parameter - * \return mhd::_internal::State The loaded state + * \return mhd::internal::State The loaded state */ -__device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, - int const &threadId, int const &n_cells, int const &o1, - int const &o2, int const &o3); +__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3); /*! * \brief Compute the approximate left and right wave speeds. M&K 2005 equation * 67 */ -__device__ __host__ mhd::_internal::Speeds approximateLRWaveSpeeds(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, - Real const &magneticX, Real const &gamma); +__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + Real const &magneticX, Real const &gamma); /*! * \brief Compute the approximate middle wave speed. M&K 2005 equation 38 */ -__device__ __host__ Real approximateMiddleWaveSpeed(mhd::_internal::State const &stateL, - mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed); +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed); /*! * \brief Compute the approximate left and right wave speeds. M&K 2005 equation * 51 */ -__device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState const &starState, - mhd::_internal::Speeds const &speed, Real const &magneticX, +__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState, + mhd::internal::Speeds const &speed, Real const &magneticX, Real const &side); /*! @@ -160,9 +160,9 @@ __device__ __host__ Real approximateStarWaveSpeed(mhd::_internal::StarState cons * * \param state The state to compute the flux of * \param magneticX The X magnetic field - * \return mhd::_internal::Flux The flux in the state + * \return mhd::internal::Flux The flux in the state */ -__device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State const &state, Real const &magneticX); +__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX); /*! * \brief Write the given flux values to the dev_flux array @@ -178,8 +178,8 @@ __device__ __host__ mhd::_internal::Flux nonStarFluxes(mhd::_internal::State con * one of the left states or one of the right states */ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, - int const &n_cells, Real *dev_flux, mhd::_internal::Flux const &flux, - mhd::_internal::State const &state); + int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux, + mhd::internal::State const &state); /*! * \brief Compute the total pressure in the star states. M&K 2005 equation 41 @@ -189,8 +189,8 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int co * \param speed The wave speeds * \return Real The total pressure in the star state */ -__device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, mhd::_internal::State const &stateR, - mhd::_internal::Speeds const &speed); +__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed); /*! * \brief Compute the L* or R* state. M&K 2005 equations 43-48 @@ -199,12 +199,11 @@ __device__ __host__ Real starTotalPressure(mhd::_internal::State const &stateL, * state \param speed The wavespeeds \param speedSide The wave speed on the * same side as the desired star state \param magneticX The magnetic field * in the x direction \param totalPressureStar The total pressure in the - * star state \return mhd::_internal::StarState The computed star state + * star state \return mhd::internal::StarState The computed star state */ -__device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::State const &state, - mhd::_internal::Speeds const &speed, - Real const &speedSide, Real const &magneticX, - Real const &totalPressureStar); +__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state, + mhd::internal::Speeds const &speed, Real const &speedSide, + Real const &magneticX, Real const &totalPressureStar); /*! * \brief Compute the flux in the star state. M&K 2005 equation 64 @@ -214,12 +213,11 @@ __device__ __host__ mhd::_internal::StarState computeStarState(mhd::_internal::S * \param flux The non-star flux on the same side as the star state * \param speed The wave speeds * \param speedSide The non-star wave speed on the same side as the star state - * \return mhd::_internal::Flux The flux in the star state + * \return mhd::internal::Flux The flux in the star state */ -__device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState const &starState, - mhd::_internal::State const &state, - mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide); +__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState, + mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide); /*! * \brief Compute the double star state. M&K 2005 equations 59-63 @@ -229,13 +227,13 @@ __device__ __host__ mhd::_internal::Flux starFluxes(mhd::_internal::StarState co * \param magneticX The x magnetic field * \param totalPressureStar The total pressure in the star state * \param speed The approximate wave speeds - * \return mhd::_internal::DoubleStarState The double star state + * \return mhd::internal::DoubleStarState The double star state */ -__device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd::_internal::StarState const &starStateL, - mhd::_internal::StarState const &starStateR, - Real const &magneticX, - Real const &totalPressureStar, - mhd::_internal::Speeds const &speed); +__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL, + mhd::internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::internal::Speeds const &speed); /*! * \brief Compute the double star state fluxes. M&K 2005 equation 65 @@ -249,10 +247,10 @@ __device__ __host__ mhd::_internal::DoubleStarState computeDoubleStarState(mhd:: * \param speedSideStar The star wave speed on the same side * \return __device__ */ -__device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( - mhd::_internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, - mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, - mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); +__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes( + mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); /*! * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver @@ -262,12 +260,12 @@ __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( * \param gamma The adiabatic index * \return Real The gas pressure */ -inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::_internal::State const &state, Real const &magneticX, +inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::internal::State const &state, Real const &magneticX, Real const &gamma) { return hydro_utilities::Calc_Pressure_Primitive(state.energy, state.density, state.velocityX, state.velocityY, state.velocityZ, gamma, magneticX, state.magneticY, state.magneticZ); } -} // namespace _internal +} // namespace internal } // end namespace mhd #endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 219f74739..6c70ddb8a 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -1807,7 +1807,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) // ========================================================================= // ========================================================================= -// Unit tests for the contents of the mhd::_internal namespace +// Unit tests for the contents of the mhd::internal namespace // ========================================================================= /*! * \brief A struct to hold some basic test values @@ -1823,7 +1823,7 @@ struct TestParams { std::vector const magneticX{92.75101068883114, 31.588767769990532}; - std::vector stateLVec{ + std::vector stateLVec{ {21.50306776645775, 1.7906564444824999, 0.33040135813215948, 1.500111692877206, 65.751208381099417, 12.297499156516622, 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083}, {48.316634031589935, 0.39291118391272883, 0.69876195899931859, 1.8528943583250035, 38.461354599479826, @@ -1833,7 +1833,7 @@ struct TestParams { {91.029557388536347, 0.93649399297774782, 0.36277769000180521, 0.095181318599791204, 83.656397841788944, 35.910258841630984, 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}}; - std::vector const starStateLVec{ + std::vector const starStateLVec{ {28.520995251761526, 1.5746306813243216, 1.3948193325212686, 6.579867455284738, 62.093488291430653, 62.765890944643196}, {54.721668215064945, 1.4363926014039052, 1.1515754515491903, 30.450436649083692, 54.279167444036723, @@ -1845,13 +1845,13 @@ struct TestParams { std::vector totalPressureStar{66.80958736783934, 72.29644038317676}; - std::vector const DoubleStarStateVec{ + std::vector const DoubleStarStateVec{ {0.79104271107837087, 0.97609103551927523, 20.943239839455895, 83.380243826880701, 45.832024557076693, std::nan("0")}, {1.390870320696683, 0.52222643241336986, 83.851481048702098, 80.366712517307832, 55.455301414557297, std::nan("0")}}; - std::vector const flux{ + std::vector const flux{ {12.939239309626116, 65.054814649176265, 73.676928455867824, 16.873647595664387, 52.718887319724693, 58.989284454159673, 29.976925743532302}, {81.715245865170729, 56.098850697078028, 2.7172469834037871, 39.701329831928732, 81.63926176158796, @@ -1861,7 +1861,7 @@ struct TestParams { {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, 45.432249733131123, 33.053375365947957, 15.621020824107379}}; - std::vector const speed{ + std::vector const speed{ {-22.40376497145191, -19.710500632936679, -0.81760587897407833, 9.6740190040662242, 24.295526347371595}, {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, 3.4191202933087519, 12.519790189404299}}; @@ -1872,7 +1872,7 @@ struct TestParams { // ========================================================================= /*! - * \brief Test the mhd::_internal::approximateLRWaveSpeeds function + * \brief Test the mhd::internal::approximateLRWaveSpeeds function * */ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) @@ -1882,7 +1882,7 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) std::vector const fiducialSpeedR{24.295526347371595, 12.519790189404299}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Speeds testSpeed = mhd::_internal::approximateLRWaveSpeeds( + mhd::internal::Speeds testSpeed = mhd::internal::approximateLRWaveSpeeds( parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma); // Now check results @@ -1894,7 +1894,7 @@ TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::approximateMiddleWaveSpeed function + * \brief Test the mhd::internal::approximateMiddleWaveSpeed function * */ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput) @@ -1903,11 +1903,11 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput std::vector const fiducialSpeedM{-0.81760587897407833, -0.026643804611559244}; - mhd::_internal::Speeds testSpeed; + mhd::internal::Speeds testSpeed; for (size_t i = 0; i < parameters.names.size(); i++) { - testSpeed.M = mhd::_internal::approximateMiddleWaveSpeed(parameters.stateLVec.at(i), parameters.stateRVec.at(i), - parameters.speed.at(i)); + testSpeed.M = mhd::internal::approximateMiddleWaveSpeed(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); // Now check results testing_utilities::Check_Results(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); @@ -1917,7 +1917,7 @@ TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput // ========================================================================= /*! - * \brief Test the mhd::_internal::approximateStarWaveSpeed function + * \brief Test the mhd::internal::approximateStarWaveSpeed function * */ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) @@ -1926,13 +1926,13 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) std::vector const fiducialSpeedStarL{-18.18506608966894, -4.2968910457518161}; std::vector const fiducialSpeedStarR{12.420292938368167, 3.6786718447209252}; - mhd::_internal::Speeds testSpeed; + mhd::internal::Speeds testSpeed; for (size_t i = 0; i < parameters.names.size(); i++) { - testSpeed.LStar = mhd::_internal::approximateStarWaveSpeed(parameters.starStateLVec.at(i), parameters.speed.at(i), - parameters.magneticX.at(i), -1); - testSpeed.RStar = mhd::_internal::approximateStarWaveSpeed(parameters.starStateRVec.at(i), parameters.speed.at(i), - parameters.magneticX.at(i), 1); + testSpeed.LStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateLVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), -1); + testSpeed.RStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateRVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), 1); // Now check results testing_utilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar, @@ -1945,22 +1945,21 @@ TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::_nonStarFluxes function + * \brief Test the mhd::internal::_nonStarFluxes function * */ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) { TestParams const parameters; - std::vector fiducialFlux{ + std::vector fiducialFlux{ {38.504606872151484, -3088.4810263278778, -1127.8835013070616, -4229.5657456907293, -12344.460641662206, -8.6244637840856555, -56.365490339906408}, {18.984145880030045, 2250.9966820900618, -2000.3517480656785, -1155.8240512956793, -2717.2127176227905, 2.9729840344910059, -43.716615275067923}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux testFlux = - mhd::_internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); + mhd::internal::Flux testFlux = mhd::internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); // Now check results testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, @@ -1982,7 +1981,7 @@ TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::computeStarState function in the + * \brief Test the mhd::internal::computeStarState function in the * non-degenerate case * */ @@ -1990,16 +1989,15 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut { TestParams const parameters; - std::vector fiducialStarState{ - {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 1023.8840191068900, 18.648382121236992, - 70.095850905078336}, - {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, - 39.389537509454122}}; + std::vector fiducialStarState{{24.101290139122913, 1.4626377138501221, 5.7559806612277464, + 1023.8840191068900, 18.648382121236992, 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, + 172.06840532772659, 66.595692901872582, 39.389537509454122}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = - mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, - parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, @@ -2016,7 +2014,7 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOut } /*! - * \brief Test the mhd::_internal::starFluxes function in the non-degenerate + * \brief Test the mhd::internal::starFluxes function in the non-degenerate * case * */ @@ -2024,20 +2022,20 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) { TestParams const parameters; - std::vector fiducialFlux{ + std::vector fiducialFlux{ {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617500, -83.294404848633300, -504.84138754248409}, {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.34364892516212, -1413.4750762739586, 25.139956754826922, 78.863254638038882}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = - mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, - parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); - mhd::_internal::Flux testFlux = - mhd::_internal::starFluxes(testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), - parameters.speed.at(i), parameters.speed.at(i).L); + mhd::internal::Flux testFlux = + mhd::internal::starFluxes(testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); // Now check results testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, @@ -2058,7 +2056,7 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) } /*! - * \brief Test the mhd::_internal::starFluxes function in the degenerate + * \brief Test the mhd::internal::starFluxes function in the degenerate * case * */ @@ -2066,7 +2064,7 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput { TestParams parameters; - std::vector fiducialStarState{ + std::vector fiducialStarState{ {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 4.5171065808847731e+17, 18.648382121236992, 70.095850905078336}, {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, @@ -2077,9 +2075,9 @@ TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput parameters.stateLVec.at(0).totalPressure *= totalPressureStarMultiplier; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::StarState testStarState = - mhd::_internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, - parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); // Now check results testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, @@ -2102,7 +2100,7 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) // Used to get us into the degenerate case double const totalPressureStarMultiplier = 1E15; - std::vector fiducialFlux{ + std::vector fiducialFlux{ {-144.2887586578122, 1450.1348804310369, -773.30617492819886, -151.70644305354989, 1378.3797024673304, -1056.6283526454272, -340.62268733874163}, {10.040447333773272, 284.85426012223729, -499.05932057162761, 336.35271628090368, 171.28451793017882, @@ -2112,9 +2110,9 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) parameters.totalPressureStar.at(1) *= totalPressureStarMultiplier; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux testFlux = - mhd::_internal::starFluxes(parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), - parameters.speed.at(i), parameters.speed.at(i).L); + mhd::internal::Flux testFlux = + mhd::internal::starFluxes(parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); // Now check results testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, @@ -2137,7 +2135,7 @@ TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::computeDoubleStarState function. + * \brief Test the mhd::internal::computeDoubleStarState function. * Non-degenerate state * */ @@ -2145,14 +2143,14 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp { TestParams const parameters; - std::vector fiducialState{ + std::vector fiducialState{ {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, 36.670978215630669, -2048.1953674500523, 1721.0582276783819}, {3.803188977150934, -4.2662645349592765, 71.787329583230417, 53.189673238238178, -999.79694164635089, 252.047167522579}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::DoubleStarState const testState = mhd::_internal::computeDoubleStarState( + mhd::internal::DoubleStarState const testState = mhd::internal::computeDoubleStarState( parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), parameters.magneticX.at(i), parameters.totalPressureStar.at(i), parameters.speed.at(i)); @@ -2173,7 +2171,7 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutp } /*! - * \brief Test the mhd::_internal::computeDoubleStarState function in the + * \brief Test the mhd::internal::computeDoubleStarState function in the * degenerate state. * */ @@ -2181,14 +2179,14 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) { TestParams const parameters; - std::vector fiducialState{ + std::vector fiducialState{ {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114}, {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, 0.0, 61.33664731346812}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::DoubleStarState const testState = - mhd::_internal::computeDoubleStarState(parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, - parameters.totalPressureStar.at(i), parameters.speed.at(i)); + mhd::internal::DoubleStarState const testState = + mhd::internal::computeDoubleStarState(parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, + parameters.totalPressureStar.at(i), parameters.speed.at(i)); // Now check results testing_utilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, @@ -2209,21 +2207,21 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::_doubleStarFluxes function + * \brief Test the mhd::internal::_doubleStarFluxes function * */ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) { TestParams const parameters; - std::vector const fiducialFlux{ + std::vector const fiducialFlux{ {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029, -245.53635448727721, -746.94190287166407}, {10.040447333773258, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595295, 30.244176588794346, -466.15336272175193}}; for (size_t i = 0; i < parameters.names.size(); i++) { - mhd::_internal::Flux const testFlux = mhd::_internal::computeDoubleStarFluxes( + mhd::internal::Flux const testFlux = mhd::internal::computeDoubleStarFluxes( parameters.DoubleStarStateVec.at(i), parameters.DoubleStarStateVec.at(i).energyL, parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), parameters.speed.at(i), parameters.speed.at(i).L, parameters.speed.at(i).LStar); @@ -2249,14 +2247,14 @@ TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::_returnFluxes function + * \brief Test the mhd::internal::_returnFluxes function * */ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) { double const dummyValue = 999; - mhd::_internal::Flux inputFlux{1, 2, 3, 4, 5, 6, 7}; - mhd::_internal::State inputState{8, 9, 10, 11, 12, 13, 14, 15, 16}; + mhd::internal::Flux inputFlux{1, 2, 3, 4, 5, 6, 7}; + mhd::internal::State inputState{8, 9, 10, 11, 12, 13, 14, 15, 16}; int threadId = 0; int n_cells = 10; @@ -2307,7 +2305,7 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); - mhd::_internal::returnFluxes(threadId, o1, o2, o3, n_cells, testFluxArray.data(), inputFlux, inputState); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, testFluxArray.data(), inputFlux, inputState); // Find the indices for the various fields int densityLoc = findIndex(testFluxArray, inputFlux.density, fiducialDensityIndex, "density"); @@ -2335,7 +2333,7 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::starTotalPressure function + * \brief Test the mhd::internal::starTotalPressure function * */ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) @@ -2345,8 +2343,8 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) std::vector const fiducialPressure{6802.2800807224075, 3476.1984612875144}; for (size_t i = 0; i < parameters.names.size(); i++) { - Real const testPressure = mhd::_internal::starTotalPressure(parameters.stateLVec.at(i), parameters.stateRVec.at(i), - parameters.speed.at(i)); + Real const testPressure = mhd::internal::starTotalPressure(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); // Now check results testing_utilities::Check_Results(fiducialPressure.at(i), testPressure, @@ -2357,7 +2355,7 @@ TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) // ========================================================================= /*! - * \brief Test the mhd::_internal::loadState function + * \brief Test the mhd::internal::loadState function * */ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) @@ -2368,7 +2366,7 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) std::vector interfaceArray(n_cells * grid_enum::num_fields); std::iota(std::begin(interfaceArray), std::end(interfaceArray), 1.); - std::vector const fiducialState{ + std::vector const fiducialState{ {1, 11, 21, 31, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, {1, 21, 31, 11, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, {1, 31, 11, 21, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, @@ -2394,8 +2392,8 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) break; } - mhd::_internal::State const testState = mhd::_internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), - parameters.gamma, threadId, n_cells, o1, o2, o3); + mhd::internal::State const testState = mhd::internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), + parameters.gamma, threadId, n_cells, o1, o2, o3); // Now check results testing_utilities::Check_Results(fiducialState.at(direction).density, testState.density, ", Density"); diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 9623fd267..3c1c3097a 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -29,7 +29,7 @@ } #endif /*MPI_CHOLLA*/ -void Check_Configuration(parameters const& P) +void Check_Configuration(Parameters const& P) { // General Checks // ============== @@ -139,7 +139,7 @@ void Check_Configuration(parameters const& P) // since we are aborting, it's OK that this isn't the most optimized // prepare some info for the error message header - const char* santized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name; + const char* sanitized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name; #ifdef MPI_CHOLLA std::string proc_info = std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)"; @@ -156,6 +156,8 @@ void Check_Configuration(parameters const& P) va_start(args, msg); va_copy(args_copy, args); + // The clang-analyzer-valist.Uninitialized is bugged and triggers improperly on this line + // NOLINTNEXTLINE(clang-analyzer-valist.Uninitialized) std::size_t bufsize_without_terminator = std::vsnprintf(nullptr, 0, msg, args); va_end(args); @@ -181,7 +183,7 @@ void Check_Configuration(parameters const& P) "Rank: %s\n" "Message: %s\n" "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n", - file_name, line_num, santized_func_name, proc_info.data(), msg_buf.data()); + file_name, line_num, sanitized_func_name, proc_info.data(), msg_buf.data()); std::fflush(stderr); // may be unnecessary for stderr chexit(1); } \ No newline at end of file diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index 4db749881..8ce08e2ca 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -10,7 +10,7 @@ * checks. * */ -void Check_Configuration(parameters const& P); +void Check_Configuration(Parameters const& P); /*! * \brief helper function that prints an error message & aborts the program (in diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 1a0c91674..55ecc6f75 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -27,7 +27,7 @@ namespace mhd::utils * but is made accesible for testing * */ -namespace _internal +namespace internal { // ===================================================================== /*! @@ -60,7 +60,7 @@ inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real con return sqrt((term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER))); } // ===================================================================== -} // namespace _internal +} // namespace internal // ========================================================================= /*! @@ -141,7 +141,7 @@ inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, Real Real const &magneticY, Real const &magneticZ, Real const &gamma) { // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); + return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); } // ========================================================================= @@ -161,7 +161,7 @@ inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, Real Real const &magneticY, Real const &magneticZ, Real const &gamma) { // Compute the sound speed - return mhd::utils::_internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); + return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); } // ========================================================================= @@ -230,10 +230,10 @@ inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conse : /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; - struct returnStruct { + struct ReturnStruct { Real x, y, z; }; - return returnStruct{avgBx, avgBy, avgBz}; + return ReturnStruct{avgBx, avgBy, avgBz}; } // ========================================================================= diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h index 6f8eebc77..4eacbb0f1 100644 --- a/src/utils/prng_utilities.h +++ b/src/utils/prng_utilities.h @@ -13,7 +13,7 @@ class ChollaPrngGenerator public: std::mt19937_64 inline static generator; - ChollaPrngGenerator(struct parameters *P) + ChollaPrngGenerator(struct Parameters *P) { // If the seed isn't defined in the settings file or argv then generate // a random seed diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 45214a268..2a5e17329 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -169,7 +169,7 @@ void Time::Print_Times() } // once at end of run in main.cpp -void Time::Print_Average_Times(struct parameters P) +void Time::Print_Average_Times(struct Parameters P) { chprintf("\nAverage Times n_steps:%d\n", n_steps); diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index d2a0f066f..20daf0333 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -66,7 +66,7 @@ class Time Time(); void Initialize(); void Print_Times(); - void Print_Average_Times(struct parameters P); + void Print_Average_Times(struct Parameters P); }; // #endif // CPU_TIME From 480e9be5bfa726cd73423255caa036c6bc71f98f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 07:45:19 -0400 Subject: [PATCH 565/694] Fix misc-confusable-identifiers check --- src/chemistry_gpu/chemistry_functions.cpp | 6 +++--- src/chemistry_gpu/chemistry_functions_gpu.cu | 4 ++-- src/chemistry_gpu/chemistry_gpu.h | 4 ++-- src/utils/gpu.hpp | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index d2ffc2556..7999a6d55 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -156,14 +156,14 @@ void Chem_GPU::Initialize_Cooling_Rates() if (!use_case_B_recombination) { Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_A, units); - Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_A, units); + Generate_Reaction_Rate_Table(&H.cool_reHeII_1_d, cool_reHeII1_rate_case_A, units); Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_A, units); } else { Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_B, units); - Generate_Reaction_Rate_Table(&H.cool_reHeII1_d, cool_reHeII1_rate_case_B, units); + Generate_Reaction_Rate_Table(&H.cool_reHeII_1_d, cool_reHeII1_rate_case_B, units); Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_B, units); } - Generate_Reaction_Rate_Table(&H.cool_reHeII2_d, cool_reHeII2_rate, units); + Generate_Reaction_Rate_Table(&H.cool_reHeII_2_d, cool_reHeII2_rate, units); Generate_Reaction_Rate_Table(&H.cool_brem_d, cool_brem_rate, units); diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index df886f7b7..72160d98d 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -153,8 +153,8 @@ __device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, R // Recombination cooling Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII; cool_reHII = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * TS.d_HII * TS.d_e; - cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII1_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; - cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII2_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII_1_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII_2_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * TS.d_HeIII * TS.d_e / 4.0; U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII; diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h index 473f6b609..79674c3a0 100644 --- a/src/chemistry_gpu/chemistry_gpu.h +++ b/src/chemistry_gpu/chemistry_gpu.h @@ -48,8 +48,8 @@ struct ChemistryHeader { Real *cool_ciHeIS_d; Real *cool_reHII_d; - Real *cool_reHeII1_d; - Real *cool_reHeII2_d; + Real *cool_reHeII_1_d; + Real *cool_reHeII_2_d; Real *cool_reHeIII_d; Real *cool_brem_d; diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index da45f2549..ef4f0e19c 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -347,12 +347,12 @@ void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f) if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) { return; } - const long nl23 = long(n2) * long(n3); - const long nl123 = long(n1) * nl23; - assert(long(n0) * nl123 < long(INT_MAX)); + const long n23_long = long(n2) * long(n3); + const long n123_long = long(n1) * n23_long; + assert(long(n0) * n123_long < long(INT_MAX)); - const int n23 = int(nl23); - const int n123 = int(nl123); + const int n23 = int(n23_long); + const int n123 = int(n123_long); if (n3 > GPU_MAX_THREADS) { const int b23 = (n23 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t23 = (n23 + b23 - 1) / b23; From 5b351e94dfbda71f101770e1e126eab8d9970101 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 08:30:04 -0400 Subject: [PATCH 566/694] Fix hicpp-signed-bitwise check it was triggering on code that was correct as is --- src/utils/reduction_utilities.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 429e5f0cb..e47f72d26 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -116,7 +116,7 @@ __device__ constexpr To bit_cast(const From& from) noexcept inline __device__ int encode(float val) { int i = bit_cast(val); - return i >= 0 ? i : (1 << 31) | ~i; + return i >= 0 ? i : (1 << 31) | ~i; // NOLINT(hicpp-signed-bitwise) } /*! @@ -128,7 +128,7 @@ inline __device__ int encode(float val) inline __device__ long long encode(double val) { auto i = bit_cast(val); - return i >= 0 ? i : (1ULL << 63) | ~i; + return i >= 0 ? i : (1ULL << 63) | ~i; // NOLINT(hicpp-signed-bitwise) } /*! @@ -140,7 +140,7 @@ inline __device__ long long encode(double val) inline __device__ float decode(int val) { if (val < 0) { - val = (1 << 31) | ~val; + val = (1 << 31) | ~val; // NOLINT(hicpp-signed-bitwise) } return bit_cast(val); } @@ -154,7 +154,7 @@ inline __device__ float decode(int val) inline __device__ double decode(long long val) { if (val < 0) { - val = (1ULL << 63) | ~val; + val = (1ULL << 63) | ~val; // NOLINT(hicpp-signed-bitwise) } return bit_cast(val); } From 15353f42c1b984972ef31a999e6bf2482ebec8b3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 08:31:21 -0400 Subject: [PATCH 567/694] clang-tidy: disable function name case check temporary --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index ea7e7dc48..0e3fa49c3 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -165,7 +165,7 @@ CheckOptions: # - aNy_CasE # readability-identifier-naming.VariableCase: 'lower_case' - readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' + # readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' readability-identifier-naming.NamespaceCase: 'lower_case' # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' # readability-identifier-naming.TypedefCase: 'CamelCase' From c36aef3906690d28369173cbb1b611c77007a0d3 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 09:31:57 -0400 Subject: [PATCH 568/694] Fix typo in readme.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f9bb18f0..a61ce0ad6 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ https://user-images.githubusercontent.com/3432028/188235319-e5eb4e5e-00c6-435f-a Getting started ---------------- -This is the stable branch of the *Cholla* hydrodyamics code. +This is the stable branch of the *Cholla* hydrodynamics code. *Cholla* is designed to be run using (AMD or NVIDIA) GPUs, and can be run in serial mode using one GPU or with MPI for multiple GPUs. From ff34c803c6e2e3ff36411ed56974c412a8b5e2b1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 09:32:53 -0400 Subject: [PATCH 569/694] Fix bug in tidy make recipe --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e53a660c5..c444ae4a8 100644 --- a/Makefile +++ b/Makefile @@ -180,7 +180,7 @@ LIBS_CLANG_TIDY := $(subst -I/, -isystem /,$(LIBS)) LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include -isystem $(HDF5_ROOT)/include CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) -GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS)) +GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS_CLANG_TIDY)) GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes CPPFILES_TIDY := $(CPPFILES) GPUFILES_TIDY := $(GPUFILES) From 9440a4d1b05b2abfcaf050f31944440cca168ff9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 22 Sep 2023 15:32:39 -0400 Subject: [PATCH 570/694] Add "cooling" build type to CI build matrices --- .github/workflows/build_and_lint.yml | 2 +- Jenkinsfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_lint.yml b/.github/workflows/build_and_lint.yml index 59e5409af..a4308ebd3 100644 --- a/.github/workflows/build_and_lint.yml +++ b/.github/workflows/build_and_lint.yml @@ -28,7 +28,7 @@ jobs: strategy: fail-fast: false matrix: - make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust] + make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust, cooling] # The CUDA container can be added with {name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"} container: [{name: "HIP",link: "docker://chollahydro/cholla:rocm_github"}] diff --git a/Jenkinsfile b/Jenkinsfile index 96000eb6d..8928df96c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -29,7 +29,7 @@ pipeline axis { name 'CHOLLA_MAKE_TYPE' - values 'hydro', 'gravity', 'disk', 'particles', 'cosmology', 'mhd', 'dust' + values 'hydro', 'gravity', 'disk', 'particles', 'cosmology', 'mhd', 'dust', 'cooling' } } From 5f5d0259a7bc72d7f518a3e62c9959d9463aa46a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 28 Sep 2023 06:30:24 -0400 Subject: [PATCH 571/694] Add cooling to tools/clang-tidy_runner.sh --- tools/clang-tidy_runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/clang-tidy_runner.sh b/tools/clang-tidy_runner.sh index 0e8930af8..6f2915b8f 100755 --- a/tools/clang-tidy_runner.sh +++ b/tools/clang-tidy_runner.sh @@ -13,7 +13,7 @@ cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" cd $cholla_root # Run all clang-tidy build types in parallel -builds=( hydro gravity disk particles cosmology mhd dust) +builds=( hydro gravity disk particles cosmology mhd dust cooling) for build in "${builds[@]}" do make tidy TYPE=$build & From 2d4b8fe17906dc42d853b702c2177e2440039617 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 12 Oct 2023 11:02:12 -0400 Subject: [PATCH 572/694] Fixes for readability-braces-around-statements checks - Added braces to 2 if statments for clarity - Disable google-readability-braces-around-statements and hicpp-braces-around-statements since they didn't appear to respect the readability-braces-around-statements.ShortStatementLines setting --- .clang-tidy | 3 ++- src/reconstruction/ppmp_cuda.cu | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 0e3fa49c3..be331deed 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,6 +37,8 @@ Checks: "*, -cert-dcl37-c, -cert-dcl51-cpp, -cppcoreguidelines-pro-bounds-constant-array-index, + -google-readability-braces-around-statements, + -hicpp-braces-around-statements, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, @@ -45,7 +47,6 @@ Checks: "*, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -bugprone-switch-missing-default-case, - -bugprone-switch-missing-default-case, -cert-env33-c, -cert-err33-c, -cert-err34-c, diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 89eaccc21..ae8da90cb 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -738,10 +738,12 @@ __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_ if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i; // steep gradient criterion (Fryxell Eqn 53, Fig 12) - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) { *q_L = 3.0 * q_i - 2.0 * (*q_R); - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) + } + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) { *q_R = 3.0 * q_i - 2.0 * (*q_L); + } *q_L = fmax(fmin(q_i, q_imo), *q_L); *q_L = fmin(fmax(q_i, q_imo), *q_L); From 48375aad5268b3fd4558044ea460af0f359f85ff Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 27 Oct 2023 11:36:26 -0400 Subject: [PATCH 573/694] Fix bug related to restarting sims with particles Along the way I refactored some logic to make use of std::string rather that c-style strings --- src/particles/io_particles.cpp | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index a5f04ce13..7a0e8a28a 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -4,6 +4,7 @@ #include #include #include + #include #include @@ -23,40 +24,34 @@ void Particles3D::Load_Particles_Data(struct Parameters *P) { - char filename[100]; - char timestep[20]; - int nfile = P->nfile; // output step you want to read from - char filename_counter[100]; - // create the filename to read from - - strcpy(filename, P->indir); - sprintf(timestep, "%d_particles", nfile); - strcat(filename, timestep); - - #if defined BINARY + #ifndef HDF5 chprintf("\nERROR: Particles only support HDF5 outputs\n"); exit(-1); - #elif defined HDF5 - strcat(filename, ".h5"); #endif + // construct the filename to read from #ifdef MPI_CHOLLA #ifdef TILED_INITIAL_CONDITIONS - sprintf(filename, "%sics_%dMpc_%d_particles.h5", P->indir, (int)P->tile_length / 1000, - G.nx_local); // Everyone reads the same file + // Every process reads the same file + const std::string base_fname = ("ics_" + std::to_string((int)P->tile_length / 1000) + "Mpc_" + + std::to_string(G.nx_local) + "_particles.h5"); #else - sprintf(filename, "%s.%d", filename, procID); + const int nfile = P->nfile; // output step you want to read from + const std::string base_fname = (std::to_string(nfile) + "_particles.h5." + + std::to_string(procID)); #endif // TILED_INITIAL_CONDITIONS #endif - chprintf(" Loading particles file: %s \n", filename); + const std::string filename = std::string(P->indir) + base_fname; + + chprintf(" Loading particles file: %s \n", filename.c_str()); #ifdef HDF5 hid_t file_id; herr_t status; // open the file - file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { printf("Unable to open input file.\n"); exit(0); From 7b4ede22f270bf41fca614079f5d5be96f7159de Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 27 Oct 2023 11:50:17 -0400 Subject: [PATCH 574/694] particle-format bugfix after clang-format and clang-tidy --- src/particles/io_particles.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 7a0e8a28a..41226b7be 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -4,9 +4,9 @@ #include #include #include - #include #include + #include #include "../global/global.h" #include "../grid/grid3D.h" @@ -32,13 +32,12 @@ void Particles3D::Load_Particles_Data(struct Parameters *P) // construct the filename to read from #ifdef MPI_CHOLLA #ifdef TILED_INITIAL_CONDITIONS - // Every process reads the same file - const std::string base_fname = ("ics_" + std::to_string((int)P->tile_length / 1000) + "Mpc_" + - std::to_string(G.nx_local) + "_particles.h5"); + // Every process reads the same file + const std::string base_fname = + ("ics_" + std::to_string((int)P->tile_length / 1000) + "Mpc_" + std::to_string(G.nx_local) + "_particles.h5"); #else - const int nfile = P->nfile; // output step you want to read from - const std::string base_fname = (std::to_string(nfile) + "_particles.h5." + - std::to_string(procID)); + const int nfile = P->nfile; // output step you want to read from + const std::string base_fname = (std::to_string(nfile) + "_particles.h5." + std::to_string(procID)); #endif // TILED_INITIAL_CONDITIONS #endif From 135c5493a1972b006bc48e22b1c9e48c33b27e97 Mon Sep 17 00:00:00 2001 From: ojwg Date: Fri, 27 Oct 2023 15:02:22 -0400 Subject: [PATCH 575/694] Deprecate BLOCK macro --- builds/make.inc.template | 2 +- builds/make.type.cloudy | 1 - builds/make.type.cooling | 1 - builds/make.type.dust | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/builds/make.inc.template b/builds/make.inc.template index 22fbd663d..4c09d085f 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -7,7 +7,7 @@ DFLAGS += -DCUDA #-DCUDA_ERROR_CHECK #To use MPI, DFLAGS must include -DMPI_CHOLLA -DFLAGS += -DMPI_CHOLLA -DBLOCK +DFLAGS += -DMPI_CHOLLA #Set the MPI Processes grid [nproc_x, nproc_y, nproc_z] #DFLAGS += -DSET_MPI_GRID diff --git a/builds/make.type.cloudy b/builds/make.type.cloudy index e604ff818..86e43315b 100644 --- a/builds/make.type.cloudy +++ b/builds/make.type.cloudy @@ -8,7 +8,6 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC diff --git a/builds/make.type.cooling b/builds/make.type.cooling index baf4ed0e9..85dedc25d 100644 --- a/builds/make.type.cooling +++ b/builds/make.type.cooling @@ -8,7 +8,6 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC diff --git a/builds/make.type.dust b/builds/make.type.dust index 0be259763..24e27e30f 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -8,7 +8,6 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMC DFLAGS += -DHLLC From c0c6f3a6cddb28c84ee852b8d1f7b3f045ea56f9 Mon Sep 17 00:00:00 2001 From: ojwg Date: Fri, 27 Oct 2023 15:34:19 -0400 Subject: [PATCH 576/694] fix broken 'cloudy' build --- src/analysis/analysis.cpp | 4 ++-- src/cooling/load_cloudy_texture.cu | 4 ++-- src/global/global.h | 2 +- src/gravity/potential_SOR_3D.cpp | 4 ++-- src/utils/timing_functions.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index 9b927daaa..d9eede2f1 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -13,7 +13,7 @@ void Grid3D::Compute_Lya_Statistics() { int axis, n_skewers; Real time_start, time_end, time_elapsed; - time_start = get_time(); + time_start = Get_Time(); // Copmpute Lya Statitics chprintf("Computing Lya Absorbiton along skewers \n"); @@ -70,7 +70,7 @@ void Grid3D::Compute_Lya_Statistics() // Analysis.Computed_Flux_Power_Spectrum = 0; // } - time_end = get_time(); + time_end = Get_Time(); time_elapsed = (time_end - time_start) * 1000; chprintf("Analysis Time: %f9.1 ms \n", time_elapsed); } diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index e0022fbb5..e1a02dc28 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -273,12 +273,12 @@ void Test_Cloudy_Speed() dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); CHECK(cudaDeviceSynchronize()); - Real time_start = get_time(); + Real time_start = Get_Time(); for (int i = 0; i < 100; i++) { hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); } CHECK(cudaDeviceSynchronize()); - Real time_end = get_time(); + Real time_end = Get_Time(); printf(" Cloudy Test Time %9.4f micro-s \n", (time_end - time_start)); printf("Exiting due to Test_Cloudy_Speed() being called \n"); exit(0); diff --git a/src/global/global.h b/src/global/global.h index 8e7c1b76a..8abe358fc 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -147,7 +147,7 @@ extern float *heating_table; * \brief Set gamma values for Riemann solver. */ extern void Set_Gammas(Real gamma_in); -/*! \fn double get_time(void) +/*! \fn double Get_Time(void) * \brief Returns the current clock time. */ extern double Get_Time(void); diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index 403b31cf6..2fdd9a91f 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -157,7 +157,7 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_ { #ifdef TIME_SOR Real time_start, time_end, time; - time_start = get_time(); + time_start = Get_Time(); #endif Grav.Poisson_solver.Copy_Input_And_Initialize(Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, @@ -233,7 +233,7 @@ void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_ #ifdef MPI_CHOLLA MPI_Barrier(world); #endif - time_end = get_time(); + time_end = Get_Time(); time = (time_end - time_start); chprintf(" SOR: Time = %f seg\n", time); #endif diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 20daf0333..96cceea15 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -3,7 +3,7 @@ #include -#include "../global/global.h" // Provides Real, get_time +#include "../global/global.h" // Provides Real, Get_Time // #ifdef CPU_TIME // Each instance of this class represents a single timer, timing a single From 571a1fad0fdf508ea34debfc28c32e3c5bf19ba8 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 00:15:41 -0400 Subject: [PATCH 577/694] Shift some contents from Grid3D's Update_Grid to Update_Hydro_Grid I think this is a very sensible organizational change. Plus, this is very helpful for some stuff I'm doing on a local branch (it would to upstream this sooner rather than later to limit merge conflicts) --- src/grid/grid3D.cpp | 52 ++++++++++++++++++++++----------------------- src/grid/grid3D.h | 2 +- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 8a7d3fa6f..1adb1e935 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -404,7 +404,7 @@ void Grid3D::set_dt(Real dti) /*! \fn void Update_Grid(void) * \brief Update the conserved quantities in each cell. */ -Real Grid3D::Update_Grid(void) +void Grid3D::Update_Grid(void) { Real max_dti = 0; int x_off, y_off, z_off; @@ -478,6 +478,29 @@ Real Grid3D::Update_Grid(void) Timer.Hydro_Integrator.End(true); #endif // CPU_TIME +} + +/*! \fn void Update_Hydro_Grid(void) + * \brief Do all steps to update the hydro. */ +Real Grid3D::Update_Hydro_Grid() +{ +#ifdef ONLY_PARTICLES + // Don't integrate the Hydro when only solving for particles + return 1e-10; +#endif // ONLY_PARTICLES + +#ifdef CPU_TIME + Timer.Hydro.Start(); +#endif // CPU_TIME + +#ifdef GRAVITY + // Extrapolate gravitational potential for hydro step + Extrapolate_Grav_Potential(); +#endif // GRAVITY + + // execute the hydro integrators + Update_Grid(); + #ifdef CUDA #ifdef COOLING_GPU @@ -513,7 +536,7 @@ Real Grid3D::Update_Grid(void) #endif // AVERAGE_SLOW_CELLS // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_Inverse_Timestep(); + Real dti = Calc_Inverse_Timestep(); #endif // CUDA @@ -540,31 +563,6 @@ Real Grid3D::Update_Grid(void) C.e_density = &C.host[H.n_cells * grid_enum::e_density]; #endif - return max_dti; -} - -/*! \fn void Update_Hydro_Grid(void) - * \brief Do all steps to update the hydro. */ -Real Grid3D::Update_Hydro_Grid() -{ -#ifdef ONLY_PARTICLES - // Don't integrate the Hydro when only solving for particles - return 1e-10; -#endif // ONLY_PARTICLES - - Real dti; - -#ifdef CPU_TIME - Timer.Hydro.Start(); -#endif // CPU_TIME - -#ifdef GRAVITY - // Extrapolate gravitational potential for hydro step - Extrapolate_Grav_Potential(); -#endif // GRAVITY - - dti = Update_Grid(); - #ifdef CPU_TIME #ifdef CHEMISTRY_GPU Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 3f4d4772c..b338ca79c 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -461,7 +461,7 @@ class Grid3D /*! \fn void Update_Grid(void) * \brief Update the conserved quantities in each cell. */ - Real Update_Grid(void); + void Update_Grid(void); /*! \fn void Update_Hydro_Grid(void) * \brief Do all steps to update the hydro. */ From c388751c2f040ed9e5aa3074aaa06b2d236da510 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 00:24:47 -0400 Subject: [PATCH 578/694] Rename Update_Grid to Execute_Hydro_Integrator --- src/grid/grid3D.cpp | 9 ++++----- src/grid/grid3D.h | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 1adb1e935..395cfec81 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -402,9 +402,9 @@ void Grid3D::set_dt(Real dti) #endif } -/*! \fn void Update_Grid(void) - * \brief Update the conserved quantities in each cell. */ -void Grid3D::Update_Grid(void) +/*! \fn void Execute_Hydro_Integratore_Grid(void) + * \brief Updates cells by executing the hydro integrator. */ +void Grid3D::Execute_Hydro_Integrator(void) { Real max_dti = 0; int x_off, y_off, z_off; @@ -498,8 +498,7 @@ Real Grid3D::Update_Hydro_Grid() Extrapolate_Grav_Potential(); #endif // GRAVITY - // execute the hydro integrators - Update_Grid(); + Execute_Hydro_Integrator(); #ifdef CUDA diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index b338ca79c..aff94c898 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -459,9 +459,9 @@ class Grid3D void set_dt_Gravity(); #endif - /*! \fn void Update_Grid(void) - * \brief Update the conserved quantities in each cell. */ - void Update_Grid(void); + /*! \fn void Execute_Hydro_Integratore_Grid(void) + * \brief Updates cells by executing the hydro integrator. */ + void Execute_Hydro_Integrator(void); /*! \fn void Update_Hydro_Grid(void) * \brief Do all steps to update the hydro. */ From 09be630d8aafb47c11a7deb1518988bf1820a50a Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 10:12:32 -0400 Subject: [PATCH 579/694] Move Average_Slow_Cells and Calc_Inverse_Timestep to after all forms of cooling. --- src/grid/grid3D.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 395cfec81..b31c40749 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -500,6 +500,8 @@ Real Grid3D::Update_Hydro_Grid() Execute_Hydro_Integrator(); + // == Perform chemistry/cooling (there are a few different cases) == + #ifdef CUDA #ifdef COOLING_GPU @@ -527,16 +529,6 @@ Real Grid3D::Update_Hydro_Grid() #endif #endif - #ifdef AVERAGE_SLOW_CELLS - // Set the min_delta_t for averaging a slow cell - Real max_dti_slow; - max_dti_slow = 1 / H.min_dt_slow; - Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow); - #endif // AVERAGE_SLOW_CELLS - - // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== - Real dti = Calc_Inverse_Timestep(); - #endif // CUDA #ifdef COOLING_GRACKLE @@ -580,6 +572,17 @@ Real Grid3D::Update_Hydro_Grid() #endif // CPU_TIME #endif // COOLING_GRACKLE +// == compute the new timestep == +#ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + Real max_dti_slow; + max_dti_slow = 1 / H.min_dt_slow; + Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow); +#endif // AVERAGE_SLOW_CELLS + + // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== + Real dti = Calc_Inverse_Timestep(); + return dti; } From 76bbbc0a35d5b9d94ed78f6514ee3ea96ce4c8d5 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 10:49:03 -0400 Subject: [PATCH 580/694] Minor cleanup - translates to 2 changes: 1. consolidate the ``ifdef CHEMISTRY_GPU`` and ``ifdef COOLING_GRACKLE`` regions 2. Adjust Timer calls to match profiling from before last commit. This involves a somewhat ugly hack related to timing Grackle (it seems unavoidable without modifying the timing_functions - I actually have some longer term ideas to refactor some of that code). --- src/grid/grid3D.cpp | 51 ++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index b31c40749..b87eb0d7c 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -477,7 +477,6 @@ void Grid3D::Execute_Hydro_Integrator(void) #ifdef CPU_TIME Timer.Hydro_Integrator.End(true); #endif // CPU_TIME - } /*! \fn void Update_Hydro_Grid(void) @@ -491,6 +490,7 @@ Real Grid3D::Update_Hydro_Grid() #ifdef CPU_TIME Timer.Hydro.Start(); + double non_hydro_elapsed_time = 0.0; #endif // CPU_TIME #ifdef GRAVITY @@ -521,15 +521,22 @@ Real Grid3D::Update_Hydro_Grid() Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #endif // DUST +#endif // CUDA + +#ifdef CHEMISTRY_GPU // Update the H and He ionization fractions and apply cooling and photoheating - #ifdef CHEMISTRY_GPU Update_Chemistry(); - #ifdef CPU_TIME + #ifdef CPU_TIME Timer.Chemistry.RecordTime(Chem.H.runtime_chemistry_step); - #endif + non_hydro_elapsed_time += Chem.H.runtime_chemistry_step; #endif - -#endif // CUDA + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + C.e_density = &C.host[H.n_cells * grid_enum::e_density]; +#endif #ifdef COOLING_GRACKLE Cool.fields.density = C.density; @@ -543,36 +550,19 @@ Real Grid3D::Update_Hydro_Grid() #ifdef GRACKLE_METALS Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density]; #endif -#endif -#ifdef CHEMISTRY_GPU - C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; - C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; - C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; - C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; - C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; - C.e_density = &C.host[H.n_cells * grid_enum::e_density]; -#endif - -#ifdef CPU_TIME - #ifdef CHEMISTRY_GPU - Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); - // Subtract the time spent on the Chemical Update - #endif // CHEMISTRY_GPU - Timer.Hydro.End(); -#endif // CPU_TIME - -#ifdef COOLING_GRACKLE #ifdef CPU_TIME - Timer.Cooling_Grackle.Start(); + double cur_grackle_timing = Get_Time(); #endif // CPU_TIME Do_Cooling_Step_Grackle(); #ifdef CPU_TIME - Timer.Cooling_Grackle.End(); + double cur_grackle_timing = Get_Time() - cur_grackle_timing; + Timer.Cooling_Grackle.RecordTime(cur_grackle_timing); + non_hydro_elapsed_time += cur_grackle_timing; #endif // CPU_TIME #endif // COOLING_GRACKLE -// == compute the new timestep == + // == average slow cells and compute the new timestep == #ifdef AVERAGE_SLOW_CELLS // Set the min_delta_t for averaging a slow cell Real max_dti_slow; @@ -583,6 +573,11 @@ Real Grid3D::Update_Hydro_Grid() // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== Real dti = Calc_Inverse_Timestep(); +#ifdef CPU_TIME + Timer.Hydro.Subtract(non_hydro_elapsed_time); + Timer.Hydro.End(); +#endif // CPU_TIME + return dti; } From 33b031b5699dd3b42e1c7893e4349ba820626d6f Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 11:54:10 -0400 Subject: [PATCH 581/694] Introduce 2 tweaks to DeviceVector to improve safety of the class 1. Delete implicitly declared constructors/assignment operations. We could definitely define these, but currently they can lead to dereferencing null pointers or double-freeing pointers 2. While I was here, I also added an explicit check that the elements of the vector are trivially copyable. This is the formal requirement for being able to copy an object with a variant of memcpy. --- src/utils/DeviceVector.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 4024edf34..7221bedf2 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -13,6 +13,7 @@ #include #include #include +#include #include // External Includes @@ -35,12 +36,16 @@ namespace cuda_utilities * `data()` method. This class works for any device side pointer, scalar or * array valued. * - * \tparam T Any serialized type where `sizeof(T)` returns correct results - * should work but non-primitive types have not been tested. + * \tparam T Any trivially copyable type where `sizeof(T)` returns correct + * results should work, but non-primitive types have not been tested. */ template class DeviceVector { + static_assert(std::is_trivially_copyable_v, + "DeviceVector can only be used with trivially_copyable types " + "due to the internal usage of memcpy"); + public: /*! * \brief Construct a new Device Vector object by calling the @@ -60,6 +65,15 @@ class DeviceVector */ ~DeviceVector() { _deAllocate(); } + /* The following are deleted because they currently lead to invalid state. + * (But they can all easily be implemented in the future). + */ + DeviceVector() = delete; + DeviceVector(const DeviceVector &) = delete; + DeviceVector(DeviceVector &&) = delete; + DeviceVector &operator=(const DeviceVector &other) = delete; + DeviceVector &operator=(DeviceVector &&other) = delete; + /*! * \brief Get the raw device pointer * From 543c08cd721bf19fbe1ee95c35ce70115cdb2c37 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 3 Nov 2023 12:58:50 -0400 Subject: [PATCH 582/694] revising the static_assert error message in DeviceVector --- src/utils/DeviceVector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 7221bedf2..ebe3e4db8 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -43,8 +43,8 @@ template class DeviceVector { static_assert(std::is_trivially_copyable_v, - "DeviceVector can only be used with trivially_copyable types " - "due to the internal usage of memcpy"); + "DeviceVector can only be used with trivially_copyable types due to the internal " + "usage of functions like cudaMemcpy, cudaMemcpyPeer, cudaMemset"); public: /*! From 98398a6c74ba9359ad6bdbe0f668a431f3d9aa49 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 1 Nov 2023 12:58:47 -0400 Subject: [PATCH 583/694] Define ``procID``, ``nproc``, & ``root`` even without MPI In more detail, these variables were previously declared in mpi_routines.h and were only ever defined when Cholla was compiled with MPI. This commit now makes it so that these variables are defined even when not compiling with MPI. In that scenario, these variables have default values of: - ``procID = 0`` - ``nproc = 1`` - ``root = 0`` To actually accomplish this, I moved these variable declarations to new files called ``global_parallel.h`` and ``global_parallel.cpp``. Based on the reviewer's preference, I would be equally happy to move these to ``global.h`` and ``global.cpp``. The motivation for this is simple. Throughout the codebase there are a number of places where we have code like the following ```c++ if (procID == 0) // or equivalently (procID == root) { // some logic... (possibly with additional #ifdef MPI_CHOLLA statements) } ``` We will now be able to rewrite these sections as ```c++ if (procID == 0) { // or equivalently (procID == root) // some logic... (possibly with #ifdef MPI_CHOLLA statements) } ``` This should let us remove a bunch of ``ifdef MPI_CHOLLA`` statments. --- src/global/global_parallel.cpp | 19 +++++++++++++++++++ src/global/global_parallel.h | 21 +++++++++++++++++++++ src/main.cpp | 5 ++++- src/mpi/mpi_routines.cpp | 5 ++--- src/mpi/mpi_routines.h | 8 +++----- 5 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 src/global/global_parallel.cpp create mode 100644 src/global/global_parallel.h diff --git a/src/global/global_parallel.cpp b/src/global/global_parallel.cpp new file mode 100644 index 000000000..257b50f58 --- /dev/null +++ b/src/global/global_parallel.cpp @@ -0,0 +1,19 @@ +#include "../global/global_parallel.h" + +#include "../utils/error_handling.h" + +// global parallelism related variables + +int procID; /*process rank*/ +int nproc; /*number of processes in global comm*/ +int root; /*rank of root process*/ + +void Init_Global_Parallel_Vars_No_MPI() +{ +#ifdef MPI_CHOLLA + CHOLLA_ERROR("This function should not be executed when compiled with MPI"); +#endif + procID = 0; + nproc = 1; + root = 0; +} \ No newline at end of file diff --git a/src/global/global_parallel.h b/src/global/global_parallel.h new file mode 100644 index 000000000..2ac832c05 --- /dev/null +++ b/src/global/global_parallel.h @@ -0,0 +1,21 @@ +/*! /file global_parallel.h + * /brief Declarations of global variables related to parallelism + * + * While most of these variables were originally defined in mpi_routines.h, + * there are cases where it can be useful to have them defined (with sensible + * defaults) even when compiled without MPI. + */ + +#ifndef GLOBAL_PARALLEL_H +#define GLOBAL_PARALLEL_H + +// NOTE: it would be nice to put the following in a namespace, but that would +// involve a bunch of refactoring + +extern int procID; /*process rank*/ +extern int nproc; /*number of processes executing simulation*/ +extern int root; /*rank of root process*/ +extern int procID_node; /*process rank on node*/ +extern int nproc_node; /*number of processes on node*/ + +#endif /* GLOBAL_PARALLEL_H */ \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 53ce38984..cb6d1a72b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -47,9 +47,12 @@ int main(int argc, char *argv[]) // start the total time start_total = Get_Time(); -/* Initialize MPI communication */ #ifdef MPI_CHOLLA + /* Initialize MPI communication */ InitializeChollaMPI(&argc, &argv); +#else + // Initialize subset of global parallelism variables usually managed by MPI + Init_Global_Parallel_Vars_No_MPI(); #endif /*MPI_CHOLLA*/ Real dti = 0; // inverse time step, 1.0 / dt diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 0aa9f31c5..de8990c02 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -8,14 +8,13 @@ #include #include "../global/global.h" + #include "../global/global_parallel.h" #include "../io/io.h" #include "../mpi/cuda_mpi_routines.h" #include "../utils/error_handling.h" /*Global MPI Variables*/ -int procID; /*process rank*/ -int nproc; /*number of processes in global comm*/ -int root; /*rank of root process*/ +// note: some relevant global variables are declared in global_parallel.h int procID_node; /*process rank on node*/ int nproc_node; /*number of MPI processes on node*/ diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index 5faf7d9cc..eb9c915f8 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -7,6 +7,7 @@ #include #include "../global/global.h" + #include "../global/global_parallel.h" #include "../grid/grid3D.h" #ifdef FFTW @@ -15,11 +16,8 @@ #endif /*FFTW*/ /*Global MPI Variables*/ -extern int procID; /*process rank*/ -extern int nproc; /*number of processes in global comm*/ -extern int root; /*rank of root process*/ -extern int procID_node; /*process rank on node*/ -extern int nproc_node; /*number of MPI processes on node*/ +// NOTE: some variable heavily used by mpi are declared in global_parallel.h +// so that they are defined even when compiled without mpi extern MPI_Comm world; /*global communicator*/ extern MPI_Comm node; /*communicator for each node*/ From 7e4d5bdcb1a26bf166c8dc1865ee0ae4d0b88ca7 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 3 Nov 2023 15:05:29 -0400 Subject: [PATCH 584/694] consolidate global_parallel with global. --- src/global/global.cpp | 20 ++++++++++++++++++++ src/global/global.h | 14 ++++++++++++++ src/global/global_parallel.cpp | 19 ------------------- src/global/global_parallel.h | 21 --------------------- src/mpi/mpi_routines.cpp | 3 +-- src/mpi/mpi_routines.h | 8 +++++--- 6 files changed, 40 insertions(+), 45 deletions(-) delete mode 100644 src/global/global_parallel.cpp delete mode 100644 src/global/global_parallel.h diff --git a/src/global/global.cpp b/src/global/global.cpp index ecb7f2ccb..89347cbda 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -57,6 +57,26 @@ int Sgn(Real x) } } +// global mpi-related variables (they are declared here because they are initialized even when +// the MPI_CHOLLA variable is not defined) + +int procID; /*process rank*/ +int nproc; /*number of processes in global comm*/ +int root; /*rank of root process*/ + +/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables + * that still meaningful in non-mpi simulations. + */ +void Init_Global_Parallel_Vars_No_MPI() +{ +#ifdef MPI_CHOLLA + CHOLLA_ERROR("This function should not be executed when compiled with MPI"); +#endif + procID = 0; + nproc = 1; + root = 0; +} + /*! \fn char Trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ char *Trim(char *s) diff --git a/src/global/global.h b/src/global/global.h index 8abe358fc..7c7b0dc13 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -155,6 +155,20 @@ extern double Get_Time(void); * \brief Mathematical sign function. Returns sign of x. */ extern int Sgn(Real x); +/* Global variables for mpi (but they are also initialized to sensible defaults when not using mpi) + * + * It may make sense to move these back into mpi_routines (but reorganizing the ifdef statements + * would take some work). It may make sense to also put these into their own namespace. + */ +extern int procID; /*process rank*/ +extern int nproc; /*number of processes executing simulation*/ +extern int root; /*rank of root process*/ + +/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables + * that still meaningful in non-mpi simulations. + */ +void Init_Global_Parallel_Vars_No_MPI(); + struct Parameters { int nx; int ny; diff --git a/src/global/global_parallel.cpp b/src/global/global_parallel.cpp deleted file mode 100644 index 257b50f58..000000000 --- a/src/global/global_parallel.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "../global/global_parallel.h" - -#include "../utils/error_handling.h" - -// global parallelism related variables - -int procID; /*process rank*/ -int nproc; /*number of processes in global comm*/ -int root; /*rank of root process*/ - -void Init_Global_Parallel_Vars_No_MPI() -{ -#ifdef MPI_CHOLLA - CHOLLA_ERROR("This function should not be executed when compiled with MPI"); -#endif - procID = 0; - nproc = 1; - root = 0; -} \ No newline at end of file diff --git a/src/global/global_parallel.h b/src/global/global_parallel.h deleted file mode 100644 index 2ac832c05..000000000 --- a/src/global/global_parallel.h +++ /dev/null @@ -1,21 +0,0 @@ -/*! /file global_parallel.h - * /brief Declarations of global variables related to parallelism - * - * While most of these variables were originally defined in mpi_routines.h, - * there are cases where it can be useful to have them defined (with sensible - * defaults) even when compiled without MPI. - */ - -#ifndef GLOBAL_PARALLEL_H -#define GLOBAL_PARALLEL_H - -// NOTE: it would be nice to put the following in a namespace, but that would -// involve a bunch of refactoring - -extern int procID; /*process rank*/ -extern int nproc; /*number of processes executing simulation*/ -extern int root; /*rank of root process*/ -extern int procID_node; /*process rank on node*/ -extern int nproc_node; /*number of processes on node*/ - -#endif /* GLOBAL_PARALLEL_H */ \ No newline at end of file diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index de8990c02..84644a1be 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -8,13 +8,12 @@ #include #include "../global/global.h" - #include "../global/global_parallel.h" #include "../io/io.h" #include "../mpi/cuda_mpi_routines.h" #include "../utils/error_handling.h" /*Global MPI Variables*/ -// note: some relevant global variables are declared in global_parallel.h +// note: some relevant global variables are declared in global.h int procID_node; /*process rank on node*/ int nproc_node; /*number of MPI processes on node*/ diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index eb9c915f8..913b5e36a 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -7,7 +7,6 @@ #include #include "../global/global.h" - #include "../global/global_parallel.h" #include "../grid/grid3D.h" #ifdef FFTW @@ -16,8 +15,11 @@ #endif /*FFTW*/ /*Global MPI Variables*/ -// NOTE: some variable heavily used by mpi are declared in global_parallel.h -// so that they are defined even when compiled without mpi +// NOTE: some variable heavily used by mpi are declared in global.h so that they are defined even +// when compiled without mpi + +extern int procID_node; /*process rank on node*/ +extern int nproc_node; /*number of MPI processes on node*/ extern MPI_Comm world; /*global communicator*/ extern MPI_Comm node; /*communicator for each node*/ From c37311629005d66f13fa5308a3fc0da35c7d867c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 1 Oct 2023 08:54:21 -0400 Subject: [PATCH 585/694] Refactor cat_dset_3d.py for easier parallelization --- python_scripts/cat_dset_3D.py | 191 +++++++++++++++++++++------------- 1 file changed, 118 insertions(+), 73 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 4cff6dc9a..7c403933e 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -17,9 +17,10 @@ import argparse import pathlib +# ====================================================================================================================== def main(): """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the - command line. If you're importing this file then use the `concat_3d` function directly. + command line. If you're importing this file then use the `concat_3d` or `concat_3d_single` functions directly. """ # Argument handling cli = argparse.ArgumentParser() @@ -30,6 +31,10 @@ def main(): # Optional Arguments cli.add_argument('-i', '--input_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The input directory.') cli.add_argument('-o', '--output_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The output directory.') + cli.add_argument('--skip-fields', type=list, default=[], help='List of fields to skip concatenating. Defaults to empty.') + cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') + cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') + cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') args = cli.parse_args() # Perform the concatenation @@ -37,15 +42,23 @@ def main(): end_num=args.end_num, num_processes=args.num_processes, input_dir=args.input_dir, - output_dir=args.output_dir) - + output_dir=args.output_dir, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts) +# ====================================================================================================================== # ====================================================================================================================== def concat_3d(start_num: int, end_num: int, num_processes: int, input_dir: pathlib.Path = pathlib.Path.cwd(), - output_dir: pathlib.Path = pathlib.Path.cwd()): + output_dir: pathlib.Path = pathlib.Path.cwd(), + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None): """Concatenate 3D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a single, large file. All outputs from start_num to end_num will be concatenated. @@ -55,6 +68,10 @@ def concat_3d(start_num: int, num_processes (int): The number of processes that were used input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). + skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. + destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. + compression_options (str, optional): What compression settings to use if compressing. Defaults to None. """ # Error checking @@ -65,77 +82,105 @@ def concat_3d(start_num: int, # loop over outputs for n in range(start_num, end_num+1): + concat_3d_single(output_number=n, + num_processes=num_processes, + input_dir=input_dir, + output_dir=output_dir, + skip_fields=skip_fields, + destination_dtype=destination_dtype, + compression_type=compression_type, + compression_options=compression_options) +# ====================================================================================================================== - # loop over files for a given output - for i in range(0, num_processes): - - # open the output file for writing (don't overwrite if exists) - fileout = h5py.File(output_dir / f'{n}.h5', 'a') - # open the input file for reading - filein = h5py.File(input_dir / f'{n}.h5.{i}', 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] - for unit in units: - fileout.attrs[unit] = [head[unit][0]] - - d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) - mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) - my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) - mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) - E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) - try: - GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) - except KeyError: - print('No Dual energy data present'); - try: - bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) - by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) - bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) - except KeyError: - print('No magnetic field data present'); - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] - fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] - fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] - fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] - fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - try: - fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - except KeyError: - print('No Dual energy data present'); - try: - fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] - fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] - fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] - except KeyError: - print('No magnetic field data present'); - - filein.close() - - fileout.close() # ====================================================================================================================== +def concat_3d_single(output_number: int, + num_processes: int, + input_dir: pathlib.Path = pathlib.Path.cwd(), + output_dir: pathlib.Path = pathlib.Path.cwd(), + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None): + """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a + single, large file. + + Args: + output_number (int): The output + end_num (int): The last output step to concatenate + num_processes (int): The number of processes that were used + input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). + output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). + skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. + destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. + compression_options (str, optional): What compression settings to use if compressing. Defaults to None. + """ + + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' + + # open the output file for writing (don't overwrite if exists) + fileout = h5py.File(output_dir / f'{output_number}.h5', 'a') + + # Setup the output file + with h5py.File(input_dir / f'{output_number}.h5.0', 'r') as source_file: + # Copy header data + fileout = copy_header(source_file, fileout) + + # Create the datasets in the output file + datasets_to_copy = list(source_file.keys()) + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + data_shape = source_file.attrs['dims'] + + fileout.create_dataset(name=dataset, + shape=data_shape, + dtype=dtype, + compression=compression_type, + compression_opts=compression_options) + + # loop over files for a given output + for i in range(0, num_processes): + # open the input file for reading + filein = h5py.File(input_dir / f'{output_number}.h5.{i}', 'r') + # read in the header data from the input file + head = filein.attrs + + # write data from individual processor file to correct location in concatenated file + nx_local, ny_local, nz_local = filein.attrs['dims_local'] + x_start, y_start, z_start = filein.attrs['offset'] + + for dataset in datasets_to_copy: + fileout[dataset][x_start:x_start+nx_local, y_start:y_start+ny_local,z_start:z_start+nz_local] = filein[dataset] + + filein.close() + + fileout.close() +# ====================================================================================================================== + +# ============================================================================== +def copy_header(source_file: h5py.File, destination_file: h5py.File): + """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank + + Args: + source_file (h5py.File): The source file + destination_file (h5py.File): The destination file + + Returns: + h5py.File: The destination file with the new header attributes + """ + fields_to_skip = ['dims_local', 'offset'] + + for attr_key in source_file.attrs.keys(): + if attr_key not in fields_to_skip: + destination_file.attrs[attr_key] = source_file.attrs[attr_key] + + return destination_file +# ============================================================================== if __name__ == '__main__': main() From 47f943002e00ece296139ab350b8be60a5036595 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 1 Oct 2023 13:05:04 -0400 Subject: [PATCH 586/694] Refactor cat_slice.py to be more flexible + a CLI Adds a CLI to cat_slice.py, removes all the hardcoded variables. Also, adds a new function internally, `cat_slice` that can be imported into other scripts and used from there, including in parallel with Dask. --- python_scripts/cat_slice.py | 332 ++++++++++++++++++++++-------------- 1 file changed, 206 insertions(+), 126 deletions(-) diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py index 7b6d15e12..51aae2d6d 100644 --- a/python_scripts/cat_slice.py +++ b/python_scripts/cat_slice.py @@ -1,130 +1,210 @@ -# Example file for concatenating on-axis slice data -# created when the -DSLICES flag is turned on +#!/usr/bin/env python3 +""" +Python script for concatenating slice hdf5 datasets for when -DSLICES is turned +on in Cholla. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_slice` function can be used to +concatenate the HDF5 files. + +Generally the easiest way to import this script is to add the `python_scripts` +directory to your python path in your script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import cat_slice +``` +""" import h5py +import argparse +import pathlib import numpy as np -ns = 0 -ne = 2 -n_procs = 4 # number of processors that did the cholla calculation -dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/raw/' -dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/catted_files' - -DE = True # set to True if Dual Energy flag was used -SCALAR = False # set to True if Scalar was used - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_slice.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - fileout.attrs['dims'] = [nx, ny, nz] - - d_xy = np.zeros((nx,ny)) - d_xz = np.zeros((nx,nz)) - d_yz = np.zeros((ny,nz)) - mx_xy = np.zeros((nx,ny)) - mx_xz = np.zeros((nx,nz)) - mx_yz = np.zeros((ny,nz)) - my_xy = np.zeros((nx,ny)) - my_xz = np.zeros((nx,nz)) - my_yz = np.zeros((ny,nz)) - mz_xy = np.zeros((nx,ny)) - mz_xz = np.zeros((nx,nz)) - mz_yz = np.zeros((ny,nz)) - E_xy = np.zeros((nx,ny)) - E_xz = np.zeros((nx,nz)) - E_yz = np.zeros((ny,nz)) - if DE: - GE_xy = np.zeros((nx,ny)) - GE_xz = np.zeros((nx,nz)) - GE_yz = np.zeros((ny,nz)) - if SCALAR: - scalar_xy = np.zeros((nx,ny)) - scalar_xz = np.zeros((nx,nz)) - scalar_yz = np.zeros((ny,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz'] - mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy'] - mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz'] - mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz'] - my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy'] - my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz'] - my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz'] - mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy'] - mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz'] - mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz'] - E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy'] - E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz'] - E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz'] - if DE: - GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy'] - GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz'] - GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz'] - if SCALAR: - scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy'] - scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz'] - scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz'] - - filein.close() - - # wrte out the new datasets - fileout.create_dataset('d_xy', data=d_xy) - fileout.create_dataset('d_xz', data=d_xz) - fileout.create_dataset('d_yz', data=d_yz) - fileout.create_dataset('mx_xy', data=mx_xy) - fileout.create_dataset('mx_xz', data=mx_xz) - fileout.create_dataset('mx_yz', data=mx_yz) - fileout.create_dataset('my_xy', data=my_xy) - fileout.create_dataset('my_xz', data=my_xz) - fileout.create_dataset('my_yz', data=my_yz) - fileout.create_dataset('mz_xy', data=mz_xy) - fileout.create_dataset('mz_xz', data=mz_xz) - fileout.create_dataset('mz_yz', data=mz_yz) - fileout.create_dataset('E_xy', data=E_xy) - fileout.create_dataset('E_xz', data=E_xz) - fileout.create_dataset('E_yz', data=E_yz) - if DE: - fileout.create_dataset('GE_xy', data=GE_xy) - fileout.create_dataset('GE_xz', data=GE_xz) - fileout.create_dataset('GE_yz', data=GE_yz) - if SCALAR: - fileout.create_dataset('scalar_xy', data=scalar_xy) - fileout.create_dataset('scalar_xz', data=scalar_xz) - fileout.create_dataset('scalar_yz', data=scalar_yz) - - fileout.close() +from cat_dset_3D import copy_header + +# ============================================================================== +def main(): + """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the + command line. If you're importing this file then use the `concat_slice` function directly. + """ + # Argument handling + cli = argparse.ArgumentParser() + # Required Arguments + cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the source HDF5 files.') + cli.add_argument('-o', '--output-file', type=pathlib.Path, required=True, help='The path and filename of the concatenated file.') + cli.add_argument('-n', '--num-processes', type=int, required=True, help='The number of processes that were used to generate the slices.') + cli.add_argument('-t', '--output-num', type=int, required=True, help='The output number to be concatenated') + # Optional Arguments + cli.add_argument('--xy', type=bool, default=True, help='If True then concatenate the XY slice. Defaults to True.') + cli.add_argument('--yz', type=bool, default=True, help='If True then concatenate the YZ slice. Defaults to True.') + cli.add_argument('--xz', type=bool, default=True, help='If True then concatenate the XZ slice. Defaults to True.') + cli.add_argument('--skip-fields', type=list, default=[], help='List of fields to skip concatenating. Defaults to empty.') + cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') + cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') + cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') + args = cli.parse_args() + + # Perform the concatenation + concat_slice(source_directory=args.source_directory, + destination_file_path=args.output_file, + num_ranks=args.num_processses, + output_number=args.output_num, + concat_xy=args.xy, + concat_yz=args.yz, + concat_xz=args.xz, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts) +# ============================================================================== + +# ============================================================================== +def concat_slice(source_directory: pathlib.Path, + destination_file_path: pathlib.Path, + num_ranks: int, + output_number: int, + concat_xy: bool = True, + concat_yz: bool = True, + concat_xz: bool = True, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None): + """Concatenate slice HDF5 Cholla datasets. i.e. take the single files + generated per process and concatenate them into a single, large file. This + function concatenates a single output time and can be called multiple times, + potentially in parallel, to concatenate multiple output times. + + Args: + source_directory (pathlib.Path): The directory containing the unconcatenated files + destination_file_path (pathlib.Path): The path and name of the new concatenated file + num_ranks (int): The number of ranks that Cholla was run with + output_number (int): The output number to concatenate + concat_xy (bool, optional): If True then concatenate the XY slice. Defaults to True. + concat_yz (bool, optional): If True then concatenate the YZ slice. Defaults to True. + concat_xz (bool, optional): If True then concatenate the XZ slice. Defaults to True. + skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. + destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. + compression_options (str, optional): What compression settings to use if compressing. Defaults to None. + """ + # Open destination file and first file for getting metadata + source_file = h5py.File(source_directory / f'{output_number}_slice.h5.0', 'r') + destination_file = h5py.File(destination_file_path, 'w') + + # Copy over header + destination_file = copy_header(source_file, destination_file) + + # Get a list of all datasets in the source file + datasets_to_copy = list(source_file.keys()) + + # Filter the datasets to only include those I wish to copy + if not concat_xy: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xy' in dataset] + if not concat_yz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'yz' in dataset] + if not concat_xz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xz' in dataset] + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + # Create the datasets in the destination file + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + slice_shape = get_slice_shape(source_file, dataset) + + destination_file.create_dataset(name=dataset, + shape=slice_shape, + dtype=dtype, + compression=compression_type, + compression_opts=compression_options) + + # Close source file in prep for looping through source files + source_file.close() + + # Copy data + for rank in range(num_ranks): + # Open source file + source_file = h5py.File(source_directory / f'{output_number}_slice.h5.{rank}', 'r') + + # Loop through and copy datasets + for dataset in datasets_to_copy: + # Determine locations and shifts for writing + (i0_start, i0_end, i1_start, i1_end), file_in_slice = write_bounds(source_file, dataset) + + if file_in_slice: + # Copy the data + destination_file[dataset][i0_start:i0_end, i1_start:i1_end] = source_file[dataset] + + # Now that the copy is done we close the source file + source_file.close() + + # Close destination file now that it is fully constructed + destination_file.close() +# ============================================================================== + +# ============================================================================== +def get_slice_shape(source_file: h5py.File, dataset: str): + """Determine the shape of the full slice in a dataset + + Args: + source_file (h5py.File): The source file the get the shape information from + dataset (str): The dataset to get the shape of + + Raises: + ValueError: If the dataset name isn't a slice name + + Returns: + tuple: The 2D dimensions of the slice + """ + nx, ny, nz = source_file.attrs['dims'] + + if 'xy' in dataset: + slice_dimensions = (nx, ny) + elif 'yz' in dataset: + slice_dimensions = (ny, nz) + elif 'xz' in dataset: + slice_dimensions = (nx, nz) + else: + raise ValueError(f'Dataset "{dataset}" is not a slice.') + + return slice_dimensions +# ============================================================================== + +# ============================================================================== +def write_bounds(source_file: h5py.File, dataset: str): + """Determine the bounds of the concatenated file to write to + + Args: + source_file (h5py.File): The source file to read from + dataset (str): The name of the dataset to read from the source file + + Raises: + ValueError: If the dataset name isn't a slice name + + Returns: + tuple: The write bounds for the concatenated file to be used like `output_file[dataset][return[0]:return[1], return[2]:return[3]] + """ + nx, ny, nz = source_file.attrs['dims'] + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + + if 'xy' in dataset: + file_in_slice = z_start <= nz//2 <= z_start+nz_local + bounds = (x_start, x_start+nx_local, y_start, y_start+ny_local) + elif 'yz' in dataset: + file_in_slice = x_start <= nx//2 <= x_start+nx_local + bounds = (y_start, y_start+ny_local, z_start, z_start+nz_local) + elif 'xz' in dataset: + file_in_slice = y_start <= ny//2 <= y_start+ny_local + bounds = (x_start, x_start+nx_local, z_start, z_start+nz_local) + else: + raise ValueError(f'Dataset "{dataset}" is not a slice.') + + return bounds, file_in_slice +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + main() + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From ecc13aebb069986429c0c2f7a447b55ff427e220 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 1 Oct 2023 13:32:26 -0400 Subject: [PATCH 587/694] Add python templates for using Dask One template for using Dask on a single machine and one for use on a distributed system, specifically OLCF systems Andes, Crusher, and Frontier. --- python_scripts/dask_distributed_template.py | 132 ++++++++++++++++++ .../dask_single_machine_template.py | 47 +++++++ 2 files changed, 179 insertions(+) create mode 100755 python_scripts/dask_distributed_template.py create mode 100755 python_scripts/dask_single_machine_template.py diff --git a/python_scripts/dask_distributed_template.py b/python_scripts/dask_distributed_template.py new file mode 100755 index 000000000..ac40294b2 --- /dev/null +++ b/python_scripts/dask_distributed_template.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +This is the skeleton for how to run a Dask script on Andes at the OLCF. The CLI +commands required are in the docstring at the top, major Dask steps are in +functions, and `main` is mostly empty with a clear area on where to do your +computations. + +Requirements: - Verified working with Dask v2023.6.0 - Install graphviz for +python + - 'conda install -c conda-forge python-graphviz graphviz' + - Make sure your version of msgpack-python is at least v1.0.5; v1.0.3 had a bug + - `conda install -c conda-forge msgpack-python=1.0.5` + +Notes: +- This is entirely focused on getting Dask to run on Andes, Crusher, and + Frontier. Other systems will likely need similar steps but not identical +- Between each python script the Dask scheduler and workers need to be + restarted. +- "--interface ib0" does not seem to be required but likely does improve + transfer speeds. On Crusher it throws an error, just omit it +- It likes to spit out lots of ugly messages on shutdown that look like + something failed. Odds are that it worked fine and just didn't shutdown + gracefully +- On OLCF systems Dask seems to hang on setup if you use more than 256 + processes. I haven't dug too deeply into it but for now it might be better to + limit jobs to that size and run them longer or run multiple jobs, potentially + an array job +- On OLCF systems it doesn't always end the job properly and the job will just + keep running and do nothing. Either set short walltimes so it times out or + just keep an eye on it. Maybe end with the script sending an exit command + +################################################################################ +#!/usr/bin/env bash + +#SBATCH -A +#SBATCH -J +#SBATCH -o /%x-%j.out +#SBATCH -t 04:00:00 +#SBATCH -p batch +#SBATCH -N 32 +#SBATCH --mail-user= #SBATCH --mail-type=ALL + +# Setup some parameters DASK_SCHEDULE_FILE=$(pwd)/dask_schedule_file.json +DASK_NUM_WORKERS=$((SLURM_JOB_NUM_NODES*8)) + +# Add any scripts that you're importing to the PYTHONPATH, even ones in the same +# directory. The worker tasks have their own directories and won't find any of +# your scripts unless they're in the PYTHONPATH +export PYTHONPATH="${PYTHONPATH}:/your/path/here" + +INTERFACE='--interface ib0' # For Andes +# INTERFACE='' # For Crusher + +srun --exclusive --ntasks=1 dask scheduler $INTERFACE --scheduler-file $DASK_SCHEDULE_FILE --no-dashboard --no-show & + +# Wait for the dask-scheduler to start +sleep 30 + +srun --exclusive --ntasks=$DASK_NUM_WORKERS dask worker --scheduler-file $DASK_SCHEDULE_FILE --memory-limit='auto' --worker-class distributed.Worker $INTERFACE --no-dashboard --local-directory & + +# Wait for workers to start +sleep 10 + +python -u ./dask-distributed-template.py --scheduler-file $DASK_SCHEDULE_FILE --num-workers $DASK_NUM_WORKERS + +wait +################################################################################ +""" + +import dask +import dask.array as da +import dask.dataframe as dd +from dask.distributed import Client +from dask import graph_manipulation + +import pathlib +import argparse + +# ============================================================================== +def main(): + # Get command line arguments + cli = argparse.ArgumentParser() + # Required Arguments + cli.add_argument('-N', '--num-workers', type=int, required=True, help='The number of workers to use') + cli.add_argument('-s', '--scheduler-file', type=pathlib.Path, required=True, help='The path to the scheduler file') + # Optional Arguments + # none yet, feel free to add your own + args = cli.parse_args() + + # Setup the Dask cluster + client = startup_dask(args.scheduler_file, args.num_workers) + + # Perform your computation + # ... + # ... + # ... + # Some suggestions: + # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)` + # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf')) + # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)` + # End of Computation + + # Shutdown the Dask cluster + shutdown_dask(client) +# ============================================================================== + +# ============================================================================== +def startup_dask(scheduler_file, num_workers): + # Connect to the dask-cluster + client = Client(scheduler_file=scheduler_file) + print('client information ', client) + + # Block until num_workers are ready + print(f'Waiting for {num_workers} workers...') + client.wait_for_workers(n_workers=num_workers) + + num_connected_workers = len(client.scheduler_info()['workers']) + print(f'{num_connected_workers} workers connected') + + return client +# ============================================================================== + +# ============================================================================== +def shutdown_dask(client): + print('Shutting down the cluster') + workers_list = list(client.scheduler_info()['workers']) + client.retire_workers(workers_list, close_workers=True) + client.shutdown() +# ============================================================================== + +if __name__ == '__main__': + main() diff --git a/python_scripts/dask_single_machine_template.py b/python_scripts/dask_single_machine_template.py new file mode 100755 index 000000000..7816ec791 --- /dev/null +++ b/python_scripts/dask_single_machine_template.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +================================================================================ + Written by Robert Caddy. + + A simple template for Dask scripts running on a single machine +================================================================================ +""" + +import dask +import dask.array as da +import dask.dataframe as dd +from dask import graph_manipulation + +import argparse +import pathlib + +# ============================================================================== +def main(): + cli = argparse.ArgumentParser() + # Required Arguments + # Optional Arguments + cli.add_argument('-n', '--num-workers', type=int, default=8, help='The number of workers to use.') + args = cli.parse_args() + + # Set scheduler type. Options are 'threads', 'processes', 'single-threaded', and 'distributed'. + # - 'threads' uses threads that share memory, often fastest on single machines, can run into issuse with the GIL + # - 'processes' uses multiple processes that do not share memory, can be used to get around issues with the GIL + # - `single-threaded` is great for debugging + dask.config.set(scheduler='processes', num_workers=args.num_workers) + + # Perform your computation + # ... + # ... + # ... + # Some suggestions: + # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)` + # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf')) + # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)` + # End of Computation +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + main() + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From 6120ad3a6df5cc2fdcc23137fddddba7b306e374 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 30 Oct 2023 15:19:23 -0400 Subject: [PATCH 588/694] Refactor slice & dset_3d scripts with common structure The two scripts now have nearly identical CLI and structure --- python_scripts/cat_dset_3D.py | 244 ++++++++++++++++++++-------------- python_scripts/cat_slice.py | 140 +++++++++---------- 2 files changed, 205 insertions(+), 179 deletions(-) mode change 100644 => 100755 python_scripts/cat_slice.py diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 7c403933e..959d692ae 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be -imported into other scripts where the `concat_3d` function can be used to concatenate the datasets. +imported into other scripts where the `concat_3d_field` function can be used to concatenate the datasets. Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your script like this: @@ -18,85 +18,10 @@ import pathlib # ====================================================================================================================== -def main(): - """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the - command line. If you're importing this file then use the `concat_3d` or `concat_3d_single` functions directly. - """ - # Argument handling - cli = argparse.ArgumentParser() - # Required Arguments - cli.add_argument('-s', '--start_num', type=int, required=True, help='The first output step to concatenate') - cli.add_argument('-e', '--end_num', type=int, required=True, help='The last output step to concatenate') - cli.add_argument('-n', '--num_processes', type=int, required=True, help='The number of processes that were used') - # Optional Arguments - cli.add_argument('-i', '--input_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The input directory.') - cli.add_argument('-o', '--output_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The output directory.') - cli.add_argument('--skip-fields', type=list, default=[], help='List of fields to skip concatenating. Defaults to empty.') - cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') - cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') - cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') - args = cli.parse_args() - - # Perform the concatenation - concat_3d(start_num=args.start_num, - end_num=args.end_num, - num_processes=args.num_processes, - input_dir=args.input_dir, - output_dir=args.output_dir, - skip_fields=args.skip_fields, - destination_dtype=args.dtype, - compression_type=args.compression_type, - compression_options=args.compression_opts) -# ====================================================================================================================== - -# ====================================================================================================================== -def concat_3d(start_num: int, - end_num: int, - num_processes: int, - input_dir: pathlib.Path = pathlib.Path.cwd(), - output_dir: pathlib.Path = pathlib.Path.cwd(), - skip_fields: list = [], - destination_dtype: np.dtype = None, - compression_type: str = None, - compression_options: str = None): - """Concatenate 3D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a - single, large file. All outputs from start_num to end_num will be concatenated. - - Args: - start_num (int): The first output step to concatenate - end_num (int): The last output step to concatenate - num_processes (int): The number of processes that were used - input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). - output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). - skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. - destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. - compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. - compression_options (str, optional): What compression settings to use if compressing. Defaults to None. - """ - - # Error checking - assert start_num >= 0, 'start_num must be greater than or equal to 0' - assert end_num >= 0, 'end_num must be greater than or equal to 0' - assert start_num <= end_num, 'end_num should be greater than or equal to start_num' - assert num_processes > 1, 'num_processes must be greater than 1' - - # loop over outputs - for n in range(start_num, end_num+1): - concat_3d_single(output_number=n, - num_processes=num_processes, - input_dir=input_dir, - output_dir=output_dir, - skip_fields=skip_fields, - destination_dtype=destination_dtype, - compression_type=compression_type, - compression_options=compression_options) -# ====================================================================================================================== - -# ====================================================================================================================== -def concat_3d_single(output_number: int, +def concat_3d_output(source_directory: pathlib.Path, + output_directory: pathlib.Path, num_processes: int, - input_dir: pathlib.Path = pathlib.Path.cwd(), - output_dir: pathlib.Path = pathlib.Path.cwd(), + output_number: int, skip_fields: list = [], destination_dtype: np.dtype = None, compression_type: str = None, @@ -105,11 +30,10 @@ def concat_3d_single(output_number: int, single, large file. Args: - output_number (int): The output - end_num (int): The last output step to concatenate - num_processes (int): The number of processes that were used - input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). - output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). + source_directory (pathlib.Path): The directory containing the unconcatenated files + output_directory (pathlib.Path): The directory containing the new concatenated files + num_processes (int): The number of ranks that Cholla was run with + output_number (int): The output number to concatenate skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. @@ -120,13 +44,13 @@ def concat_3d_single(output_number: int, assert num_processes > 1, 'num_processes must be greater than 1' assert output_number >= 0, 'output_number must be greater than or equal to 0' - # open the output file for writing (don't overwrite if exists) - fileout = h5py.File(output_dir / f'{output_number}.h5', 'a') + # open the output file for writing (fail if it exists) + destination_file = h5py.File(output_directory / f'{output_number}.h5', 'w-') # Setup the output file - with h5py.File(input_dir / f'{output_number}.h5.0', 'r') as source_file: + with h5py.File(source_directory / f'{output_number}.h5.0', 'r') as source_file: # Copy header data - fileout = copy_header(source_file, fileout) + destination_file = copy_header(source_file, destination_file) # Create the datasets in the output file datasets_to_copy = list(source_file.keys()) @@ -137,29 +61,42 @@ def concat_3d_single(output_number: int, data_shape = source_file.attrs['dims'] - fileout.create_dataset(name=dataset, - shape=data_shape, - dtype=dtype, - compression=compression_type, - compression_opts=compression_options) + if dataset == 'magnetic_x': data_shape[0] += 1 + if dataset == 'magnetic_y': data_shape[1] += 1 + if dataset == 'magnetic_z': data_shape[2] += 1 + + destination_file.create_dataset(name=dataset, + shape=data_shape, + dtype=dtype, + compression=compression_type, + compression_opts=compression_options) # loop over files for a given output for i in range(0, num_processes): # open the input file for reading - filein = h5py.File(input_dir / f'{output_number}.h5.{i}', 'r') - # read in the header data from the input file - head = filein.attrs + source_file = h5py.File(source_directory / f'{output_number}.h5.{i}', 'r') - # write data from individual processor file to correct location in concatenated file - nx_local, ny_local, nz_local = filein.attrs['dims_local'] - x_start, y_start, z_start = filein.attrs['offset'] + # Compute the offset slicing + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + x_end, y_end, z_end = x_start+nx_local, y_start+ny_local, z_start+nz_local + # write data from individual processor file to correct location in concatenated file for dataset in datasets_to_copy: - fileout[dataset][x_start:x_start+nx_local, y_start:y_start+ny_local,z_start:z_start+nz_local] = filein[dataset] + magnetic_offset = [0,0,0] + if dataset == 'magnetic_x': magnetic_offset[0] = 1 + if dataset == 'magnetic_y': magnetic_offset[1] = 1 + if dataset == 'magnetic_z': magnetic_offset[2] = 1 + + destination_file[dataset][x_start:x_end+magnetic_offset[0], + y_start:y_end+magnetic_offset[1], + z_start:z_end+magnetic_offset[2]] = source_file[dataset] - filein.close() + # Now that the copy is done we close the source file + source_file.close() - fileout.close() + # Close destination file now that it is fully constructed + destination_file.close() # ====================================================================================================================== # ============================================================================== @@ -182,5 +119,106 @@ def copy_header(source_file: h5py.File, destination_file: h5py.File): return destination_file # ============================================================================== +# ============================================================================== +def common_cli() -> argparse.ArgumentParser: + """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an + `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method + is used. + """ + + # ============================================================================ + # Function used to parse the `--concat-output` argument + def concat_output(raw_argument: str) -> list: + # Check if the string is empty + if len(raw_argument) < 1: + raise ValueError('The --concat-output argument must not be of length zero.') + + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,-') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Split on commas + cleaned_argument = cleaned_argument.split(',') + + # Generate the final list + iterable_argument = set() + for arg in cleaned_argument: + if '-' not in arg: + if int(arg) < 0: + raise ValueError() + iterable_argument.add(int(arg)) + else: + start, end = arg.split('-') + start, end = int(start), int(end) + if end < start: + raise ValueError('The end of a range must be larger than the start of the range.') + if start < 0: + raise ValueError() + iterable_argument = iterable_argument.union(set(range(start, end+1))) + + return iterable_argument + # ============================================================================ + + # ============================================================================ + def positive_int(raw_argument: str) -> int: + arg = int(raw_argument) + if arg < 0: + raise ValueError('Argument must be 0 or greater.') + + return arg + # ============================================================================ + + # ============================================================================ + def skip_fields(raw_argument: str) -> list: + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + cleaned_argument = cleaned_argument.split(',') + + return cleaned_argument + # ============================================================================ + + # Initialize the CLI + cli = argparse.ArgumentParser() + + # Required Arguments + cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the directory for the source HDF5 files.') + cli.add_argument('-o', '--output-directory', type=pathlib.Path, required=True, help='The path to the directory to write out the concatenated HDF5 files.') + cli.add_argument('-n', '--num-processes', type=positive_int, required=True, help='The number of processes that were used') + cli.add_argument('-c', '--concat-outputs', type=concat_output, required=True, help='Which outputs to concatenate. Can be a single number (e.g. 8), a range (e.g. 2-9), or a list (e.g. [1,2,3]). Ranges are inclusive') + + # Optional Arguments + cli.add_argument('--skip-fields', type=skip_fields, default=[], help='List of fields to skip concatenating. Defaults to empty.') + cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') + cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') + cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') + + return cli +# ============================================================================== + if __name__ == '__main__': - main() + from timeit import default_timer + start = default_timer() + + cli = common_cli() + args = cli.parse_args() + + # Perform the concatenation + for output in args.concat_outputs: + concat_3d_output(source_directory=args.source_directory, + output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts) + + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py old mode 100644 new mode 100755 index 51aae2d6d..88f66ea2f --- a/python_scripts/cat_slice.py +++ b/python_scripts/cat_slice.py @@ -19,48 +19,12 @@ import pathlib import numpy as np -from cat_dset_3D import copy_header - -# ============================================================================== -def main(): - """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the - command line. If you're importing this file then use the `concat_slice` function directly. - """ - # Argument handling - cli = argparse.ArgumentParser() - # Required Arguments - cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the source HDF5 files.') - cli.add_argument('-o', '--output-file', type=pathlib.Path, required=True, help='The path and filename of the concatenated file.') - cli.add_argument('-n', '--num-processes', type=int, required=True, help='The number of processes that were used to generate the slices.') - cli.add_argument('-t', '--output-num', type=int, required=True, help='The output number to be concatenated') - # Optional Arguments - cli.add_argument('--xy', type=bool, default=True, help='If True then concatenate the XY slice. Defaults to True.') - cli.add_argument('--yz', type=bool, default=True, help='If True then concatenate the YZ slice. Defaults to True.') - cli.add_argument('--xz', type=bool, default=True, help='If True then concatenate the XZ slice. Defaults to True.') - cli.add_argument('--skip-fields', type=list, default=[], help='List of fields to skip concatenating. Defaults to empty.') - cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') - cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') - cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') - args = cli.parse_args() - - # Perform the concatenation - concat_slice(source_directory=args.source_directory, - destination_file_path=args.output_file, - num_ranks=args.num_processses, - output_number=args.output_num, - concat_xy=args.xy, - concat_yz=args.yz, - concat_xz=args.xz, - skip_fields=args.skip_fields, - destination_dtype=args.dtype, - compression_type=args.compression_type, - compression_options=args.compression_opts) -# ============================================================================== +from cat_dset_3D import copy_header, common_cli # ============================================================================== def concat_slice(source_directory: pathlib.Path, - destination_file_path: pathlib.Path, - num_ranks: int, + output_directory: pathlib.Path, + num_processes: int, output_number: int, concat_xy: bool = True, concat_yz: bool = True, @@ -76,8 +40,8 @@ def concat_slice(source_directory: pathlib.Path, Args: source_directory (pathlib.Path): The directory containing the unconcatenated files - destination_file_path (pathlib.Path): The path and name of the new concatenated file - num_ranks (int): The number of ranks that Cholla was run with + output_directory (pathlib.Path): The directory containing the new concatenated files + num_processes (int): The number of ranks that Cholla was run with output_number (int): The output number to concatenate concat_xy (bool, optional): If True then concatenate the XY slice. Defaults to True. concat_yz (bool, optional): If True then concatenate the YZ slice. Defaults to True. @@ -87,53 +51,57 @@ def concat_slice(source_directory: pathlib.Path, compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. compression_options (str, optional): What compression settings to use if compressing. Defaults to None. """ - # Open destination file and first file for getting metadata - source_file = h5py.File(source_directory / f'{output_number}_slice.h5.0', 'r') - destination_file = h5py.File(destination_file_path, 'w') - - # Copy over header - destination_file = copy_header(source_file, destination_file) - # Get a list of all datasets in the source file - datasets_to_copy = list(source_file.keys()) + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' - # Filter the datasets to only include those I wish to copy - if not concat_xy: - datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xy' in dataset] - if not concat_yz: - datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'yz' in dataset] - if not concat_xz: - datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xz' in dataset] - datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] - - # Create the datasets in the destination file - for dataset in datasets_to_copy: - dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype - - slice_shape = get_slice_shape(source_file, dataset) + # Open destination file and first file for getting metadata + destination_file = h5py.File(output_directory / f'{output_number}_slice.h5', 'w-') + + # Setup the output file + with h5py.File(source_directory / f'{output_number}_slice.h5.0', 'r') as source_file: + # Copy over header + destination_file = copy_header(source_file, destination_file) + + # Get a list of all datasets in the source file + datasets_to_copy = list(source_file.keys()) + + # Filter the datasets to only include those that need to be copied + if not concat_xy: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xy' in dataset] + if not concat_yz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'yz' in dataset] + if not concat_xz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xz' in dataset] + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + # Create the datasets in the destination file + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype - destination_file.create_dataset(name=dataset, - shape=slice_shape, - dtype=dtype, - compression=compression_type, - compression_opts=compression_options) + slice_shape = __get_slice_shape(source_file, dataset) - # Close source file in prep for looping through source files - source_file.close() + destination_file.create_dataset(name=dataset, + shape=slice_shape, + dtype=dtype, + compression=compression_type, + compression_opts=compression_options) # Copy data - for rank in range(num_ranks): + for rank in range(num_processes): # Open source file source_file = h5py.File(source_directory / f'{output_number}_slice.h5.{rank}', 'r') # Loop through and copy datasets for dataset in datasets_to_copy: # Determine locations and shifts for writing - (i0_start, i0_end, i1_start, i1_end), file_in_slice = write_bounds(source_file, dataset) + (i0_start, i0_end, i1_start, i1_end), file_in_slice = __write_bounds_slice(source_file, dataset) if file_in_slice: # Copy the data - destination_file[dataset][i0_start:i0_end, i1_start:i1_end] = source_file[dataset] + destination_file[dataset][i0_start:i0_end, + i1_start:i1_end] = source_file[dataset] # Now that the copy is done we close the source file source_file.close() @@ -143,7 +111,7 @@ def concat_slice(source_directory: pathlib.Path, # ============================================================================== # ============================================================================== -def get_slice_shape(source_file: h5py.File, dataset: str): +def __get_slice_shape(source_file: h5py.File, dataset: str): """Determine the shape of the full slice in a dataset Args: @@ -171,7 +139,7 @@ def get_slice_shape(source_file: h5py.File, dataset: str): # ============================================================================== # ============================================================================== -def write_bounds(source_file: h5py.File, dataset: str): +def __write_bounds_slice(source_file: h5py.File, dataset: str): """Determine the bounds of the concatenated file to write to Args: @@ -206,5 +174,25 @@ def write_bounds(source_file: h5py.File, dataset: str): if __name__ == '__main__': from timeit import default_timer start = default_timer() - main() + + cli = common_cli() + cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY slice.') + cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ slice.') + cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ slice.') + args = cli.parse_args() + + # Perform the concatenation + for output in args.concat_outputs: + concat_slice(source_directory=args.source_directory, + output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + concat_xy=args.disable_xy, + concat_yz=args.disable_yz, + concat_xz=args.disable_xz, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts) + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From ebd9ef0988cdca6d9a8e4d66f8f528108a07f82c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 31 Oct 2023 11:17:14 -0400 Subject: [PATCH 589/694] Add chunking option to concatenation scripts --- python_scripts/cat_dset_3D.py | 25 +++++++++++++++++++++++-- python_scripts/cat_slice.py | 8 ++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 959d692ae..9e6cff693 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -25,7 +25,8 @@ def concat_3d_output(source_directory: pathlib.Path, skip_fields: list = [], destination_dtype: np.dtype = None, compression_type: str = None, - compression_options: str = None): + compression_options: str = None, + chunking = None): """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a single, large file. @@ -38,6 +39,7 @@ def concat_3d_output(source_directory: pathlib.Path, destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. compression_options (str, optional): What compression settings to use if compressing. Defaults to None. + chunking (bool or tuple, optional): Whether or not to use chunking and the chunk size. Defaults to None. """ # Error checking @@ -68,6 +70,7 @@ def concat_3d_output(source_directory: pathlib.Path, destination_file.create_dataset(name=dataset, shape=data_shape, dtype=dtype, + chunks=chunking, compression=compression_type, compression_opts=compression_options) @@ -185,6 +188,22 @@ def skip_fields(raw_argument: str) -> list: return cleaned_argument # ============================================================================ + # ============================================================================ + def chunk_arg(raw_argument: str): + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('(', '') + cleaned_argument = cleaned_argument.replace(')', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Convert to a tuple and return + return tuple([int(i) for i in cleaned_argument.split(',')]) + # ============================================================================ + # Initialize the CLI cli = argparse.ArgumentParser() @@ -199,6 +218,7 @@ def skip_fields(raw_argument: str) -> list: cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') + cli.add_argument('--chunking', type=chunk_arg, default=None, nargs='?', const=True, help='Enable chunking of the output file. Default is `False`. If set without an argument then the chunk size will be automatically chosen or a tuple can be passed to indicate the chunk size desired.') return cli # ============================================================================== @@ -219,6 +239,7 @@ def skip_fields(raw_argument: str) -> list: skip_fields=args.skip_fields, destination_dtype=args.dtype, compression_type=args.compression_type, - compression_options=args.compression_opts) + compression_options=args.compression_opts, + chunking=args.chunking) print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py index 88f66ea2f..9f608a96e 100755 --- a/python_scripts/cat_slice.py +++ b/python_scripts/cat_slice.py @@ -32,7 +32,8 @@ def concat_slice(source_directory: pathlib.Path, skip_fields: list = [], destination_dtype: np.dtype = None, compression_type: str = None, - compression_options: str = None): + compression_options: str = None, + chunking = None): """Concatenate slice HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a single, large file. This function concatenates a single output time and can be called multiple times, @@ -50,6 +51,7 @@ def concat_slice(source_directory: pathlib.Path, destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. compression_options (str, optional): What compression settings to use if compressing. Defaults to None. + chunking (bool or tuple, optional): Whether or not to use chunking and the chunk size. Defaults to None. """ # Error checking @@ -85,6 +87,7 @@ def concat_slice(source_directory: pathlib.Path, destination_file.create_dataset(name=dataset, shape=slice_shape, dtype=dtype, + chunks=chunking, compression=compression_type, compression_opts=compression_options) @@ -193,6 +196,7 @@ def __write_bounds_slice(source_file: h5py.File, dataset: str): skip_fields=args.skip_fields, destination_dtype=args.dtype, compression_type=args.compression_type, - compression_options=args.compression_opts) + compression_options=args.compression_opts, + chunking=args.chunking) print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From bb8c39a4c5925a42da944375e10cf5d1272032bc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sat, 11 Nov 2023 10:47:26 -0500 Subject: [PATCH 590/694] Update cat_slice to work with projection data --- python_scripts/cat_slice.py | 133 ++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 59 deletions(-) diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py index 9f608a96e..152b4db7c 100755 --- a/python_scripts/cat_slice.py +++ b/python_scripts/cat_slice.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """ -Python script for concatenating slice hdf5 datasets for when -DSLICES is turned -on in Cholla. Includes a CLI for concatenating Cholla HDF5 datasets and can be -imported into other scripts where the `concat_slice` function can be used to -concatenate the HDF5 files. +Python script for concatenating 2D hdf5 datasets for when -DSLICES, +-DPROJECTION, or -DROTATED_PROJECTION is turned on in Cholla. Includes a CLI for +concatenating Cholla HDF5 datasets and can be imported into other scripts where +the `concat_2d_dataset` function can be used to concatenate the HDF5 files. Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your script like this: @@ -22,19 +22,20 @@ from cat_dset_3D import copy_header, common_cli # ============================================================================== -def concat_slice(source_directory: pathlib.Path, - output_directory: pathlib.Path, - num_processes: int, - output_number: int, - concat_xy: bool = True, - concat_yz: bool = True, - concat_xz: bool = True, - skip_fields: list = [], - destination_dtype: np.dtype = None, - compression_type: str = None, - compression_options: str = None, - chunking = None): - """Concatenate slice HDF5 Cholla datasets. i.e. take the single files +def concat_2d_dataset(source_directory: pathlib.Path, + output_directory: pathlib.Path, + num_processes: int, + output_number: int, + dataset_kind: str, + concat_xy: bool = True, + concat_yz: bool = True, + concat_xz: bool = True, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None): + """Concatenate 2D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a single, large file. This function concatenates a single output time and can be called multiple times, potentially in parallel, to concatenate multiple output times. @@ -44,9 +45,10 @@ def concat_slice(source_directory: pathlib.Path, output_directory (pathlib.Path): The directory containing the new concatenated files num_processes (int): The number of ranks that Cholla was run with output_number (int): The output number to concatenate - concat_xy (bool, optional): If True then concatenate the XY slice. Defaults to True. - concat_yz (bool, optional): If True then concatenate the YZ slice. Defaults to True. - concat_xz (bool, optional): If True then concatenate the XZ slice. Defaults to True. + dataset_kind (str): The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'. + concat_xy (bool, optional): If True then concatenate the XY slices/projections. Defaults to True. + concat_yz (bool, optional): If True then concatenate the YZ slices/projections. Defaults to True. + concat_xz (bool, optional): If True then concatenate the XZ slices/projections. Defaults to True. skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. @@ -57,12 +59,13 @@ def concat_slice(source_directory: pathlib.Path, # Error checking assert num_processes > 1, 'num_processes must be greater than 1' assert output_number >= 0, 'output_number must be greater than or equal to 0' + assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' - # Open destination file and first file for getting metadata - destination_file = h5py.File(output_directory / f'{output_number}_slice.h5', 'w-') + # Open destination file + destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w-') - # Setup the output file - with h5py.File(source_directory / f'{output_number}_slice.h5.0', 'r') as source_file: + # Setup the destination file + with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: # Copy over header destination_file = copy_header(source_file, destination_file) @@ -79,13 +82,19 @@ def concat_slice(source_directory: pathlib.Path, datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] # Create the datasets in the destination file + zero_array = np.zeros(1) for dataset in datasets_to_copy: dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype - slice_shape = __get_slice_shape(source_file, dataset) + dataset_shape = __get_2d_dataset_shape(source_file, dataset) + + # Create array to initialize data to zero, this is required for projections + if zero_array.shape != dataset_shape: + zero_array = np.zeros(dataset_shape) destination_file.create_dataset(name=dataset, - shape=slice_shape, + shape=dataset_shape, + data=zero_array, dtype=dtype, chunks=chunking, compression=compression_type, @@ -94,17 +103,21 @@ def concat_slice(source_directory: pathlib.Path, # Copy data for rank in range(num_processes): # Open source file - source_file = h5py.File(source_directory / f'{output_number}_slice.h5.{rank}', 'r') + source_file = h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.{rank}', 'r') # Loop through and copy datasets for dataset in datasets_to_copy: # Determine locations and shifts for writing - (i0_start, i0_end, i1_start, i1_end), file_in_slice = __write_bounds_slice(source_file, dataset) + (i0_start, i0_end, i1_start, i1_end), file_in_slice = __write_bounds_2d_dataset(source_file, dataset) + + # If this is a slice dataset we can skip loading the source file if that + # file isn't in the slice + if dataset_kind == 'slice' and not file_in_slice: + continue - if file_in_slice: - # Copy the data - destination_file[dataset][i0_start:i0_end, - i1_start:i1_end] = source_file[dataset] + # Copy the data, the summation is required for projections but not slices + destination_file[dataset][i0_start:i0_end, + i1_start:i1_end] += source_file[dataset] # Now that the copy is done we close the source file source_file.close() @@ -114,35 +127,35 @@ def concat_slice(source_directory: pathlib.Path, # ============================================================================== # ============================================================================== -def __get_slice_shape(source_file: h5py.File, dataset: str): - """Determine the shape of the full slice in a dataset +def __get_2d_dataset_shape(source_file: h5py.File, dataset: str): + """Determine the shape of the full 2D dataset Args: source_file (h5py.File): The source file the get the shape information from dataset (str): The dataset to get the shape of Raises: - ValueError: If the dataset name isn't a slice name + ValueError: If the dataset name isn't a 2D dataset name Returns: - tuple: The 2D dimensions of the slice + tuple: The dimensions of the dataset """ nx, ny, nz = source_file.attrs['dims'] - +#TODO update this rot proj if 'xy' in dataset: - slice_dimensions = (nx, ny) + dimensions = (nx, ny) elif 'yz' in dataset: - slice_dimensions = (ny, nz) + dimensions = (ny, nz) elif 'xz' in dataset: - slice_dimensions = (nx, nz) + dimensions = (nx, nz) else: raise ValueError(f'Dataset "{dataset}" is not a slice.') - return slice_dimensions + return dimensions # ============================================================================== # ============================================================================== -def __write_bounds_slice(source_file: h5py.File, dataset: str): +def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str): """Determine the bounds of the concatenated file to write to Args: @@ -150,7 +163,7 @@ def __write_bounds_slice(source_file: h5py.File, dataset: str): dataset (str): The name of the dataset to read from the source file Raises: - ValueError: If the dataset name isn't a slice name + ValueError: If the dataset name isn't a 2D dataset name Returns: tuple: The write bounds for the concatenated file to be used like `output_file[dataset][return[0]:return[1], return[2]:return[3]] @@ -169,7 +182,7 @@ def __write_bounds_slice(source_file: h5py.File, dataset: str): file_in_slice = y_start <= ny//2 <= y_start+ny_local bounds = (x_start, x_start+nx_local, z_start, z_start+nz_local) else: - raise ValueError(f'Dataset "{dataset}" is not a slice.') + raise ValueError(f'Dataset "{dataset}" is not a slice or projection.') return bounds, file_in_slice # ============================================================================== @@ -179,24 +192,26 @@ def __write_bounds_slice(source_file: h5py.File, dataset: str): start = default_timer() cli = common_cli() - cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY slice.') - cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ slice.') - cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ slice.') + cli.add_argument('-d', '--dataset-kind', type=str, required=True, help='What kind of 2D dataset to concatnate. Options are "slice", "proj", and "rot_proj"') + cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY datasets.') + cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ datasets.') + cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ datasets.') args = cli.parse_args() # Perform the concatenation for output in args.concat_outputs: - concat_slice(source_directory=args.source_directory, - output_directory=args.output_directory, - num_processes=args.num_processes, - output_number=output, - concat_xy=args.disable_xy, - concat_yz=args.disable_yz, - concat_xz=args.disable_xz, - skip_fields=args.skip_fields, - destination_dtype=args.dtype, - compression_type=args.compression_type, - compression_options=args.compression_opts, - chunking=args.chunking) + concat_2d_dataset(source_directory=args.source_directory, + output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + dataset_kind=args.dataset_kind, + concat_xy=args.disable_xy, + concat_yz=args.disable_yz, + concat_xz=args.disable_xz, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From 47df926f743f5bf0ba8a221334d0947c44a3b551 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sat, 11 Nov 2023 10:54:22 -0500 Subject: [PATCH 591/694] Rename concat scripts for clarity delete cat_projection.py as it has been superseded by concat_2d_data.py --- python_scripts/cat_projection.py | 67 ------------------- .../{cat_slice.py => concat_2d_data.py} | 6 +- .../{cat_dset_3D.py => concat_3d_data.py} | 2 +- 3 files changed, 4 insertions(+), 71 deletions(-) delete mode 100755 python_scripts/cat_projection.py rename python_scripts/{cat_slice.py => concat_2d_data.py} (98%) rename python_scripts/{cat_dset_3D.py => concat_3d_data.py} (99%) diff --git a/python_scripts/cat_projection.py b/python_scripts/cat_projection.py deleted file mode 100755 index 29b56a416..000000000 --- a/python_scripts/cat_projection.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -# Example file for concatenating on-axis projection data -# created when the -DPROJECTION flag is turned on - -import h5py -import numpy as np - -ns = 0 -ne = 0 -n_procs = 16 # number of processors that did the cholla calculation -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_proj.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - dxy = np.zeros((nx,ny)) - dxz = np.zeros((nx,nz)) - Txy = np.zeros((nx,ny)) - Txz = np.zeros((nx,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy'] - Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz'] - - filein.close() - - # write out the new datasets - fileout.create_dataset('d_xy', data=dxy) - fileout.create_dataset('d_xz', data=dxz) - fileout.create_dataset('T_xy', data=Txy) - fileout.create_dataset('T_xz', data=Txz) - - fileout.close() diff --git a/python_scripts/cat_slice.py b/python_scripts/concat_2d_data.py similarity index 98% rename from python_scripts/cat_slice.py rename to python_scripts/concat_2d_data.py index 152b4db7c..04a67da86 100755 --- a/python_scripts/cat_slice.py +++ b/python_scripts/concat_2d_data.py @@ -10,7 +10,7 @@ ``` import sys sys.path.append('/PATH/TO/CHOLLA/python_scripts') -import cat_slice +import concat_2d_data ``` """ @@ -19,7 +19,7 @@ import pathlib import numpy as np -from cat_dset_3D import copy_header, common_cli +from concat_3d_data import copy_header, common_cli # ============================================================================== def concat_2d_dataset(source_directory: pathlib.Path, @@ -62,7 +62,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' # Open destination file - destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w-') + destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w') # Setup the destination file with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/concat_3d_data.py similarity index 99% rename from python_scripts/cat_dset_3D.py rename to python_scripts/concat_3d_data.py index 9e6cff693..8eaded7df 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/concat_3d_data.py @@ -8,7 +8,7 @@ ``` import sys sys.path.append('/PATH/TO/CHOLLA/python_scripts') -import cat_dset_3D +import concat_3d_data ``` """ From ee587e9f08ad71e8773104eb68b31d517eaf78ff Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sat, 11 Nov 2023 21:09:49 -0700 Subject: [PATCH 592/694] Add rotated projection support to concat_2d_data.py Also, removed cat_rotated_projection.py as it is now superseded by the exhanced funcionality of concat_2d_data.py --- python_scripts/cat_rotated_projection.py | 85 ------------------------ python_scripts/concat_2d_data.py | 12 +++- 2 files changed, 10 insertions(+), 87 deletions(-) delete mode 100755 python_scripts/cat_rotated_projection.py diff --git a/python_scripts/cat_rotated_projection.py b/python_scripts/cat_rotated_projection.py deleted file mode 100755 index 6e769ce55..000000000 --- a/python_scripts/cat_rotated_projection.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# Example file for concatenating rotated projection data -# created when the -DROTATED_PROJECTION flag is turned on - -import h5py -import numpy as np - -ns = 0 -ne = 0 -n_procs = 16 # number of processors that did the cholla calculation -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_rot_proj.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the arrays to hold the output data - if (i == 0): - nxr = int(head['nxr']) - nzr = int(head['nzr']) - Lx = head['Lx'] - Lz = head['Lz'] - delta = head['delta'] - theta = head['theta'] - phi = head['phi'] - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - fileout.attrs['nxr'] = nxr - fileout.attrs['nzr'] = nzr - fileout.attrs['Lx'] = Lx - fileout.attrs['Lz'] = Lz - fileout.attrs['delta'] = delta - fileout.attrs['theta'] = theta - fileout.attrs['phi'] = phi - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - - d_xzr = np.zeros((nxr, nzr)) - vx_xzr = np.zeros((nxr, nzr)) - vy_xzr = np.zeros((nxr, nzr)) - vz_xzr = np.zeros((nxr, nzr)) - T_xzr = np.zeros((nxr, nzr)) - - # write data from individual processor file to - # correct location in concatenated file - nx_min = int(head['nx_min']) - nx_max = int(head['nx_max']) - nz_min = int(head['nz_min']) - nz_max = int(head['nz_max']) - - d_xzr[nx_min:nx_max,nz_min:nz_max] += filein['d_xzr'][:] - vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:] - vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:] - vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:] - T_xzr[nx_min:nx_max,nz_min:nz_max] += filein['T_xzr'][:] - - filein.close() - - # write out the new datasets - fileout.create_dataset("d_xzr", data=d_xzr) - fileout.create_dataset("vx_xzr", data=vx_xzr) - fileout.create_dataset("vy_xzr", data=vy_xzr) - fileout.create_dataset("vz_xzr", data=vz_xzr) - fileout.create_dataset("T_xzr", data=T_xzr) - - fileout.close() - - - diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py index 04a67da86..11e14e554 100755 --- a/python_scripts/concat_2d_data.py +++ b/python_scripts/concat_2d_data.py @@ -62,7 +62,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' # Open destination file - destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w') + destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w-') # Setup the destination file with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: @@ -140,8 +140,11 @@ def __get_2d_dataset_shape(source_file: h5py.File, dataset: str): Returns: tuple: The dimensions of the dataset """ + + if 'xzr' in dataset: + return (source_file.attrs['nxr'][0], source_file.attrs['nzr'][0]) + nx, ny, nz = source_file.attrs['dims'] -#TODO update this rot proj if 'xy' in dataset: dimensions = (nx, ny) elif 'yz' in dataset: @@ -168,6 +171,11 @@ def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str): Returns: tuple: The write bounds for the concatenated file to be used like `output_file[dataset][return[0]:return[1], return[2]:return[3]] """ + + if 'xzr' in dataset: + return (source_file.attrs['nx_min'][0], source_file.attrs['nx_max'][0], + source_file.attrs['nz_min'][0], source_file.attrs['nz_max'][0]), True + nx, ny, nz = source_file.attrs['dims'] nx_local, ny_local, nz_local = source_file.attrs['dims_local'] x_start, y_start, z_start = source_file.attrs['offset'] From 3a8983013d63630113b46ab901de3993b702be55 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sat, 11 Nov 2023 21:25:14 -0700 Subject: [PATCH 593/694] Add safer method of opening destination HDF5 file --- python_scripts/concat_2d_data.py | 4 ++-- python_scripts/concat_3d_data.py | 37 +++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py index 11e14e554..e64c77052 100755 --- a/python_scripts/concat_2d_data.py +++ b/python_scripts/concat_2d_data.py @@ -19,7 +19,7 @@ import pathlib import numpy as np -from concat_3d_data import copy_header, common_cli +from concat_3d_data import copy_header, common_cli, destination_safe_open # ============================================================================== def concat_2d_dataset(source_directory: pathlib.Path, @@ -62,7 +62,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' # Open destination file - destination_file = h5py.File(output_directory / f'{output_number}_{dataset_kind}.h5', 'w-') + destination_file = destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5') # Setup the destination file with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py index 8eaded7df..6b9edc0bd 100755 --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -46,8 +46,8 @@ def concat_3d_output(source_directory: pathlib.Path, assert num_processes > 1, 'num_processes must be greater than 1' assert output_number >= 0, 'output_number must be greater than or equal to 0' - # open the output file for writing (fail if it exists) - destination_file = h5py.File(output_directory / f'{output_number}.h5', 'w-') + # Open the output file for writing + destination_file = destination_safe_open(output_directory / f'{output_number}.h5') # Setup the output file with h5py.File(source_directory / f'{output_number}.h5.0', 'r') as source_file: @@ -100,7 +100,38 @@ def concat_3d_output(source_directory: pathlib.Path, # Close destination file now that it is fully constructed destination_file.close() -# ====================================================================================================================== +# ============================================================================== + +# ============================================================================== +def destination_safe_open(filename: pathlib.Path) -> h5py.File: + """Opens a HDF5 file safely and provides useful error messages for some common failure modes + + Parameters + ---------- + filename : pathlib.Path + The full path and name of the file to open + + Returns + ------- + h5py.File + The opened HDF5 file object + + Raises + ------ + FileExistsError + Raises if the destination file already exists + """ + + try: + destination_file = h5py.File(filename, 'w-') + except FileExistsError: + # It might be better for this to simply print the error message and return + # rather than exiting. That way if a single call fails in a parallel + # environment it doesn't take down the entire job + raise FileExistsError(f'File "{filename}" already exists and will not be overwritten, skipping.') + + return destination_file +# ============================================================================== # ============================================================================== def copy_header(source_file: h5py.File, destination_file: h5py.File): From da1c6727be42a9f39000eb7d965e1d15eee585cb Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Sun, 12 Nov 2023 17:10:10 -0700 Subject: [PATCH 594/694] Convert to Numpy docstrings --- python_scripts/concat_2d_data.py | 76 ++++++++++++++++++----- python_scripts/concat_3d_data.py | 103 +++++++++++++++++++++++-------- 2 files changed, 136 insertions(+), 43 deletions(-) mode change 100755 => 100644 python_scripts/concat_2d_data.py mode change 100755 => 100644 python_scripts/concat_3d_data.py diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py old mode 100755 new mode 100644 index e64c77052..16f1668a0 --- a/python_scripts/concat_2d_data.py +++ b/python_scripts/concat_2d_data.py @@ -36,24 +36,66 @@ def concat_2d_dataset(source_directory: pathlib.Path, compression_options: str = None, chunking = None): """Concatenate 2D HDF5 Cholla datasets. i.e. take the single files - generated per process and concatenate them into a single, large file. This - function concatenates a single output time and can be called multiple times, - potentially in parallel, to concatenate multiple output times. + generated per process and concatenate them into a single, large file. This + function concatenates a single output time and can be called multiple times, + potentially in parallel, to concatenate multiple output times. + + Parameters + ---------- + source_directory : pathlib.Path + The directory containing the unconcatenated files + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + dataset_kind : str + The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'. + concat_xy : bool + If True then concatenate the XY slices/projections. Defaults to True. + concat_yz : bool + If True then concatenate the YZ slices/projections. Defaults to True. + concat_xz : bool + If True then concatenate the XZ slices/projections. Defaults to True. + skip_fields : list + List of fields to skip concatenating. Defaults to []. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + source_directory: pathlib.Path : + + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + dataset_kind: str : + + concat_xy: bool : + (Default value = True) + concat_yz: bool : + (Default value = True) + concat_xz: bool : + (Default value = True) + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- - Args: - source_directory (pathlib.Path): The directory containing the unconcatenated files - output_directory (pathlib.Path): The directory containing the new concatenated files - num_processes (int): The number of ranks that Cholla was run with - output_number (int): The output number to concatenate - dataset_kind (str): The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'. - concat_xy (bool, optional): If True then concatenate the XY slices/projections. Defaults to True. - concat_yz (bool, optional): If True then concatenate the YZ slices/projections. Defaults to True. - concat_xz (bool, optional): If True then concatenate the XZ slices/projections. Defaults to True. - skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. - destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. - compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. - compression_options (str, optional): What compression settings to use if compressing. Defaults to None. - chunking (bool or tuple, optional): Whether or not to use chunking and the chunk size. Defaults to None. """ # Error checking diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py old mode 100755 new mode 100644 index 6b9edc0bd..08cc1a50b --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -28,18 +28,48 @@ def concat_3d_output(source_directory: pathlib.Path, compression_options: str = None, chunking = None): """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a - single, large file. - - Args: - source_directory (pathlib.Path): The directory containing the unconcatenated files - output_directory (pathlib.Path): The directory containing the new concatenated files - num_processes (int): The number of ranks that Cholla was run with - output_number (int): The output number to concatenate - skip_fields (list, optional): List of fields to skip concatenating. Defaults to []. - destination_dtype (np.dtype, optional): The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. - compression_type (str, optional): What kind of compression to use on the output data. Defaults to None. - compression_options (str, optional): What compression settings to use if compressing. Defaults to None. - chunking (bool or tuple, optional): Whether or not to use chunking and the chunk size. Defaults to None. + single, large file. + + Parameters + ---------- + source_directory : pathlib.Path + The directory containing the unconcatenated files + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + skip_fields : list + List of fields to skip concatenating. Defaults to []. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + source_directory: pathlib.Path : + + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- + """ # Error checking @@ -109,17 +139,20 @@ def destination_safe_open(filename: pathlib.Path) -> h5py.File: Parameters ---------- filename : pathlib.Path - The full path and name of the file to open + + The full path and name of the file to open : + + filename: pathlib.Path : + Returns ------- h5py.File - The opened HDF5 file object - Raises - ------ - FileExistsError - Raises if the destination file already exists + The opened HDF5 file object + + + """ try: @@ -137,12 +170,22 @@ def destination_safe_open(filename: pathlib.Path) -> h5py.File: def copy_header(source_file: h5py.File, destination_file: h5py.File): """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank - Args: - source_file (h5py.File): The source file - destination_file (h5py.File): The destination file + Parameters + ---------- + source_file : h5py.File + The source file + destination_file : h5py.File + The destination file + source_file: h5py.File : + + destination_file: h5py.File : + + + Returns + ------- + h5py.File + The destination file with the new header attributes - Returns: - h5py.File: The destination file with the new header attributes """ fields_to_skip = ['dims_local', 'offset'] @@ -156,13 +199,21 @@ def copy_header(source_file: h5py.File, destination_file: h5py.File): # ============================================================================== def common_cli() -> argparse.ArgumentParser: """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an - `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method - is used. + `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method + is used. + + Parameters + ---------- + + Returns + ------- + """ # ============================================================================ - # Function used to parse the `--concat-output` argument def concat_output(raw_argument: str) -> list: + """Function used to parse the `--concat-output` argument + """ # Check if the string is empty if len(raw_argument) < 1: raise ValueError('The --concat-output argument must not be of length zero.') From 3fbd10855b14be60713b323ae95ab9db141b5b49 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Nov 2023 11:50:35 -0700 Subject: [PATCH 595/694] Move all concat common tools into their own file --- python_scripts/concat_2d_data.py | 19 +-- python_scripts/concat_3d_data.py | 184 +---------------------------- python_scripts/concat_internals.py | 178 ++++++++++++++++++++++++++++ 3 files changed, 194 insertions(+), 187 deletions(-) create mode 100644 python_scripts/concat_internals.py diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py index 16f1668a0..5cf6fde55 100644 --- a/python_scripts/concat_2d_data.py +++ b/python_scripts/concat_2d_data.py @@ -15,11 +15,10 @@ """ import h5py -import argparse import pathlib import numpy as np -from concat_3d_data import copy_header, common_cli, destination_safe_open +import concat_internals # ============================================================================== def concat_2d_dataset(source_directory: pathlib.Path, @@ -34,7 +33,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, destination_dtype: np.dtype = None, compression_type: str = None, compression_options: str = None, - chunking = None): + chunking = None) -> None: """Concatenate 2D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a single, large file. This function concatenates a single output time and can be called multiple times, @@ -104,12 +103,12 @@ def concat_2d_dataset(source_directory: pathlib.Path, assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' # Open destination file - destination_file = destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5') + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5') # Setup the destination file with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: # Copy over header - destination_file = copy_header(source_file, destination_file) + destination_file = concat_internals.copy_header(source_file, destination_file) # Get a list of all datasets in the source file datasets_to_copy = list(source_file.keys()) @@ -169,7 +168,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, # ============================================================================== # ============================================================================== -def __get_2d_dataset_shape(source_file: h5py.File, dataset: str): +def __get_2d_dataset_shape(source_file: h5py.File, dataset: str) -> tuple: """Determine the shape of the full 2D dataset Args: @@ -200,7 +199,7 @@ def __get_2d_dataset_shape(source_file: h5py.File, dataset: str): # ============================================================================== # ============================================================================== -def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str): +def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str) -> tuple: """Determine the bounds of the concatenated file to write to Args: @@ -211,7 +210,9 @@ def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str): ValueError: If the dataset name isn't a 2D dataset name Returns: - tuple: The write bounds for the concatenated file to be used like `output_file[dataset][return[0]:return[1], return[2]:return[3]] + tuple: The write bounds for the concatenated file to be used like + `output_file[dataset][return[0]:return[1], return[2]:return[3]]` followed by a bool to indicate if the file is + in the slice if concatenating a slice """ if 'xzr' in dataset: @@ -241,7 +242,7 @@ def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str): from timeit import default_timer start = default_timer() - cli = common_cli() + cli = concat_internals.common_cli() cli.add_argument('-d', '--dataset-kind', type=str, required=True, help='What kind of 2D dataset to concatnate. Options are "slice", "proj", and "rot_proj"') cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY datasets.') cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ datasets.') diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py index 08cc1a50b..930c108e2 100644 --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -14,9 +14,10 @@ import h5py import numpy as np -import argparse import pathlib +import concat_internals + # ====================================================================================================================== def concat_3d_output(source_directory: pathlib.Path, output_directory: pathlib.Path, @@ -26,7 +27,7 @@ def concat_3d_output(source_directory: pathlib.Path, destination_dtype: np.dtype = None, compression_type: str = None, compression_options: str = None, - chunking = None): + chunking = None) -> None: """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a single, large file. @@ -77,12 +78,12 @@ def concat_3d_output(source_directory: pathlib.Path, assert output_number >= 0, 'output_number must be greater than or equal to 0' # Open the output file for writing - destination_file = destination_safe_open(output_directory / f'{output_number}.h5') + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}.h5') # Setup the output file with h5py.File(source_directory / f'{output_number}.h5.0', 'r') as source_file: # Copy header data - destination_file = copy_header(source_file, destination_file) + destination_file = concat_internals.copy_header(source_file, destination_file) # Create the datasets in the output file datasets_to_copy = list(source_file.keys()) @@ -132,184 +133,11 @@ def concat_3d_output(source_directory: pathlib.Path, destination_file.close() # ============================================================================== -# ============================================================================== -def destination_safe_open(filename: pathlib.Path) -> h5py.File: - """Opens a HDF5 file safely and provides useful error messages for some common failure modes - - Parameters - ---------- - filename : pathlib.Path - - The full path and name of the file to open : - - filename: pathlib.Path : - - - Returns - ------- - h5py.File - - The opened HDF5 file object - - - - """ - - try: - destination_file = h5py.File(filename, 'w-') - except FileExistsError: - # It might be better for this to simply print the error message and return - # rather than exiting. That way if a single call fails in a parallel - # environment it doesn't take down the entire job - raise FileExistsError(f'File "{filename}" already exists and will not be overwritten, skipping.') - - return destination_file -# ============================================================================== - -# ============================================================================== -def copy_header(source_file: h5py.File, destination_file: h5py.File): - """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank - - Parameters - ---------- - source_file : h5py.File - The source file - destination_file : h5py.File - The destination file - source_file: h5py.File : - - destination_file: h5py.File : - - - Returns - ------- - h5py.File - The destination file with the new header attributes - - """ - fields_to_skip = ['dims_local', 'offset'] - - for attr_key in source_file.attrs.keys(): - if attr_key not in fields_to_skip: - destination_file.attrs[attr_key] = source_file.attrs[attr_key] - - return destination_file -# ============================================================================== - -# ============================================================================== -def common_cli() -> argparse.ArgumentParser: - """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an - `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method - is used. - - Parameters - ---------- - - Returns - ------- - - """ - - # ============================================================================ - def concat_output(raw_argument: str) -> list: - """Function used to parse the `--concat-output` argument - """ - # Check if the string is empty - if len(raw_argument) < 1: - raise ValueError('The --concat-output argument must not be of length zero.') - - # Strip unneeded characters - cleaned_argument = raw_argument.replace(' ', '') - cleaned_argument = cleaned_argument.replace('[', '') - cleaned_argument = cleaned_argument.replace(']', '') - - # Check that it only has the allowed characters - allowed_charaters = set('0123456789,-') - if not set(cleaned_argument).issubset(allowed_charaters): - raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") - - # Split on commas - cleaned_argument = cleaned_argument.split(',') - - # Generate the final list - iterable_argument = set() - for arg in cleaned_argument: - if '-' not in arg: - if int(arg) < 0: - raise ValueError() - iterable_argument.add(int(arg)) - else: - start, end = arg.split('-') - start, end = int(start), int(end) - if end < start: - raise ValueError('The end of a range must be larger than the start of the range.') - if start < 0: - raise ValueError() - iterable_argument = iterable_argument.union(set(range(start, end+1))) - - return iterable_argument - # ============================================================================ - - # ============================================================================ - def positive_int(raw_argument: str) -> int: - arg = int(raw_argument) - if arg < 0: - raise ValueError('Argument must be 0 or greater.') - - return arg - # ============================================================================ - - # ============================================================================ - def skip_fields(raw_argument: str) -> list: - # Strip unneeded characters - cleaned_argument = raw_argument.replace(' ', '') - cleaned_argument = cleaned_argument.replace('[', '') - cleaned_argument = cleaned_argument.replace(']', '') - cleaned_argument = cleaned_argument.split(',') - - return cleaned_argument - # ============================================================================ - - # ============================================================================ - def chunk_arg(raw_argument: str): - # Strip unneeded characters - cleaned_argument = raw_argument.replace(' ', '') - cleaned_argument = cleaned_argument.replace('(', '') - cleaned_argument = cleaned_argument.replace(')', '') - - # Check that it only has the allowed characters - allowed_charaters = set('0123456789,') - if not set(cleaned_argument).issubset(allowed_charaters): - raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") - - # Convert to a tuple and return - return tuple([int(i) for i in cleaned_argument.split(',')]) - # ============================================================================ - - # Initialize the CLI - cli = argparse.ArgumentParser() - - # Required Arguments - cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the directory for the source HDF5 files.') - cli.add_argument('-o', '--output-directory', type=pathlib.Path, required=True, help='The path to the directory to write out the concatenated HDF5 files.') - cli.add_argument('-n', '--num-processes', type=positive_int, required=True, help='The number of processes that were used') - cli.add_argument('-c', '--concat-outputs', type=concat_output, required=True, help='Which outputs to concatenate. Can be a single number (e.g. 8), a range (e.g. 2-9), or a list (e.g. [1,2,3]). Ranges are inclusive') - - # Optional Arguments - cli.add_argument('--skip-fields', type=skip_fields, default=[], help='List of fields to skip concatenating. Defaults to empty.') - cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') - cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') - cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') - cli.add_argument('--chunking', type=chunk_arg, default=None, nargs='?', const=True, help='Enable chunking of the output file. Default is `False`. If set without an argument then the chunk size will be automatically chosen or a tuple can be passed to indicate the chunk size desired.') - - return cli -# ============================================================================== - if __name__ == '__main__': from timeit import default_timer start = default_timer() - cli = common_cli() + cli = concat_internals.common_cli() args = cli.parse_args() # Perform the concatenation diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py new file mode 100644 index 000000000..29bf49829 --- /dev/null +++ b/python_scripts/concat_internals.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +Contains all the common tools for the various concatnation functions/scipts +""" + +import h5py +import argparse +import pathlib + +# ============================================================================== +def destination_safe_open(filename: pathlib.Path) -> h5py.File: + """Opens a HDF5 file safely and provides useful error messages for some common failure modes + + Parameters + ---------- + filename : pathlib.Path + + The full path and name of the file to open : + + filename: pathlib.Path : + + + Returns + ------- + h5py.File + + The opened HDF5 file object + """ + + try: + destination_file = h5py.File(filename, 'w-') + except FileExistsError: + # It might be better for this to simply print the error message and return + # rather than exiting. That way if a single call fails in a parallel + # environment it doesn't take down the entire job + raise FileExistsError(f'File "{filename}" already exists and will not be overwritten, skipping.') + + return destination_file +# ============================================================================== + +# ============================================================================== +def copy_header(source_file: h5py.File, destination_file: h5py.File) -> h5py.File: + """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank + + Parameters + ---------- + source_file : h5py.File + The source file + destination_file : h5py.File + The destination file + source_file: h5py.File : + + destination_file: h5py.File : + + + Returns + ------- + h5py.File + The destination file with the new header attributes + """ + fields_to_skip = ['dims_local', 'offset'] + + for attr_key in source_file.attrs.keys(): + if attr_key not in fields_to_skip: + destination_file.attrs[attr_key] = source_file.attrs[attr_key] + + return destination_file +# ============================================================================== + +# ============================================================================== +def common_cli() -> argparse.ArgumentParser: + """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an + `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method + is used. + + Parameters + ---------- + + Returns + ------- + argparse.ArgumentParser + The common components of the CLI for the concatenation scripts + """ + + # ============================================================================ + def concat_output(raw_argument: str) -> list: + """Function used to parse the `--concat-output` argument + """ + # Check if the string is empty + if len(raw_argument) < 1: + raise ValueError('The --concat-output argument must not be of length zero.') + + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,-') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Split on commas + cleaned_argument = cleaned_argument.split(',') + + # Generate the final list + iterable_argument = set() + for arg in cleaned_argument: + if '-' not in arg: + if int(arg) < 0: + raise ValueError() + iterable_argument.add(int(arg)) + else: + start, end = arg.split('-') + start, end = int(start), int(end) + if end < start: + raise ValueError('The end of a range must be larger than the start of the range.') + if start < 0: + raise ValueError() + iterable_argument = iterable_argument.union(set(range(start, end+1))) + + return iterable_argument + # ============================================================================ + + # ============================================================================ + def positive_int(raw_argument: str) -> int: + arg = int(raw_argument) + if arg < 0: + raise ValueError('Argument must be 0 or greater.') + + return arg + # ============================================================================ + + # ============================================================================ + def skip_fields(raw_argument: str) -> list: + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + cleaned_argument = cleaned_argument.split(',') + + return cleaned_argument + # ============================================================================ + + # ============================================================================ + def chunk_arg(raw_argument: str) -> tuple: + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('(', '') + cleaned_argument = cleaned_argument.replace(')', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Convert to a tuple and return + return tuple([int(i) for i in cleaned_argument.split(',')]) + # ============================================================================ + + # Initialize the CLI + cli = argparse.ArgumentParser() + + # Required Arguments + cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the directory for the source HDF5 files.') + cli.add_argument('-o', '--output-directory', type=pathlib.Path, required=True, help='The path to the directory to write out the concatenated HDF5 files.') + cli.add_argument('-n', '--num-processes', type=positive_int, required=True, help='The number of processes that were used') + cli.add_argument('-c', '--concat-outputs', type=concat_output, required=True, help='Which outputs to concatenate. Can be a single number (e.g. 8), a range (e.g. 2-9), or a list (e.g. [1,2,3]). Ranges are inclusive') + + # Optional Arguments + cli.add_argument('--skip-fields', type=skip_fields, default=[], help='List of fields to skip concatenating. Defaults to empty.') + cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') + cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') + cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') + cli.add_argument('--chunking', type=chunk_arg, default=None, nargs='?', const=True, help='Enable chunking of the output file. Default is `False`. If set without an argument then the chunk size will be automatically chosen or a tuple can be passed to indicate the chunk size desired.') + + return cli +# ============================================================================== From 572ac721c12a2c5a0406eb49cfc0d83acbb96b34 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Nov 2023 11:53:11 -0700 Subject: [PATCH 596/694] Remove deprecated cat.py All the functionality of cat.py is now available in concat_2d_data.py and concat_3d_data.py. Marked concatenation files as executable --- python_scripts/cat.py | 406 ----------------------------- python_scripts/cat_particles.py | 0 python_scripts/concat_2d_data.py | 0 python_scripts/concat_3d_data.py | 0 python_scripts/concat_internals.py | 0 5 files changed, 406 deletions(-) delete mode 100755 python_scripts/cat.py mode change 100644 => 100755 python_scripts/cat_particles.py mode change 100644 => 100755 python_scripts/concat_2d_data.py mode change 100644 => 100755 python_scripts/concat_3d_data.py mode change 100644 => 100755 python_scripts/concat_internals.py diff --git a/python_scripts/cat.py b/python_scripts/cat.py deleted file mode 100755 index dc840c570..000000000 --- a/python_scripts/cat.py +++ /dev/null @@ -1,406 +0,0 @@ -# Utils for concat cholla output - -import h5py -import numpy as np -import os - -verbose = True - -def parse(argv): - # Determine prefix - if 'h5' in argv: - preprefix = argv.split('.h5')[0] - prefix = preprefix +'.h5' - - else: - prefix = './{}.h5'.format(argv) - - # Check existing - firstfile = prefix+'.0' - if not os.path.isfile(firstfile): - print(firstfile,' is missing') - exit() - - # Set dirnames - dnamein = os.path.dirname(firstfile)+'/' - dnameout = os.path.dirname(firstfile) + '/' - return dnamein,dnameout - -def hydro(n,dnamein,dnameout,double=True): - """ - n: integer, output number of file - dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory - dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory - double: optional bool, double precision (float64) if True, single precision (float32) if False - - Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. - """ - - fileout = h5py.File(dnameout+str(n)+'.h5', 'a') - - i = -1 - # loops over all files - while True: - i += 1 - - fileinname = dnamein+str(n)+'.h5.'+str(i) - - if not os.path.isfile(fileinname): - break - print('Load:',fileinname,flush=True) - - # open the input file for reading - filein = h5py.File(fileinname,'r') - - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'densit\ -y_unit'] - for unit in units: - fileout.attrs[unit] = [head[unit][0]] - keys = list(filein.keys()) - #['density','momentum_x','momentum_y','momentum_z','Energy','GasEnergy','scalar0'] - - for key in keys: - if key not in fileout: - # WARNING: If you don't set dataset dtype it will default to 32-bit, but CHOLLA likes to be 64-bit - if double: - dtype = filein[key].dtype - else: - dtype = None - if nz > 1: - fileout.create_dataset(key, (nx, ny, nz), chunks=(nxl,nyl,nzl), dtype=dtype) - elif ny > 1: - fileout.create_dataset(key, (nx, ny), chunks=(nxl,nyl), dtype=dtype) - elif nx > 1: - fileout.create_dataset(key, (nx,), chunks=(nxl,), dtype=dtype) - #fileout.create_dataset(key, (nx, ny, nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - for key in keys: - if key in filein: - if nz > 1: - fileout[key][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein[key] - elif ny > 1: - fileout[key][xs:xs+nxl,ys:ys+nyl] = filein[key] - elif nx > 1: - fileout[key][xs:xs+nxl] = filein[key] - filein.close() - - # end loop over all files - fileout.close() - - -def projection(n,dnamein,dnameout): - """ - n: integer, output number of file - dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory - dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory - double: optional bool, double precision (float64) if True, single precision (float32) if False - - Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. - """ - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w') - i = -1 - while True: - i += 1 - - fileinname = dnamein+str(n)+'_proj.h5.'+str(i) - - if not os.path.isfile(fileinname): - break - - if verbose: - print(fileinname) - # open the input file for reading - filein = h5py.File(fileinname,'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - dxy = np.zeros((nx,ny)) - dxz = np.zeros((nx,nz)) - Txy = np.zeros((nx,ny)) - Txz = np.zeros((nx,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy'] - Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz'] - - filein.close() - - # write out the new datasets - fileout.create_dataset('d_xy', data=dxy) - fileout.create_dataset('d_xz', data=dxz) - fileout.create_dataset('T_xy', data=Txy) - fileout.create_dataset('T_xz', data=Txz) - - fileout.close() - return - -def slice(n,dnamein,dnameout): - """ - n: integer, output number of file - dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory - dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory - double: optional bool, double precision (float64) if True, single precision (float32) if False - - Reads files of form dnamein{n}_slice.h5.{rank}, looping over rank, outputting to file dnameout{n}_slice.h5. - """ - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w') - - i = -1 - while True: - # loop over files for a given output time - i += 1 - - fileinname = dnamein+str(n)+'_slice.h5.'+str(i) - if not os.path.isfile(fileinname): - break - - if verbose: - print(fileinname) - # open the input file for reading - filein = h5py.File(fileinname,'r') - # read in the header data from the input file - head = filein.attrs - - # Detect DE - DE = 'GE_xy' in filein - SCALAR = 'scalar_xy' in filein - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - fileout.attrs['dims'] = [nx, ny, nz] - - d_xy = np.zeros((nx,ny)) - d_xz = np.zeros((nx,nz)) - d_yz = np.zeros((ny,nz)) - mx_xy = np.zeros((nx,ny)) - mx_xz = np.zeros((nx,nz)) - mx_yz = np.zeros((ny,nz)) - my_xy = np.zeros((nx,ny)) - my_xz = np.zeros((nx,nz)) - my_yz = np.zeros((ny,nz)) - mz_xy = np.zeros((nx,ny)) - mz_xz = np.zeros((nx,nz)) - mz_yz = np.zeros((ny,nz)) - E_xy = np.zeros((nx,ny)) - E_xz = np.zeros((nx,nz)) - E_yz = np.zeros((ny,nz)) - if DE: - GE_xy = np.zeros((nx,ny)) - GE_xz = np.zeros((nx,nz)) - GE_yz = np.zeros((ny,nz)) - if SCALAR: - scalar_xy = np.zeros((nx,ny)) - scalar_xz = np.zeros((nx,nz)) - scalar_yz = np.zeros((ny,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz'] - mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy'] - mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz'] - mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz'] - my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy'] - my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz'] - my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz'] - mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy'] - mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz'] - mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz'] - E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy'] - E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz'] - E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz'] - if DE: - GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy'] - GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz'] - GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz'] - if SCALAR: - scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy'] - scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz'] - scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz'] - - filein.close() - - # wrte out the new datasets - fileout.create_dataset('d_xy', data=d_xy) - fileout.create_dataset('d_xz', data=d_xz) - fileout.create_dataset('d_yz', data=d_yz) - fileout.create_dataset('mx_xy', data=mx_xy) - fileout.create_dataset('mx_xz', data=mx_xz) - fileout.create_dataset('mx_yz', data=mx_yz) - fileout.create_dataset('my_xy', data=my_xy) - fileout.create_dataset('my_xz', data=my_xz) - fileout.create_dataset('my_yz', data=my_yz) - fileout.create_dataset('mz_xy', data=mz_xy) - fileout.create_dataset('mz_xz', data=mz_xz) - fileout.create_dataset('mz_yz', data=mz_yz) - fileout.create_dataset('E_xy', data=E_xy) - fileout.create_dataset('E_xz', data=E_xz) - fileout.create_dataset('E_yz', data=E_yz) - if DE: - fileout.create_dataset('GE_xy', data=GE_xy) - fileout.create_dataset('GE_xz', data=GE_xz) - fileout.create_dataset('GE_yz', data=GE_yz) - if SCALAR: - fileout.create_dataset('scalar_xy', data=scalar_xy) - fileout.create_dataset('scalar_xz', data=scalar_xz) - fileout.create_dataset('scalar_yz', data=scalar_yz) - - fileout.close() - return - -def rot_proj(n,dnamein,dnameout): - """ - n: integer, output number of file - dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory - dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory - double: optional bool, double precision (float64) if True, single precision (float32) if False - - Reads files of form dnamein{n}_rot_proj.h5.{rank}, looping over rank, outputting to file dnameout{n}_rot_proj.h5. - """ - - fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w') - i = -1 - - while True: - # loop over files for a given output time - i += 1 - fileinname = dnamein+str(n)+'_rot_proj.h5.'+str(i) - if not os.path.isfile(fileinname): - break - - if verbose: - print(fileinname) - - filein = h5py.File(dnamein+fileinname,'r') - head = filein.attrs - # if it's the first input file, write the header attributes - # and create the arrays to hold the output data - if (i == 0): - - nxr = int(head['nxr']) - nzr = int(head['nzr']) - Lx = head['Lx'] - Lz = head['Lz'] - delta = head['delta'] - theta = head['theta'] - phi = head['phi'] - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - fileout.attrs['nxr'] = nxr - fileout.attrs['nzr'] = nzr - fileout.attrs['Lx'] = Lx - fileout.attrs['Lz'] = Lz - fileout.attrs['delta'] = delta - fileout.attrs['theta'] = theta - fileout.attrs['phi'] = phi - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - - d_xzr = np.zeros((nxr, nzr)) - vx_xzr = np.zeros((nxr, nzr)) - vy_xzr = np.zeros((nxr, nzr)) - vz_xzr = np.zeros((nxr, nzr)) - T_xzr = np.zeros((nxr, nzr)) - - # end first input file - - # write data from individual processor file to - # correct location in concatenated file - nx_min = int(head['nx_min']) - nx_max = int(head['nx_max']) - nz_min = int(head['nz_min']) - nz_max = int(head['nz_max']) - - d_xzr[nx_min:nx_max,nz_min:nz_max] += filein['d_xzr'][:] - vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:] - vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:] - vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:] - T_xzr[nx_min:nx_max,nz_min:nz_max] += filein['T_xzr'][:] - - filein.close() - # end while loop - - # write out the new datasets - fileout.create_dataset("d_xzr", data=d_xzr) - fileout.create_dataset("vx_xzr", data=vx_xzr) - fileout.create_dataset("vy_xzr", data=vy_xzr) - fileout.create_dataset("vz_xzr", data=vz_xzr) - fileout.create_dataset("T_xzr", data=T_xzr) - - fileout.close() diff --git a/python_scripts/cat_particles.py b/python_scripts/cat_particles.py old mode 100644 new mode 100755 diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py old mode 100644 new mode 100755 diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py old mode 100644 new mode 100755 diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py old mode 100644 new mode 100755 From 8152145e82619e65755cce8abcf0cedf97afb0c1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Nov 2023 11:59:50 -0700 Subject: [PATCH 597/694] Rename concat_3d_output to concat_3d_dataset for consistency with 2d version --- python_scripts/concat_3d_data.py | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py index 930c108e2..73194ebf0 100755 --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -19,15 +19,15 @@ import concat_internals # ====================================================================================================================== -def concat_3d_output(source_directory: pathlib.Path, - output_directory: pathlib.Path, - num_processes: int, - output_number: int, - skip_fields: list = [], - destination_dtype: np.dtype = None, - compression_type: str = None, - compression_options: str = None, - chunking = None) -> None: +def concat_3d_dataset(source_directory: pathlib.Path, + output_directory: pathlib.Path, + num_processes: int, + output_number: int, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None) -> None: """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a single, large file. @@ -142,14 +142,14 @@ def concat_3d_output(source_directory: pathlib.Path, # Perform the concatenation for output in args.concat_outputs: - concat_3d_output(source_directory=args.source_directory, - output_directory=args.output_directory, - num_processes=args.num_processes, - output_number=output, - skip_fields=args.skip_fields, - destination_dtype=args.dtype, - compression_type=args.compression_type, - compression_options=args.compression_opts, - chunking=args.chunking) + concat_3d_dataset(source_directory=args.source_directory, + output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From 611eb163bd6c020752c0fc5344460cc0a21e3d69 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Nov 2023 13:39:55 -0700 Subject: [PATCH 598/694] Update particles concatenation file to new method --- python_scripts/cat_particles.py | 91 ----------- python_scripts/concat_3d_data.py | 8 +- python_scripts/concat_internals.py | 2 +- python_scripts/concat_particles.py | 250 +++++++++++++++++++++++++++++ 4 files changed, 255 insertions(+), 96 deletions(-) delete mode 100755 python_scripts/cat_particles.py create mode 100755 python_scripts/concat_particles.py diff --git a/python_scripts/cat_particles.py b/python_scripts/cat_particles.py deleted file mode 100755 index 03cbcd71c..000000000 --- a/python_scripts/cat_particles.py +++ /dev/null @@ -1,91 +0,0 @@ -# Example file for concatenating particle data - -import h5py -import numpy as np - -ns = 0 -ne = 300 -n_procs = 4 # number of processors that did the cholla calculation -dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/raw/' -dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/particles_cat/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_particles.h5', 'w') - - if (n % 10 == 0): print(str(n)) - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_particles.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['velocity_unit'] = head['velocity_unit'] - fileout.attrs['length_unit'] = head['length_unit'] - fileout.attrs['particle_mass'] = head['particle_mass'] - fileout.attrs['density_unit'] = head['density_unit'] - - x = np.array([]) - y = np.array([]) - z = np.array([]) - vx = np.array([]) - vy = np.array([]) - vz = np.array([]) - particle_ids = np.array([]) - density = np.zeros((nx, ny, nz)) - n_total_particles = 0 - - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - n_total_particles += head['n_particles_local'] - density[xs:xs+nxl, ys:ys+nyl, zs:zs+nzl] += filein['density'] - x = np.append(x, filein['pos_x']) - y = np.append(y, filein['pos_y']) - z = np.append(z, filein['pos_z']) - vx = np.append(vx, filein['vel_x']) - vy = np.append(vy, filein['vel_y']) - vz = np.append(vz, filein['vel_z']) - particle_ids = np.append(particle_ids, filein['particle_IDs']) - - filein.close() - - # write out the new datasets - fileout.create_dataset('x', data=x) - fileout.create_dataset('y', data=y) - fileout.create_dataset('z', data=z) - fileout.create_dataset('vx', data=vx) - fileout.create_dataset('vy', data=vy) - fileout.create_dataset('vz', data=vz) - fileout.create_dataset('particle_ids', data=particle_ids) - fileout.create_dataset('density', data=density) - fileout.attrs['n_total_particles'] = n_total_particles - - fileout.close() diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py index 73194ebf0..599a4a8d1 100755 --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be -imported into other scripts where the `concat_3d_field` function can be used to concatenate the datasets. +imported into other scripts where the `concat_3d_dataset` function can be used to concatenate the datasets. Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your script like this: @@ -18,7 +18,7 @@ import concat_internals -# ====================================================================================================================== +# ============================================================================== def concat_3d_dataset(source_directory: pathlib.Path, output_directory: pathlib.Path, num_processes: int, @@ -28,8 +28,8 @@ def concat_3d_dataset(source_directory: pathlib.Path, compression_type: str = None, compression_options: str = None, chunking = None) -> None: - """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files generated per process and concatenate them into a - single, large file. + """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files + generated per process and concatenate them into a single, large file. Parameters ---------- diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py index 29bf49829..6f90f0211 100755 --- a/python_scripts/concat_internals.py +++ b/python_scripts/concat_internals.py @@ -58,7 +58,7 @@ def copy_header(source_file: h5py.File, destination_file: h5py.File) -> h5py.Fil h5py.File The destination file with the new header attributes """ - fields_to_skip = ['dims_local', 'offset'] + fields_to_skip = ['dims_local', 'offset', 'n_particles_local'] for attr_key in source_file.attrs.keys(): if attr_key not in fields_to_skip: diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py new file mode 100755 index 000000000..d286a4fec --- /dev/null +++ b/python_scripts/concat_particles.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +""" +Python script for concatenating particle hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_particles_dataset` function can be used to concatenate the datasets. + +Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your +script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import concat_particles +``` +""" + +import h5py +import numpy as np +import pathlib + +import concat_internals + +# ====================================================================================================================== +def concat_particles_dataset(source_directory: pathlib.Path, + output_directory: pathlib.Path, + num_processes: int, + output_number: int, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None) -> None: + """Concatenate a single particle HDF5 Cholla dataset. i.e. take the single + files generated per process and concatenate them into a single, large file. + + Parameters + ---------- + source_directory : pathlib.Path + The directory containing the unconcatenated files + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + skip_fields : list + List of fields to skip concatenating. Defaults to []. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + source_directory: pathlib.Path : + + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- + + """ + + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' + + # Open the output file for writing + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_particles.h5') + + # Setup the output file + # Note that the call to `__get_num_particles` is potentially expensive as it + # opens every single file to read the number of particles in that file + num_particles = __get_num_particles(source_directory, num_processes, output_number) + destination_file = __setup_destination_file(source_directory, + destination_file, + output_number, + num_particles, + skip_fields, + destination_dtype, + compression_type, + compression_options, + chunking) + + # loop over files for a given output + particles_offset = 0 + for i in range(0, num_processes): + # open the input file for reading + source_file = h5py.File(source_directory / f'{output_number}_particles.h5.{i}', 'r') + + # Compute the offset slicing for the 3D data + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + x_end, y_end, z_end = x_start+nx_local, y_start+ny_local, z_start+nz_local + + # Get the local number of particles + num_particles_local = source_file.attrs['n_particles_local'][0] + + # write data from individual processor file to correct location in concatenated file + for dataset in list(destination_file.keys()): + + if dataset == 'density': + destination_file[dataset][x_start:x_end, + y_start:y_end, + z_start:z_end] = source_file[dataset] + else: + start = particles_offset + end = particles_offset + num_particles_local + destination_file[dataset][start:end] = source_file[dataset] + + # Update the particles offset + particles_offset += num_particles_local + + # Now that the copy is done we close the source file + source_file.close() + + # Close destination file now that it is fully constructed + destination_file.close() +# ============================================================================== + +# ============================================================================== +def __get_num_particles(source_directory: pathlib.Path, + num_processes: int, + output_number: int) -> int: + """Get the total number of particles in the output. This function is heavily + I/O bound and might benefit from utilizing threads. + + Parameters + ---------- + source_directory : pathlib.Path + The directory of the unconcatenated files + num_processes : int + The number of processes + output_number : int + The output number to get data from + + Returns + ------- + int + The total number of particles in the output + """ + # loop over files for a given output + num_particles = 0 + for i in range(0, num_processes): + # open the input file for reading + with h5py.File(source_directory / f'{output_number}_particles.h5.{i}', 'r') as source_file: + num_particles += source_file.attrs['n_particles_local'] + + return num_particles +# ============================================================================== + +# ============================================================================== +def __setup_destination_file(source_directory: pathlib.Path, + destination_file: h5py.File, + output_number: int, + num_particles: int, + skip_fields: list, + destination_dtype: np.dtype, + compression_type: str, + compression_options: str, + chunking) -> h5py.File: + """_summary_ + + Parameters + ---------- + source_directory : pathlib.Path + The directory containing the unconcatenated files + destination_file : h5py.File + The destination file + output_number : int + The output number to concatenate + num_particles : int + The total number of particles in the output + skip_fields : list + List of fields to skip concatenating. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. + compression_type : str + What kind of compression to use on the output data. + compression_options : str + What compression settings to use if compressing. + chunking : _type_ + Whether or not to use chunking and the chunk size. + + Returns + ------- + h5py.File + The fully set up destination file + """ + with h5py.File(source_directory / f'{output_number}_particles.h5.0', 'r') as source_file: + # Copy header data + destination_file = concat_internals.copy_header(source_file, destination_file) + + # Make list of datasets to copy + datasets_to_copy = list(source_file.keys()) + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + # Create the datasets in the output file + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + # Determine the shape of the dataset + if dataset == 'density': + data_shape = source_file.attrs['dims'] + else: + data_shape = num_particles + + # Create the dataset + destination_file.create_dataset(name=dataset, + shape=data_shape, + dtype=dtype, + chunks=chunking, + compression=compression_type, + compression_opts=compression_options) + + return destination_file +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + + cli = concat_internals.common_cli() + args = cli.parse_args() + + # Perform the concatenation + for output in args.concat_outputs: + concat_particles_dataset(source_directory=args.source_directory, + output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) + + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') From 9b751a5b36e02ee2bb45ec1fc85fdfbb93e4d14a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 13 Nov 2023 13:45:00 -0700 Subject: [PATCH 599/694] Update python_scripts/README.md for new concat scripts --- python_scripts/README.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python_scripts/README.md b/python_scripts/README.md index 5a462e8c1..acda923b7 100644 --- a/python_scripts/README.md +++ b/python_scripts/README.md @@ -5,15 +5,8 @@ You will likely develop more customized, robust, and flexible scripts for your o These simple scripts here are intended to help you understand the basics of the generated data from Cholla. ## Merging HDF5 files -Multi-processor runs generate HDF5 files per-timestep per-processor. -To treat each timestep together we want to merge those per-processor HDF5 files. -| Script | Concatenate | -| ------ | ----------- | -`cat_dset_3d.py` | 3D HDF5 datasets -`cat_projection.py` | The on-axis projection data created when the -DPROJECTION flag is turned on -`cat_rotated_projection.py` | The rotated projection data created when the -DROTATED_PROJECTION flag is turned on -`cat_slice.py` | The on-axis slice data created when the -DSLICES flag is turned on +Multi-processor runs generate HDF5 files per-timestep per-processor. Merging these per process output into a single file can be done with the concatenation scripts detailed in the "Outputs" section of the wiki. ## Plotting data We here present simple Python matplotlib-based scripts to plot density, velocity, energy, and pressure. From c1f6d9e7d8a46a128f1ef93468f4aa044ba3bbb2 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 16 Nov 2023 16:29:08 -0700 Subject: [PATCH 600/694] Fix placeholder comment --- python_scripts/concat_particles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py index d286a4fec..8a916f08e 100755 --- a/python_scripts/concat_particles.py +++ b/python_scripts/concat_particles.py @@ -171,7 +171,7 @@ def __setup_destination_file(source_directory: pathlib.Path, compression_type: str, compression_options: str, chunking) -> h5py.File: - """_summary_ + """Setup the destination file by copying the header and setting up the datasets Parameters ---------- From 7634b5a9581aa3fca0c6cbb36dcea11091b41f40 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Tue, 21 Nov 2023 14:56:01 -0500 Subject: [PATCH 601/694] Convert OUTPUT_ALWAYS from compile-time definition to a run-time parameter called output_always --- builds/make.type.disk | 1 - builds/make.type.mhd | 3 --- src/global/global.cpp | 2 ++ src/global/global.h | 1 + src/main.cpp | 4 +--- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/builds/make.type.disk b/builds/make.type.disk index a95560cf1..d43986f7b 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -21,7 +21,6 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT #DFLAGS += -DSTATIC_GRAV -#DFLAGS += -DOUTPUT_ALWAYS DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 2c6cbf68d..51d82b2e9 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -51,6 +51,3 @@ DFLAGS += $(MPI_GPU) # Limit the number of steps to evolve. # DFLAGS += -DN_STEPS_LIMIT=1000 - -# Output on every time step -# DFLAGS += -DOUTPUT_ALWAYS diff --git a/src/global/global.cpp b/src/global/global.cpp index ecb7f2ccb..b4514a1c0 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -220,6 +220,8 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "out_float32_GasEnergy") == 0) { parms->out_float32_GasEnergy = atoi(value); #endif // DE + } else if (strcmp(name, "output_always") == 0) { + parms->output_always = atoi(value); #ifdef MHD } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 8abe358fc..1215da26d 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -179,6 +179,7 @@ struct Parameters { #ifdef DE int out_float32_GasEnergy = 0; #endif + int output_always = 0; #ifdef STATIC_GRAV int custom_grav = 0; // flag to set specific static gravity field #endif diff --git a/src/main.cpp b/src/main.cpp index 53ce38984..8ee0be128 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -322,9 +322,7 @@ int main(int argc, char *argv[]) "%9.3f ms total time = %9.4f s\n\n", G.H.n_step, G.H.t, G.H.dt, (stop_step - start_step) * 1000, G.H.t_wall); -#ifdef OUTPUT_ALWAYS - G.H.Output_Now = true; -#endif + if (P.output_always) G.H.Output_Now = true; #ifdef ANALYSIS if (G.Analysis.Output_Now) { From 447774755ce6e95e1c3d902b67ab0514057bdc93 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Oct 2023 11:05:37 -0400 Subject: [PATCH 602/694] Remove accidentally committed file --- clang-tidy-runner.sh | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 clang-tidy-runner.sh diff --git a/clang-tidy-runner.sh b/clang-tidy-runner.sh deleted file mode 100644 index b8f0e4888..000000000 --- a/clang-tidy-runner.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -# Description: -# Run clang-tidy on all build types in parallel. Note that this spawns 2x the -# number of build types threads since each type has a thread for the CPU code -# and a thread for the GPU code - -# If ctrl-c is sent trap it and kill all clang-tidy processes -trap "kill -- -$$" EXIT - -# cd into the Cholla directory. Default to ${HOME}/Code/cholla -cholla_path=${1:-${HOME}/Code/cholla} -cd ${cholla_path} - -# Run all clang-tidy build types in parallel -builds=( hydro gravity disk particles cosmology mhd dust) -for build in "${builds[@]}" -do - make tidy TYPE=$build & -done - -# Wait for clang-tidy to finish -wait From 9869555b60d003136854ccf43d3e2981746a8acd Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Oct 2023 14:03:36 -0400 Subject: [PATCH 603/694] Consolodate GPU error checking functions Consolodate all GPU error checking functions and macros into one overloaded function with one overload for CUDA/HIP errors and one for CUFFT/HIPFFT errors. That one doesn't use any macros and supports all the usual usage modes. It does utilize the `experimental::source_location` class. That class is supported on all compilers that we use or expect others to use but if it doesn't work for you then commenting out the relevant lines should be sufficient. Replaced all calls to `CHECK`, `CudaSafeCall`, `CudaCheckError`, and `gpErrchk` with `GPU_Error_Check`. --- src/analysis/feedback_analysis.cpp | 12 +- src/analysis/feedback_analysis_gpu.cu | 12 +- src/chemistry_gpu/chemistry_functions_gpu.cu | 14 +-- src/cooling/cooling_cuda.cu | 2 +- src/cooling/load_cloudy_texture.cu | 14 +-- src/dust/dust_cuda.cu | 2 +- src/global/global_cuda.h | 74 ++--------- src/gravity/gravity_boundaries_gpu.cu | 2 +- src/gravity/gravity_functions.cpp | 4 +- src/gravity/gravity_functions_gpu.cu | 24 ++-- src/gravity/gravity_restart.cpp | 6 +- src/gravity/paris/HenryPeriodic.cu | 20 +-- src/gravity/paris/HenryPeriodic.hpp | 48 +++---- src/gravity/paris/PoissonZero3DBlockedGPU.cu | 64 +++++----- src/gravity/potential_SOR_3D.cpp | 3 +- src/gravity/potential_SOR_3D_gpu.cu | 12 +- src/gravity/potential_paris_3D.cu | 16 +-- src/gravity/potential_paris_galactic.cu | 18 +-- src/grid/cuda_boundaries.cu | 2 +- src/grid/grid3D.cpp | 14 +-- src/grid/initial_conditions.cpp | 2 +- src/hydro/hydro_cuda.cu | 2 +- src/hydro/hydro_cuda_tests.cu | 2 +- src/integrators/VL_1D_cuda.cu | 24 ++-- src/integrators/VL_2D_cuda.cu | 30 ++--- src/integrators/VL_3D_cuda.cu | 50 ++++---- src/integrators/simple_1D_cuda.cu | 24 ++-- src/integrators/simple_2D_cuda.cu | 22 ++-- src/integrators/simple_3D_cuda.cu | 36 +++--- src/io/io_gpu.cu | 16 ++- src/mhd/ct_electric_fields_tests.cu | 28 ++--- src/mhd/magnetic_divergence.cu | 2 +- src/mhd/magnetic_update_tests.cu | 22 ++-- src/mpi/mpi_routines.cpp | 48 +++---- src/particles/density_CIC_gpu.cu | 8 +- src/particles/feedback_CIC_gpu.cu | 34 ++--- src/particles/gravity_CIC_gpu.cu | 6 +- src/particles/io_particles.cpp | 8 +- src/particles/particles_3D_gpu.cu | 34 ++--- src/particles/particles_boundaries.cpp | 14 +-- src/particles/particles_boundaries_gpu.cu | 40 +++--- src/particles/particles_dynamics.cpp | 4 +- src/particles/particles_dynamics_gpu.cu | 16 +-- src/reconstruction/plmc_cuda_tests.cu | 8 +- src/reconstruction/ppmc_cuda_tests.cu | 8 +- src/reconstruction/reconstruction_tests.cu | 8 +- src/riemann_solvers/hllc_cuda_tests.cu | 16 +-- src/riemann_solvers/hlld_cuda_tests.cu | 20 +-- src/utils/DeviceVector.h | 22 ++-- src/utils/DeviceVector_tests.cu | 2 +- src/utils/cuda_utilities.cpp | 2 +- src/utils/cuda_utilities.h | 2 +- src/utils/error_check_cuda.cu | 4 +- src/utils/gpu.hpp | 124 ++++++++++--------- src/utils/gpu_arrays_functions.cu | 16 +-- src/utils/gpu_arrays_functions.h | 12 +- src/utils/reduction_utilities_tests.cu | 2 +- src/utils/timing_functions.cpp | 4 +- 58 files changed, 527 insertions(+), 558 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index e63ea8203..3dab7b6da 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -16,8 +16,8 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real)); #ifdef PARTICLES_GPU - CHECK(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real))); - CHECK(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real))); #endif // setup the (constant) circular speed arrays @@ -40,8 +40,8 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) } #ifdef PARTICLES_GPU - CHECK(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif } @@ -50,8 +50,8 @@ FeedbackAnalysis::~FeedbackAnalysis() free(h_circ_vel_x); free(h_circ_vel_y); #ifdef PARTICLES_GPU - CHECK(cudaFree(d_circ_vel_x)); - CHECK(cudaFree(d_circ_vel_y)); + GPU_Error_Check(cudaFree(d_circ_vel_x)); + GPU_Error_Check(cudaFree(d_circ_vel_y)); #endif } diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu index 778ee921c..11132bece 100644 --- a/src/analysis/feedback_analysis_gpu.cu +++ b/src/analysis/feedback_analysis_gpu.cu @@ -147,8 +147,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) Real *d_partial_vel; Real *h_partial_mass = (Real *)malloc(ngrid * sizeof(Real)); Real *h_partial_vel = (Real *)malloc(ngrid * sizeof(Real)); - CHECK(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real))); - CHECK(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real))); Real total_mass = 0; Real total_vel = 0; @@ -177,8 +177,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) // cudaDeviceSynchronize(); - CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); #ifdef MPI_CHOLLA MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); @@ -195,8 +195,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, sqrt(total_vel / total_mass) * VELOCITY_UNIT / 1e5); - CHECK(cudaFree(d_partial_vel)); - CHECK(cudaFree(d_partial_mass)); + GPU_Error_Check(cudaFree(d_partial_vel)); + GPU_Error_Check(cudaFree(d_partial_mass)); free(h_partial_mass); free(h_partial_vel); diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 72160d98d..9290e0918 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -18,37 +18,37 @@ void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size) { cudaMalloc((void **)array_dev, size * sizeof(float)); - CudaCheckError(); + GPU_Error_Check(); } void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d) { - CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } void Chem_GPU::Free_Array_GPU_float(float *array_dev) { cudaFree(array_dev); - CudaCheckError(); + GPU_Error_Check(); } void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size) { cudaMalloc((void **)array_dev, size * sizeof(Real)); - CudaCheckError(); + GPU_Error_Check(); } void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d) { - CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } void Chem_GPU::Free_Array_GPU_Real(Real *array_dev) { cudaFree(array_dev); - CudaCheckError(); + GPU_Error_Check(); } class Thermal_State @@ -622,7 +622,7 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghos hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H); - CudaCheckError(); + GPU_Error_Check(); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 4b09527d0..192a1848d 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -26,7 +26,7 @@ void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, in dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, heatTexObj); - CudaCheckError(); + GPU_Error_Check(); } /*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index e1a02dc28..241fc740c 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -110,8 +110,8 @@ void Load_Cuda_Textures() // allocate host arrays to be copied to textures // these arrays are declared as external pointers in global.h - CudaSafeCall(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault)); - CudaSafeCall(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault)); // Read cooling tables into the host arrays Host_Read_Cooling_Tables(cooling_table, heating_table); @@ -164,8 +164,8 @@ void Load_Cuda_Textures() cudaCreateTextureObject(&heatTexObj, &heatResDesc, &texDesc, NULL); // Free the memory associated with the cooling tables on the host - CudaSafeCall(cudaFreeHost(cooling_table)); - CudaSafeCall(cudaFreeHost(heating_table)); + GPU_Error_Check(cudaFreeHost(cooling_table)); + GPU_Error_Check(cudaFreeHost(heating_table)); // Run Test // Test_Cloudy_Textures(); @@ -261,7 +261,7 @@ void Test_Cloudy_Textures() dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); printf("Exiting due to Test_Cloudy_Textures() being called \n"); exit(0); } @@ -272,12 +272,12 @@ void Test_Cloudy_Speed() int num_T = 1 + 80 * 81; dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); Real time_start = Get_Time(); for (int i = 0; i < 100; i++) { hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); } - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); Real time_end = Get_Time(); printf(" Cloudy Test Time %9.4f micro-s \n", (time_end - time_start)); printf("Exiting due to Test_Cloudy_Speed() being called \n"); diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index bbecf1935..7ca48a9fd 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -32,7 +32,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); - CudaCheckError(); + GPU_Error_Check(); } __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 296fa31f1..23e3b3dec 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -2,6 +2,9 @@ * /brief Declarations of global variables and functions for the cuda kernels. */ +#ifndef GLOBAL_CUDA_H +#define GLOBAL_CUDA_H + #ifdef CUDA #include @@ -11,10 +14,7 @@ #include "../global/global.h" #include "../utils/gpu.hpp" - #ifndef GLOBAL_CUDA_H - #define GLOBAL_CUDA_H - - #define TPB 256 // threads per block + #define TPB 256 // threads per block // #define TPB 64 extern bool memory_allocated; // Flag becomes true after allocating the memory @@ -38,56 +38,6 @@ extern Real *dev_grav_potential; extern Real *temp_potential; extern Real *buffer_potential; - #define CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) - #define CudaCheckError() __cudaCheckError(__FILE__, __LINE__) - -inline void __cudaSafeCall(cudaError err, const char *file, const int line) -{ - #ifdef CUDA_ERROR_CHECK - if (cudaSuccess != err) { - fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); - exit(-1); - } - #endif - - return; -} - -inline void __cudaCheckError(const char *file, const int line) -{ - #ifdef CUDA_ERROR_CHECK - cudaError err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); - exit(-1); - } - - // More careful checking. However, this will affect performance. - // Comment away if needed. - err = cudaDeviceSynchronize(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); - exit(-1); - } - #endif - - return; -} - - #define gpuErrchk(ans) \ - { \ - gpuAssert((ans), __FILE__, __LINE__); \ - } -inline void gpuAssert(cudaError_t code, char *file, int line, bool abort = true) -{ - if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) { - exit(code); - } - } -} - /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) @@ -99,9 +49,9 @@ __device__ inline int sgn_CUDA(Real x) } } - // Define atomic_add if it's not supported - #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 - #else + // Define atomic_add if it's not supported + #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + #else __device__ double atomicAdd(double *address, double val) { unsigned long long int *address_as_ull = (unsigned long long int *)address; @@ -112,12 +62,12 @@ __device__ double atomicAdd(double *address, double val) } while (assumed != old); return __longlong_as_double(old); } - #endif + #endif - // This helper function exists to make it easier to find printfs inside - // kernels - #define kernel_printf printf + // This helper function exists to make it easier to find printfs inside + // kernels + #define kernel_printf printf - #endif // GLOBAL_CUDA_H +#endif // GLOBAL_CUDA_H #endif // CUDA diff --git a/src/gravity/gravity_boundaries_gpu.cu b/src/gravity/gravity_boundaries_gpu.cu index 86727edd7..63f8d6e86 100644 --- a/src/gravity/gravity_boundaries_gpu.cu +++ b/src/gravity/gravity_boundaries_gpu.cu @@ -319,7 +319,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real * hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); return size_buffer; } diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 2e94621a6..b92d06564 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -580,8 +580,8 @@ void Grid3D::Setup_Analytic_Potential(struct Parameters *P) #endif #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, - Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, + Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); #endif } diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 6cd177163..b92d19084 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -9,27 +9,27 @@ void Grav3D::AllocateMemory_GPU() { - CudaSafeCall(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); #ifdef GRAVITY_GPU #ifdef GRAVITY_ANALYTIC_COMP - CudaSafeCall(cudaMalloc((void **)&F.analytic_potential_d, n_cells_potential * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.analytic_potential_d, n_cells_potential * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); #endif #endif // GRAVITY_GPU @@ -284,8 +284,8 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef PARTICLES_CPU void Grid3D::Copy_Potential_From_GPU() { - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), - cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } #endif // PARTICLES_CPU diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index 1cfff9cc8..d44af57a9 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -42,7 +42,8 @@ void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) // Read potential and copy to device to be used as potential n-1 Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential"); #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check( + cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); #endif H5Fclose(file_id); @@ -71,7 +72,8 @@ void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) // Copy device to host if needed #ifdef GRAVITY_GPU - CudaSafeCall(cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check( + cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); #endif // Write potential diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu index 28ece4feb..1602ca737 100644 --- a/src/gravity/paris/HenryPeriodic.cu +++ b/src/gravity/paris/HenryPeriodic.cu @@ -75,14 +75,14 @@ HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi bytes_ = nMax * sizeof(double); // FFT objects - CHECK(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, djp_ * dhq_)); - CHECK(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, dip_ * dhq_)); - CHECK(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, dip_ * djq_)); - CHECK(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, dip_ * djq_)); + GPU_Error_Check(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, djp_ * dhq_)); + GPU_Error_Check(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, dip_ * dhq_)); + GPU_Error_Check(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, dip_ * djq_)); + GPU_Error_Check(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, dip_ * djq_)); #ifndef MPI_GPU // Host arrays for MPI communication - CHECK(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); hb_ = ha_ + nMax; #endif @@ -91,13 +91,13 @@ HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi HenryPeriodic::~HenryPeriodic() { #ifndef MPI_GPU - CHECK(cudaFreeHost(ha_)); + GPU_Error_Check(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; #endif - CHECK(cufftDestroy(r2ck_)); - CHECK(cufftDestroy(c2rk_)); - CHECK(cufftDestroy(c2cj_)); - CHECK(cufftDestroy(c2ci_)); + GPU_Error_Check(cufftDestroy(r2ck_)); + GPU_Error_Check(cufftDestroy(c2rk_)); + GPU_Error_Check(cufftDestroy(c2cj_)); + GPU_Error_Check(cufftDestroy(c2ci_)); MPI_Comm_free(&commI_); MPI_Comm_free(&commJ_); MPI_Comm_free(&commK_); diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index 8d1502263..0441d5487 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -126,11 +126,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con const int countK = dip * djq * dk; #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); #endif @@ -152,7 +152,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con } // Real-to-complex FFT in Z - CHECK(cufftExecD2Z(r2ck_, a, bc)); + GPU_Error_Check(cufftExecD2Z(r2ck_, a, bc)); // Rearrange for Y redistribution { @@ -174,11 +174,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Redistribute for Y pencils const int countJ = 2 * dip * djq * dhq; #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); #endif @@ -201,7 +201,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con } // Forward FFT in Y - CHECK(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_FORWARD)); + GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_FORWARD)); // Rearrange for X redistribution { @@ -223,11 +223,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Redistribute for X pencils const int countI = 2 * dip * djp * dhq; #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); #endif @@ -250,7 +250,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con } // Forward FFT in X - CHECK(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_FORWARD)); + GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_FORWARD)); // Apply filter in frequency space distributed in X pencils @@ -268,7 +268,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con }); // Backward FFT in X - CHECK(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_INVERSE)); + GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_INVERSE)); // Rearrange for Y redistribution { @@ -290,11 +290,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Redistribute for Y pencils #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); #endif @@ -316,7 +316,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con } // Backward FFT in Y - CHECK(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_INVERSE)); + GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_INVERSE)); // Rearrange for Z redistribution { @@ -338,11 +338,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Redistribute in Z pencils #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); #endif @@ -364,7 +364,7 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con } // Complex-to-real FFT in Z - CHECK(cufftExecZ2D(c2rk_, ac, b)); + GPU_Error_Check(cufftExecZ2D(c2rk_, ac, b)); // Rearrange for 3D-block redistribution { @@ -385,11 +385,11 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Redistribute for 3D blocks #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); - CHECK(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); #endif diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 7d94c8ca3..84e070160 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -84,13 +84,13 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo bytes_ = nMax * sizeof(double); int nkh = nk_ / 2 + 1; - CHECK(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, dip_ * djq_)); + GPU_Error_Check(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, dip_ * djq_)); int njh = nj_ / 2 + 1; - CHECK(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, dip_ * dkq_)); + GPU_Error_Check(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, dip_ * dkq_)); int nih = ni_ / 2 + 1; - CHECK(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, dkq_ * djp_)); + GPU_Error_Check(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, dkq_ * djp_)); #ifndef MPI_GPU - CHECK(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); hb_ = ha_ + nMax; #endif @@ -99,12 +99,12 @@ PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() { #ifndef MPI_GPU - CHECK(cudaFreeHost(ha_)); + GPU_Error_Check(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; #endif - CHECK(cufftDestroy(d2zi_)); - CHECK(cufftDestroy(d2zj_)); - CHECK(cufftDestroy(d2zk_)); + GPU_Error_Check(cufftDestroy(d2zi_)); + GPU_Error_Check(cufftDestroy(d2zj_)); + GPU_Error_Check(cufftDestroy(d2zk_)); MPI_Comm_free(&commI_); MPI_Comm_free(&commJ_); MPI_Comm_free(&commK_); @@ -167,11 +167,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); #endif gpuFor( @@ -193,7 +193,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou ua[ij + k] = ub[((pqb * dip + i) * djq + j) * dk + kkb]; } }); - CHECK(cufftExecD2Z(d2zk_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc)); gpuFor( dip, nk / 2 + 1, djq, GPU_LAMBDA(const int i, const int k, const int j) { if (k == 0) { @@ -218,11 +218,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, MPI_DOUBLE, commJ_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, commJ_); #endif gpuFor( @@ -243,7 +243,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou ua[ik + j] = ub[((qb * dip + i) * dkq + k) * djq + jb]; } }); - CHECK(cufftExecD2Z(d2zj_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( dkq, nj / 2 + 1, dip, GPU_LAMBDA(const int k, const int j, const int i) { if (j == 0) { @@ -268,11 +268,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); #endif gpuFor( @@ -296,7 +296,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou ua[kj + i] = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib]; } }); - CHECK(cufftExecD2Z(d2zi_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc)); { #ifdef PARIS_GALACTIC_3PT const double si = M_PI / double(ni + ni); @@ -366,7 +366,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); } - CHECK(cufftExecD2Z(d2zi_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc)); gpuFor( dkq, ni / 2 + 1, djp, GPU_LAMBDA(const int k, const int i, const int j) { if (i == 0) { @@ -390,11 +390,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); #endif gpuFor( @@ -423,7 +423,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou ua[ki + nj - j] = wa * apb - wb * amb; } }); - CHECK(cufftExecD2Z(d2zj_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( dip, nj / 2 + 1, dkq, GPU_LAMBDA(const int i, const int j, const int k) { if (j == 0) { @@ -448,11 +448,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, MPI_DOUBLE, commJ_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, commJ_); #endif gpuFor( @@ -481,7 +481,7 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou ua[ij + nk - k] = wa * apb - wb * amb; } }); - CHECK(cufftExecD2Z(d2zk_, ua, uc)); + GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc)); const double divN = 1.0 / (8.0 * double(ni) * double(nj) * double(nk)); gpuFor( dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { @@ -504,11 +504,11 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou } }); #ifndef MPI_GPU - CHECK(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); - CHECK(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); #else - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); #endif gpuFor( diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index 2fdd9a91f..0cffeb981 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -136,7 +136,8 @@ void Potential_SOR_3D::Copy_Input_And_Initialize(Real *input_density, const Real if (!potential_initialized) { chprintf("SOR: Initializing Potential \n"); - CHECK(cudaMemcpy(F.potential_d, input_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check( + cudaMemcpy(F.potential_d, input_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); // Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, // F.potential_d, F.density_d ); potential_initialized = true; diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index 9910b62cd..5646ded68 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -9,25 +9,25 @@ void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size) { cudaMalloc((void **)array_dev, size * sizeof(Real)); - CudaCheckError(); + GPU_Error_Check(); } void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size) { cudaMalloc((void **)array_dev, size * sizeof(bool)); - CudaCheckError(); + GPU_Error_Check(); } void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) { cudaFree(array_dev); - CudaCheckError(); + GPU_Error_Check(); } void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) { cudaFree(array_dev); - CudaCheckError(); + GPU_Error_Check(); } __global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, @@ -686,12 +686,12 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU(int direction, int side, } void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) { - CudaSafeCall(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); } void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) { - CudaSafeCall(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); } #endif // GRAVITY diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index aa6af2652..c3a66ae9e 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -92,9 +92,9 @@ void PotentialParis3D::Get_Potential(const Real *const density, Real *const pote const int n = ni * nj * nk; #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(db, density, densityBytes_, cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(db, density, densityBytes_, cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyHostToDevice)); #endif const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; @@ -111,9 +111,9 @@ void PotentialParis3D::Get_Potential(const Real *const density, Real *const pote assert(potential); #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToHost)); #endif } @@ -171,22 +171,22 @@ void PotentialParis3D::Initialize(const Real lx, const Real ly, const Real lz, c const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); - CHECK(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); assert(da_); - CHECK(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, potentialBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, potentialBytes_))); assert(db_); } void PotentialParis3D::Reset() { if (db_) { - CHECK(cudaFree(db_)); + GPU_Error_Check(cudaFree(db_)); } db_ = nullptr; if (da_) { - CHECK(cudaFree(da_)); + GPU_Error_Check(cudaFree(da_)); } da_ = nullptr; diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index 5d6c758b9..fbb38df28 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -48,8 +48,8 @@ void PotentialParisGalactic::Get_Potential(const Real *const density, Real *cons const Real *const rho = density; Real *const phi = potential; #else - CHECK(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0)); - CHECK(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, 0)); const Real *const rho = da; Real *const phi = dc_; #endif @@ -106,7 +106,7 @@ void PotentialParisGalactic::Get_Potential(const Real *const density, Real *cons }); #ifndef GRAVITY_GPU - CHECK(cudaMemcpy(potential, dc_, potentialBytes_, cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(potential, dc_, potentialBytes_, cudaMemcpyDeviceToHost)); #endif } @@ -154,13 +154,13 @@ void PotentialParisGalactic::Initialize(const Real lx, const Real ly, const Real minBytes_ = pp_->bytes(); densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; - CHECK(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); - CHECK(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, densityBytes_))); #ifndef GRAVITY_GPU const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); - CHECK(cudaMalloc(reinterpret_cast(&dc_), potentialBytes_)); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&dc_), potentialBytes_)); #endif } @@ -168,19 +168,19 @@ void PotentialParisGalactic::Reset() { #ifndef GRAVITY_GPU if (dc_) { - CHECK(cudaFree(dc_)); + GPU_Error_Check(cudaFree(dc_)); } dc_ = nullptr; potentialBytes_ = 0; #endif if (db_) { - CHECK(cudaFree(db_)); + GPU_Error_Check(cudaFree(db_)); } db_ = nullptr; if (da_) { - CHECK(cudaFree(da_)); + GPU_Error_Check(cudaFree(da_)); } da_ = nullptr; diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index f5dbe361d..baf846d3c 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -35,7 +35,7 @@ void PackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(PackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, c_head, isize, jsize, ksize, nx, ny, idxoffset, buffer_ncells, n_fields, n_cells); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); } __global__ void UnpackBuffers3DKernel(Real *buffer, Real *c_head, int isize, int jsize, int ksize, int nx, int ny, diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index b87eb0d7c..d22e1242b 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -283,7 +283,7 @@ void Grid3D::AllocateMemory(void) { // allocate memory for the conserved variable arrays // allocate all the memory to density, to insure contiguous memory - CudaSafeCall(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault)); // point conserved variables to the appropriate locations C.density = &(C.host[grid_enum::density * H.n_cells]); @@ -310,7 +310,7 @@ void Grid3D::AllocateMemory(void) #endif // DE // allocate memory for the conserved variable arrays on the device - CudaSafeCall(cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); cuda_utilities::initGpuMemory(C.device, H.n_fields * H.n_cells * sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); @@ -336,8 +336,8 @@ void Grid3D::AllocateMemory(void) #endif // DE #if defined(GRAVITY) - CudaSafeCall(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); - CudaSafeCall(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); + GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); + GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else C.Grav_potential = NULL; C.d_Grav_potential = NULL; @@ -619,11 +619,11 @@ void Grid3D::Reset(void) void Grid3D::FreeMemory(void) { // free the conserved variable arrays - CudaSafeCall(cudaFreeHost(C.host)); + GPU_Error_Check(cudaFreeHost(C.host)); #ifdef GRAVITY - CudaSafeCall(cudaFreeHost(C.Grav_potential)); - CudaSafeCall(cudaFree(C.d_Grav_potential)); + GPU_Error_Check(cudaFreeHost(C.Grav_potential)); + GPU_Error_Check(cudaFree(C.d_Grav_potential)); #endif // If memory is single allocated, free the memory at the end of the simulation. diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 768bf0960..89c42aa24 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -100,7 +100,7 @@ void Grid3D::Set_Initial_Conditions(Parameters P) } if (C.device != NULL) { - CudaSafeCall(cudaMemcpy(C.device, C.density, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(C.device, C.density, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); } } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c74654c0e..5651efbfb 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -589,7 +589,7 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); } - CudaCheckError(); + GPU_Error_Check(); // Note: dev_dti[0] is DeviceVector syntactic sugar for returning a value via // cudaMemcpy diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index fe4b351f7..d633773cc 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -63,7 +63,7 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // Run the kernel hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); - CudaCheckError(); + GPU_Error_Check(); // Compare results // Check for equality and if not equal return difference diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index d21711cd3..88cf9bf7f 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -46,12 +46,12 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); // If memory is single allocated: memory_allocated becomes true and // successive timesteps won't allocate memory. If the memory is not single @@ -64,7 +64,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea // arrays hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT @@ -79,12 +79,12 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, n_cells, n_ghost, dx, 0.5 * dt, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); // Step 4: Construct left and right interface values using updated conserved // variables @@ -107,7 +107,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea #ifdef PPMC hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); #endif - CudaCheckError(); + GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT @@ -122,7 +122,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of velocity before updating the conserved array, @@ -135,12 +135,12 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); - CudaCheckError(); + GPU_Error_Check(); #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 8fdaf2ec9..4c46638ed 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -44,16 +44,16 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of if (!memory_allocated) { // allocate GPU arrays - // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); // If memory is single allocated: memory_allocated becomes true and // successive timesteps won't allocate memory. If the memory is not single @@ -66,7 +66,7 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of // arrays hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT @@ -87,12 +87,12 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); // Step 4: Construct left and right interface values using updated conserved // variables @@ -118,7 +118,7 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); #endif // PPMC - CudaCheckError(); + GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT @@ -139,7 +139,7 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of velocity before updating the conserved array, @@ -152,12 +152,12 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); - CudaCheckError(); + GPU_Error_Check(); #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 097b40625..dbb7966ff 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -94,16 +94,16 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #else // not MHD size_t const arraySize = n_fields * n_cells * sizeof(Real); #endif // MHD - CudaSafeCall(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Lx, arraySize)); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, arraySize)); - CudaSafeCall(cudaMalloc((void **)&Q_Ly, arraySize)); - CudaSafeCall(cudaMalloc((void **)&Q_Ry, arraySize)); - CudaSafeCall(cudaMalloc((void **)&Q_Lz, arraySize)); - CudaSafeCall(cudaMalloc((void **)&Q_Rz, arraySize)); - CudaSafeCall(cudaMalloc((void **)&F_x, arraySize)); - CudaSafeCall(cudaMalloc((void **)&F_y, arraySize)); - CudaSafeCall(cudaMalloc((void **)&F_z, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Lz, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Rz, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_x, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_y, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_z, arraySize)); cuda_utilities::initGpuMemory(dev_conserved_half, n_fields * n_cells * sizeof(Real)); cuda_utilities::initGpuMemory(Q_Lx, arraySize); @@ -117,7 +117,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int cuda_utilities::initGpuMemory(F_z, arraySize); #ifdef MHD - CudaSafeCall(cudaMalloc((void **)&ctElectricFields, ctArraySize)); + GPU_Error_Check(cudaMalloc((void **)&ctElectricFields, ctArraySize)); #endif // MHD #if defined(GRAVITY) @@ -134,14 +134,14 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int } #if defined(GRAVITY) && !defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif // GRAVITY and GRAVITY_GPU // Step 1: Use PCM reconstruction to put primitive variables into interface // arrays hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT @@ -184,24 +184,24 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); #endif // HLLD - CudaCheckError(); + GPU_Error_Check(); #ifdef MHD // Step 2.5: Compute the Constrained transport electric fields hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); - CudaCheckError(); + GPU_Error_Check(); #endif // MHD // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); - CudaCheckError(); + GPU_Error_Check(); #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells, 0.5 * dt, dx, dy, dz); - CudaCheckError(); + GPU_Error_Check(); #endif // MHD // Step 4: Construct left and right interface values using updated conserved @@ -239,7 +239,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); #endif // PPMC - CudaCheckError(); + GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT @@ -282,7 +282,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); #endif // HLLD - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this @@ -290,40 +290,40 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif // DE #ifdef MHD // Step 5.5: Compute the Constrained transport electric fields hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); - CudaCheckError(); + GPU_Error_Check(); #endif // MHD // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); - CudaCheckError(); + GPU_Error_Check(); #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); - CudaCheckError(); + GPU_Error_Check(); #endif // MHD #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif // DE #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); - CudaCheckError(); + GPU_Error_Check(); #endif // TEMPERATURE_FLOOR return; diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 067735fcd..36401a8fe 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -41,11 +41,11 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_x, (n_fields)*n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, (n_fields)*n_cells * sizeof(Real))); // If memory is single allocated: memory_allocated becomes true and // successive timesteps won't allocate memory. If the memory is not single @@ -58,26 +58,26 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif #ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif #ifdef PPMC hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); - CudaCheckError(); + GPU_Error_Check(); #endif // Step 2: Calculate the fluxes @@ -93,7 +93,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this @@ -106,13 +106,13 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); - CudaCheckError(); + GPU_Error_Check(); // Synchronize the total and internal energy, if using dual-energy formalism #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index c1b2cc149..9361bf9e7 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -39,14 +39,14 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - // CudaSafeCall( cudaMalloc((void**)&dev_conserved, + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, // n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); // If memory is single allocated: memory_allocated becomes true and // successive timesteps won't allocate memory. If the memory is not single @@ -82,7 +82,7 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); #endif - CudaCheckError(); + GPU_Error_Check(); // Step 2: Calculate the fluxes #ifdef EXACT @@ -103,7 +103,7 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this @@ -116,13 +116,13 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); - CudaCheckError(); + GPU_Error_Check(); // Synchronize the total and internal energy #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 17a37c6c1..865a6f9c8 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -46,7 +46,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, if (!memory_allocated) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); // allocate memory on the GPU chprintf( @@ -56,18 +56,18 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, chprintf(" Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields * n_cells * sizeof(Real) / 1e9, global_free / 1e9, global_total / 1e9); dev_conserved = d_conserved; - CudaSafeCall(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Lz, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&Q_Rz, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); - CudaSafeCall(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lz, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rz, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real))); #if defined(GRAVITY) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, + // GPU_Error_Check( cudaMalloc((void**)&dev_grav_potential, // n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; #else @@ -83,7 +83,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, } #if defined(GRAVITY) && !defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif // Step 1: Construct left and right interface values using updated conserved @@ -120,7 +120,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); - CudaCheckError(); + GPU_Error_Check(); #endif // PPMC // Step 2: Calculate the fluxes @@ -156,7 +156,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLL - CudaCheckError(); + GPU_Error_Check(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this @@ -164,26 +164,26 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, // Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); - CudaCheckError(); + GPU_Error_Check(); #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); - CudaCheckError(); + GPU_Error_Check(); #endif // TEMPERATURE_FLOOR return; diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index 495b0bd19..a793ab792 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -104,7 +104,8 @@ void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, device_buffer, device_source, mhd_direction); - CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost)); + GPU_Error_Check( + cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); @@ -132,7 +133,8 @@ void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, device_buffer, device_source, mhd_direction); - CudaSafeCall(cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost)); + GPU_Error_Check( + cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); @@ -153,8 +155,8 @@ void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_ dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, device_hdf5_buffer, device_grid_buffer, mhd_direction); - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), - cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), + cudaMemcpyDeviceToHost)); return; } @@ -164,13 +166,15 @@ void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_ dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal2D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, device_hdf5_buffer, device_grid_buffer); - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check( + cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); return; } // 1D case if (nx > 1 && ny == 1 && nz == 1) { - CudaSafeCall(cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check( + cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real), cudaMemcpyDeviceToHost)); return; } } diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index d5fcfb656..d3a8ea4dc 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -52,11 +52,11 @@ class tMHDCalculateCTElectricFields : public ::testing::Test dimBlock(TPB, 1, 1) { // Allocate device arrays - CudaSafeCall(cudaMalloc(&dev_fluxX, fluxX.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_grid, grid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_fluxX, fluxX.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_grid, grid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size() * sizeof(double))); // Populate the grids with values where vector.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique @@ -96,21 +96,21 @@ class tMHDCalculateCTElectricFields : public ::testing::Test void Run_Test() { // Copy values to GPU - CudaSafeCall(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_testCTElectricFields, testCTElectricFields.data(), - testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_testCTElectricFields, testCTElectricFields.data(), + testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Call the kernel to test hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, 0, dev_fluxX, dev_fluxY, dev_fluxZ, dev_grid, dev_testCTElectricFields, nx, ny, nz, n_cells); - CudaCheckError(); + GPU_Error_Check(); // Copy test data back - CudaSafeCall(cudaMemcpy(testCTElectricFields.data(), dev_testCTElectricFields, - testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(testCTElectricFields.data(), dev_testCTElectricFields, + testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); // Check the results diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu index cc639a8a7..f49e04218 100644 --- a/src/mhd/magnetic_divergence.cu +++ b/src/mhd/magnetic_divergence.cu @@ -90,7 +90,7 @@ Real checkMagneticDivergence(Grid3D const &G) // Now lets get the local maximum divergence hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, G.C.device, dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_cells); - CudaCheckError(); + GPU_Error_Check(); Real max_magnetic_divergence = dev_maxDivergence[0]; #ifdef MPI_CHOLLA diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index db47d658e..7cfb8757c 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -44,9 +44,9 @@ class tMHDUpdateMagneticField3D : public ::testing::Test dimBlock(TPB, 1, 1) { // Allocate device arrays - CudaSafeCall(cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_destinationGrid, destinationGrid.size() * sizeof(double))); - CudaSafeCall(cudaMalloc(&dev_ctElectricFields, ctElectricFields.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_destinationGrid, destinationGrid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_ctElectricFields, ctElectricFields.size() * sizeof(double))); // Populate the grids with values where vector.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique @@ -83,21 +83,21 @@ class tMHDUpdateMagneticField3D : public ::testing::Test void Run_Test() { // Copy values to GPU - CudaSafeCall( + GPU_Error_Check( cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size() * sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size() * sizeof(Real), - cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size() * sizeof(Real), + cudaMemcpyHostToDevice)); // Call the kernel to test hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, dev_sourceGrid, dev_destinationGrid, dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); - CudaCheckError(); + GPU_Error_Check(); // Copy test data back - CudaSafeCall(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, destinationGrid.size() * sizeof(Real), - cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, destinationGrid.size() * sizeof(Real), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); // Check the results diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 0aa9f31c5..c80d7bd60 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -600,18 +600,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) chprintf("Allocating MPI communication buffers on GPU "); chprintf("(nx = %ld, ny = %ld, nz = %ld).\n", xbsize, ybsize, zbsize); - CudaSafeCall(cudaMalloc(&d_send_buffer_x0, xbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_x1, xbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x0, xbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x1, xbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y0, ybsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y1, ybsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y0, ybsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y1, ybsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z0, zbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z1, zbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z0, zbsize * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z1, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x0, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x1, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y0, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y1, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z0, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z1, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1, zbsize * sizeof(Real))); #if !defined(MPI_GPU) h_send_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real)); @@ -640,18 +640,18 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) "Allocating MPI communication buffers on GPU for particle transfers ( " "N_Particles: %d ).\n", N_PARTICLES_TRANSFER); - CudaSafeCall(cudaMalloc(&d_send_buffer_x0_particles, buffer_length_particles_x0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_x1_particles, buffer_length_particles_x1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y0_particles, buffer_length_particles_y0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_y1_particles, buffer_length_particles_y1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z0_particles, buffer_length_particles_z0_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_send_buffer_z1_particles, buffer_length_particles_z1_send * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real))); - CudaSafeCall(cudaMalloc(&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x0_particles, buffer_length_particles_x0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x1_particles, buffer_length_particles_x1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y0_particles, buffer_length_particles_y0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y1_particles, buffer_length_particles_y1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z0_particles, buffer_length_particles_z0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z1_particles, buffer_length_particles_z1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real))); #endif // PARTICLES && PARTICLES_GPU // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 68346912e..756c48643 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -14,8 +14,8 @@ #ifdef GRAVITY_GPU void Grid3D::Copy_Particles_Density_to_GPU() { - CudaSafeCall(cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells * sizeof(Real), - cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells * sizeof(Real), + cudaMemcpyHostToDevice)); } #endif @@ -166,13 +166,13 @@ void Particles3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid); - CudaCheckError(); + GPU_Error_Check(); cudaDeviceSynchronize(); } #if !defined(GRAVITY_GPU) // Copy the density from device to host - CudaSafeCall(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), cudaMemcpyDeviceToHost)); #endif } diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index b0d0cf288..7fa7c967f 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -121,8 +121,8 @@ void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocat // (i.e. assumes regular temporal spacing) snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1); - CHECK(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real))); - CHECK(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real))); + GPU_Error_Check(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); } else { chprintf("No SN rate file specified. Using constant rate\n"); @@ -139,7 +139,7 @@ void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocat dim3 block(TPB_FEEDBACK); hipLaunchKernelGGL(Init_State_Kernel, grid, block, 0, 0, P->prng_seed, randStates); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); } @@ -673,15 +673,15 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) int* d_prev_N; if (G.Particles.n_local > 0) { - CHECK(cudaMalloc(&d_dti, sizeof(Real))); - CHECK(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); - CHECK(cudaMalloc(&d_prev_dens, G.Particles.n_local * sizeof(Real))); - CHECK(cudaMalloc(&d_prev_N, G.Particles.n_local * sizeof(int))); - CHECK(cudaMemset(d_prev_dens, 0, G.Particles.n_local * sizeof(Real))); - CHECK(cudaMemset(d_prev_N, 0, G.Particles.n_local * sizeof(int))); + GPU_Error_Check(cudaMalloc(&d_dti, sizeof(Real))); + GPU_Error_Check(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMalloc(&d_prev_dens, G.Particles.n_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_prev_N, G.Particles.n_local * sizeof(int))); + GPU_Error_Check(cudaMemset(d_prev_dens, 0, G.Particles.n_local * sizeof(Real))); + GPU_Error_Check(cudaMemset(d_prev_N, 0, G.Particles.n_local * sizeof(int))); ngrid = std::ceil((1. * G.Particles.n_local) / TPB_FEEDBACK); - CHECK(cudaMalloc((void**)&d_info, FEED_INFO_N * ngrid * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void**)&d_info, FEED_INFO_N * ngrid * sizeof(Real))); } // TODO: info collection and max dti calculation // assumes ngrid is 1. The reason being that reduction of @@ -700,7 +700,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, time_sn_end, G.H.n_step); - CHECK(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } #ifdef MPI_CHOLLA @@ -721,7 +721,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, time_sn_end, G.H.n_step); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); } G.H.dt = C_cfl / h_dti; } @@ -729,11 +729,11 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) } while (direction == -1); if (G.Particles.n_local > 0) { - CHECK(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), cudaMemcpyDeviceToHost)); - CHECK(cudaFree(d_dti)); - CHECK(cudaFree(d_info)); - CHECK(cudaFree(d_prev_dens)); - CHECK(cudaFree(d_prev_N)); + GPU_Error_Check(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaFree(d_dti)); + GPU_Error_Check(cudaFree(d_info)); + GPU_Error_Check(cudaFree(d_prev_dens)); + GPU_Error_Check(cudaFree(d_prev_N)); } #ifdef MPI_CHOLLA diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 556166a65..4711b1a32 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -19,7 +19,7 @@ // Copy the potential from host to device void Particles3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential) { - CudaSafeCall(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); } // Kernel to compute the gradient of the potential @@ -132,7 +132,7 @@ void Particles3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_ hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz); - CudaCheckError(); + GPU_Error_Check(); } // Get CIC indexes from the particles positions @@ -284,7 +284,7 @@ void Particles3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid); - CudaCheckError(); + GPU_Error_Check(); } } diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 41226b7be..e986c5287 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -550,12 +550,12 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef PARTICLES_GPU // Copy the device arrays from the device to the host - CudaSafeCall(cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells * sizeof(Real), - cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells * sizeof(Real), + cudaMemcpyDeviceToHost)); #endif // PARTICLES_GPU #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) - CudaSafeCall(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), - cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); #endif // OUTPUT_POTENTIAL // Count Current Total Particles diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index d72199179..d72c9bc81 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -16,7 +16,7 @@ void Particles3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif @@ -26,7 +26,7 @@ void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } @@ -38,7 +38,7 @@ void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) void Particles3D::Print_Max_Memory_Usage() { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); part_int_t n_local_max, n_total, mem_usage; @@ -78,13 +78,13 @@ void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); - CudaCheckError(); + GPU_Error_Check(); } void Particles3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif @@ -94,14 +94,14 @@ void Particles3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } void Particles3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif @@ -111,14 +111,14 @@ void Particles3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t s printf(" Requested Memory: %ld MB \n", size * sizeof(int) / 1000000); exit(-1); } - CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(int))); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(int))); cudaDeviceSynchronize(); } void Particles3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif @@ -128,14 +128,14 @@ void Particles3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, printf(" Requested Memory: %ld MB \n", size * sizeof(part_int_t) / 1000000); exit(-1); } - CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(part_int_t))); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(part_int_t))); cudaDeviceSynchronize(); } void Particles3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size) { size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif @@ -145,33 +145,33 @@ void Particles3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t printf(" Requested Memory: %ld MB \n", size * sizeof(bool) / 1000000); exit(-1); } - CudaSafeCall(cudaMalloc((void **)array_dev, size * sizeof(bool))); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool))); cudaDeviceSynchronize(); } void Particles3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } void Particles3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } void Particles3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } void Particles3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, part_int_t size) { - CudaSafeCall(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } @@ -192,7 +192,7 @@ void Particles3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size); - CudaCheckError(); + GPU_Error_Check(); } #endif // PARTICLES_GPU diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index a8a4909d5..96e4f110e 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -20,7 +20,7 @@ // Transfer the particles that moved outside the local domain void Grid3D::Transfer_Particles_Boundaries(struct Parameters P) { - CudaCheckError(); + GPU_Error_Check(); // Transfer Particles Boundaries Particles.TRANSFER_PARTICLES_BOUNDARIES = true; #ifdef CPU_TIME @@ -31,7 +31,7 @@ void Grid3D::Transfer_Particles_Boundaries(struct Parameters P) Timer.Part_Boundaries.End(); #endif Particles.TRANSFER_PARTICLES_BOUNDARIES = false; - CudaCheckError(); + GPU_Error_Check(); } #ifdef MPI_CHOLLA @@ -730,7 +730,7 @@ int Particles3D::Select_Particles_to_Transfer_GPU(int direction, int side) n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); return n_transfer; } @@ -847,7 +847,7 @@ void Particles3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int dire G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos); #endif - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); *n_send += n_transfer; // if ( *n_send > 0 ) printf( "###Transfered %ld particles\n", *n_send); @@ -881,7 +881,7 @@ void Particles3D::Replace_Tranfered_Particles_GPU(int n_transfer) G.replace_particles_indices_d, false); #endif - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(cudaDeviceSynchronize()); // Update the local number of particles n_local -= n_transfer; } @@ -927,7 +927,7 @@ void Particles3D::Set_Particles_Open_Boundary_GPU(int dir, int side) // G.transfer_particles_flags_d, G.transfer_particles_indices_d, // G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, // G.transfer_particles_prefix_sum_blocks_d ); - // CHECK(cudaDeviceSynchronize()); + // GPU_Error_Check(cudaDeviceSynchronize()); // chprintf("OPEN condition: removing %d\n", n_transfer); Replace_Tranfered_Particles_GPU(n_transfer); } @@ -1035,7 +1035,7 @@ void Particles3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real } } - CudaCheckError(); + GPU_Error_Check(); Copy_Transfer_Particles_from_Buffer_GPU(n_recv, recv_buffer_d); } diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu index 94433f267..5f8165be3 100644 --- a/src/particles/particles_boundaries_gpu.cu +++ b/src/particles/particles_boundaries_gpu.cu @@ -74,7 +74,7 @@ void Grid3D::Set_Particles_Boundary_GPU(int dir, int side) hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev, d_min, d_max, L); - CudaCheckError(); + GPU_Error_Check(); } // #ifdef MPI_CHOLLA @@ -310,7 +310,7 @@ void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, in hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace); - CudaCheckError(); + GPU_Error_Check(); } void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d, @@ -325,7 +325,7 @@ void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *f hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace); - CudaCheckError(); + GPU_Error_Check(); } part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax, @@ -354,33 +354,33 @@ part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int sid hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d, transfer_flags_d); - CudaCheckError(); + GPU_Error_Check(); hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d); - CudaCheckError(); + GPU_Error_Check(); hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, grid_size_half, transfer_prefix_sum_blocks_d); - CudaCheckError(); + GPU_Error_Check(); hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d); - CudaCheckError(); + GPU_Error_Check(); hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d, transfer_prefix_sum_d); - CudaCheckError(); + GPU_Error_Check(); - CudaSafeCall(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost)); - CudaCheckError(); + GPU_Error_Check(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_indices_d); - CudaCheckError(); + GPU_Error_Check(); hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d); - CudaCheckError(); + GPU_Error_Check(); // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); return n_transfer_h[0]; @@ -427,7 +427,7 @@ void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type); - CudaCheckError(); + GPU_Error_Check(); } __global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer, @@ -472,7 +472,7 @@ void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, i hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type); - CudaCheckError(); + GPU_Error_Check(); } #ifdef MPI_CHOLLA @@ -480,16 +480,16 @@ void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Re { int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), cudaMemcpyDeviceToHost)); - CudaCheckError(); + GPU_Error_Check(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); } void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d) { int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), cudaMemcpyHostToDevice)); - CudaCheckError(); + GPU_Error_Check(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(); } #endif // MPI_CHOLLA @@ -522,7 +522,7 @@ void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d); - CudaCheckError(); + GPU_Error_Check(); } __global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel(int n_local, int n_transfer, int field_id, @@ -554,7 +554,7 @@ void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d); - CudaCheckError(); + GPU_Error_Check(); } // #endif//MPI_CHOLLA diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 977fd936c..39aeba6c7 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -155,7 +155,7 @@ Real Grid3D::Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end) // Update the particles positions and velocities void Grid3D::Advance_Particles(int N_step) { - CudaCheckError(); + GPU_Error_Check(); #ifdef CPU_TIME if (N_step == 1) { Timer.Advance_Part_1.Start(); @@ -190,7 +190,7 @@ void Grid3D::Advance_Particles(int N_step) Timer.Advance_Part_2.End(); } #endif - CudaCheckError(); + GPU_Error_Check(); } // Get the accteleration for all the particles diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index 710659849..817040dca 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -95,12 +95,12 @@ Real Particles3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particl hipLaunchKernelGGL(Calc_Particles_dti_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_particles_local, dx, dy, dz, vel_x, vel_y, vel_z, dti_array_dev); - CudaCheckError(); + GPU_Error_Check(); // Initialize dt values Real max_dti = 0; // copy the dti array onto the CPU - CudaSafeCall(cudaMemcpy(dti_array_host, dti_array_dev, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(dti_array_host, dti_array_dev, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); // find maximum inverse timestep from CFL condition for (int i = 0; i < ngrid; i++) { max_dti = fmax(max_dti, dti_array_host[i]); @@ -160,7 +160,7 @@ void Particles3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, R if (n_local > 0) { hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); - CudaCheckError(); + GPU_Error_Check(); } } @@ -179,7 +179,7 @@ void Particles3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, R if (n_local > 0) { hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); - CudaCheckError(); + GPU_Error_Check(); } } @@ -276,8 +276,8 @@ void Particles3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_lo hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K); - CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + GPU_Error_Check(cudaDeviceSynchronize()); + // GPU_Error_Check(); } } @@ -299,8 +299,8 @@ void Particles3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_lo hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K); - CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + GPU_Error_Check(cudaDeviceSynchronize()); + // GPU_Error_Check(); } } diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 704891acd..0c567b6d0 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -139,8 +139,8 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); // Perform Comparison for (size_t i = 0; i < host_grid.size(); i++) { @@ -250,8 +250,8 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); // Perform Comparison for (size_t i = 0; i < dev_interface_right.size(); i++) { diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 1bd67bd6f..9e9b11140 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -89,8 +89,8 @@ TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel hipLaunchKernelGGL(PPMC_CTU, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); // Perform Comparison for (size_t i = 0; i < host_grid.size(); i++) { @@ -229,8 +229,8 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel hipLaunchKernelGGL(PPMC_VL, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, gamma, direction); - CudaCheckError(); - CHECK(cudaDeviceSynchronize()); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); // Perform Comparison for (size_t i = 0; i < dev_interface_left.size(); i++) { diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index f0c11e3e5..6c2e19af7 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -64,7 +64,7 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput cuda_utilities::DeviceVector dev_results(1); hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed, sound_speed_squared, dev_results.data()); - CudaCheckError(); + GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::Characteristic const host_results = dev_results.at(0); @@ -95,7 +95,7 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput cuda_utilities::DeviceVector dev_results(1); hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed, sound_speed_squared, dev_results.data()); - CudaCheckError(); + GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::Primitive const host_results = dev_results.at(0); @@ -122,7 +122,7 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) cuda_utilities::DeviceVector dev_results(1); hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, dev_results.data()); - CudaCheckError(); + GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::EigenVecs const host_results = dev_results.at(0); // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; @@ -367,7 +367,7 @@ TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpe hipLaunchKernelGGL(Test_Monotize_Characteristic_Return_Primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma, dev_results.data()); - CudaCheckError(); + GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::Primitive const host_results = dev_results.at(0); diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu index 882b87548..829c536b7 100644 --- a/src/riemann_solvers/hllc_cuda_tests.cu +++ b/src/riemann_solvers/hllc_cuda_tests.cu @@ -68,12 +68,12 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test Real *devTestFlux; // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedLeft, nFields * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedRight, nFields * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devTestFlux, nFields * sizeof(Real))); - CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(devConservedLeft, stateLeft.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(devConservedRight, stateRight.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, @@ -81,12 +81,12 @@ class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test devConservedRight, // the "right" interface devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, nFields * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); + GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, nFields * sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); return testFlux; } diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 6c70ddb8a..4993fa47e 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -98,16 +98,16 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test Real *devTestFlux; // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); - CudaSafeCall( + GPU_Error_Check( cudaMemcpy(devConservedLeft, stateLeft.data(), stateLeft.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall( + GPU_Error_Check( cudaMemcpy(devConservedRight, stateRight.data(), stateRight.size() * sizeof(Real), cudaMemcpyHostToDevice)); - CudaSafeCall( + GPU_Error_Check( cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel @@ -117,12 +117,12 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test devConservedMagXFace, // the magnetic field at the interface devTestFlux, n_cells, gamma, direction, nFields); - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); + GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); // Free device arrays cudaFree(devConservedLeft); diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index ebe3e4db8..db10a09b4 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -192,14 +192,14 @@ class DeviceVector void _allocate(size_t const size) { _size = size; - CudaSafeCall(cudaMalloc(&_ptr, _size * sizeof(T))); + GPU_Error_Check(cudaMalloc(&_ptr, _size * sizeof(T))); } /*! * \brief Free the device side array * */ - void _deAllocate() { CudaSafeCall(cudaFree(_ptr)); } + void _deAllocate() { GPU_Error_Check(cudaFree(_ptr)); } }; } // namespace cuda_utilities // ============================================================================= @@ -222,7 +222,7 @@ DeviceVector::DeviceVector(size_t const size, bool const initialize) _allocate(size); if (initialize) { - CudaSafeCall(cudaMemset(_ptr, 0, _size * sizeof(T))); + GPU_Error_Check(cudaMemset(_ptr, 0, _size * sizeof(T))); } } // ========================================================================= @@ -241,10 +241,10 @@ void DeviceVector::resize(size_t const newSize) _allocate(newSize); // Copy the values from the old array to the new array - CudaSafeCall(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); + GPU_Error_Check(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); // Free the old array - CudaSafeCall(cudaFree(oldDevPtr)); + GPU_Error_Check(cudaFree(oldDevPtr)); } // ========================================================================= @@ -262,7 +262,7 @@ template T DeviceVector::operator[](size_t const &index) { T hostValue; - CudaSafeCall(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), cudaMemcpyDeviceToHost)); return hostValue; } // ========================================================================= @@ -289,9 +289,9 @@ T DeviceVector::at(size_t const index) template void DeviceVector::assign(T const &hostValue, size_t const &index) { - CudaSafeCall(cudaMemcpy(&(_ptr[index]), // destination - &hostValue, // source - sizeof(T), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(&(_ptr[index]), // destination + &hostValue, // source + sizeof(T), cudaMemcpyHostToDevice)); } // ========================================================================= @@ -300,7 +300,7 @@ template void DeviceVector::cpyHostToDevice(const T *arrIn, size_t const &arrSize) { if (arrSize <= _size) { - CudaSafeCall(cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); } else { throw std::out_of_range( "Warning: Couldn't copy array to device," @@ -316,7 +316,7 @@ template void DeviceVector::cpyDeviceToHost(T *arrOut, size_t const &arrSize) { if (_size <= arrSize) { - CudaSafeCall(cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); } else { throw std::out_of_range( "Warning: Couldn't copy array to host, " diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 4f396b19b..6acd84308 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -26,7 +26,7 @@ void Check_Pointer_Attributes(cuda_utilities::DeviceVector &devVector) { // Get the pointer information cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); + GPU_Error_Check(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); // Warning strings std::string typeMessage = diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp index 0e915d93d..142266159 100644 --- a/src/utils/cuda_utilities.cpp +++ b/src/utils/cuda_utilities.cpp @@ -17,7 +17,7 @@ void Print_GPU_Memory_Usage(std::string const &additional_text) { // Get the memory usage size_t gpu_free_memory, gpu_total_memory; - CudaSafeCall(cudaMemGetInfo(&gpu_free_memory, &gpu_total_memory)); + GPU_Error_Check(cudaMemGetInfo(&gpu_free_memory, &gpu_total_memory)); // Assuming that all GPUs in the system have the same amount of memory size_t const gpu_used_memory = Reduce_size_t_Max(gpu_total_memory - gpu_free_memory); diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 5a2e01673..acc50cbac 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -77,7 +77,7 @@ inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const & * \param[in] ptr The pointer to GPU memory * \param[in] N The size of the array in bytes */ -inline void initGpuMemory(Real *ptr, size_t N) { CudaSafeCall(cudaMemset(ptr, 0, N)); } +inline void initGpuMemory(Real *ptr, size_t N) { GPU_Error_Check(cudaMemset(ptr, 0, N)); } // ===================================================================== /*! diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index 4a0f78cb4..470d47edd 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -63,12 +63,12 @@ int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, dim3 Block_Error) { int *error_value_dev; - CudaSafeCall(cudaMalloc((void **)&error_value_dev, sizeof(int))); + GPU_Error_Check(cudaMalloc((void **)&error_value_dev, sizeof(int))); hipLaunchKernelGGL(Check_Value_Along_Axis, Grid_Error, Block_Error, 0, 0, dev_conserved, 0, nx, ny, nz, n_ghost, error_value_dev); int error_value_host; - CudaSafeCall(cudaMemcpy(&error_value_host, error_value_dev, sizeof(int), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(&error_value_host, error_value_dev, sizeof(int), cudaMemcpyDeviceToHost)); return error_value_host; } diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index ef4f0e19c..62c3308ca 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -3,6 +3,10 @@ #include #include #include +#include +#include + +#include "../utils/error_handling.h" #ifdef O_HIP @@ -12,14 +16,6 @@ #include -static void __attribute__((unused)) check(const hipfftResult err, const char *const file, const int line) -{ - if (err == HIPFFT_SUCCESS) return; - fprintf(stderr, "HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, err); - fflush(stderr); - exit(err); -} - #endif // CUFFT PARIS PARIS_GALACTIC #define WARPSIZE 64 @@ -30,6 +26,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define CUFFT_INVERSE HIPFFT_BACKWARD #define CUFFT_Z2D HIPFFT_Z2D #define CUFFT_Z2Z HIPFFT_Z2Z + #define CUFFT_SUCCESS HIPFFT_SUCCESS #define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError hipError_t @@ -110,15 +107,6 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define curand hiprand #define curand_poisson hiprand_poisson -static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line) -{ - if (err == hipSuccess) return; - fprintf(stderr, "HIP ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, hipGetErrorName(err), - hipGetErrorString(err)); - fflush(stderr); - exit(err); -} - #else // not O_HIP #include @@ -127,29 +115,8 @@ static void __attribute__((unused)) check(const hipError_t err, const char *cons #include -static void check(const cufftResult err, const char *const file, const int line) -{ - if (err == CUFFT_SUCCESS) { - return; - } - fprintf(stderr, "CUFFT ERROR AT LINE %d OF FILE '%s': %d\n", line, file, err); - fflush(stderr); - exit(err); -} - #endif // defined(PARIS) || defined(PARIS_GALACTIC) -static void check(const cudaError_t err, const char *const file, const int line) -{ - if (err == cudaSuccess) { - return; - } - fprintf(stderr, "CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n", line, file, cudaGetErrorName(err), - cudaGetErrorString(err)); - fflush(stderr); - exit(err); -} - #define WARPSIZE 32 static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define hipLaunchKernelGGL(F, G, B, M, S, ...) F<<>>(__VA_ARGS__) @@ -157,10 +124,55 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #endif // O_HIP -#define CHECK(X) check(X, __FILE__, __LINE__) - #define GPU_MAX_THREADS 256 +/*! + * \brief Check for CUDA/HIP error codes. Can be called wrapping a GPU function that returns a value or with no + * arguments and it will get the latest error code. + * + * \param[in] code The code to check. Defaults to the last error code + * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True + * \param[in] location The location of the call. This should be left as the default value. + */ +inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort = true, + std::experimental::source_location location = std::experimental::source_location::current()) +{ + code = cudaDeviceSynchronize(); + + // Check the code + if (code != cudaSuccess) { + std::cout << "GPU_Error_Check: Failed at " + << "Line: " << location.line() << ", File: " << location.file_name() + << ", Function: " << location.function_name() << ", with code: " << cudaGetErrorString(code) << std::endl; + if (abort) { + chexit(code); + } + } +} + +#if defined(PARIS) || defined(PARIS_GALACTIC) +/*! + * \brief Check for CUFFT/HIPFFT error codes. Can be called wrapping a FFT function that returns a value + * + * \param[in] code The code to check + * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True + * \param[in] location The location of the call. This should be left as the default value. + */ +inline void GPU_Error_Check(cufftResult_t code, bool abort = true, + std::experimental::source_location location = std::experimental::source_location::current()) +{ + // Check the code + if (code != CUFFT_SUCCESS) { + std::cout << "GPU_Error_Check: Failed at " + << "Line: " << location.line() << ", File: " << location.file_name() + << ", Function: " << location.function_name() << ", with FFT code: " << code << std::endl; + if (abort) { + chexit(code); + } + } +} +#endif // defined(PARIS) || defined(PARIS_GALACTIC) + #if defined(__CUDACC__) || defined(__HIPCC__) template @@ -181,7 +193,7 @@ void gpuFor(const int n0, const F f) const int b0 = (n0 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t0 = (n0 + b0 - 1) / b0; gpuRun0<<>>(n0, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } template @@ -223,13 +235,13 @@ void gpuFor(const int n0, const int n1, const F f) const int b1 = (n1 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t1 = (n1 + b1 - 1) / b1; gpuRun2x0<<>>(n1, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (nl01 > GPU_MAX_THREADS) { gpuRun1x1<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else { gpuRun0x2<<<1, dim3(n1, n0)>>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } } @@ -285,16 +297,16 @@ void gpuFor(const int n0, const int n1, const int n2, const F f) const int b2 = (n2 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t2 = (n2 + b2 - 1) / b2; gpuRun3x0<<>>(n2, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (nl12 > GPU_MAX_THREADS) { gpuRun2x1<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (nl012 > GPU_MAX_THREADS) { gpuRun1x2<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else { gpuRun0x3<<<1, dim3(n2, n1, n0)>>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } } @@ -357,16 +369,16 @@ void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f) const int b23 = (n23 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t23 = (n23 + b23 - 1) / b23; gpuRun4x0<<>>(n23, n3, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (n23 > GPU_MAX_THREADS) { gpuRun3x1<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (n123 > GPU_MAX_THREADS) { gpuRun2x2<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else { gpuRun1x3<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } } @@ -435,17 +447,17 @@ void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4 const int b34 = (n34 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; const int t34 = (n34 + b34 - 1) / b34; gpuRun5x0<<>>(n1, n34, n4, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (n34 > GPU_MAX_THREADS) { const int n01 = n0 * n1; gpuRun4x1<<>>(n1, f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else if (n2 * n34 > GPU_MAX_THREADS) { gpuRun3x2<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } else { gpuRun2x3<<>>(f); - CHECK(cudaGetLastError()); + GPU_Error_Check(); } } diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu index 696b19a6f..0a84ef64e 100644 --- a/src/utils/gpu_arrays_functions.cu +++ b/src/utils/gpu_arrays_functions.cu @@ -15,7 +15,7 @@ void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_siz } size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); #ifdef PRINT_GPU_MEMORY printf("ReAllocating GPU Memory: %d MB free \n", (int)global_free / 1000000); @@ -29,29 +29,29 @@ void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_siz } Real *new_array_d; - CudaSafeCall(cudaMalloc((void **)&new_array_d, new_size * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(Real))); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); if (new_array_d == NULL) { std::cout << " Error When Allocating New GPU Array" << std::endl; chexit(-1); } // Copy the content of the original array to the new array - CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(Real), cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(Real), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); // size_t global_free_before, global_free_after; - // CudaSafeCall( cudaMemGetInfo( &global_free_before, &global_total ) ); + // GPU_Error_Check( cudaMemGetInfo( &global_free_before, &global_total ) ); // cudaDeviceSynchronize(); // Free the original array cudaFree(*current_array_d); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); - // CudaSafeCall( cudaMemGetInfo( &global_free_after, &global_total ) ); + // GPU_Error_Check( cudaMemGetInfo( &global_free_after, &global_total ) ); // cudaDeviceSynchronize(); // // printf("Freed Memory: %d MB\n", (int) (global_free_after - diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h index e28e86714..f15b379ab 100644 --- a/src/utils/gpu_arrays_functions.h +++ b/src/utils/gpu_arrays_functions.h @@ -19,7 +19,7 @@ void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool } size_t global_free, global_total; - CudaSafeCall(cudaMemGetInfo(&global_free, &global_total)); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); #ifdef PRINT_GPU_MEMORY printf("ReAllocating GPU Memory: %ld MB free \n", global_free / 1000000); @@ -33,23 +33,23 @@ void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool } T *new_array_d; - CudaSafeCall(cudaMalloc((void **)&new_array_d, new_size * sizeof(T))); + GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(T))); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); if (new_array_d == NULL) { std::cout << " Error When Allocating New GPU Array" << std::endl; chexit(-1); } // Copy the content of the original array to the new array - CudaSafeCall(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(T), cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(T), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); // Free the original array cudaFree(*current_array_d); cudaDeviceSynchronize(); - CudaCheckError(); + GPU_Error_Check(); // Replace the pointer of the original array with the new one *current_array_d = new_array_d; diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index effb42549..5dd18c197 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -60,7 +60,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // ================ hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_grid.data(), dev_max.data(), host_grid.size()); - CudaCheckError(); + GPU_Error_Check(); // Perform comparison testing_utilities::Check_Results(maxValue, dev_max.at(0), "maximum value found"); diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 2a5e17329..a0382e43f 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -65,8 +65,8 @@ void OneTime::End(bool const print_high_values) // Get GPU ID std::string gpu_id(MPI_MAX_PROCESSOR_NAME, ' '); int device; - CudaSafeCall(cudaGetDevice(&device)); - CudaSafeCall(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device)); + GPU_Error_Check(cudaGetDevice(&device)); + GPU_Error_Check(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device)); gpu_id.erase( std::find_if(gpu_id.rbegin(), gpu_id.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), gpu_id.end()); From 971bacfee801b9d4cd0b4fe6704339242215fc30 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Oct 2023 15:47:33 -0400 Subject: [PATCH 604/694] Add missing error checks to some cudaMallocs Some already had error checks on the next line but now they're all wrapped as is standard in the rest of the code --- src/chemistry_gpu/chemistry_functions_gpu.cu | 13 ++++--------- src/cooling/load_cloudy_texture.cu | 4 ++-- src/gravity/potential_SOR_3D_gpu.cu | 18 ++++-------------- src/particles/feedback_CIC_gpu.cu | 2 +- src/utils/debug_utilities.cu | 2 +- 5 files changed, 12 insertions(+), 27 deletions(-) diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 9290e0918..7c9bfe2cf 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -17,8 +17,7 @@ void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size) { - cudaMalloc((void **)array_dev, size * sizeof(float)); - GPU_Error_Check(); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(float))); } void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d) @@ -27,15 +26,11 @@ void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array cudaDeviceSynchronize(); } -void Chem_GPU::Free_Array_GPU_float(float *array_dev) -{ - cudaFree(array_dev); - GPU_Error_Check(); -} +void Chem_GPU::Free_Array_GPU_float(float *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size) { - cudaMalloc((void **)array_dev, size * sizeof(Real)); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); GPU_Error_Check(); } @@ -47,7 +42,7 @@ void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d) void Chem_GPU::Free_Array_GPU_Real(Real *array_dev) { - cudaFree(array_dev); + GPU_Error_Check(cudaFree(array_dev)); GPU_Error_Check(); } diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index 241fc740c..8ac7e602c 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -118,8 +118,8 @@ void Load_Cuda_Textures() // Allocate CUDA arrays in device memory cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); - cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny); - cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny); + GPU_Error_Check(cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny)); + GPU_Error_Check(cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny)); // Copy the cooling and heating arrays from host to device diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index 5646ded68..d2066edb8 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -8,27 +8,17 @@ void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size) { - cudaMalloc((void **)array_dev, size * sizeof(Real)); - GPU_Error_Check(); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); } void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size) { - cudaMalloc((void **)array_dev, size * sizeof(bool)); - GPU_Error_Check(); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool))); } -void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) -{ - cudaFree(array_dev); - GPU_Error_Check(); -} +void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } -void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) -{ - cudaFree(array_dev); - GPU_Error_Check(); -} +void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } __global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, Real current_a) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 7fa7c967f..0a4e8b292 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -132,7 +132,7 @@ void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocat // Now initialize the poisson random number generator state. n_states = n_local * allocation_factor; - cudaMalloc((void**)&randStates, n_states * sizeof(FeedbackPrng)); + GPU_Error_Check(cudaMalloc((void**)&randStates, n_states * sizeof(FeedbackPrng))); int ngrid = (n_states - 1) / TPB_FEEDBACK + 1; dim3 grid(ngrid); diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu index 9a1157aca..20720583f 100644 --- a/src/utils/debug_utilities.cu +++ b/src/utils/debug_utilities.cu @@ -45,7 +45,7 @@ void Check_For_Nan(Real* device_array, int array_size, int check_num) { bool host_out_bool[1] = {false}; bool* out_bool; - cudaMalloc((void**)&out_bool, sizeof(bool)); + GPU_Error_Check(cudaMalloc((void**)&out_bool, sizeof(bool))); cudaMemcpy(out_bool, host_out_bool, sizeof(bool), cudaMemcpyHostToDevice); int ngrid = (array_size + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); From c5e35a616d256920831323ec1bbc2d3684e36be7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Oct 2023 15:51:50 -0400 Subject: [PATCH 605/694] Make GPU error checking on by default The CUDA_ERROR_CHECK macro that turns on error checking has been deprecated in favor of the new DISABLE_GPU_ERROR_CHECKING macro which disable error checking. Error checking is now on by default unless compiled with the DISABLE_GPU_ERROR_CHECKING macro. --- builds/make.inc.template | 4 ++-- builds/make.type.disk | 6 ++---- builds/make.type.mhd | 6 +++--- src/utils/error_handling.cpp | 6 +++--- src/utils/gpu.hpp | 4 ++++ 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/builds/make.inc.template b/builds/make.inc.template index 4c09d085f..3ae156225 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -4,7 +4,7 @@ #To use GPUs, CUDA must be turned on here #Optional error checking can also be enabled -DFLAGS += -DCUDA #-DCUDA_ERROR_CHECK +DFLAGS += -DCUDA #To use MPI, DFLAGS must include -DMPI_CHOLLA DFLAGS += -DMPI_CHOLLA @@ -65,7 +65,7 @@ DFLAGS += -DTEMPERATURE_FLOOR #DFLAGS += -DDYNAMIC_GPU_ALLOC # Set the cooling function -#DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCOOLING_GPU #DFLAGS += -DCLOUDY_COOL # Use Tiled Iitial Conditions for Scaling Tets diff --git a/builds/make.type.disk b/builds/make.type.disk index a95560cf1..c2efaf0f6 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -23,7 +23,7 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT #DFLAGS += -DOUTPUT_ALWAYS DFLAGS += -DCUDA -DFLAGS += -DMPI_CHOLLA +DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPPMC DFLAGS += -DHLLC @@ -43,9 +43,7 @@ DFLAGS += -DHYDRO_GPU OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION DFLAGS += $(OUTPUT) -DFLAGS += $(MPI_GPU) +DFLAGS += $(MPI_GPU) DFLAGS += -DPARALLEL_OMP DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) - -#DFLAGS += -DCUDA_ERROR_CHECK diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 2c6cbf68d..1849722d7 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -43,12 +43,12 @@ DFLAGS += $(OUTPUT) #This is set in the system make.host file DFLAGS += $(MPI_GPU) +# Disable CUDA error checking +# DFLAGS += -DDISABLE_GPU_ERROR_CHECKING + # NOTE: The following macros are to help facilitate debugging and should not be # used on scientific runs -# Do CUDA error checking -# DFLAGS += -DCUDA_ERROR_CHECK - # Limit the number of steps to evolve. # DFLAGS += -DN_STEPS_LIMIT=1000 diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 3c1c3097a..2fe9e9735 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -74,10 +74,10 @@ void Check_Configuration(Parameters const& P) Check_Boundary(P.zu_bcnd, "zu_bcnd"); // warn if error checking is disabled -#ifndef CUDA_ERROR_CHECK +#ifndef DISABLE_GPU_ERROR_CHECKING // NOLINTNEXTLINE(clang-diagnostic-#warnings) - #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" -#endif //! CUDA_ERROR_CHECK + #warning "CUDA error checking is disabled. Enable it by compiling without the DISABLE_GPU_ERROR_CHECKING macro." +#endif //! DISABLE_GPU_ERROR_CHECKING // Check that PRECISION is 2 #ifndef PRECISION diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 62c3308ca..962ed09fc 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -137,6 +137,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort = true, std::experimental::source_location location = std::experimental::source_location::current()) { +#ifndef DISABLE_GPU_ERROR_CHECKING code = cudaDeviceSynchronize(); // Check the code @@ -148,6 +149,7 @@ inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort chexit(code); } } +#endif // DISABLE_GPU_ERROR_CHECKING } #if defined(PARIS) || defined(PARIS_GALACTIC) @@ -161,6 +163,7 @@ inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort inline void GPU_Error_Check(cufftResult_t code, bool abort = true, std::experimental::source_location location = std::experimental::source_location::current()) { + #ifndef DISABLE_GPU_ERROR_CHECKING // Check the code if (code != CUFFT_SUCCESS) { std::cout << "GPU_Error_Check: Failed at " @@ -170,6 +173,7 @@ inline void GPU_Error_Check(cufftResult_t code, bool abort = true, chexit(code); } } + #endif // DISABLE_GPU_ERROR_CHECKING } #endif // defined(PARIS) || defined(PARIS_GALACTIC) From cdbed5d17c4eee5ecd6285c14e9e31d0b9b16d80 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 27 Oct 2023 16:33:16 -0400 Subject: [PATCH 606/694] Add HIPifly macros needed for GPU error checking --- src/utils/gpu.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 962ed09fc..0817940cc 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -27,6 +27,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define CUFFT_Z2D HIPFFT_Z2D #define CUFFT_Z2Z HIPFFT_Z2Z #define CUFFT_SUCCESS HIPFFT_SUCCESS + #define cufftResult_t hipfftResult_t #define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError hipError_t @@ -65,6 +66,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize #define cudaMemGetInfo hipMemGetInfo #define cudaDeviceGetPCIBusId hipDeviceGetPCIBusId + #define cudaPeekAtLastError hipPeekAtLastError // Texture definitions #define cudaArray hipArray From 0fd7f89d31b9325bfc4b626fe73c616117e71a64 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Wed, 6 Dec 2023 13:09:54 -0500 Subject: [PATCH 607/694] update branch --- builds/make.type.dust | 5 ++++- cholla-tests-data | 2 +- src/global/global.h | 2 +- src/grid/cuda_boundaries.cu | 4 ++-- src/grid/grid3D.cpp | 2 +- src/grid/initial_conditions.cpp | 15 ++++++++------- src/integrators/VL_3D_cuda.cu | 4 +++- src/integrators/simple_3D_cuda.cu | 2 ++ 8 files changed, 22 insertions(+), 14 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index a0ea7b267..b9813674c 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPPMP DFLAGS += -DHLLC # DFLAGS += -DDE @@ -30,8 +30,11 @@ DFLAGS += -DSCALAR # Define dust macro DFLAGS += -DDUST +DFLAGS += -DSCALAR_FLOOR + # Apply the cooling in the GPU from precomputed tables DFLAGS += -DCOOLING_GPU +DFLAGS += -DCLOUDY_COOLING #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME diff --git a/cholla-tests-data b/cholla-tests-data index 321416680..dcd73ff52 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 321416680f95d97b5d4ccc6f0b83a8b9ecafdaf0 +Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e diff --git a/src/global/global.h b/src/global/global.h index 8abe358fc..1e63a6af7 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -50,7 +50,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" // Conserved Floor Values -#define TEMP_FLOOR 1e-3 +#define TEMP_FLOOR 10 #define DENS_FLOOR 1e-5 // in code units // Parameter for Enzo dual Energy Condition diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index f5dbe361d..733fb1a85 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -312,13 +312,13 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int Real vx, vy, vz, d_0, P_0; n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + T_0 = 3e7; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function d_0 = n_0 * mu * MP / DENSITY_UNIT; P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; - vx = 100 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) + vx = 500 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) vy = 0.0; vz = 0.0; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 8a7d3fa6f..0a8374323 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -157,7 +157,7 @@ void Grid3D::Initialize(struct Parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number + H.min_dt_slow = 1e-5; // Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 768bf0960..a1983fb48 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1324,7 +1324,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 2.5; // cloud radius in code units (kpc) + Real R_cl = .1; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1339,22 +1339,22 @@ void Grid3D::Clouds() // single centered cloud setup for (int nn = 0; nn < N_cl; nn++) { - cl_pos[nn][0] = 0.5 * H.xdglobal; + cl_pos[nn][0] = 0.075 * H.xdglobal; cl_pos[nn][1] = 0.5 * H.ydglobal; cl_pos[nn][2] = 0.5 * H.zdglobal; printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1.68e-4; - n_cl = 5.4e-2; + n_bg = 1e-2; + n_cl = 10; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; - vx_bg = 0.0; + vx_bg = 1000*TIME_UNIT/KPC; // vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; - T_bg = 3e6; + T_bg = 3e7; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; p_cl = p_bg; @@ -1414,10 +1414,11 @@ void Grid3D::Clouds() #ifdef DE C.GasEnergy[id] = p_cl / (gama - 1.0); #endif // DE - +#ifdef SCALAR #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; #endif // DUST +#endif } } } diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index bb14be755..a600ecd73 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -332,9 +332,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #endif // TEMPERATURE_FLOOR #ifdef SCALAR_FLOOR + #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - grid_enum::dust_density, 1e-5); + grid_enum::dust_density, 1e-10); CudaCheckError(); + #endif #endif // SCALAR_FLOOR return; diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index dd06a2ae6..7a64de081 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -187,9 +187,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, #endif // TEMPERATURE_FLOOR #ifdef SCALAR_FLOOR + #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-5); CudaCheckError(); + #endif DUST #endif // SCALAR_FLOOR return; From 9bb2328d9deb41ee5d5dcae7662835199905bcfa Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 7 Dec 2023 12:16:48 -0500 Subject: [PATCH 608/694] fixed dual-energy formalism synchronization. --- src/hydro/hydro_cuda.cu | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c74654c0e..fa060e811 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -839,6 +839,7 @@ __global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_gho int imo, ipo; n_cells = nx; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID @@ -864,7 +865,10 @@ __global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_gho Emax = fmax(dev_conserved[4 * n_cells + imo], E); Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); - if (U_total / Emax > eta_2) { + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { U = U_total; } else { U = U_advected; @@ -887,6 +891,7 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, i int imo, ipo, jmo, jpo; n_cells = nx * ny; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID @@ -922,7 +927,10 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, i Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); - if (U_total / Emax > eta_2) { + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { U = U_total; } else { U = U_advected; @@ -945,6 +953,7 @@ __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, i int imo, ipo, jmo, jpo, kmo, kpo; n_cells = nx * ny * nz; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID @@ -987,7 +996,10 @@ __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, i Emax = fmax(Emax, dev_conserved[4 * n_cells + kmo]); Emax = fmax(Emax, dev_conserved[4 * n_cells + kpo]); - if (U_total / Emax > eta_2) { + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { U = U_total; } else { U = U_advected; From 39a6f977feedffe98eacf8dc11cb9493cf8150a9 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 8 Dec 2023 14:53:05 -0500 Subject: [PATCH 609/694] Fixed the way cell averaging works so that conserved values are internally consistent when averaging all fields. --- src/hydro/hydro_cuda.cu | 100 ++++++++++++++++++++++++++++++++++------ src/hydro/hydro_cuda.h | 2 +- 2 files changed, 87 insertions(+), 15 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index c74654c0e..983a6e506 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1153,23 +1153,95 @@ __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, i } __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, - Real *conserved) + Real gamma, Real *conserved) { - // Average Density - Average_Cell_Single_Field(0, i, j, k, nx, ny, nz, ncells, conserved); - // Average Momentum_x - Average_Cell_Single_Field(1, i, j, k, nx, ny, nz, ncells, conserved); - // Average Momentum_y - Average_Cell_Single_Field(2, i, j, k, nx, ny, nz, ncells, conserved); - // Average Momentum_z - Average_Cell_Single_Field(3, i, j, k, nx, ny, nz, ncells, conserved); - // Average Energy - Average_Cell_Single_Field(4, i, j, k, nx, ny, nz, ncells, conserved); + int id = i + (j)*nx + (k)*nx*ny; + + Real d, mx, my, mz, E, P; + d = conserved[grid_enum::density*ncells + id]; + mx = conserved[grid_enum::momentumx*ncells + id]; + my = conserved[grid_enum::momentumy*ncells + id]; + mz = conserved[grid_enum::momentumz*ncells + id]; + E = conserved[grid_enum::Energy*ncells + id]; + P = (E - (0.5/d)*(mx*mx + my*my + mz*mz))*(gamma-1.0); + + printf("%3d %3d %3d BC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, mx/d, my/d, mz/d); + + int idn; + int N = 0; + Real d_av, vx_av, vy_av, vz_av, P_av; + d_av = vx_av = vy_av = vz_av = P_av = 0.0; + #ifdef SCALAR + Real scalar[NSCALARS], scalar_av[NSCALARS]; + for (int n=0; n 0.0 && P > 0.0) { + d_av += d; + vx_av += mx; + vy_av += my; + vz_av += mz; + P_av += P/(gamma-1.0); + #ifdef SCALAR + for (int n=0; n Date: Fri, 8 Dec 2023 15:06:08 -0500 Subject: [PATCH 610/694] update initial conditions --- src/grid/cuda_boundaries.cu | 4 ++-- src/grid/initial_conditions.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 733fb1a85..8e04527ab 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -312,13 +312,13 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int Real vx, vy, vz, d_0, P_0; n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e7; // same value as T_bg in cloud initial condition function (K) + T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function d_0 = n_0 * mu * MP / DENSITY_UNIT; P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; - vx = 500 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) + vx = 1000 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) vy = 0.0; vz = 0.0; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index a1983fb48..0e2ee15ca 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1354,7 +1354,7 @@ void Grid3D::Clouds() vx_cl = 0.0; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; - T_bg = 3e7; + T_bg = 3e6; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; p_cl = p_bg; From d1c3e2040c9ae989f1885205a997761ed17e7031 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 8 Dec 2023 15:45:24 -0500 Subject: [PATCH 611/694] clang formatted and compiling --- src/hydro/hydro_cuda.cu | 93 ++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 983a6e506..40f89a347 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1155,17 +1155,17 @@ __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, i __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real gamma, Real *conserved) { - int id = i + (j)*nx + (k)*nx*ny; + int id = i + (j)*nx + (k)*nx * ny; Real d, mx, my, mz, E, P; - d = conserved[grid_enum::density*ncells + id]; - mx = conserved[grid_enum::momentumx*ncells + id]; - my = conserved[grid_enum::momentumy*ncells + id]; - mz = conserved[grid_enum::momentumz*ncells + id]; - E = conserved[grid_enum::Energy*ncells + id]; - P = (E - (0.5/d)*(mx*mx + my*my + mz*mz))*(gamma-1.0); + d = conserved[grid_enum::density * ncells + id]; + mx = conserved[grid_enum::momentum_x * ncells + id]; + my = conserved[grid_enum::momentum_y * ncells + id]; + mz = conserved[grid_enum::momentum_z * ncells + id]; + E = conserved[grid_enum::Energy * ncells + id]; + P = (E - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); - printf("%3d %3d %3d BC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, mx/d, my/d, mz/d); + printf("%3d %3d %3d BC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, mx / d, my / d, mz / d); int idn; int N = 0; @@ -1173,75 +1173,74 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int d_av = vx_av = vy_av = vz_av = P_av = 0.0; #ifdef SCALAR Real scalar[NSCALARS], scalar_av[NSCALARS]; - for (int n=0; n 0.0 && P > 0.0) { d_av += d; vx_av += mx; vy_av += my; vz_av += mz; - P_av += P/(gamma-1.0); - #ifdef SCALAR - for (int n=0; n Date: Fri, 8 Dec 2023 15:48:06 -0500 Subject: [PATCH 612/694] refactor to set dust grain size in input file --- src/dust/dust_cuda.cu | 21 +++++++++++---------- src/dust/dust_cuda.h | 6 +++--- src/global/global.cpp | 6 ++++++ src/global/global.h | 5 +++++ src/grid/grid3D.cpp | 8 +++++++- src/grid/grid3D.h | 6 ++++++ 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index bbecf1935..fe3676e65 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -25,17 +25,17 @@ #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius) { int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, grain_radius); CudaCheckError(); } -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius) { // get grid indices int n_cells = nx * ny * nz; @@ -100,7 +100,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g temperature = temperature_init; Real tau_sp = - Calc_Sputtering_Timescale(number_density, temperature) / TIME_UNIT; // sputtering timescale, kyr (sim units) + Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / TIME_UNIT; // sputtering timescale, kyr (sim units) dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep dd = dd_dt * dt; // change in dust density at current timestep @@ -126,17 +126,18 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g } // McKinnon et al. (2017) sputtering timescale -__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature) +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius) { - Real grain_radius = 1; // dust grain size in units of 0.1 micrometers - Real temperature_0 = 2e6; // temp above which the sputtering rate is ~constant in K - Real omega = 2.5; // controls the low-temperature scaling of the sputtering rate - Real A = 5.3618e15; // 0.17 Gyr in s + Real a = grain_radius; // dust grain size in units of 0.1 micrometers + Real temperature_0 = 2e6; // temp above which the sputtering rate is ~constant in K + Real omega = 2.5; // controls the low-temperature scaling of the sputtering rate + Real A = 5.3618e15; // 0.17 Gyr in s number_density /= (6e-4); // gas number density in units of 10^-27 g/cm^3 // sputtering timescale, s - Real tau_sp = A * (grain_radius / number_density) * (pow(temperature_0 / temperature, omega) + 1); + printf("%e\n", grain_radius); + Real tau_sp = A * (a / number_density) * (pow(temperature_0 / temperature, omega) + 1); return tau_sp; } diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index fb72007ac..ff27e4098 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -27,7 +27,7 @@ * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio */ -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius); /*! * \brief Compute the change in dust density for a cell and update its value in dev_conserved. @@ -43,7 +43,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n * \param[in] gamma Specific heat ratio */ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, - Real gamma); + Real gamma, Real grain_radius); /*! * \brief Compute the sputtering timescale based on a cell's density and temperature. @@ -53,7 +53,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g * * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) */ -__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature); +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius); /*! * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale. diff --git a/src/global/global.cpp b/src/global/global.cpp index ecb7f2ccb..f57f2d475 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -422,6 +422,12 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) strncpy(parms->skewersdir, value, MAXLEN); #endif #endif + #ifdef SCALAR + #ifdef DUST + } else if (strcmp(name, "grain_radius") == 0) { + parms->grain_radius = atoi(value); + #endif + #endif } else if (!Is_Param_Valid(name)) { chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); } diff --git a/src/global/global.h b/src/global/global.h index 8abe358fc..4e9148e79 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -307,6 +307,11 @@ struct Parameters { char skewersdir[MAXLEN]; #endif #endif +#ifdef SCALAR +#ifdef DUST +Real grain_radius; +#endif +#endif }; /*! \fn void parse_params(char *param_file, struct Parameters * parms); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 8a7d3fa6f..b7e44e2fe 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -274,6 +274,12 @@ void Grid3D::Initialize(struct Parameters *P) H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0'); #endif +#ifdef SCALAR +#ifdef DUST + H.grain_radius = P->grain_radius; +#endif +#endif + H.Output_Initial = true; } @@ -494,7 +500,7 @@ Real Grid3D::Update_Grid(void) #ifdef DUST // ==Apply dust from dust/dust_cuda.h== - Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); + Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius); #endif // DUST // Update the H and He ionization fractions and apply cooling and photoheating diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 3f4d4772c..34bd57f7f 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -267,6 +267,12 @@ struct Header { * \brief Flag set to true when all the data will be written to file * (Restart File ) */ bool Output_Complete_Data; + + #ifdef SCALAR + #ifdef DUST + Real grain_radius; + #endif + #endif }; /*! \class Grid3D From 6353fe0803a23ab7f3a497a6c0ea73461ccba4a4 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 8 Dec 2023 15:50:23 -0500 Subject: [PATCH 613/694] added average cells to cell update crash --- src/hydro/hydro_cuda.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 40f89a347..854c11147 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -385,6 +385,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, dev_conserved[id], dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); } #endif // DENSITY_FLOOR /* @@ -653,7 +654,7 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, cs); - Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); } } } From 69d586c04c4266e1f23c26a693d2a1d784a3bdee Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 8 Dec 2023 15:58:20 -0500 Subject: [PATCH 614/694] wrap dust macro in scalar macro --- src/grid/initial_conditions.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 768bf0960..b363f51e9 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1415,9 +1415,11 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_cl / (gama - 1.0); #endif // DE -#ifdef DUST +#ifdef SCALAR + #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; -#endif // DUST + #endif // DUST +#endif } } } From 61970fca9936a3bf32cf56bd88a8fb734eed41a3 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 8 Dec 2023 16:02:06 -0500 Subject: [PATCH 615/694] run clang format --- src/dust/dust_cuda.cu | 13 ++++++++----- src/dust/dust_cuda.h | 7 ++++--- src/global/global.cpp | 6 +++--- src/global/global.h | 6 +++--- src/grid/grid3D.cpp | 4 ++-- src/grid/grid3D.h | 6 +++--- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index fe3676e65..40ae32236 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -25,17 +25,20 @@ #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius) +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius) { int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, grain_radius); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, + grain_radius); CudaCheckError(); } -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius) +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius) { // get grid indices int n_cells = nx * ny * nz; @@ -99,8 +102,8 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // if dual energy is turned on use temp from total internal energy temperature = temperature_init; - Real tau_sp = - Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / TIME_UNIT; // sputtering timescale, kyr (sim units) + Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / + TIME_UNIT; // sputtering timescale, kyr (sim units) dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep dd = dd_dt * dt; // change in dust density at current timestep diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index ff27e4098..212901e8a 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -27,7 +27,8 @@ * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio */ -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, Real grain_radius); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius); /*! * \brief Compute the change in dust density for a cell and update its value in dev_conserved. @@ -42,8 +43,8 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio */ -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, - Real gamma, Real grain_radius); +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius); /*! * \brief Compute the sputtering timescale based on a cell's density and temperature. diff --git a/src/global/global.cpp b/src/global/global.cpp index f57f2d475..9f828fd56 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -422,12 +422,12 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) strncpy(parms->skewersdir, value, MAXLEN); #endif #endif - #ifdef SCALAR - #ifdef DUST +#ifdef SCALAR + #ifdef DUST } else if (strcmp(name, "grain_radius") == 0) { parms->grain_radius = atoi(value); - #endif #endif +#endif } else if (!Is_Param_Valid(name)) { chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); } diff --git a/src/global/global.h b/src/global/global.h index 4e9148e79..83aabd5d6 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -308,9 +308,9 @@ struct Parameters { #endif #endif #ifdef SCALAR -#ifdef DUST -Real grain_radius; -#endif + #ifdef DUST + Real grain_radius; + #endif #endif }; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index b7e44e2fe..38384af84 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -275,9 +275,9 @@ void Grid3D::Initialize(struct Parameters *P) #endif #ifdef SCALAR -#ifdef DUST + #ifdef DUST H.grain_radius = P->grain_radius; -#endif + #endif #endif H.Output_Initial = true; diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 34bd57f7f..4398dfe44 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -268,11 +268,11 @@ struct Header { * (Restart File ) */ bool Output_Complete_Data; - #ifdef SCALAR +#ifdef SCALAR #ifdef DUST - Real grain_radius; - #endif + Real grain_radius; #endif +#endif }; /*! \class Grid3D From 156c642a4aa0f1ae5ca26c0be30d1f8af8e783d2 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 8 Dec 2023 16:07:55 -0500 Subject: [PATCH 616/694] update the output_always so that it is now a bool. --- src/global/global.cpp | 4 +++- src/global/global.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index b4514a1c0..8468ea526 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -221,7 +221,9 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) parms->out_float32_GasEnergy = atoi(value); #endif // DE } else if (strcmp(name, "output_always") == 0) { - parms->output_always = atoi(value); + int tmp = atoi(value); + CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0."); + parms->output_always = tmp; #ifdef MHD } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 1215da26d..edf7d9207 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -179,7 +179,7 @@ struct Parameters { #ifdef DE int out_float32_GasEnergy = 0; #endif - int output_always = 0; + bool output_always = 0; #ifdef STATIC_GRAV int custom_grav = 0; // flag to set specific static gravity field #endif From 38b16493a06d7fd98ddc706569804bcd936a83ed Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 8 Dec 2023 16:24:56 -0500 Subject: [PATCH 617/694] undo accidental commit --- src/dust/dust_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 40ae32236..46273ef03 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -34,7 +34,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, grain_radius); - CudaCheckError(); + GPU_Error_Check(); } __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, From 7a02d014964dfe042afe13bbacbbb0e6360b412c Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 8 Dec 2023 16:30:24 -0500 Subject: [PATCH 618/694] update test --- src/dust/dust_cuda_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index a1357037a..4beab7df5 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -28,9 +28,10 @@ TEST(tDUSTTestSputteringTimescale, Real YR_IN_S = 3.154e7; Real const k_test_number_density = 1; Real const k_test_temperature = pow(10, 5.0); + Real const k_test_grain_size = 1; Real const k_fiducial_num = 182565146.96398282; - Real test_num = Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature) / YR_IN_S; // yr + Real test_num = Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature, k_test_grain_size) / YR_IN_S; // yr double abs_diff; int64_t ulps_diff; From 479051a0b452810bfb5fd6983c80c696110d3448 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 8 Dec 2023 16:32:00 -0500 Subject: [PATCH 619/694] run clang format --- src/dust/dust_cuda_tests.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 4beab7df5..fb0677edf 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -28,10 +28,11 @@ TEST(tDUSTTestSputteringTimescale, Real YR_IN_S = 3.154e7; Real const k_test_number_density = 1; Real const k_test_temperature = pow(10, 5.0); - Real const k_test_grain_size = 1; + Real const k_test_grain_radius = 1; Real const k_fiducial_num = 182565146.96398282; - Real test_num = Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature, k_test_grain_size) / YR_IN_S; // yr + Real test_num = + Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature, k_test_grain_radius) / YR_IN_S; // yr double abs_diff; int64_t ulps_diff; From f5736441a743eb2d0d140e17a0fadc80584973cb Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Mon, 11 Dec 2023 13:20:56 -0500 Subject: [PATCH 620/694] turned off linting for scalar for loop --- src/hydro/hydro_cuda.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index d2b7a666d..ac914cfdf 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1174,7 +1174,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int d_av = vx_av = vy_av = vz_av = P_av = 0.0; #ifdef SCALAR Real scalar[NSCALARS], scalar_av[NSCALARS]; - for (int n = 0; n < NSCALARS; n++) { + for (int n = 0; n < NSCALARS; n++) { //NOLINT scalar_av[n] = 0.0; } #endif @@ -1189,7 +1189,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int mz = conserved[grid_enum::momentum_z * ncells + idn]; P = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { + for (int n = 0; n < NSCALARS; n++) { //NOLINT scalar[n] = conserved[grid_enum::scalar * ncells + idn]; } #endif @@ -1200,7 +1200,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int vz_av += mz; P_av += P / (gamma - 1.0); #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { + for (int n = 0; n < NSCALARS; n++) { //NOLINT scalar_av[n] += scalar[n]; } #endif @@ -1215,7 +1215,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int vy_av = vy_av / d_av; vz_av = vz_av / d_av; #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { + for (int n = 0; n < NSCALARS; n++) { //NOLINT scalar_av[n] = scalar_av[n] / d_av; } #endif From bcd3a2770f12be1f0333a5dc6449042f640c2fe3 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Mon, 11 Dec 2023 13:24:46 -0500 Subject: [PATCH 621/694] clang format --- src/hydro/hydro_cuda.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index ac914cfdf..0127344af 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1174,7 +1174,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int d_av = vx_av = vy_av = vz_av = P_av = 0.0; #ifdef SCALAR Real scalar[NSCALARS], scalar_av[NSCALARS]; - for (int n = 0; n < NSCALARS; n++) { //NOLINT + for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] = 0.0; } #endif @@ -1189,7 +1189,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int mz = conserved[grid_enum::momentum_z * ncells + idn]; P = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { //NOLINT + for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar[n] = conserved[grid_enum::scalar * ncells + idn]; } #endif @@ -1200,7 +1200,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int vz_av += mz; P_av += P / (gamma - 1.0); #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { //NOLINT + for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] += scalar[n]; } #endif @@ -1215,7 +1215,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int vy_av = vy_av / d_av; vz_av = vz_av / d_av; #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { //NOLINT + for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] = scalar_av[n] / d_av; } #endif From cbf98efa96a37658b530642e83e3ad5a72644b20 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Mon, 11 Dec 2023 14:01:30 -0500 Subject: [PATCH 622/694] missed one --- src/hydro/hydro_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 0127344af..1f1d07521 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1232,7 +1232,7 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int conserved[id + ncells * grid_enum::GasEnergy] = P_av / (gamma - 1.0); #endif #ifdef SCALAR - for (int n = 0; n < NSCALARS; n++) { + for (int n = 0; n < NSCALARS; n++) { // NOLINT conserved[id + ncells * grid_enum::scalar] = d_av * scalar_av[n]; } #endif From 9b7e6147cd46926c2dfed9a23ebc5a642e33c76d Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 14 Dec 2023 08:24:31 -0500 Subject: [PATCH 623/694] save simulation setup --- src/grid/cuda_boundaries.cu | 2 +- src/grid/initial_conditions.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 8e04527ab..9ad4a80e6 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -312,7 +312,7 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int Real vx, vy, vz, d_0, P_0; n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + T_0 = 3e7; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function d_0 = n_0 * mu * MP / DENSITY_UNIT; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 0e2ee15ca..c25f24c31 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1346,7 +1346,7 @@ void Grid3D::Clouds() } n_bg = 1e-2; - n_cl = 10; + n_cl = 50; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; vx_bg = 1000*TIME_UNIT/KPC; @@ -1354,10 +1354,11 @@ void Grid3D::Clouds() vx_cl = 0.0; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; - T_bg = 3e6; + T_bg = 3e7; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; - p_cl = p_bg; + // p_cl = p_bg; + p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; istart = H.n_ghost; iend = H.nx - H.n_ghost; From 4077eddf208d335ca7f4f8710fb0d97a867cd763 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 14 Dec 2023 08:31:45 -0500 Subject: [PATCH 624/694] resolve merge conflict --- src/integrators/VL_3D_cuda.cu | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 7d10dff5a..d562707ea 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -194,19 +194,14 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, -<<<<<<< HEAD - F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields); - CudaCheckError(); + F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); + GPU_Error_Check(); #ifdef DENSITY_FLOOR hipLaunchKernelGGL(Apply_Density_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, nx, ny, nz, n_ghost, density_floor); #endif // DENSITY_FLOOR -======= - F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); - GPU_Error_Check(); ->>>>>>> dev #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, From 3cd6b2ef9e04369686229035727e3d93aff1ee14 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 15 Dec 2023 08:54:31 -0500 Subject: [PATCH 625/694] update from cell averaging PR --- builds/make.type.dust | 4 ++-- src/integrators/VL_3D_cuda.cu | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index b9813674c..45a11f61b 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -14,8 +14,8 @@ DFLAGS += -DHLLC # DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS -DFLAGS += -DTEMPERATURE_FLOOR -DFLAGS += -DSCALAR_FLOOR +# DFLAGS += -DTEMPERATURE_FLOOR +# DFLAGS += -DSCALAR_FLOOR ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index d562707ea..f299eda3c 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -32,7 +32,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, - Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields); + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -335,7 +335,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, 1e-10); - CudaCheckError(); + GPU_Error_Check(); #endif #endif // SCALAR_FLOOR @@ -361,7 +361,8 @@ void Free_Memory_VL_3D() __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, - Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields) + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor) { Real dtodx = dt / dx; Real dtody = dt / dy; From fc8f8d7c5c031fc452884f9fc2d1ba8a48a776c3 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Wed, 20 Dec 2023 14:45:16 -0500 Subject: [PATCH 626/694] revert de formalism behavior in cosmological simulations until future tests are performed. --- src/global/global.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/global/global.h b/src/global/global.h index 8abe358fc..8634ae3f2 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -53,10 +53,25 @@ typedef double Real; #define TEMP_FLOOR 1e-3 #define DENS_FLOOR 1e-5 // in code units -// Parameter for Enzo dual Energy Condition -#define DE_ETA_1 \ - 0.001 // Ratio of U to E for which Internal Energy is used to compute the - // Pressure +// Parameters for Enzo dual Energy Condition +// - Prior to GH PR #356, DE_ETA_1 nominally had a value of 0.001 in all +// simulations (in practice, the value of DE_ETA_1 had minimal significance +// in those simulations). In PR #356, we revised the internal-energy +// synchronization to account for the value of DE_ETA_1. This was necessary +// for non-cosmology simulations. +// - In Cosmological simulation, we set DE_ETA_1 to a large number (it doesn't +// really matter what, as long as its >=1) to maintain the older behavior +// - In the future, we run tests and revisit the choice of DE_ETA_1 in +// cosmological simulations +#ifdef COSMOLOGY + #define DE_ETA_1 10.0 +#else + #define DE_ETA_1 \ + 0.001 // Ratio of U to E for which Internal Energy is used to compute the + // Pressure. This also affects when the Internal Energy is used for + // the update. +#endif + #define DE_ETA_2 \ 0.035 // Ratio of U to max(E_local) used to select which Internal Energy is // used for the update. From c1ccee3ca8783a5b4aab6a8dd090a7e07c040293 Mon Sep 17 00:00:00 2001 From: Helena Richie Date: Wed, 3 Jan 2024 12:30:59 -0500 Subject: [PATCH 627/694] update branch --- src/grid/initial_conditions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 432321113..c9d22727a 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1346,7 +1346,7 @@ void Grid3D::Clouds() } n_bg = 1e-2; - n_cl = 50; + n_cl = 10; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; vx_bg = 1000*TIME_UNIT/KPC; @@ -1357,8 +1357,8 @@ void Grid3D::Clouds() T_bg = 3e7; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; - // p_cl = p_bg; - p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; + p_cl = p_bg; + // p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; istart = H.n_ghost; iend = H.nx - H.n_ghost; From 5b8525187f8a5dc965d4a999e4285f57ca8a7f5d Mon Sep 17 00:00:00 2001 From: Helena Richie Date: Thu, 4 Jan 2024 08:28:49 -0500 Subject: [PATCH 628/694] update build --- builds/make.type.dust | 11 +++-------- src/dust/dust_cuda.cu | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 45a11f61b..a95912ea0 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -9,20 +9,15 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPLMP DFLAGS += -DHLLC -# DFLAGS += -DDE +DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS # DFLAGS += -DTEMPERATURE_FLOOR -# DFLAGS += -DSCALAR_FLOOR +DFLAGS += -DSCALAR_FLOOR -ifeq ($(findstring cosmology,$(TYPE)),cosmology) -DFLAGS += -DSIMPLE -else DFLAGS += -DVL -# DFLAGS += -DSIMPLE -endif # Evolve additional scalars DFLAGS += -DSCALAR diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 7ca48a9fd..d4c6191ec 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -83,7 +83,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g velocity_z = dev_conserved[id + n_cells * grid_enum::momentum_z] / density_gas; #ifdef DE energy_gas = dev_conserved[id + n_cells * grid_enum::GasEnergy] / density_gas; - energy_gas = fmax(ge, (Real)TINY_NUMBER); + energy_gas = fmax(energy_gas, (Real)TINY_NUMBER); #endif // DE // calculate physical quantities From f5dc360bdf2c2a5e649b665af7add87ea2d6aa1f Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 4 Jan 2024 09:00:07 -0500 Subject: [PATCH 629/694] hard-code cloud temp --- src/grid/cuda_boundaries.cu | 2 +- src/grid/initial_conditions.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 675f14ae5..715840322 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -312,7 +312,7 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int Real vx, vy, vz, d_0, P_0; n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e7; // same value as T_bg in cloud initial condition function (K) + T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function d_0 = n_0 * mu * MP / DENSITY_UNIT; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 432321113..658362954 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1346,7 +1346,7 @@ void Grid3D::Clouds() } n_bg = 1e-2; - n_cl = 50; + n_cl = 10; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; vx_bg = 1000*TIME_UNIT/KPC; @@ -1354,7 +1354,7 @@ void Grid3D::Clouds() vx_cl = 0.0; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; - T_bg = 3e7; + T_bg = 3e6; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; // p_cl = p_bg; From e12b846a388f3429920196f1088773babb540c82 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 4 Jan 2024 14:01:33 -0500 Subject: [PATCH 630/694] update build --- src/grid/initial_conditions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 6389b8bf7..658362954 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1357,8 +1357,8 @@ void Grid3D::Clouds() T_bg = 3e6; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; - p_cl = p_bg; - // p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; + // p_cl = p_bg; + p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; istart = H.n_ghost; iend = H.nx - H.n_ghost; From 3fec9785d8bfd6654ad9045cfc8135bd9822a218 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Sat, 6 Jan 2024 16:37:34 -0500 Subject: [PATCH 631/694] update build --- builds/make.type.dust | 4 +--- src/global/global.h | 2 +- src/grid/grid3D.cpp | 2 +- src/hydro/hydro_cuda.cu | 3 ++- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index a95912ea0..35d595a37 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -14,7 +14,7 @@ DFLAGS += -DHLLC DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS -# DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR DFLAGS += -DSCALAR_FLOOR DFLAGS += -DVL @@ -25,8 +25,6 @@ DFLAGS += -DSCALAR # Define dust macro DFLAGS += -DDUST -DFLAGS += -DSCALAR_FLOOR - # Apply the cooling in the GPU from precomputed tables DFLAGS += -DCOOLING_GPU DFLAGS += -DCLOUDY_COOLING diff --git a/src/global/global.h b/src/global/global.h index dbb976d8c..18b08b71f 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -50,7 +50,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" // Conserved Floor Values -#define TEMP_FLOOR 10 +#define TEMP_FLOOR 100 #define DENS_FLOOR 1e-5 // in code units // Parameters for Enzo dual Energy Condition diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index a76417321..4638942eb 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -157,7 +157,7 @@ void Grid3D::Initialize(struct Parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-5; // Initialize the minumum dt to a tiny number + H.min_dt_slow = 0.024; // Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index bff6df5ba..e50cdd3f0 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -379,7 +379,8 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R #endif // GRAVITY - #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) + // #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) + #if !(defined(DENSITY_FLOOR)) if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, From 17f16bd740d0e600cc4a867bcad9176240c210fe Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 9 Jan 2024 10:25:52 -0500 Subject: [PATCH 632/694] Add missing limiting in PLMC The limiting after the calculation of the interfaces was missing. Added it --- src/reconstruction/plmc_cuda.cu | 3 ++ src/reconstruction/reconstruction.h | 53 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 41a5ae505..bb31e9904 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -124,6 +124,9 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0); + // Limit the interfaces + reconstruction::Plm_Limit_Interfaces(interface_L_iph, interface_R_imh, cell_imo, cell_i, cell_ipo); + #ifndef VL Real const dtodx = dt / dx; diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 07aae21a6..4b5fe44ed 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -673,6 +673,59 @@ Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const & } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Apply limiting the the primitive interfaces in PLM reconstructions + * + * \param[in,out] interface_L_iph The unlimited left plus 1/2 interface + * \param[in,out] interface_R_imh The unlimited right minus 1/2 interface + * \param[in] cell_imo The cell centered values at i-1 + * \param[in] cell_i The cell centered values at i + * \param[in] cell_ipo The cell centered values at i+1 + */ +void __device__ __host__ __inline__ Plm_Limit_Interfaces(Primitive &interface_L_iph, Primitive &interface_R_imh, + Primitive const &cell_imo, Primitive const &cell_i, + Primitive const &cell_ipo) +{ + auto limiter = [](Real &l_iph, Real &r_imh, Real const &val_imo, Real const &val_i, Real const &val_ipo) { + Real sum = l_iph + r_imh; + r_imh = fmax(fmin(val_i, val_imo), r_imh); + r_imh = fmin(fmax(val_i, val_imo), r_imh); + l_iph = sum - r_imh; + l_iph = fmax(fmin(val_i, val_ipo), l_iph); + l_iph = fmin(fmax(val_i, val_ipo), l_iph); + r_imh = sum - l_iph; + }; + + limiter(interface_L_iph.density, interface_R_imh.density, cell_imo.density, cell_i.density, cell_ipo.density); + limiter(interface_L_iph.velocity_x, interface_R_imh.velocity_x, cell_imo.velocity_x, cell_i.velocity_x, + cell_ipo.velocity_x); + limiter(interface_L_iph.velocity_y, interface_R_imh.velocity_y, cell_imo.velocity_y, cell_i.velocity_y, + cell_ipo.velocity_y); + limiter(interface_L_iph.velocity_z, interface_R_imh.velocity_z, cell_imo.velocity_z, cell_i.velocity_z, + cell_ipo.velocity_z); + limiter(interface_L_iph.pressure, interface_R_imh.pressure, cell_imo.pressure, cell_i.pressure, cell_ipo.pressure); + +#ifdef MHD + limiter(interface_L_iph.magnetic_y, interface_R_imh.magnetic_y, cell_imo.magnetic_y, cell_i.magnetic_y, + cell_ipo.magnetic_y); + limiter(interface_L_iph.magnetic_z, interface_R_imh.magnetic_z, cell_imo.magnetic_z, cell_i.magnetic_z, + cell_ipo.magnetic_z); +#endif // MHD + +#ifdef DE + limiter(interface_L_iph.gas_energy, interface_R_imh.gas_energy, cell_imo.gas_energy, cell_i.gas_energy, + cell_ipo.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + limiter(interface_L_iph.scalar[i], interface_R_imh.scalar[i], cell_imo.scalar[i], cell_i.scalar[i], + cell_ipo.scalar[i]); + } +#endif // SCALAR +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state From 2de4389894fad7d99707ac49440496377b19176e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 9 Jan 2024 11:19:42 -0500 Subject: [PATCH 633/694] Update MHD PLMC test for new limiter --- src/reconstruction/plmc_cuda_tests.cu | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 0c567b6d0..429bb7a89 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -204,17 +204,17 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {{21, 0.73640639402573249}, {85, 3.3462413154443715}, {149, 2.1945584994458125}, - {213, 0.67418839414138987}, - {277, 16.909618487528142}, - {341, 2.1533768050263267}, - {405, 1.6994195863331925}}, + {213, 1.1837630990406585}, + {277, 17.570011907061254}, + {341, 2.1583975283044725}, + {405, 1.7033818819502551}}, {{21, 0.25340904981266843}, {85, 2.0441984720128734}, {149, 1.9959059157695584}, {213, 0.45377591914009824}, - {277, 23.677832869261188}, - {341, 1.5437923271692418}, - {405, 1.8141353672443383}}}; + {277, 24.018953780483471}, + {341, 1.7033818819502551}, + {405, 1.8587936590169301}}}; std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, {84, 3.0043379408547275}, {148, 2.6320759184913625}, @@ -226,19 +226,19 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {17, 0.44405384992296193}, {81, 2.5027813113931279}, {145, 2.6371119205792346}, - {209, 1.0210845222961809}, - {273, 21.360010722689488}, - {337, 2.1634182515826184}, - {401, 1.7073441775673177}, + {209, 0.71381042558869023}, + {273, 20.742152413015724}, + {337, 2.1583975283044725}, + {401, 1.7033818819502551}, }, { {5, 0.92705119413602599}, {69, 1.9592598982258778}, {133, 0.96653490574340428}, {197, 1.3203867992383289}, - {261, 8.0057564947791793}, - {325, 1.8629714367312684}, - {389, 1.9034519507895218}, + {261, 7.6371723945376493}, + {325, 1.7033818819502551}, + {389, 1.8587936590169301}, }}; // Loop over different directions From e38b9778777e488008d248ad693e7d04d8f7fdd1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 9 Jan 2024 11:58:20 -0500 Subject: [PATCH 634/694] Add test for new PLM limiting function --- src/reconstruction/reconstruction_tests.cu | 71 ++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 6c2e19af7..5ef075768 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -609,3 +609,74 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) testing_utilities::Check_Results(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); } } + +TEST(tHYDROReconstructionPlmLimitInterfaces, CorrectInputExpectCorrectOutput) +{ + // Set up values to test + reconstruction::Primitive interface_l_iph, interface_r_imh; + reconstruction::Primitive cell_im1, cell_i, cell_ip1; + interface_r_imh.density = -1.94432878387898625e+14; + interface_r_imh.velocity_x = 1.42049955114756404e-04; + interface_r_imh.velocity_y = -2.61311412306644180e-06; + interface_r_imh.velocity_z = -1.99429361865204601e-07; + interface_r_imh.pressure = -2.01130121665840250e-14; + interface_l_iph.density = 1.94433200621991188e+14; + interface_l_iph.velocity_x = 1.42025407335853601e-04; + interface_l_iph.velocity_y = -2.61311412306644180e-06; + interface_l_iph.velocity_z = -6.01154878659959398e-06; + interface_l_iph.pressure = 2.01130321665840277e-14; + + cell_im1.density = 1.61101072114153951e+08; + cell_i.density = 1.61117046279133737e+08; + cell_ip1.density = 1.61011252191243321e+08; + cell_im1.velocity_x = 1.42067642369120116e-04; + cell_i.velocity_x = 1.42037681225305003e-04; + cell_ip1.velocity_x = 1.41901817571928041e-04; + cell_im1.velocity_y = -2.61228250783092252e-06; + cell_i.velocity_y = -2.61311412306644180e-06; + cell_ip1.velocity_y = -2.61155204131260820e-06; + cell_im1.velocity_z = 2.71420653365757378e-06; + cell_i.velocity_z = -3.10548907423239929e-06; + cell_ip1.velocity_z = -8.91005201578514336e-06; + cell_im1.pressure = 9.99999999999999945e-21; + cell_i.pressure = 9.99999999999999945e-21; + cell_ip1.pressure = 4.70262856027679407e-03; + + // Set fiducial values + reconstruction::Primitive interface_r_imh_fiducial, interface_l_iph_fiducial; + interface_r_imh_fiducial.density = 1.61117046283366263e+08; + interface_r_imh_fiducial.velocity_x = 1.42049955114756404e-04; + interface_r_imh_fiducial.velocity_y = -2.61311412306644180e-06; + interface_r_imh_fiducial.velocity_z = -1.99429361865204601e-07; + interface_r_imh_fiducial.pressure = 9.99999999999999945e-21; + interface_l_iph_fiducial.density = 1.61117046279133737e+08; + interface_l_iph_fiducial.velocity_x = 1.42025407335853601e-04; + interface_l_iph_fiducial.velocity_y = -2.61311412306644180e-06; + interface_l_iph_fiducial.velocity_z = -6.01154878659959398e-06; + interface_l_iph_fiducial.pressure = 1.00000000027100627e-20; + + // Run function + reconstruction::Plm_Limit_Interfaces(interface_l_iph, interface_r_imh, cell_im1, cell_i, cell_ip1); + + // Check values + testing_utilities::Check_Results(interface_l_iph_fiducial.density, interface_l_iph.density, + "Mismatch in l_iph density"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_x, interface_l_iph.velocity_x, + "Mismatch in l_iph velocity_x"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_y, interface_l_iph.velocity_y, + "Mismatch in l_iph velocity_y"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_z, interface_l_iph.velocity_z, + "Mismatch in l_iph velocity_z"); + testing_utilities::Check_Results(interface_l_iph_fiducial.pressure, interface_l_iph.pressure, + "Mismatch in l_iph pressure"); + testing_utilities::Check_Results(interface_r_imh_fiducial.density, interface_r_imh.density, + "Mismatch in r_imh density"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_x, interface_r_imh.velocity_x, + "Mismatch in r_imh velocity_x"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_y, interface_r_imh.velocity_y, + "Mismatch in r_imh velocity_y"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_z, interface_r_imh.velocity_z, + "Mismatch in r_imh velocity_z"); + testing_utilities::Check_Results(interface_r_imh_fiducial.pressure, interface_r_imh.pressure, + "Mismatch in r_imh pressure"); +} From 4c548a73418b870043b586c1193253811898079d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 12 Jan 2024 14:13:58 -0500 Subject: [PATCH 635/694] Remove unnecessary sums in limiter --- src/reconstruction/reconstruction.h | 11 ++++------- src/reconstruction/reconstruction_tests.cu | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 4b5fe44ed..23442a776 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -688,13 +688,10 @@ void __device__ __host__ __inline__ Plm_Limit_Interfaces(Primitive &interface_L_ Primitive const &cell_ipo) { auto limiter = [](Real &l_iph, Real &r_imh, Real const &val_imo, Real const &val_i, Real const &val_ipo) { - Real sum = l_iph + r_imh; - r_imh = fmax(fmin(val_i, val_imo), r_imh); - r_imh = fmin(fmax(val_i, val_imo), r_imh); - l_iph = sum - r_imh; - l_iph = fmax(fmin(val_i, val_ipo), l_iph); - l_iph = fmin(fmax(val_i, val_ipo), l_iph); - r_imh = sum - l_iph; + r_imh = fmax(fmin(val_i, val_imo), r_imh); + r_imh = fmin(fmax(val_i, val_imo), r_imh); + l_iph = fmax(fmin(val_i, val_ipo), l_iph); + l_iph = fmin(fmax(val_i, val_ipo), l_iph); }; limiter(interface_L_iph.density, interface_R_imh.density, cell_imo.density, cell_i.density, cell_ipo.density); diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 5ef075768..008ce6752 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -644,7 +644,7 @@ TEST(tHYDROReconstructionPlmLimitInterfaces, CorrectInputExpectCorrectOutput) // Set fiducial values reconstruction::Primitive interface_r_imh_fiducial, interface_l_iph_fiducial; - interface_r_imh_fiducial.density = 1.61117046283366263e+08; + interface_r_imh_fiducial.density = 161101072.11415395; interface_r_imh_fiducial.velocity_x = 1.42049955114756404e-04; interface_r_imh_fiducial.velocity_y = -2.61311412306644180e-06; interface_r_imh_fiducial.velocity_z = -1.99429361865204601e-07; @@ -653,7 +653,7 @@ TEST(tHYDROReconstructionPlmLimitInterfaces, CorrectInputExpectCorrectOutput) interface_l_iph_fiducial.velocity_x = 1.42025407335853601e-04; interface_l_iph_fiducial.velocity_y = -2.61311412306644180e-06; interface_l_iph_fiducial.velocity_z = -6.01154878659959398e-06; - interface_l_iph_fiducial.pressure = 1.00000000027100627e-20; + interface_l_iph_fiducial.pressure = 2.0113032166584028e-14; // Run function reconstruction::Plm_Limit_Interfaces(interface_l_iph, interface_r_imh, cell_im1, cell_i, cell_ip1); From c38c1151af32c33ea6d0ebb9eebc4e08af6c927d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 12 Jan 2024 14:15:24 -0500 Subject: [PATCH 636/694] Fix bug in calculation of kinetic energy Two of the terms were multiplied instead of added. Added a new function for computing the square of the magnitude --- cholla-tests-data | 2 +- src/reconstruction/plmc_cuda_tests.cu | 10 +++++----- src/reconstruction/reconstruction_tests.cu | 2 +- src/system_tests/mhd_system_tests.cpp | 2 +- src/utils/hydro_utilities.h | 12 ++++++------ src/utils/hydro_utilities_tests.cpp | 4 ++-- src/utils/math_utilities.h | 16 ++++++++++++++++ src/utils/mhd_utilities.h | 7 +++---- 8 files changed, 35 insertions(+), 20 deletions(-) diff --git a/cholla-tests-data b/cholla-tests-data index dcd73ff52..71eb66d63 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e +Subproject commit 71eb66d63622ac15c0844ae96ec9034cd5e4f4d3 diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 429bb7a89..68f11b396 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -226,9 +226,9 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {17, 0.44405384992296193}, {81, 2.5027813113931279}, {145, 2.6371119205792346}, - {209, 0.71381042558869023}, - {273, 20.742152413015724}, - {337, 2.1583975283044725}, + {209, 1.0210845222961809}, + {273, 21.353253570231175}, + {337, 2.1634182515826184}, {401, 1.7033818819502551}, }, { @@ -236,8 +236,8 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {69, 1.9592598982258778}, {133, 0.96653490574340428}, {197, 1.3203867992383289}, - {261, 7.6371723945376493}, - {325, 1.7033818819502551}, + {261, 7.9217487636977353}, + {325, 1.8629714367312684}, {389, 1.8587936590169301}, }}; diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 008ce6752..dc1f10720 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -238,7 +238,7 @@ TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 39950.641025641031}; #ifdef DE - fiducial_data.pressure = 34274.282506448195; + fiducial_data.pressure = 39950.641025641031; #endif // DE testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index c7a21aaae..4261797b2 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -765,7 +765,7 @@ TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOu test_runner.numMpiRanks = GetParam(); // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems - test_runner.runTest(true, 3.9E-8, 1.6E-6); + test_runner.runTest(true, 3.9E-8, 2.25E-6); } /// Test the MHD Blast Wave diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index c0f783e1c..24caff9f7 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -35,7 +35,7 @@ inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real cons Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real pressure = (E - 0.5 * d * (vx * vx + ((vy * vy) + (vz * vz)))); + Real pressure = E - 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); #ifdef MHD pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); @@ -48,7 +48,7 @@ inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real cons Real const &mz, Real const &gamma, Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real pressure = (E - 0.5 * (mx * mx + my * my + mz * mz) / d); + Real pressure = E - 0.5 * math_utils::SquareMagnitude(mx, my, mz) / d; #ifdef MHD pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); @@ -76,7 +76,7 @@ inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { // Compute and return energy - Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); #ifdef MHD energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); @@ -92,7 +92,7 @@ inline __host__ __device__ Real Calc_Energy_Conserved(Real const &P, Real const { // Compute and return energy Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + - (0.5 / d) * (momentum_x * momentum_x + momentum_y * momentum_y + momentum_z * momentum_z); + (0.5 / d) * math_utils::SquareMagnitude(momentum_x, momentum_y, momentum_z); #ifdef MHD energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); @@ -130,7 +130,7 @@ inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const & inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, Real const &vx, Real const &vy, Real const &vz) { - return 0.5 * d * (vx * vx + vy * vy * vz * vz); + return 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); } /*! @@ -145,7 +145,7 @@ inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, Real const &mx, Real const &my, Real const &mz) { - return (0.5 / d) * (mx * mx + my * my * mz * mz); + return (0.5 / d) * math_utils::SquareMagnitude(mx, my, mz); } /*! diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index eda204c76..b200ddd8c 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -238,7 +238,7 @@ TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducialEnergies{0.0, 6.307524975350106e-145, 7.3762470327090601e+249}; + std::vector fiducialEnergies{0.0, 6.307524975350106e-145, 1.9018677140549924e+150}; double const coef = 1E-50; for (size_t i = 0; i < parameters.names.size(); i++) { @@ -252,7 +252,7 @@ TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducialEnergies{0.0, 0.0, 7.2568536478335773e+147}; + std::vector fiducialEnergies{0.0, 0.0, 3.0042157852278499e+49}; double const coef = 1E-50; for (size_t i = 0; i < parameters.names.size(); i++) { diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h index 1480f852c..68d13f19d 100644 --- a/src/utils/math_utilities.h +++ b/src/utils/math_utilities.h @@ -82,4 +82,20 @@ inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, Real }; // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the magnitude of a vector + * + * \param[in] v1 The first element of the vector + * \param[in] v2 The second element of the vector + * \param[in] v3 The third element of the vector + * + * \return Real The dot product of a and b + */ +inline __device__ __host__ Real SquareMagnitude(Real const &v1, Real const &v2, Real const &v3) +{ + return dotProduct(v1, v2, v3, v1, v2, v3); +}; +// ========================================================================= + } // namespace math_utils diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 55ecc6f75..f409fd4b0 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -18,6 +18,7 @@ #include "../grid/grid3D.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" +#include "../utils/math_utilities.h" namespace mhd::utils { @@ -74,7 +75,7 @@ inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real con inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY, Real const &magneticZ) { - return 0.5 * (magneticX * magneticX + ((magneticY * magneticY) + (magneticZ * magneticZ))); + return 0.5 * math_utils::SquareMagnitude(magneticX, magneticY, magneticZ); } // ========================================================================= @@ -98,9 +99,7 @@ inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, Real Real const &magneticX, Real const &magneticY, Real const &magneticZ, Real const &gamma) { - return energyTot - - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / - fmax(density, TINY_NUMBER) - + return energyTot - 0.5 * math_utils::SquareMagnitude(momentumX, momentumY, momentumZ) / fmax(density, TINY_NUMBER) - computeMagneticEnergy(magneticX, magneticY, magneticZ); } // ========================================================================= From 83629aafa93b3525f9540212b01a259836ac7f5b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 12 Jan 2024 14:44:03 -0500 Subject: [PATCH 637/694] Test for math_utils::SquareMagnitude --- src/utils/math_utilities_tests.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp index 83fd1d232..a49cd8a41 100644 --- a/src/utils/math_utilities_tests.cpp +++ b/src/utils/math_utilities_tests.cpp @@ -56,4 +56,22 @@ TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) // Now check results testing_utilities::Check_Results(fiducialDotProduct, testDotProduct, "dot product"); } +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLSquareMagnitude, CorrectInputExpectCorrectOutput) +{ + std::vector a = {11.503067766457753, 98.316634031589935, 41.12177317622657}; + + double const fiducial_square_magnitude = 11489.481324498336; + + double test_square_magnitude = math_utils::SquareMagnitude(a.at(0), a.at(1), a.at(2)); + + // Now check results + testing_utilities::Check_Results(fiducial_square_magnitude, test_square_magnitude, "dot product"); +} // ========================================================================= \ No newline at end of file From 328770bda11c5d3d1b3253bf3c14f9c94fa39e16 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Sat, 13 Jan 2024 15:15:30 -0500 Subject: [PATCH 638/694] clean up initial conditions --- src/grid/initial_conditions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 658362954..87c5d392f 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1419,7 +1419,7 @@ void Grid3D::Clouds() #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; #endif // DUST -#endif +#endif // SCAlAR } } } From 2f3ef5fed87eeba6e9c25b9424fca7e15cacf829 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Sat, 13 Jan 2024 15:23:16 -0500 Subject: [PATCH 639/694] reset to default settings --- builds/make.type.dust | 18 +++++++++--------- src/global/global.h | 2 +- src/grid/cuda_boundaries.cu | 2 +- src/grid/grid3D.cpp | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index 35d595a37..a6cf1bdd0 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -15,25 +15,25 @@ DFLAGS += -DHLLC DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DTEMPERATURE_FLOOR -DFLAGS += -DSCALAR_FLOOR DFLAGS += -DVL # Evolve additional scalars -DFLAGS += -DSCALAR +DFLAGS += -DSCALAR +DFLAGS += -DSCALAR_FLOOR # Define dust macro -DFLAGS += -DDUST +DFLAGS += -DDUST # Apply the cooling in the GPU from precomputed tables -DFLAGS += -DCOOLING_GPU -DFLAGS += -DCLOUDY_COOLING +DFLAGS += -DCOOLING_GPU +DFLAGS += -DCLOUDY_COOLING #Measure the Timing of the different stages -#DFLAGS += -DCPU_TIME +#DFLAGS += -DCPU_TIME -DFLAGS += -DSLICES -DFLAGS += -DPROJECTION +DFLAGS += -DSLICES +DFLAGS += -DPROJECTION DFLAGS += $(OUTPUT) @@ -41,4 +41,4 @@ DFLAGS += $(OUTPUT) #and the MPI transfers are done from the GPU #If not specified, MPI_GPU is off by default #This is set in the system make.host file -DFLAGS += $(MPI_GPU) +DFLAGS += $(MPI_GPU) \ No newline at end of file diff --git a/src/global/global.h b/src/global/global.h index 18b08b71f..f7030b563 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -50,7 +50,7 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" // Conserved Floor Values -#define TEMP_FLOOR 100 +#define TEMP_FLOOR 1e-3 #define DENS_FLOOR 1e-5 // in code units // Parameters for Enzo dual Energy Condition diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index 715840322..baf846d3c 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -318,7 +318,7 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int d_0 = n_0 * mu * MP / DENSITY_UNIT; P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; - vx = 1000 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) + vx = 100 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) vy = 0.0; vz = 0.0; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 4638942eb..a76417321 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -157,7 +157,7 @@ void Grid3D::Initialize(struct Parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 0.024; // Initialize the minumum dt to a tiny number + H.min_dt_slow = 1e-5; // Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA From 094f1e9ad96c0f04b60aa9a782c4de11356c2e06 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Sat, 13 Jan 2024 17:24:46 -0500 Subject: [PATCH 640/694] get floor values from paramter file and remove density floor kernel --- builds/make.type.dust | 3 +++ src/global/global.cpp | 12 +++++++++++ src/global/global.h | 13 ++++++++---- src/grid/grid3D.cpp | 18 ++++++++++------ src/grid/grid3D.h | 3 ++- src/grid/initial_conditions.cpp | 15 +++++++------ src/hydro/hydro_cuda.cu | 35 ++----------------------------- src/hydro/hydro_cuda.h | 2 -- src/hydro/hydro_cuda_tests.cu | 1 - src/integrators/VL_3D_cuda.cu | 9 ++------ src/integrators/VL_3D_cuda.h | 2 +- src/integrators/simple_3D_cuda.cu | 4 ++-- src/integrators/simple_3D_cuda.h | 2 +- 13 files changed, 53 insertions(+), 66 deletions(-) diff --git a/builds/make.type.dust b/builds/make.type.dust index a6cf1bdd0..24a765302 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -15,6 +15,7 @@ DFLAGS += -DHLLC DFLAGS += -DDE DFLAGS += -DAVERAGE_SLOW_CELLS DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DDENSITY_FLOOR DFLAGS += -DVL @@ -37,6 +38,8 @@ DFLAGS += -DPROJECTION DFLAGS += $(OUTPUT) +DFLAGS += -DOUTPUT_ALWAYS + #Select if the Hydro Conserved data will reside in the GPU #and the MPI transfers are done from the GPU #If not specified, MPI_GPU is off by default diff --git a/src/global/global.cpp b/src/global/global.cpp index 89347cbda..d0f898739 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -428,6 +428,18 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "UVB_rates_file") == 0) { strncpy(parms->UVB_rates_file, value, MAXLEN); #endif +# ifdef TEMPERATURE_FLOOR + } else if (strcmp(name, "temperature_floor") == 0) { + parms->temperature_floor = atof(value); +# endif +# ifdef DENSITY_FLOOR + } else if (strcmp(name, "density_floor") == 0) { + parms->density_floor = atof(value); +# endif +# ifdef SCALAR_FLOOR + } else if (strcmp(name, "scalar_floor") == 0) { + parms->scalar_floor = atof(value); +# endif #ifdef ANALYSIS } else if (strcmp(name, "analysis_scale_outputs_file") == 0) { strncpy(parms->analysis_scale_outputs_file, value, MAXLEN); diff --git a/src/global/global.h b/src/global/global.h index f7030b563..6c9524001 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -49,10 +49,6 @@ typedef double Real; #define LOG_FILE_NAME "run_output.log" -// Conserved Floor Values -#define TEMP_FLOOR 1e-3 -#define DENS_FLOOR 1e-5 // in code units - // Parameters for Enzo dual Energy Condition // - Prior to GH PR #356, DE_ETA_1 nominally had a value of 0.001 in all // simulations (in practice, the value of DE_ETA_1 had minimal significance @@ -325,6 +321,15 @@ struct Parameters { char UVB_rates_file[MAXLEN]; // File for the UVB photoheating and // photoionization rates of HI, HeI and HeII #endif +#ifdef TEMPERATURE_FLOOR + Real temperature_floor; +#endif +#ifdef DENSITY_FLOOR + Real density_floor; +#endif +#ifdef SCALAR_FLOOR + Real scalar_floor; +#endif #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output // values for cosmological diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index a76417321..d844784d5 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -258,16 +258,22 @@ void Grid3D::Initialize(struct Parameters *P) #endif /*ROTATED_PROJECTION*/ // Values for lower limit for density and temperature +#ifdef TEMPERATURE_FLOOR + H.temperature_floor = P->temperature_floor; +#else + H.temperature_floor = 0.0; +#endif + #ifdef DENSITY_FLOOR - H.density_floor = DENS_FLOOR; + H.density_floor = P->density_floor; #else H.density_floor = 0.0; #endif -#ifdef TEMPERATURE_FLOOR - H.temperature_floor = TEMP_FLOOR; +#ifdef SCALAR_FLOOR + H.scalar_floor = P->scalar_floor; #else - H.temperature_floor = 0.0; + H.scalar_floor = 0.0; #endif #ifdef COSMOLOGY @@ -461,12 +467,12 @@ void Grid3D::Execute_Hydro_Integrator(void) #ifdef VL VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, - C.Grav_potential); + C.Grav_potential, H.scalar_floor); #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, - U_floor, C.Grav_potential); + U_floor, C.Grav_potential, H.scalar_floor); #endif // SIMPLE #endif } else { diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index aff94c898..5354acf0a 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -231,8 +231,9 @@ struct Header { int custom_grav; // Values for lower limit for density and temperature - Real density_floor; Real temperature_floor; + Real density_floor; + Real scalar_floor; Real Ekin_avrg; diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 87c5d392f..0e5aabc90 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1324,7 +1324,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = .1; // cloud radius in code units (kpc) + Real R_cl = 2.5; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1339,17 +1339,17 @@ void Grid3D::Clouds() // single centered cloud setup for (int nn = 0; nn < N_cl; nn++) { - cl_pos[nn][0] = 0.075 * H.xdglobal; + cl_pos[nn][0] = 0.5 * H.xdglobal; cl_pos[nn][1] = 0.5 * H.ydglobal; cl_pos[nn][2] = 0.5 * H.zdglobal; printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1e-2; - n_cl = 10; + n_bg = 1.68e-4; + n_cl = 5.4e-2; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; - vx_bg = 1000*TIME_UNIT/KPC; + vx_bg = 0.0; // vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr vx_cl = 0.0; vy_bg = vy_cl = 0.0; @@ -1357,8 +1357,7 @@ void Grid3D::Clouds() T_bg = 3e6; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; - // p_cl = p_bg; - p_cl = n_cl * KB * T_cl / PRESSURE_UNIT; + p_cl = p_bg; istart = H.n_ghost; iend = H.nx - H.n_ghost; @@ -1419,7 +1418,7 @@ void Grid3D::Clouds() #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; #endif // DUST -#endif // SCAlAR +#endif // SCALAR } } } diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index e50cdd3f0..125e851b4 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -379,8 +379,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R #endif // GRAVITY - // #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) - #if !(defined(DENSITY_FLOOR)) + #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, @@ -1140,37 +1139,6 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int } } -__global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor) -{ - int id, xid, yid, zid, n_cells; - Real density_init; // variable to store the value of the scalar before a floor is applied - n_cells = nx * ny * nz; - - // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx * ny); - yid = (id - zid * nx * ny) / nx; - xid = id - zid * nx * ny - yid * nx; - - // threads corresponding to real cells do the calculation - if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && - zid < nz - n_ghost) { - density_init = dev_conserved[id + n_cells * grid_enum::density]; - - if (density_init < density_floor) { - printf("###Thread density change %f -> %f \n", density_init, density_floor); - dev_conserved[id] = density_floor; - // Scale the conserved values to the new density - dev_conserved[id + n_cells * grid_enum::momentum_x] *= (density_floor / density_init); - dev_conserved[id + n_cells * grid_enum::momentum_y] *= (density_floor / density_init); - dev_conserved[id + n_cells * grid_enum::momentum_z] *= (density_floor / density_init); - dev_conserved[id + n_cells * grid_enum::Energy] *= (density_floor / density_init); - #ifdef DE - dev_conserved[id + n_cells * grid_enum::GasEnergy] *= (density_floor / density_init); - #endif // DE - } - } -} __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved) { @@ -1305,6 +1273,7 @@ __global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, scalar = dev_conserved[id + n_cells * field_num]; if (scalar < scalar_floor) { + printf("###Thread scalar change %f -> %f \n", scalar, scalar_floor); dev_conserved[id + n_cells * field_num] = scalar_floor; } } diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 92a997790..8fcfbba05 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -88,8 +88,6 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); -__global__ void Apply_Density_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real density_floor); - __global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor); diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 0403478e5..0796a3064 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -148,7 +148,6 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) { - // Call the function we are testing int num_blocks = 1; dim3 dim1dGrid(num_blocks, 1, 1); dim3 dim1dBlock(TPB, 1, 1); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index f299eda3c..25462fa91 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -37,7 +37,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential) + Real *host_grav_potential, Real scalar_floor) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -197,11 +197,6 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); GPU_Error_Check(); - #ifdef DENSITY_FLOOR - hipLaunchKernelGGL(Apply_Density_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, nx, ny, nz, n_ghost, - density_floor); - #endif // DENSITY_FLOOR - #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, @@ -334,7 +329,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #ifdef SCALAR_FLOOR #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - grid_enum::dust_density, 1e-10); + grid_enum::dust_density, scalar_floor); GPU_Error_Check(); #endif #endif // SCALAR_FLOOR diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 3f2cf8d75..4104493bc 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -11,7 +11,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential); + Real *host_grav_potential, Real scalar_floor); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index c572dcd97..d3beb14b2 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -27,7 +27,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential) + Real *host_grav_potential, Real scalar_floor) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -189,7 +189,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, #ifdef SCALAR_FLOOR #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - grid_enum::dust_density, 1e-5); + grid_enum::dust_density, scalar_floor); CudaCheckError(); #endif DUST #endif // SCALAR_FLOOR diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 585c553ba..e2cea247e 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -12,7 +12,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential); + Real *host_grav_potential, Real scalar_floor); void Free_Memory_Simple_3D(); From d783bb74f1e8542709cebd8f9b7181b9f2c9afb0 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Sat, 13 Jan 2024 17:26:42 -0500 Subject: [PATCH 641/694] run clang format --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/global/global.cpp | 12 +++++----- src/gravity/gravity_functions.cpp | 8 +++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 10 ++++----- src/grid/initial_conditions.cpp | 26 +++++++++++----------- src/integrators/VL_3D_cuda.cu | 9 ++++---- src/integrators/simple_3D_cuda.cu | 4 ++-- src/io/io.cpp | 4 ++-- src/particles/io_particles.cpp | 14 ++++++------ src/particles/particles_3D.cpp | 14 ++++++------ src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 +++++-- src/system_tests/hydro_system_tests.cpp | 4 ++-- 19 files changed, 69 insertions(+), 64 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 3dab7b6da..5fe0e7543 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 7999a6d55..65e3af691 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index a7f5c36cb..c5f2a8078 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/global/global.cpp b/src/global/global.cpp index d0f898739..6579d9955 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -428,18 +428,18 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "UVB_rates_file") == 0) { strncpy(parms->UVB_rates_file, value, MAXLEN); #endif -# ifdef TEMPERATURE_FLOOR +#ifdef TEMPERATURE_FLOOR } else if (strcmp(name, "temperature_floor") == 0) { parms->temperature_floor = atof(value); -# endif -# ifdef DENSITY_FLOOR +#endif +#ifdef DENSITY_FLOOR } else if (strcmp(name, "density_floor") == 0) { parms->density_floor = atof(value); -# endif -# ifdef SCALAR_FLOOR +#endif +#ifdef SCALAR_FLOOR } else if (strcmp(name, "scalar_floor") == 0) { parms->scalar_floor = atof(value); -# endif +#endif #ifdef ANALYSIS } else if (strcmp(name, "analysis_scale_outputs_file") == 0) { strncpy(parms->analysis_scale_outputs_file, value, MAXLEN); diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index b92d06564..6c7a6dde7 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index b92d19084..15de64a95 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index d844784d5..93499fea0 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -158,7 +158,7 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef AVERAGE_SLOW_CELLS H.min_dt_slow = 1e-5; // Initialize the minumum dt to a tiny number -#endif // AVERAGE_SLOW_CELLS +#endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA @@ -267,13 +267,13 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; #else - H.scalar_floor = 0.0; + H.scalar_floor = 0.0; #endif #ifdef COSMOLOGY @@ -345,8 +345,8 @@ void Grid3D::AllocateMemory(void) GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 0e5aabc90..30e3eb459 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1415,10 +1415,10 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_cl / (gama - 1.0); #endif // DE #ifdef SCALAR -#ifdef DUST + #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; -#endif // DUST -#endif // SCALAR + #endif // DUST +#endif // SCALAR } } } @@ -1482,18 +1482,18 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1553,17 +1553,17 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 25462fa91..2b4162ffc 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -32,7 +32,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, - Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor); void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -122,7 +123,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and @@ -327,11 +328,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #endif // TEMPERATURE_FLOOR #ifdef SCALAR_FLOOR - #ifdef DUST + #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, scalar_floor); GPU_Error_Check(); - #endif + #endif #endif // SCALAR_FLOOR return; diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index d3beb14b2..b68f0a351 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -187,11 +187,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, #endif // TEMPERATURE_FLOOR #ifdef SCALAR_FLOOR - #ifdef DUST + #ifdef DUST hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, grid_enum::dust_density, scalar_floor); CudaCheckError(); - #endif DUST + #endif DUST #endif // SCALAR_FLOOR return; diff --git a/src/io/io.cpp b/src/io/io.cpp index 9959267de..b243440e4 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1400,12 +1400,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index e986c5287..e6da774ed 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -445,12 +445,12 @@ void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Para Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -563,7 +563,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 6417e4136..87a2be8e5 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 27470befe..772153534 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index a000da4da..f69bbdc4b 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 9e9b11140..c1319ea58 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index ae8da90cb..f84946437 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 6c2e19af7..74c0e6896 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,9 +575,13 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive interface { + 1, 2, 3, 4, 5, 6, 7, 8 + }; #else // MHD - reconstruction::Primitive interface{6, 7, 8, 9, 10}; + reconstruction::Primitive interface { + 6, 7, 8, 9, 10 + }; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 6cffe9c21..18b2994bb 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); From 1a8255a3b9ca2093db37f25238d8fc623b35541a Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 18 Jan 2024 13:18:15 -0500 Subject: [PATCH 642/694] try to satisfy clang-tidy --- src/global/global.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/global.h b/src/global/global.h index f1ff3cac3..b7b3d5643 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -208,7 +208,7 @@ struct Parameters { #ifdef DE int out_float32_GasEnergy = 0; #endif - bool output_always = 0; + bool output_always = false; #ifdef STATIC_GRAV int custom_grav = 0; // flag to set specific static gravity field #endif From 399921b7e3fb4dd9835f008eb795f0027f319eb8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 11:50:19 -0500 Subject: [PATCH 643/694] remove reference to DENS_FLOOR macro --- src/particles/feedback_CIC_gpu.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 0a4e8b292..bd162e585 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -157,9 +157,9 @@ __device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real } __device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, Real* momentum_y, Real* momentum_z, - Real* energy, int index, Real dx, Real dy, Real dz) + Real* energy, int index, Real dx, Real dy, Real dz, Real density_floor) { - Real dens = fmax(density[index], DENS_FLOOR); + Real dens = fmax(density[index], density_floor); Real d_inv = 1.0 / dens; Real vx = momentum_x[index] * d_inv; Real vy = momentum_y[index] * d_inv; @@ -236,7 +236,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, Real* momentum_z, Real gamma, FeedbackPrng* states, Real* prev_dens, int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, - Real time_sn_end, int n_step) + Real time_sn_end, int n_step, Real density_floor) { __shared__ Real s_info[FEED_INFO_N * TPB_FEEDBACK]; // for collecting SN feedback information, like # // of SNe or # resolved. @@ -443,7 +443,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real if (direction > 0) { local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, - energy, indx, dx, dy, dz)); + energy, indx, dx, dy, dz, density_floor)); } } } @@ -605,7 +605,7 @@ __global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, // id[gtid], N, n_0); local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, - energy, indx, dx, dy, dz)); + energy, indx, dx, dy, dz, density_floor)); } } } @@ -698,7 +698,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, - time_sn_end, G.H.n_step); + time_sn_end, G.H.n_step, G.H.density_floor); GPU_Error_Check(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); } @@ -719,7 +719,7 @@ Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, - time_sn_end, G.H.n_step); + time_sn_end, G.H.n_step, G.H.density_floor); GPU_Error_Check(cudaDeviceSynchronize()); } From 5fd419e841a51f04a435a4a0c87235c06bb14f4a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 22 Jan 2024 13:27:02 -0500 Subject: [PATCH 644/694] Add a function for computing the temperature from conserved variables --- src/utils/hydro_utilities.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 24caff9f7..984a1f8ec 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -63,6 +63,30 @@ inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) return T; } +/*! + * \brief Compute the temperature from the conserved variables + * + * \param[in] E The energy + * \param[in] d The density + * \param[in] mx The momentum in the X-direction + * \param[in] my The momentum in the Y-direction + * \param[in] mz The momentum in the Z-direction + * \param[in] gamma The adiabatic index + * \param[in] n The number density + * \param[in] magnetic_x The cell centered magnetic field in the X-direction + * \param[in] magnetic_y The cell centered magnetic field in the Y-direction + * \param[in] magnetic_z The cell centered magnetic field in the Z-direction + * \return Real The temperature of the gas in a cell + */ +inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, Real const mx, Real const my, + Real const mz, Real const gamma, Real const n, + Real const magnetic_x = 0.0, Real const magnetic_y = 0.0, + Real const magnetic_z = 0.0) +{ + Real const P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma, magnetic_x, magnetic_y, magnetic_z); + return Calc_Temp(P, n); +} + #ifdef DE inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const &n) { From ddd046e2398d9b38080f74f652b48864f4792de6 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 13:56:16 -0500 Subject: [PATCH 645/694] Add MHD support to projection outputs --- src/io/io.cpp | 96 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 9959267de..a246cb6b2 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -16,6 +16,8 @@ #include "../grid/grid3D.h" #include "../io/io.h" #include "../utils/cuda_utilities.h" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" #include "../utils/timing_functions.h" // provides ScopedTimer #ifdef MPI_CHOLLA #include "../mpi/mpi_routines.h" @@ -1526,18 +1528,16 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) * current simulation time. */ void Grid3D::Write_Projection_HDF5(hid_t file_id) { - int i, j, k, id, buf_id; hid_t dataset_id, dataspace_xy_id, dataspace_xz_id; Real *dataset_buffer_dxy, *dataset_buffer_dxz; Real *dataset_buffer_Txy, *dataset_buffer_Txz; herr_t status; - Real dxy, dxz, Txy, Txz, n, T; + Real dxy, dxz, Txy, Txz; #ifdef DUST Real dust_xy, dust_xz; Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; #endif - n = T = 0; Real mu = 0.6; // 3D @@ -1563,37 +1563,51 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) dataspace_xz_id = H5Screate_simple(2, dims, NULL); // Copy the xy density and temperature projections to the memory buffer - for (j = 0; j < H.ny_real; j++) { - for (i = 0; i < H.nx_real; i++) { + for (int j = 0; j < H.ny_real; j++) { + for (int i = 0; i < H.nx_real; i++) { dxy = 0; Txy = 0; #ifdef DUST dust_xy = 0; #endif // for each xy element, sum over the z column - for (k = 0; k < H.nz_real; k++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + for (int k = 0; k < H.nz_real; k++) { + int const xid = i + H.n_ghost; + int const yid = j + H.n_ghost; + int const zid = k + H.n_ghost; + int const id = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny); + // sum density dxy += C.density[id] * H.dz; #ifdef DUST dust_xy += C.dust_density[id] * H.dz; #endif // calculate number density - n = C.density[id] * DENSITY_UNIT / (mu * MP); + Real const n = C.density[id] * DENSITY_UNIT / (mu * MP); + // calculate temperature - #ifndef DE - Real mx = C.momentum_x[id]; - Real my = C.momentum_y[id]; - Real mz = C.momentum_z[id]; - Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); - #endif #ifdef DE - T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); - #endif - Txy += T * C.density[id] * H.dz; + Real const T = hydro_utilities::Calc_Temp_DE(C.density[id], C.GasEnergy[id], gama, n); + #else // DE is not defined + Real const mx = C.momentum_x[id]; + Real const my = C.momentum_y[id]; + Real const mz = C.momentum_z[id]; + Real const E = C.Energy[id]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + #else // MHD is not defined + Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; + #endif // MHD + + Real const T = hydro_utilities::Calc_Temp_Conserved(E, C.density[id], mx, my, mz, gama, n, magnetic_x, + magnetic_y, magnetic_z); + #endif // DE + + Txy += T * H.dz; } - buf_id = j + i * H.ny_real; + int const buf_id = j + i * H.ny_real; dataset_buffer_dxy[buf_id] = dxy; dataset_buffer_Txy[buf_id] = Txy; #ifdef DUST @@ -1603,37 +1617,47 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) } // Copy the xz density and temperature projections to the memory buffer - for (k = 0; k < H.nz_real; k++) { - for (i = 0; i < H.nx_real; i++) { + for (int k = 0; k < H.nz_real; k++) { + for (int i = 0; i < H.nx_real; i++) { dxz = 0; Txz = 0; #ifdef DUST dust_xz = 0; #endif // for each xz element, sum over the y column - for (j = 0; j < H.ny_real; j++) { - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + for (int j = 0; j < H.ny_real; j++) { + int const xid = i + H.n_ghost; + int const yid = j + H.n_ghost; + int const zid = k + H.n_ghost; + int const id = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny); // sum density dxz += C.density[id] * H.dy; #ifdef DUST dust_xz += C.dust_density[id] * H.dy; #endif // calculate number density - n = C.density[id] * DENSITY_UNIT / (mu * MP); - // calculate temperature - #ifndef DE - Real mx = C.momentum_x[id]; - Real my = C.momentum_y[id]; - Real mz = C.momentum_z[id]; - Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); - #endif + Real const n = C.density[id] * DENSITY_UNIT / (mu * MP); #ifdef DE - T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); - #endif - Txz += T * C.density[id] * H.dy; + Real const T = hydro_utilities::Calc_Temp_DE(C.density[id], C.GasEnergy[id], gama, n); + #else // DE is not defined + Real const mx = C.momentum_x[id]; + Real const my = C.momentum_y[id]; + Real const mz = C.momentum_z[id]; + Real const E = C.Energy[id]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + #else // MHD is not defined + Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; + #endif // MHD + + Real const T = hydro_utilities::Calc_Temp_Conserved(E, C.density[id], mx, my, mz, gama, n, magnetic_x, + magnetic_y, magnetic_z); + #endif // DE + Txz += T * H.dy; } - buf_id = k + i * H.nz_real; + int const buf_id = k + i * H.nz_real; dataset_buffer_dxz[buf_id] = dxz; dataset_buffer_Txz[buf_id] = Txz; #ifdef DUST From f276258598f8997163907907b032672ad87e346e Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 16:21:26 -0500 Subject: [PATCH 646/694] run clang format --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/gravity/gravity_functions.cpp | 8 ++++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 8 ++++---- src/grid/initial_conditions.cpp | 20 ++++++++++---------- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 4 ++-- src/particles/io_particles.cpp | 14 +++++++------- src/particles/particles_3D.cpp | 14 +++++++------- src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 ++------ src/system_tests/hydro_system_tests.cpp | 4 ++-- 17 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 5fe0e7543..3dab7b6da 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 65e3af691..7999a6d55 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index c5f2a8078..a7f5c36cb 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 6c7a6dde7..b92d06564 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 15de64a95..b92d19084 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 93499fea0..dd65da756 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -267,13 +267,13 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; #else - H.scalar_floor = 0.0; + H.scalar_floor = 0.0; #endif #ifdef COSMOLOGY @@ -345,8 +345,8 @@ void Grid3D::AllocateMemory(void) GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 30e3eb459..af558be8f 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1482,18 +1482,18 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1553,17 +1553,17 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2b4162ffc..1118c1419 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -123,7 +123,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index b243440e4..9959267de 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1400,12 +1400,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index e6da774ed..e986c5287 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -445,12 +445,12 @@ void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Para Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -563,7 +563,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 87a2be8e5..6417e4136 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 772153534..27470befe 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..a000da4da 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index c1319ea58..9e9b11140 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index f84946437..ae8da90cb 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 74c0e6896..6c2e19af7 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,13 +575,9 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface { - 1, 2, 3, 4, 5, 6, 7, 8 - }; + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive interface { - 6, 7, 8, 9, 10 - }; + reconstruction::Primitive interface{6, 7, 8, 9, 10}; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 18b2994bb..6cffe9c21 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); From 37ccafdced47c8eccc07eb92822e2a0e97d119e8 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 16:22:33 -0500 Subject: [PATCH 647/694] run clang format --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/gravity/gravity_functions.cpp | 8 ++++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 8 ++++---- src/grid/initial_conditions.cpp | 20 ++++++++++---------- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 4 ++-- src/particles/io_particles.cpp | 14 +++++++------- src/particles/particles_3D.cpp | 14 +++++++------- src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 ++++++-- src/system_tests/hydro_system_tests.cpp | 4 ++-- 17 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 3dab7b6da..5fe0e7543 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 7999a6d55..65e3af691 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index a7f5c36cb..c5f2a8078 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index b92d06564..6c7a6dde7 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index b92d19084..15de64a95 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index dd65da756..93499fea0 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -267,13 +267,13 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; #else - H.scalar_floor = 0.0; + H.scalar_floor = 0.0; #endif #ifdef COSMOLOGY @@ -345,8 +345,8 @@ void Grid3D::AllocateMemory(void) GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index af558be8f..30e3eb459 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1482,18 +1482,18 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1553,17 +1553,17 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 1118c1419..2b4162ffc 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -123,7 +123,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index 9959267de..b243440e4 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1400,12 +1400,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index e986c5287..e6da774ed 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -445,12 +445,12 @@ void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Para Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -563,7 +563,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 6417e4136..87a2be8e5 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 27470befe..772153534 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index a000da4da..f69bbdc4b 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 9e9b11140..c1319ea58 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index ae8da90cb..f84946437 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 6c2e19af7..74c0e6896 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,9 +575,13 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive interface { + 1, 2, 3, 4, 5, 6, 7, 8 + }; #else // MHD - reconstruction::Primitive interface{6, 7, 8, 9, 10}; + reconstruction::Primitive interface { + 6, 7, 8, 9, 10 + }; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 6cffe9c21..18b2994bb 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); From 80cd55e5b3d641b8d209be8b87db7c278705c508 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 16:17:51 -0500 Subject: [PATCH 648/694] Fix a DE bug in Dust_Kernel The gas energy was being loaded then checked for correctness against a variable that didn't exist. The gas energy was also being updated when it shouldn't be. Fixed those bugs and added MHD support to the temperature calculations. Also, updated the hydro_utilities::Calc_Temp_DE function to use the total, not specific, gas energy and added documentation for clarity --- src/dust/dust_cuda.cu | 59 ++++++++++++++----------------------- src/io/io.cpp | 26 ++++++++-------- src/utils/hydro_utilities.h | 13 ++++++-- 3 files changed, 46 insertions(+), 52 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 46273ef03..ced77858c 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -52,14 +52,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g int id_x = id - id_z * nx * ny - id_y * nx; // define physics variables - Real density_gas, density_dust; // fluid mass densities - Real number_density; // gas number density - Real mu = 0.6; // mean molecular weight - Real temperature, energy, pressure; // temperature, energy, pressure - Real velocity_x, velocity_y, velocity_z; // velocities - #ifdef DE - Real energy_gas; - #endif // DE + Real density_gas, density_dust; // fluid mass densities + Real number_density; // gas number density + Real mu = 0.6; // mean molecular weight // define integration variables Real dd_dt; // instantaneous rate of change in dust density @@ -71,36 +66,30 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // get conserved quanitites density_gas = dev_conserved[id + n_cells * grid_enum::density]; density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; - energy = dev_conserved[id + n_cells * grid_enum::Energy]; // convert mass density to number density number_density = density_gas * DENSITY_UNIT / (mu * MP); - if (energy < 0.0 || energy != energy) { - return; - } - - // get conserved quanitites - velocity_x = dev_conserved[id + n_cells * grid_enum::momentum_x] / density_gas; - velocity_y = dev_conserved[id + n_cells * grid_enum::momentum_y] / density_gas; - velocity_z = dev_conserved[id + n_cells * grid_enum::momentum_z] / density_gas; + // Compute the temperature #ifdef DE - energy_gas = dev_conserved[id + n_cells * grid_enum::GasEnergy] / density_gas; - energy_gas = fmax(ge, (Real)TINY_NUMBER); - #endif // DE - - // calculate physical quantities - pressure = hydro_utilities::Calc_Pressure_Primitive(energy, density_gas, velocity_x, velocity_y, velocity_z, gamma); - - Real temperature_init; - temperature_init = hydro_utilities::Calc_Temp(pressure, number_density); - - #ifdef DE - temperature_init = hydro_utilities::Calc_Temp_DE(density_gas, energy_gas, gamma, number_density); - #endif // DE - - // if dual energy is turned on use temp from total internal energy - temperature = temperature_init; + Real const total_gas_energy = dev_conserved[id + n_cells * grid_enum::GasEnergy]; + Real const temperature = hydro_utilities::Calc_Temp_DE(total_gas_energy, gamma, number_density); + #else // DE is not enabled + Real const energy = dev_conserved[id + n_cells * grid_enum::Energy]; + Real const momentum_x = dev_conserved[id + n_cells * grid_enum::momentum_x]; + Real const momentum_y = dev_conserved[id + n_cells * grid_enum::momentum_y]; + Real const momentum_z = dev_conserved[id + n_cells * grid_enum::momentum_z]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + #else // MHD is not defined + Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; + #endif // MHD + + Real const temperature = hydro_utilities::Calc_Temp_Conserved( + energy, density, momentum_x, momentum_y, momentum_z, gamma, number_density, magnetic_x, magnetic_y, magnetic_z); + #endif // DE Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / TIME_UNIT; // sputtering timescale, kyr (sim units) @@ -121,10 +110,6 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g density_dust += dd; dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust; - - #ifdef DE - dev_conserved[id + n_cells * grid_enum::GasEnergy] = density_dust * energy_gas; - #endif } } diff --git a/src/io/io.cpp b/src/io/io.cpp index a246cb6b2..a16267867 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1578,16 +1578,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) int const id = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny); // sum density - dxy += C.density[id] * H.dz; + Real const d = C.density[id]; + dxy += d * H.dz; #ifdef DUST dust_xy += C.dust_density[id] * H.dz; #endif // calculate number density - Real const n = C.density[id] * DENSITY_UNIT / (mu * MP); + Real const n = d * DENSITY_UNIT / (mu * MP); // calculate temperature #ifdef DE - Real const T = hydro_utilities::Calc_Temp_DE(C.density[id], C.GasEnergy[id], gama, n); + Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n); #else // DE is not defined Real const mx = C.momentum_x[id]; Real const my = C.momentum_y[id]; @@ -1601,11 +1602,11 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; #endif // MHD - Real const T = hydro_utilities::Calc_Temp_Conserved(E, C.density[id], mx, my, mz, gama, n, magnetic_x, - magnetic_y, magnetic_z); + Real const T = + hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); #endif // DE - Txy += T * H.dz; + Txy += T * d * H.dz; } int const buf_id = j + i * H.ny_real; dataset_buffer_dxy[buf_id] = dxy; @@ -1631,14 +1632,15 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) int const zid = k + H.n_ghost; int const id = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny); // sum density - dxz += C.density[id] * H.dy; + Real const d = C.density[id]; + dxz += d * H.dy; #ifdef DUST dust_xz += C.dust_density[id] * H.dy; #endif // calculate number density - Real const n = C.density[id] * DENSITY_UNIT / (mu * MP); + Real const n = d * DENSITY_UNIT / (mu * MP); #ifdef DE - Real const T = hydro_utilities::Calc_Temp_DE(C.density[id], C.GasEnergy[id], gama, n); + Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n); #else // DE is not defined Real const mx = C.momentum_x[id]; Real const my = C.momentum_y[id]; @@ -1652,10 +1654,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; #endif // MHD - Real const T = hydro_utilities::Calc_Temp_Conserved(E, C.density[id], mx, my, mz, gama, n, magnetic_x, - magnetic_y, magnetic_z); + Real const T = + hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); #endif // DE - Txz += T * H.dy; + Txz += T * d * H.dy; } int const buf_id = k + i * H.nz_real; dataset_buffer_dxz[buf_id] = dxz; diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 984a1f8ec..e49554366 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -88,10 +88,17 @@ inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, } #ifdef DE -inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const &n) +/*! + * \brief Compute the temperature when DE is turned on + * + * \param total_gas_energy The total gas energy in the cell. This is the value stored in the grid + * \param gamma The adiabatic index + * \param n The number density + * \return Real The temperature + */ +inline __host__ __device__ Real Calc_Temp_DE(Real const &total_gas_energy, Real const &gamma, Real const &n) { - Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - return T; + return total_gas_energy * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); } #endif // DE From d737c4b407d723ed0da78537dfb84d80fb89c469 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 16:24:40 -0500 Subject: [PATCH 649/694] run clang format --- src/analysis/feedback_analysis.cpp | 2 +- src/chemistry_gpu/chemistry_functions.cpp | 2 +- src/cooling_grackle/cool_grackle.cpp | 2 +- src/gravity/gravity_functions.cpp | 8 ++++---- src/gravity/gravity_functions_gpu.cu | 4 ++-- src/grid/grid3D.cpp | 8 ++++---- src/grid/initial_conditions.cpp | 20 ++++++++++---------- src/integrators/VL_3D_cuda.cu | 2 +- src/io/io.cpp | 4 ++-- src/particles/io_particles.cpp | 14 +++++++------- src/particles/particles_3D.cpp | 14 +++++++------- src/particles/particles_boundaries_cpu.cpp | 4 ++-- src/reconstruction/plmp_cuda.cu | 2 +- src/reconstruction/ppmc_cuda_tests.cu | 2 +- src/reconstruction/ppmp_cuda.cu | 2 +- src/reconstruction/reconstruction_tests.cu | 8 ++------ src/system_tests/hydro_system_tests.cpp | 4 ++-- 17 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 5fe0e7543..3dab7b6da 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 65e3af691..7999a6d55 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index c5f2a8078..a7f5c36cb 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 6c7a6dde7..b92d06564 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -137,7 +137,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -211,7 +211,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -530,8 +530,8 @@ void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index 15de64a95..b92d19084 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 93499fea0..dd65da756 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -267,13 +267,13 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; #else - H.density_floor = 0.0; + H.density_floor = 0.0; #endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; #else - H.scalar_floor = 0.0; + H.scalar_floor = 0.0; #endif #ifdef COSMOLOGY @@ -345,8 +345,8 @@ void Grid3D::AllocateMemory(void) GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); #else - C.Grav_potential = NULL; - C.d_Grav_potential = NULL; + C.Grav_potential = NULL; + C.d_Grav_potential = NULL; #endif #ifdef CHEMISTRY_GPU diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 30e3eb459..af558be8f 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1482,18 +1482,18 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1553,17 +1553,17 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 2b4162ffc..1118c1419 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -123,7 +123,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index b243440e4..9959267de 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1400,12 +1400,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index e6da774ed..e986c5287 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -445,12 +445,12 @@ void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Para Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -563,7 +563,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 87a2be8e5..6417e4136 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 772153534..27470befe 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index f69bbdc4b..a000da4da 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -120,7 +120,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index c1319ea58..9e9b11140 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index f84946437..ae8da90cb 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 74c0e6896..6c2e19af7 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,13 +575,9 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface { - 1, 2, 3, 4, 5, 6, 7, 8 - }; + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; #else // MHD - reconstruction::Primitive interface { - 6, 7, 8, 9, 10 - }; + reconstruction::Primitive interface{6, 7, 8, 9, 10}; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 18b2994bb..6cffe9c21 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); From 065cd79b691ad967614298bbdcd59f238c5fa711 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 16:35:10 -0500 Subject: [PATCH 650/694] Add MHD support to rotated projection outputs --- src/io/io.cpp | 57 +++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index a16267867..f93a30360 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1702,7 +1702,6 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) * time. */ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) { - int i, j, k, id, buf_id; hid_t dataset_id, dataspace_xzr_id; Real *dataset_buffer_dxzr; Real *dataset_buffer_Txzr; @@ -1712,14 +1711,14 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) herr_t status; Real dxy, dxz, Txy, Txz; - Real d, n, T, vx, vy, vz; + Real d, n, vx, vy, vz; Real x, y, z; // cell positions Real xp, yp, zp; // rotated positions Real alpha, beta; // projected positions int ix, iz; // projected index positions - n = T = 0; + n = 0; Real mu = 0.6; srand(137); // initialize a random number @@ -1766,11 +1765,14 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) dataspace_xzr_id = H5Screate_simple(2, dims, NULL); // Copy the xz rotated projection to the memory buffer - for (k = 0; k < H.nz_real; k++) { - for (i = 0; i < H.nx_real; i++) { - for (j = 0; j < H.ny_real; j++) { + for (int k = 0; k < H.nz_real; k++) { + for (int i = 0; i < H.nx_real; i++) { + for (int j = 0; j < H.ny_real; j++) { // get cell index - id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + int const xid = i + H.n_ghost; + int const yid = j + H.n_ghost; + int const zid = k + H.n_ghost; + int const id = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny); // get cell positions Get_Position(i + H.n_ghost, j + H.n_ghost, k + H.n_ghost, &x, &y, &z); @@ -1795,33 +1797,40 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) #endif if ((ix >= 0) && (ix < nx_dset) && (iz >= 0) && (iz < nz_dset)) { - buf_id = iz + ix * nz_dset; - d = C.density[id]; + int const buf_id = iz + ix * nz_dset; + d = C.density[id]; // project density dataset_buffer_dxzr[buf_id] += d * H.dy; // calculate number density n = d * DENSITY_UNIT / (mu * MP); + // calculate temperature - #ifndef DE - Real mx = C.momentum_x[id]; - Real my = C.momentum_y[id]; - Real mz = C.momentum_z[id]; - Real E = C.Energy[id]; - T = (E - 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]) * (gama - 1.0) * PRESSURE_UNIT / (n * KB); - #endif #ifdef DE - T = C.GasEnergy[id] * PRESSURE_UNIT * (gama - 1.0) / (n * KB); - #endif + Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n); + #else // DE is not defined + Real const mx = C.momentum_x[id]; + Real const my = C.momentum_y[id]; + Real const mz = C.momentum_z[id]; + Real const E = C.Energy[id]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + #else // MHD is not defined + Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; + #endif // MHD + + Real const T = + hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); + #endif // DE + Txz = T * d * H.dy; dataset_buffer_Txzr[buf_id] += Txz; // compute velocities - vx = C.momentum_x[id]; - dataset_buffer_vxxzr[buf_id] += vx * H.dy; - vy = C.momentum_y[id]; - dataset_buffer_vyxzr[buf_id] += vy * H.dy; - vz = C.momentum_z[id]; - dataset_buffer_vzxzr[buf_id] += vz * H.dy; + dataset_buffer_vxxzr[buf_id] += C.momentum_x[id] * H.dy; + dataset_buffer_vyxzr[buf_id] += C.momentum_y[id] * H.dy; + dataset_buffer_vzxzr[buf_id] += C.momentum_z[id] * H.dy; } } } From 4226ddba591673b6da7dab165b52007b45e5241a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 16:37:54 -0500 Subject: [PATCH 651/694] Fix test for new version of Calc_Temp_DE --- src/utils/hydro_utilities_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index b200ddd8c..fe0cbe9e6 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -145,7 +145,7 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) for (size_t i = 0; i < parameters.names.size(); i++) { Real test_Ts = - hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); + hydro_utilities::Calc_Temp_DE(parameters.d.at(i) * parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); testing_utilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); } From c84e4a292b4ab49ae0161773dbe1da47558916c5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 16:46:32 -0500 Subject: [PATCH 652/694] Fix typo in Dust_Kernel, clarify comment --- src/dust/dust_cuda.cu | 5 +++-- src/utils/hydro_utilities.h | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index ced77858c..c38180ab5 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -87,8 +87,9 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; #endif // MHD - Real const temperature = hydro_utilities::Calc_Temp_Conserved( - energy, density, momentum_x, momentum_y, momentum_z, gamma, number_density, magnetic_x, magnetic_y, magnetic_z); + Real const temperature = + hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, momentum_z, gamma, + number_density, magnetic_x, magnetic_y, magnetic_z); #endif // DE Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index e49554366..fab5aece1 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -91,9 +91,10 @@ inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, /*! * \brief Compute the temperature when DE is turned on * - * \param total_gas_energy The total gas energy in the cell. This is the value stored in the grid - * \param gamma The adiabatic index - * \param n The number density + * \param[in] total_gas_energy The total gas energy in the cell. This is the value stored in the grid at + * grid_enum::GasEnergy + * \param[in] gamma The adiabatic index + * \param[in] n The number density * \return Real The temperature */ inline __host__ __device__ Real Calc_Temp_DE(Real const &total_gas_energy, Real const &gamma, Real const &n) From d292c46cbfd2fe00a3eb12603bc2c0d3c25bd1ef Mon Sep 17 00:00:00 2001 From: Helena Richie Date: Tue, 23 Jan 2024 18:08:43 -0500 Subject: [PATCH 653/694] undo accidental change --- src/grid/grid3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 1cb8d9f56..4c3ddd1ab 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -157,7 +157,7 @@ void Grid3D::Initialize(struct Parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-5; // Initialize the minumum dt to a tiny number + H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA From da2d264db7b148366e7cb31e3cb83885ccc94161 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 18:38:35 -0500 Subject: [PATCH 654/694] undo accidental removal of density floor in half-step --- cholla-tests-data | 2 +- src/integrators/VL_3D_cuda.cu | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 71eb66d63..dcd73ff52 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 71eb66d63622ac15c0844ae96ec9034cd5e4f4d3 +Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 1118c1419..b1bc118b7 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -455,6 +455,21 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); #endif // DE + #ifdef DENSITY_FLOOR + if (dev_conserved_half[id] < density_floor) { + dens_0 = dev_conserved_half[id]; + printf("###Thread density change %f -> %f \n", dens_0, density_floor); + dev_conserved_half[id] = density_floor; + // Scale the conserved values to the new density + dev_conserved_half[1 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[2 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0); + #ifdef DE + dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); + #endif // DE + } + #endif // DENSITY_FLOOR } } From ef470301b2091ab7c5159e9d3908acb21c7bf687 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 18:49:16 -0500 Subject: [PATCH 655/694] fix undeclared variable --- src/integrators/VL_3D_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index b1bc118b7..0657a946f 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -457,7 +457,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de #endif // DE #ifdef DENSITY_FLOOR if (dev_conserved_half[id] < density_floor) { - dens_0 = dev_conserved_half[id]; + Real dens_0 = dev_conserved_half[id]; printf("###Thread density change %f -> %f \n", dens_0, density_floor); dev_conserved_half[id] = density_floor; // Scale the conserved values to the new density From 2a5d7ed34b1c9d268827b1014af407d7662e5bd6 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 19:41:30 -0500 Subject: [PATCH 656/694] remove debugging print statement --- src/dust/dust_cuda.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 28f764900..8376a2a1b 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -139,7 +139,6 @@ __device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real tem number_density /= (6e-4); // gas number density in units of 10^-27 g/cm^3 // sputtering timescale, s - printf("%e\n", grain_radius); Real tau_sp = A * (a / number_density) * (pow(temperature_0 / temperature, omega) + 1); return tau_sp; From db0f78865d64ca77a0a9d05cc3043840080d02be Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 19:50:35 -0500 Subject: [PATCH 657/694] add host wrapper functions for temp and scalar floor kernels and move calls to grid3d::update_hydro_grid --- src/grid/grid3D.cpp | 38 ++++++++++++++++++------------- src/hydro/hydro_cuda.cu | 37 ++++++++++++++++++++++++++---- src/hydro/hydro_cuda.h | 8 +++++-- src/hydro/hydro_cuda_tests.cu | 6 ++--- src/integrators/VL_3D_cuda.cu | 18 ++------------- src/integrators/VL_3D_cuda.h | 4 ++-- src/integrators/simple_3D_cuda.cu | 18 ++------------- src/integrators/simple_3D_cuda.h | 4 ++-- 8 files changed, 71 insertions(+), 62 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 4c3ddd1ab..2e548e1f6 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -430,16 +430,6 @@ void Grid3D::Execute_Hydro_Integrator(void) z_off = nz_local_start; #endif - // Set the lower limit for density and temperature (Internal Energy) - Real U_floor, density_floor; - density_floor = H.density_floor; - // Minimum of internal energy from minumum of temperature - U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT; -#ifdef COSMOLOGY - U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2 - U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; -#endif - #ifdef CPU_TIME Timer.Hydro_Integrator.Start(); #endif // CPU_TIME @@ -472,13 +462,12 @@ void Grid3D::Execute_Hydro_Integrator(void) #ifdef CUDA #ifdef VL VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, - C.Grav_potential, H.scalar_floor); + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, + C.Grav_potential); #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, - U_floor, C.Grav_potential, H.scalar_floor); + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, C.Grav_potential); #endif // SIMPLE #endif } else { @@ -512,10 +501,27 @@ Real Grid3D::Update_Hydro_Grid() Execute_Hydro_Integrator(); - // == Perform chemistry/cooling (there are a few different cases) == - #ifdef CUDA + #ifdef TEMPERATURE_FLOOR + // Set the lower limit temperature (Internal Energy) + Real U_floor; + // Minimum of internal energy from minumum of temperature + U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT; + #ifdef COSMOLOGY + U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2 + U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; + #endif + Apply_Temperature_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, U_floor); + #endif // TEMPERATURE_FLOOR + + #ifdef SCALAR_FLOOR + #ifdef DUST + Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor); + #endif + #endif // SCALAR_FLOOR + + // == Perform chemistry/cooling (there are a few different cases) == #ifdef COOLING_GPU #ifdef CPU_TIME Timer.Cooling_GPU.Start(); diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 125e851b4..484023353 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1101,8 +1101,22 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, in #endif // DE -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, - Real U_floor) + +void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) +{ + // set values for GPU kernels + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + // number of blocks per 1D grid + dim3 dim1dGrid(ngrid, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB, 1, 1); + + hipLaunchKernelGGL(Temperature_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); +} + +__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real U_floor) { int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, Ekin, U; @@ -1254,8 +1268,21 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int printf("%3d %3d %3d FC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, vx_av, vy_av, vz_av); } -__global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, - Real scalar_floor) +void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor) +{ + // set values for GPU kernels + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + // number of blocks per 1D grid + dim3 dim1dGrid(ngrid, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB, 1, 1); + + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, field_num, scalar_floor); +} + +__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real scalar_floor) { int id, xid, yid, zid, n_cells; Real scalar; // variable to store the value of the scalar before a floor is applied @@ -1273,7 +1300,7 @@ __global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, scalar = dev_conserved[id + n_cells * field_num]; if (scalar < scalar_floor) { - printf("###Thread scalar change %f -> %f \n", scalar, scalar_floor); + // printf("###Thread scalar change %f -> %f \n", scalar, scalar_floor); dev_conserved[id + n_cells * field_num] = scalar_floor; } } diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 8fcfbba05..ea64b2164 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -85,10 +85,14 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n Real dy, Real dz, Real gamma, Real max_dti_slow); #endif -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, +void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); + +__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); -__global__ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, +void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor); + +__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor); __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index 0796a3064..ade162926 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -174,21 +174,21 @@ TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) // Case where scalar is below the floor host_conserved.at(field_num) = 0.0; // scalar dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); testing_utilities::Check_Results(scalar_floor, dev_conserved.at(field_num), "below floor"); // Case where scalar is above the floor host_conserved.at(field_num) = 2.0; // scalar dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "above floor"); // Case where scalar is at the floor host_conserved.at(field_num) = 1.0; // scalar dev_conserved.cpyHostToDevice(host_conserved); - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, field_num, scalar_floor); testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "at floor"); } diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 0657a946f..c0250983f 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -37,8 +37,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential, Real scalar_floor) + Real dt, int n_fields, int custom_grav, Real density_floor, + Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -321,20 +321,6 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int GPU_Error_Check(); #endif // DE - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, - U_floor); - GPU_Error_Check(); - #endif // TEMPERATURE_FLOOR - - #ifdef SCALAR_FLOOR - #ifdef DUST - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - grid_enum::dust_density, scalar_floor); - GPU_Error_Check(); - #endif - #endif // SCALAR_FLOOR - return; } diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 4104493bc..3310f5fe5 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -10,8 +10,8 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential, Real scalar_floor); + Real dt, int n_fields, int custom_grav, Real density_floor, + Real *host_grav_potential); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index b68f0a351..3a1ce77b1 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -26,8 +26,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential, Real scalar_floor) + Real dt, int n_fields, int custom_grav, Real density_floor, + Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -180,20 +180,6 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, GPU_Error_Check(); #endif - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, - U_floor); - GPU_Error_Check(); - #endif // TEMPERATURE_FLOOR - - #ifdef SCALAR_FLOOR - #ifdef DUST - hipLaunchKernelGGL(Apply_Scalar_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - grid_enum::dust_density, scalar_floor); - CudaCheckError(); - #endif DUST - #endif // SCALAR_FLOOR - return; } diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index e2cea247e..57d211ed1 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -11,8 +11,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real U_floor, - Real *host_grav_potential, Real scalar_floor); + Real dt, int n_fields, int custom_grav, Real density_floor, + Real *host_grav_potential); void Free_Memory_Simple_3D(); From 71df1156e81ac3c51705448e74fb6e2ec77a3ede Mon Sep 17 00:00:00 2001 From: helenarichie Date: Tue, 23 Jan 2024 19:51:16 -0500 Subject: [PATCH 658/694] run clang format' --- src/grid/grid3D.cpp | 11 ++++++----- src/hydro/hydro_cuda.cu | 11 ++++++----- src/hydro/hydro_cuda.h | 4 ++-- src/integrators/VL_3D_cuda.cu | 3 +-- src/integrators/VL_3D_cuda.h | 3 +-- src/integrators/simple_3D_cuda.cu | 3 +-- src/integrators/simple_3D_cuda.h | 3 +-- 7 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 2e548e1f6..d25189d89 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -158,7 +158,7 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef AVERAGE_SLOW_CELLS H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number -#endif // AVERAGE_SLOW_CELLS +#endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA @@ -467,7 +467,8 @@ void Grid3D::Execute_Hydro_Integrator(void) #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, - H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, C.Grav_potential); + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, + C.Grav_potential); #endif // SIMPLE #endif } else { @@ -508,16 +509,16 @@ Real Grid3D::Update_Hydro_Grid() Real U_floor; // Minimum of internal energy from minumum of temperature U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT; - #ifdef COSMOLOGY + #ifdef COSMOLOGY U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2 U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; - #endif + #endif Apply_Temperature_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, U_floor); #endif // TEMPERATURE_FLOOR #ifdef SCALAR_FLOOR #ifdef DUST - Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor); + Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor); #endif #endif // SCALAR_FLOOR diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 484023353..69c4c083d 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1101,7 +1101,6 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, in #endif // DE - void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) { // set values for GPU kernels @@ -1111,8 +1110,9 @@ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - - hipLaunchKernelGGL(Temperature_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); + + hipLaunchKernelGGL(Temperature_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields, U_floor); } __global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, @@ -1277,8 +1277,9 @@ void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - - hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, field_num, scalar_floor); + + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, field_num, + scalar_floor); } __global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index ea64b2164..c6d9ac96d 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -88,12 +88,12 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); __global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, - Real U_floor); + Real U_floor); void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor); __global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, - Real scalar_floor); + Real scalar_floor); __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index c0250983f..3d1c8eb11 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -37,8 +37,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, - Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 3310f5fe5..d3c58cc0b 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -10,8 +10,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, - Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 3a1ce77b1..8cbba2d07 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -26,8 +26,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, - Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 57d211ed1..67da69ca9 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -11,8 +11,7 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, - Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); void Free_Memory_Simple_3D(); From f9bddfb684b9c694fdffbf7a0523a50e7ad0729d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 24 Jan 2024 11:04:42 -0500 Subject: [PATCH 659/694] Replace `total_gas_energy` name with `gas_energy` gas_energy is the total gas energy and specific_gas_energy is the specific version --- src/dust/dust_cuda.cu | 4 ++-- src/utils/hydro_utilities.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index c38180ab5..4dc05648c 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -72,8 +72,8 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // Compute the temperature #ifdef DE - Real const total_gas_energy = dev_conserved[id + n_cells * grid_enum::GasEnergy]; - Real const temperature = hydro_utilities::Calc_Temp_DE(total_gas_energy, gamma, number_density); + Real const gas_energy = dev_conserved[id + n_cells * grid_enum::GasEnergy]; + Real const temperature = hydro_utilities::Calc_Temp_DE(gas_energy, gamma, number_density); #else // DE is not enabled Real const energy = dev_conserved[id + n_cells * grid_enum::Energy]; Real const momentum_x = dev_conserved[id + n_cells * grid_enum::momentum_x]; diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index fab5aece1..1a464e899 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -91,15 +91,15 @@ inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, /*! * \brief Compute the temperature when DE is turned on * - * \param[in] total_gas_energy The total gas energy in the cell. This is the value stored in the grid at + * \param[in] gas_energy The total gas energy in the cell. This is the value stored in the grid at * grid_enum::GasEnergy * \param[in] gamma The adiabatic index * \param[in] n The number density * \return Real The temperature */ -inline __host__ __device__ Real Calc_Temp_DE(Real const &total_gas_energy, Real const &gamma, Real const &n) +inline __host__ __device__ Real Calc_Temp_DE(Real const gas_energy, Real const gamma, Real const n) { - return total_gas_energy * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + return gas_energy * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); } #endif // DE From ec6e34d378e1683e13b000ccd3121be0462cb1a1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 24 Jan 2024 11:06:27 -0500 Subject: [PATCH 660/694] Refactor MHD support in `Dust_Kernel` Now you only use magnetic variables when MHD is enabled --- src/dust/dust_cuda.cu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 4dc05648c..69cc83eec 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -83,14 +83,15 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); - #else // MHD is not defined - Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; - #endif // MHD - Real const temperature = hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, momentum_z, gamma, number_density, magnetic_x, magnetic_y, magnetic_z); - #endif // DE + #else // MHD is not defined + Real const temperature = hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, + momentum_z, gamma, number_density); + #endif // MHD + + #endif // DE Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / TIME_UNIT; // sputtering timescale, kyr (sim units) From 495d453e0e2e3fbad687988b2b1d7d35d10d867a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 24 Jan 2024 12:53:30 -0500 Subject: [PATCH 661/694] Refactor temperature calculations in io.cpp --- src/io/io.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index f93a30360..51303c72d 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1598,12 +1598,11 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); - #else // MHD is not defined - Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; - #endif // MHD - Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); + #else // MHD is not defined + Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n); + #endif // MHD #endif // DE Txy += T * d * H.dz; @@ -1650,12 +1649,11 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); - #else // MHD is not defined - Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; - #endif // MHD - Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); + #else // MHD is not defined + Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n); + #endif // MHD #endif // DE Txz += T * d * H.dy; } @@ -1711,14 +1709,13 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) herr_t status; Real dxy, dxz, Txy, Txz; - Real d, n, vx, vy, vz; + Real d, vx, vy, vz; Real x, y, z; // cell positions Real xp, yp, zp; // rotated positions Real alpha, beta; // projected positions int ix, iz; // projected index positions - n = 0; Real mu = 0.6; srand(137); // initialize a random number @@ -1802,7 +1799,7 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) // project density dataset_buffer_dxzr[buf_id] += d * H.dy; // calculate number density - n = d * DENSITY_UNIT / (mu * MP); + Real const n = d * DENSITY_UNIT / (mu * MP); // calculate temperature #ifdef DE @@ -1816,12 +1813,11 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); - #else // MHD is not defined - Real const magnetic_x = 0.0, magnetic_y = 0.0, magnetic_z = 0.0; - #endif // MHD - Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); + #else // MHD is not defined + Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n); + #endif // MHD #endif // DE Txz = T * d * H.dy; From bfb85a3530bdb6bbdee8af4ad709ef1e1abaa1ab Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 12:08:14 -0500 Subject: [PATCH 662/694] Update hydro build to use VL and PLMC Updated the default hydro build to use the Van Leer (VL) and PLMC build options. Also, updated the test data for those new builds --- builds/make.type.hydro | 6 +- cholla-tests-data | 2 +- src/reconstruction/plmc_cuda_tests.cu | 130 ++++++++++++++------------ 3 files changed, 74 insertions(+), 64 deletions(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index f34d78172..b35dbd9ae 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/cholla-tests-data b/cholla-tests-data index 71eb66d63..86c2e3145 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 71eb66d63622ac15c0844ae96ec9034cd5e4f4d3 +Subproject commit 86c2e3145915f37cc83dfc072ab3273d23aebf5e diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 68f11b396..0207a09ac 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -25,6 +25,9 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) { +#ifndef VL + #warning "The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) integrator" +#endif // VL // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); @@ -49,66 +52,71 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, - {27, 0.70033864721549188}, - {106, 2.2476363309467553}, - {107, 3.0633780053857027}, - {186, 2.2245934101106259}, - {187, 2.1015872413794123}, - {266, 2.1263341057778309}, - {267, 3.9675148506537838}, - {346, 3.3640057502842691}, - {347, 21.091316282933843}}, - {{21, 0.72430827309279655}, - {37, 0.19457128219588618}, - {101, 5.4739527659741896}, - {117, 4.4286255636679313}, - {181, 0.12703829036056602}, - {197, 2.2851440769830953}, - {261, 1.5337035731959561}, - {277, 2.697375839048191}, - {341, 22.319601655044117}, - {357, 82.515887983144168}}, - {{25, 2.2863650183226212}, - {29, 1.686415421301841}, - {105, 0.72340346106443465}, - {109, 5.4713687086831388}, - {185, 3.929100145230096}, - {189, 4.9166140516911483}, - {265, 0.95177493689267167}, - {269, 0.46056494878491938}, - {345, 3.6886096301452787}, - {349, 16.105488797582133}}}; - std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, - {26, 0.70033864721549188}, - {105, 1.5947787943675635}, - {106, 3.0633780053857027}, - {185, 4.0069556576401011}, - {186, 2.1015872413794123}, - {265, 1.7883678016935785}, - {266, 3.9675148506537838}, - {345, 2.8032969746372527}, - {346, 21.091316282933843}}, - {{17, 0.43265217076853835}, - {33, 0.19457128219588618}, - {97, 3.2697645945288754}, - {113, 4.4286255636679313}, - {177, 0.07588397666718491}, - {193, 2.2851440769830953}, - {257, 0.91612950577699748}, - {273, 2.697375839048191}, - {337, 13.332201861384396}, - {353, 82.515887983144168}}, - {{5, 2.2863650183226212}, - {9, 1.686415421301841}, - {85, 0.72340346106443465}, - {89, 1.7792505446336098}, - {165, 5.3997753452111859}, - {169, 1.4379190463124139}, - {245, 0.95177493689267167}, - {249, 0.46056494878491938}, - {325, 6.6889498465051407}, - {329, 1.6145084086614281}}}; + std::vector> fiducial_interface_left = {{{26, 3.8877922383184833}, + {27, 0.70033864721549188}, + {106, 5.6625525038177784}, + {107, 3.0633780053857027}, + {186, 4.0069556576401011}, + {187, 2.1015872413794123}, + {266, 5.1729859852329314}, + {267, 3.9675148506537838}, + {346, 9.6301414677176531}, + {347, 21.091316282933843}}, + {{21, 0.74780807318015607}, + {37, 0.19457128219588618}, + {101, 5.6515522777659895}, + {117, 4.4286255636679313}, + {181, 0.13115998072061905}, + {197, 2.2851440769830953}, + {261, 1.5834637771067519}, + {277, 2.697375839048191}, + {341, 23.043749364531674}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.9563546443402542}, + {185, 3.6128571662018358}, + {189, 5.3735653401079038}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.1670194578067843}, + {349, 19.142817472509272}}}; + + std::vector> fiducial_interface_right = + + {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.594778794367564}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935782}, + {266, 3.9675148506537838}, + {345, 2.8032969746372531}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.77925054463361}, + {165, 5.3997753452111859}, + {169, 1.4379190463124141}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051398}, + {329, 1.6145084086614285}}} + + ; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -161,6 +169,8 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; + // if (test_val != 0.0) std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl; + testing_utilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); From 90fd6f1679b5f4758615c6713218463532b27e9a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 13:36:52 -0500 Subject: [PATCH 663/694] Switch MHD to using PLMC by default Also updated test data --- builds/make.type.mhd | 2 +- cholla-tests-data | 2 +- src/system_tests/mhd_system_tests.cpp | 24 ++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 4459819e8..d08e6373e 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLD DFLAGS += -DMHD diff --git a/cholla-tests-data b/cholla-tests-data index 86c2e3145..2c6aa912c 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 86c2e3145915f37cc83dfc072ab3273d23aebf5e +Subproject commit 2c6aa912c7d1223bb4c486eb50beb65be4d6da9d diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 4261797b2..a14caa9a1 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -228,7 +228,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM @@ -266,7 +266,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM @@ -416,12 +416,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(7.0E-8, 1.5E-7); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -452,12 +452,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(5.4E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -487,12 +487,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(4.5E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) @@ -523,12 +523,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(5.0E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results( - 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.17); + 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.2); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, @@ -645,7 +645,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCor #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM @@ -681,7 +681,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorr #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.8E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM From 752af6a780fb5c59b8fe2ebd9552e041795d6bcb Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 13:40:23 -0500 Subject: [PATCH 664/694] Update other builds to PLMC and VL default Updated template, basic_scalar, disk, dust, rot_proj, static_grav to use PLMC and Van Leer integrator --- builds/make.inc.template | 4 ++-- builds/make.type.basic_scalar | 6 +++--- builds/make.type.disk | 2 +- builds/make.type.dust | 2 +- builds/make.type.rot_proj | 6 +++--- builds/make.type.static_grav | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builds/make.inc.template b/builds/make.inc.template index 3ae156225..98d2c146c 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -33,8 +33,8 @@ DFLAGS += -DHDF5 # Reconstruction #DFLAGS += -DPCM #DFLAGS += -DPLMP -#DFLAGS += -DPLMC -DFLAGS += -DPPMP +DFLAGS += -DPLMC +#DFLAGS += -DPPMP #DFLAGS += -DPPMC # Riemann Solver diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar index d2dd75892..5aa4a5d0e 100644 --- a/builds/make.type.basic_scalar +++ b/builds/make.type.basic_scalar @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/builds/make.type.disk b/builds/make.type.disk index f2e3f0ec1..284b2c73d 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -24,7 +24,7 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC DFLAGS += -DVL diff --git a/builds/make.type.dust b/builds/make.type.dust index 24e27e30f..b6b808202 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # DFLAGS += -DDE diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj index e6faa7514..e29ab43e7 100644 --- a/builds/make.type.rot_proj +++ b/builds/make.type.rot_proj @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index 4f13e7288..cd77643f2 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR From 8efd83df33e7eb668ebe1964e811b414b70ea9f0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 13:54:53 -0500 Subject: [PATCH 665/694] Update gravity test data for switch to PLMC/VL --- cholla-tests-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cholla-tests-data b/cholla-tests-data index 2c6aa912c..da5c3a309 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 2c6aa912c7d1223bb4c486eb50beb65be4d6da9d +Subproject commit da5c3a309d5451fabdec27fd7942e6121bb9c277 From 1f678e024e10fbbe0be031e17853a091e59295f0 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 22 Jan 2024 13:54:24 -0500 Subject: [PATCH 666/694] Make sure cosmology builds use SIMPLE integrator --- builds/make.type.hydro | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index b35dbd9ae..1a96baaa9 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -7,8 +7,11 @@ DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -# DFLAGS += -DSIMPLE +ifeq ($(findstring cosmology,$(TYPE)),cosmology) +DFLAGS += -DSIMPLE +else DFLAGS += -DVL +endif # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR From 654c0ba595ab2ebdec9a5e48cf9f1dd6fa7e08cf Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 17:02:12 -0500 Subject: [PATCH 667/694] Fix clang-tidy warning --- src/global/global.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/global/global.cpp b/src/global/global.cpp index 1b8dada22..842011dec 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -242,6 +242,10 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) #endif // DE } else if (strcmp(name, "output_always") == 0) { int tmp = atoi(value); + // In this case the CHOLLA_ASSERT macro runs into issuse with the readability-simplify-boolean-expr clang-tidy check + // due to some weird macro expansion stuff. That check has been disabled here for now but in clang-tidy 18 the + // IgnoreMacro option should be used instead. + // NOLINTNEXTLINE(readability-simplify-boolean-expr) CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0."); parms->output_always = tmp; #ifdef MHD From f240b3b037737b409d72c002263981267f57ff84 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 17:29:13 -0500 Subject: [PATCH 668/694] Replace #warning with print to cerr Avoids issues with running clang-tidy in non-VL builds --- src/reconstruction/plmc_cuda_tests.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 0207a09ac..678f6329d 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -26,7 +26,10 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) { #ifndef VL - #warning "The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) integrator" + std::cerr << "Warning: The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) " + "integrator" + << std::endl; + return; #endif // VL // Set up PRNG to use std::mt19937_64 prng(42); From cb5e8f3270926aecfec8f0d521afd4a27f552504 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Fri, 26 Jan 2024 10:21:02 -0500 Subject: [PATCH 669/694] run clang format --- src/dust/dust_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index b32224c3e..8b72facdf 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -90,7 +90,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real const temperature = hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, momentum_z, gamma, number_density); #endif // MHD - #endif // DE + #endif // DE Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / TIME_UNIT; // sputtering timescale, kyr (sim units) From e37035eeecbb1c404f660bf799718b81b51642d4 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 25 Jan 2024 16:01:14 -0500 Subject: [PATCH 670/694] we now write all files from different simulation cycles to separate directories. --- src/gravity/gravity_restart.cpp | 5 +- src/io/io.cpp | 110 ++++++++++++++++++-------------- src/io/io.h | 50 ++++++++++++++- src/main.cpp | 1 - src/particles/io_particles.cpp | 16 +---- 5 files changed, 115 insertions(+), 67 deletions(-) diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp index d44af57a9..d2a09e24d 100644 --- a/src/gravity/gravity_restart.cpp +++ b/src/gravity/gravity_restart.cpp @@ -56,9 +56,8 @@ void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) { H5open(); - char filename[MAXLEN]; - Gravity_Restart_Filename(filename, P->outdir, nfile); - hid_t file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + std::string filename = FnameTemplate(*P).format_fname(nfile, "_gravity"); + hid_t file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write dt_now hsize_t attr_dims = 1; diff --git a/src/io/io.cpp b/src/io/io.cpp index 51303c72d..31430a61f 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -95,6 +95,11 @@ void Write_Data(Grid3D &G, struct Parameters P, int nfile) chprintf("\nSaving Snapshot: %d \n", nfile); + // ensure the output-directory exists (try to create it if it doesn't exist) + // -> Aside: it would be nice to pass an FnameTemplate instance into each function that uses it, + // rather than reconstructing it everywhere + Ensure_Dir_Exists(FnameTemplate(P).effective_output_dir_path(nfile)); + #ifdef HDF5 // Initialize HDF5 interface H5open(); @@ -193,20 +198,11 @@ void Write_Data(Grid3D &G, struct Parameters P, int nfile) void Output_Data(Grid3D &G, struct Parameters P, int nfile) { // create the filename - std::string filename(P.outdir); - filename += std::to_string(nfile); + std::string filename = FnameTemplate(P).format_fname(nfile, ""); -#if defined BINARY - filename += ".bin"; -#elif defined HDF5 - filename += ".h5"; -#else - filename += ".txt"; +#if !defined(BINARY) && !defined(HDF5) if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); #endif -#ifdef MPI_CHOLLA - filename += "." + std::to_string(procID); -#endif // open the file for binary writes #if defined BINARY @@ -285,12 +281,7 @@ void Output_Float32(Grid3D &G, struct Parameters P, int nfile) } // create the filename - std::string filename(P.outdir); - filename += std::to_string(nfile); - filename += ".float32.h5"; - #ifdef MPI_CHOLLA - filename += "." + std::to_string(procID); - #endif // MPI_CHOLLA + std::string filename = FnameTemplate(P).format_fname(nfile, ".float32"); // create hdf5 file hid_t file_id; /* file identifier */ @@ -393,13 +384,7 @@ void Output_Projected_Data(Grid3D &G, struct Parameters P, int nfile) herr_t status; // create the filename - std::string filename(P.outdir); - filename += std::to_string(nfile); - filename += "_proj.h5"; - - #ifdef MPI_CHOLLA - filename += "." + std::to_string(procID); - #endif /*MPI_CHOLLA*/ + std::string filename = FnameTemplate(P).format_fname(nfile, "_proj"); // Create a new file file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -438,13 +423,7 @@ void Output_Rotated_Projected_Data(Grid3D &G, struct Parameters P, int nfile) herr_t status; // create the filename - std::string filename(P.outdir); - filename += std::to_string(nfile); - filename += "_rot_proj.h5"; - - #ifdef MPI_CHOLLA - filename += "." + std::to_string(procID); - #endif /*MPI_CHOLLA*/ + std::string filename = FnameTemplate(P).format_fname(nfile, "_rot_proj"); if (G.R.flag_delta == 1) { // if flag_delta==1, then we are just outputting a @@ -543,13 +522,7 @@ void Output_Slices(Grid3D &G, struct Parameters P, int nfile) herr_t status; // create the filename - std::string filename(P.outdir); - filename += std::to_string(nfile); - filename += "_slice.h5"; - - #ifdef MPI_CHOLLA - filename += "." + std::to_string(procID); - #endif /*MPI_CHOLLA*/ + std::string filename = FnameTemplate(P).format_fname(nfile, "_slice"); // Create a new file file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -2803,32 +2776,73 @@ void Write_Debug(Real *Value, const char *fname, int nValues, int iProc) fclose(fp); } -void Ensure_Outdir_Exists(std::string outdir) +std::string FnameTemplate::effective_output_dir_path(int nfile) const noexcept { - if (outdir == "") { - return; - } else if (Is_Root_Proc()) { + // for consistency, ensure that the returned string always has a trailing "/" + if (outdir_.empty()) { + return "./"; + } else if (separate_cycle_dirs_) { + return this->outdir_ + "/" + std::to_string(nfile) + "/"; + } else { + // if the last character of outdir is not a '/', then the substring of + // characters after the final '/' (or entire string if there isn't any '/') + // is treated as a file-prefix + // + // this is accomplished here: + std::filesystem::path without_file_prefix = std::filesystem::path(this->outdir_).parent_path(); + return without_file_prefix.string() + "/"; + } +} + +std::string FnameTemplate::format_fname(int nfile, const std::string &pre_extension_suffix) const noexcept +{ + // get the leading section of the string + const std::string path_prefix = + (separate_cycle_dirs_) + ? (effective_output_dir_path(nfile) + "/") // while redundant, the slash signals our intent + : outdir_; + + // get the file extension +#if defined BINARY + const char *extension = ".bin"; +#elif defined HDF5 + const char *extension = ".h5"; +#else + const char *extension = ".txt"; +#endif + + std::string procID_part; // initialized to empty string +#ifdef MPI_CHOLLA + procID_part = ("." + std::to_string(procID)); +#endif + + return path_prefix + std::to_string(nfile) + pre_extension_suffix + extension + procID_part; +} + +void Ensure_Dir_Exists(std::string dir_path) +{ + if (Is_Root_Proc()) { // if the last character of outdir is not a '/', then the substring of // characters after the final '/' (or entire string if there isn't any '/') // is treated as a file-prefix // // this is accomplished here: - std::filesystem::path without_file_prefix = std::filesystem::path(outdir).parent_path(); + std::filesystem::path path = std::filesystem::path(dir_path); - if (!without_file_prefix.empty()) { + if (!dir_path.empty()) { // try to create all directories specified within outdir (does nothing if // the directories already exist) std::error_code err_code; - std::filesystem::create_directories(without_file_prefix, err_code); + std::filesystem::create_directories(path, err_code); // confirm that an error-code wasn't set & that the path actually refers // to a directory (it's unclear from docs whether err-code is set in that // case) - if (err_code or not std::filesystem::is_directory(without_file_prefix)) { + if (err_code or not std::filesystem::is_directory(path)) { CHOLLA_ERROR( "something went wrong while trying to create the path to the " - "output-dir: %s", - outdir.c_str()); + "directory: %s", + dir_path.c_str()); } } } diff --git a/src/io/io.h b/src/io/io.h index 598a57a66..690f7f5a6 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -51,11 +51,59 @@ void Write_Message_To_Log_File(const char* message); void Write_Debug(Real* Value, const char* fname, int nValues, int iProc); +/* Lightweight object designed to centralize the file-naming logic (& any associated configuration). + * + * Cholla pathnames traditionally followed the following template: + * "{outdir}{nfile}{pre_extension_suffix}{extension}[.{proc_id}]" + * where each curly-braced token represents a different variable. In detail: + * - `{outdir}` is the parameter from the parameter file. The historical behavior (that we currently + * maintain), if this is non-empty, then all charaters following the last '/' are treated as a + * prefix to the output file name (if there aren't any '/' characters, then the whole string is + * effectively a prefix. + * - `{nfile}` is the current file-output count. + * - `{pre_extension_suffix}` is the pre-hdf5-extension suffix. It's the suffix that precedes the + * file extension (or `{extension}`) + * - `{extension}` is the filename extension. Examples include ".h5" or ".bin" or ".txt". + * - `{proc_id}` represents the process-id that held the data that will be written to this file. + * In non-MPI runs, this will be omitted. + * + * Instances can be configured to support the following newer file-naming template + * "{outdir}/{nfile}/{nfile}{pre_extension_suffix}{extension}[.{proc_id}]" + * where the the significance of each curly-braced token is largely unchanged. There are 2 things + * worth noting: + * - all files written at a single simulation-cycle are now grouped in a single directory + * - `{outdir}` never specifies a file prefix. When `{outdir}` is empty, it is treated as "./". + * Otherwise, we effectively append '/' to the end of `{outdir}` + * + * \note + * This could probably pull double-duty and get reused with infile. + */ +class FnameTemplate +{ + public: + FnameTemplate() = delete; + + FnameTemplate(const Parameters& P) : separate_cycle_dirs_(false), outdir_(P.outdir) {} + + /* Specifies whether separate cycles are written to separate directories */ + bool separate_cycle_dirs() const noexcept { return separate_cycle_dirs_; } + + /* Returns the effective output-directory used for outputs at a given simulation-cycle */ + std::string effective_output_dir_path(int nfile) const noexcept; + + /* format the file path */ + std::string format_fname(int nfile, const std::string& pre_extension_suffix) const noexcept; + + private: + bool separate_cycle_dirs_; + std::string outdir_; +}; + /* Checks whether the directories referred to within outdir exist. Creates them * if they don't. It gracefully handles cases where outdir contains a prefix * for the output files. */ -void Ensure_Outdir_Exists(std::string outdir); +void Ensure_Dir_Exists(std::string dir_path); #ifdef HDF5 // From io/io.cpp diff --git a/src/main.cpp b/src/main.cpp index 016df3f61..758b9f54f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -98,7 +98,6 @@ int main(int argc, char *argv[]) chprintf("Input directory: %s\n", P.indir); } chprintf("Output directory: %s\n", P.outdir); - Ensure_Outdir_Exists(P.outdir); // Check the configuration Check_Configuration(P); diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index e986c5287..7aaa627d6 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -757,23 +757,11 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) void Grid3D::OutputData_Particles(struct Parameters P, int nfile) { FILE *out; - char filename[MAXLEN]; - char timestep[20]; + std::string filename = FnameTemplate(P).format_fname(nfile, "_particles"); - // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d", nfile); - strcat(filename, timestep); // a binary file is created for each process #if defined BINARY chprintf("\nERROR: Particles only support HDF5 outputs\n") return; - // only one HDF5 file is created - #elif defined HDF5 - strcat(filename, "_particles"); - strcat(filename, ".h5"); - #ifdef MPI_CHOLLA - sprintf(filename, "%s.%d", filename, procID); - #endif #endif #if defined HDF5 @@ -781,7 +769,7 @@ void Grid3D::OutputData_Particles(struct Parameters P, int nfile) herr_t status; // Create a new file collectively - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) Write_Header_HDF5(file_id); From 8066f3a9eaec4f75d677b2728fc91f9462f8672f Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 26 Jan 2024 12:56:21 -0500 Subject: [PATCH 671/694] Add a runtime parameter, legacy_flat_outdir, that lets users opt into the older behavior where all outputs are written to a single file. --- src/global/global.cpp | 4 ++++ src/global/global.h | 3 ++- src/io/io.h | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 1b8dada22..359edf37e 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -244,6 +244,10 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) int tmp = atoi(value); CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0."); parms->output_always = tmp; + } else if (strcmp(name, "legacy_flat_outdir") == 0) { + int tmp = atoi(value); + CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "legacy_flat_outdir must be 1 or 0."); + parms->legacy_flat_outdir = tmp; #ifdef MHD } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 87cc124a4..8acb0f140 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -208,7 +208,8 @@ struct Parameters { #ifdef DE int out_float32_GasEnergy = 0; #endif - bool output_always = false; + bool output_always = false; + bool legacy_flat_outdir = false; #ifdef STATIC_GRAV int custom_grav = 0; // flag to set specific static gravity field #endif diff --git a/src/io/io.h b/src/io/io.h index 690f7f5a6..a25f5298a 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -83,7 +83,7 @@ class FnameTemplate public: FnameTemplate() = delete; - FnameTemplate(const Parameters& P) : separate_cycle_dirs_(false), outdir_(P.outdir) {} + FnameTemplate(const Parameters& P) : separate_cycle_dirs_(not P.legacy_flat_outdir), outdir_(P.outdir) {} /* Specifies whether separate cycles are written to separate directories */ bool separate_cycle_dirs() const noexcept { return separate_cycle_dirs_; } From fe2f349abe54e54645ff7c9e29a150f350454630 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 8 Dec 2023 14:00:02 -0500 Subject: [PATCH 672/694] Correct some comments in AutomaticLaunchParams --- src/utils/cuda_utilities.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index acc50cbac..76baae6de 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -84,18 +84,14 @@ inline void initGpuMemory(Real *ptr, size_t N) { GPU_Error_Check(cudaMemset(ptr, * \brief Struct to determine the optimal number of blocks and threads * per block to use when launching a kernel. The member * variables are `threadsPerBlock` and `numBlocks` which are chosen with - the occupancy API. Can target any device on the system through the - * optional constructor argument. - * NOTE: On AMD there's currently an issue that stops kernels from being - * passed. As a workaround for now this struct just returns the maximum - * number of blocks and threads per block that a MI250X can run at once. + * the occupancy API. * */ template struct AutomaticLaunchParams { public: /*! - * \brief Construct a new Reduction Launch Params object. By default it + * \brief Construct a new AutomaticLaunchParams object. By default it * generates values of numBlocks and threadsPerBlock suitable for a * kernel with a grid-stride loop. For a kernel with one thread per * element set the optional `numElements` argument to the number of From b22d163aa05640d4c8ea668a31c5a58ac3ec3bfc Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 8 Dec 2023 16:33:58 -0500 Subject: [PATCH 673/694] Add automatic launch params to kernels in VL_3D integrator --- src/integrators/VL_3D_cuda.cu | 238 ++++++++++++++++++++-------------- 1 file changed, 138 insertions(+), 100 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index dbb7966ff..1c0b0f56b 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -139,67 +139,84 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Step 1: Use PCM reconstruction to put primitive variables into interface // arrays - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, - Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); + cuda_utilities::AutomaticLaunchParams static const pcm_launch_params(PCM_Reconstruction_3D, n_cells); + hipLaunchKernelGGL(PCM_Reconstruction_3D, pcm_launch_params.numBlocks, pcm_launch_params.threadsPerBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, - gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, - gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, - gama, 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const exact_launch_params(Calculate_Exact_Fluxes_CUDA, + n_cellsCalculate_Exact_Fluxes_CUDA); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, - 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, - 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, - 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const roe_launch_params(Calculate_Roe_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, - gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, - gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, - gama, 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const hllc_launch_params(Calculate_HLLC_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, - 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, - 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, - 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const hll_launch_params(Calculate_HLL_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLL #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, - &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, - &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, - &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const hlld_launch_params(mhd::Calculate_HLLD_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, + n_fields); #endif // HLLD GPU_Error_Check(); #ifdef MHD // Step 2.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, - ctElectricFields, nx, ny, nz, n_cells); + cuda_utilities::AutomaticLaunchParams static const ct_launch_params(mhd::Calculate_CT_Electric_Fields, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0, + 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); GPU_Error_Check(); #endif // MHD // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, - F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); + cuda_utilities::AutomaticLaunchParams static const update_half_launch_params(Update_Conserved_Variables_3D_half, + n_cells); + hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, update_half_launch_params.numBlocks, + update_half_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, + nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); GPU_Error_Check(); #ifdef MHD // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, + cuda_utilities::AutomaticLaunchParams static const update_magnetic_launch_params(mhd::Update_Magnetic_Field_3D, + n_cells); + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks, + update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells, 0.5 * dt, dx, dy, dz); GPU_Error_Check(); #endif // MHD @@ -211,76 +228,86 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); #endif // PCM #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const plmp_launch_params(PLMP_cuda, n_cells); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, - 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, - 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda, n_cells); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, n_fields); #endif // PLMC #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); - hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); - hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); + cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL, n_cells); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); #endif // PPMC GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, - gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, - gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, - gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, - 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, - 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, - 2, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, - gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, - gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, - gama, 2, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, - 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, - 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, - 2, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif // HLLC #ifdef HLLD - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, - &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, - &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, n_fields); - hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, - &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, + n_fields); #endif // HLLD GPU_Error_Check(); @@ -288,40 +315,51 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, - Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); + cuda_utilities::AutomaticLaunchParams static const de_advect_launch_params(Partial_Update_Advected_Internal_Energy_3D, + n_cells); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, de_advect_launch_params.numBlocks, + de_advect_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, + nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); GPU_Error_Check(); #endif // DE #ifdef MHD // Step 5.5: Compute the Constrained transport electric fields - hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, - ctElectricFields, nx, ny, nz, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0, + 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); GPU_Error_Check(); #endif // MHD // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, - Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, - zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); + cuda_utilities::AutomaticLaunchParams static const update_full_launch_params(Update_Conserved_Variables_3D, n_cells); + hipLaunchKernelGGL(Update_Conserved_Variables_3D, update_full_launch_params.numBlocks, + update_full_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, + F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, + gama, n_fields, custom_grav, density_floor, dev_grav_potential); GPU_Error_Check(); #ifdef MHD // Update the magnetic fields - hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks, + update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); GPU_Error_Check(); #endif // MHD #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, - n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); + cuda_utilities::AutomaticLaunchParams static const de_select_launch_params(Select_Internal_Energy_3D, n_cells); + hipLaunchKernelGGL(Select_Internal_Energy_3D, de_select_launch_params.numBlocks, + de_select_launch_params.threadsPerBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); + cuda_utilities::AutomaticLaunchParams static const de_sync_launch_params(Sync_Energies_3D, n_cells); + hipLaunchKernelGGL(Sync_Energies_3D, de_sync_launch_params.numBlocks, de_sync_launch_params.threadsPerBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); GPU_Error_Check(); #endif // DE #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + cuda_utilities::AutomaticLaunchParams static const temp_floor_launch_params(Apply_Temperature_Floor, n_cells); + hipLaunchKernelGGL(Apply_Temperature_Floor, temp_floor_launch_params.numBlocks, + temp_floor_launch_params.threadsPerBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); GPU_Error_Check(); #endif // TEMPERATURE_FLOOR From a7ea118aa04677208c25ff85ca49e80ab7537b91 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 13 Dec 2023 15:49:59 -0500 Subject: [PATCH 674/694] Fix clang-tidy error that appeared on some builds --- src/utils/cuda_utilities.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 76baae6de..85927d532 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -106,6 +106,12 @@ struct AutomaticLaunchParams { cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); if (numElements > 0) { + // This line is needed to check that threadsPerBlock isn't zero. Somewhere inside + // cudaOccupancyMaxPotentialBlockSize threadsPerBlock can be zero according to clang-tidy so this line sets it to + // a more reasonable value + threadsPerBlock = (threadsPerBlock == 0) ? TPB : threadsPerBlock; + + // Compute the number of blocks numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; } } From ddbd3125bc6227ee1a8f174f8f699fa94915b1a5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 12:08:14 -0500 Subject: [PATCH 675/694] Update hydro build to use VL and PLMC Updated the default hydro build to use the Van Leer (VL) and PLMC build options. Also, updated the test data for those new builds --- builds/make.type.hydro | 6 +- cholla-tests-data | 2 +- src/reconstruction/plmc_cuda_tests.cu | 130 ++++++++++++++------------ 3 files changed, 74 insertions(+), 64 deletions(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index f34d78172..b35dbd9ae 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/cholla-tests-data b/cholla-tests-data index dcd73ff52..da5c3a309 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e +Subproject commit da5c3a309d5451fabdec27fd7942e6121bb9c277 diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 68f11b396..0207a09ac 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -25,6 +25,9 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) { +#ifndef VL + #warning "The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) integrator" +#endif // VL // Set up PRNG to use std::mt19937_64 prng(42); std::uniform_real_distribution doubleRand(0.1, 5); @@ -49,66 +52,71 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) dev_grid.cpyHostToDevice(host_grid); // Fiducial Data - std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, - {27, 0.70033864721549188}, - {106, 2.2476363309467553}, - {107, 3.0633780053857027}, - {186, 2.2245934101106259}, - {187, 2.1015872413794123}, - {266, 2.1263341057778309}, - {267, 3.9675148506537838}, - {346, 3.3640057502842691}, - {347, 21.091316282933843}}, - {{21, 0.72430827309279655}, - {37, 0.19457128219588618}, - {101, 5.4739527659741896}, - {117, 4.4286255636679313}, - {181, 0.12703829036056602}, - {197, 2.2851440769830953}, - {261, 1.5337035731959561}, - {277, 2.697375839048191}, - {341, 22.319601655044117}, - {357, 82.515887983144168}}, - {{25, 2.2863650183226212}, - {29, 1.686415421301841}, - {105, 0.72340346106443465}, - {109, 5.4713687086831388}, - {185, 3.929100145230096}, - {189, 4.9166140516911483}, - {265, 0.95177493689267167}, - {269, 0.46056494878491938}, - {345, 3.6886096301452787}, - {349, 16.105488797582133}}}; - std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, - {26, 0.70033864721549188}, - {105, 1.5947787943675635}, - {106, 3.0633780053857027}, - {185, 4.0069556576401011}, - {186, 2.1015872413794123}, - {265, 1.7883678016935785}, - {266, 3.9675148506537838}, - {345, 2.8032969746372527}, - {346, 21.091316282933843}}, - {{17, 0.43265217076853835}, - {33, 0.19457128219588618}, - {97, 3.2697645945288754}, - {113, 4.4286255636679313}, - {177, 0.07588397666718491}, - {193, 2.2851440769830953}, - {257, 0.91612950577699748}, - {273, 2.697375839048191}, - {337, 13.332201861384396}, - {353, 82.515887983144168}}, - {{5, 2.2863650183226212}, - {9, 1.686415421301841}, - {85, 0.72340346106443465}, - {89, 1.7792505446336098}, - {165, 5.3997753452111859}, - {169, 1.4379190463124139}, - {245, 0.95177493689267167}, - {249, 0.46056494878491938}, - {325, 6.6889498465051407}, - {329, 1.6145084086614281}}}; + std::vector> fiducial_interface_left = {{{26, 3.8877922383184833}, + {27, 0.70033864721549188}, + {106, 5.6625525038177784}, + {107, 3.0633780053857027}, + {186, 4.0069556576401011}, + {187, 2.1015872413794123}, + {266, 5.1729859852329314}, + {267, 3.9675148506537838}, + {346, 9.6301414677176531}, + {347, 21.091316282933843}}, + {{21, 0.74780807318015607}, + {37, 0.19457128219588618}, + {101, 5.6515522777659895}, + {117, 4.4286255636679313}, + {181, 0.13115998072061905}, + {197, 2.2851440769830953}, + {261, 1.5834637771067519}, + {277, 2.697375839048191}, + {341, 23.043749364531674}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.9563546443402542}, + {185, 3.6128571662018358}, + {189, 5.3735653401079038}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.1670194578067843}, + {349, 19.142817472509272}}}; + + std::vector> fiducial_interface_right = + + {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.594778794367564}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935782}, + {266, 3.9675148506537838}, + {345, 2.8032969746372531}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.77925054463361}, + {165, 5.3997753452111859}, + {169, 1.4379190463124141}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051398}, + {329, 1.6145084086614285}}} + + ; // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -161,6 +169,8 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) ? 0.0 : fiducial_interface_right.at(direction)[i]; + // if (test_val != 0.0) std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl; + testing_utilities::Check_Results( fiducial_val, test_val, "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); From 29aa0519e798f9da107a477e694478ee7d1e842f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 13:36:52 -0500 Subject: [PATCH 676/694] Switch MHD to using PLMC by default Also updated test data --- builds/make.type.mhd | 2 +- src/system_tests/mhd_system_tests.cpp | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 4459819e8..d08e6373e 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLD DFLAGS += -DMHD diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index 4261797b2..a14caa9a1 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -228,7 +228,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM @@ -266,7 +266,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); #endif // PCM @@ -416,12 +416,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecond waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(7.0E-8, 1.5E-7); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) @@ -452,12 +452,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecond waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(5.4E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) @@ -487,12 +487,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConve waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(4.5E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], - "", 0.17); + "", 0.2); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) @@ -523,12 +523,12 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderC waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Run the wave - waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + waveTest.runL1ErrorTest(5.0E-8, 8.0E-8); // Check the scaling double const low_res_l2norm = waveTest.getL2Norm(); testing_utilities::Check_Results( - 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.17); + 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.2); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, @@ -645,7 +645,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCor #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM @@ -681,7 +681,7 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorr #ifdef PCM waveTest.runL1ErrorTest(4.E-7, 4.E-7); #elif defined(PLMC) - waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); + waveTest.runL1ErrorTest(2.0E-8, 2.8E-8); #elif defined(PPMC) waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); #endif // PCM From fafcf4f3946fd8856ce8bad6ac44b1d31faabc3c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Jan 2024 13:40:23 -0500 Subject: [PATCH 677/694] Update other builds to PLMC and VL default Updated template, basic_scalar, disk, dust, rot_proj, static_grav to use PLMC and Van Leer integrator --- builds/make.inc.template | 4 ++-- builds/make.type.basic_scalar | 6 +++--- builds/make.type.disk | 2 +- builds/make.type.dust | 2 +- builds/make.type.rot_proj | 6 +++--- builds/make.type.static_grav | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builds/make.inc.template b/builds/make.inc.template index 3ae156225..98d2c146c 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -33,8 +33,8 @@ DFLAGS += -DHDF5 # Reconstruction #DFLAGS += -DPCM #DFLAGS += -DPLMP -#DFLAGS += -DPLMC -DFLAGS += -DPPMP +DFLAGS += -DPLMC +#DFLAGS += -DPPMP #DFLAGS += -DPPMC # Riemann Solver diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar index d2dd75892..5aa4a5d0e 100644 --- a/builds/make.type.basic_scalar +++ b/builds/make.type.basic_scalar @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/builds/make.type.disk b/builds/make.type.disk index f2e3f0ec1..284b2c73d 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -24,7 +24,7 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC DFLAGS += -DVL diff --git a/builds/make.type.dust b/builds/make.type.dust index 24a765302..5addcacdc 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -9,7 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPLMP +DFLAGS += -DPLMC DFLAGS += -DHLLC DFLAGS += -DDE diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj index e6faa7514..e29ab43e7 100644 --- a/builds/make.type.rot_proj +++ b/builds/make.type.rot_proj @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index 4f13e7288..cd77643f2 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -3,12 +3,12 @@ DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR From 2215b2f643286f367458ce2c195ffab09a0b1a1b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 22 Jan 2024 13:54:24 -0500 Subject: [PATCH 678/694] Make sure cosmology builds use SIMPLE integrator --- builds/make.type.hydro | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index b35dbd9ae..1a96baaa9 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -7,8 +7,11 @@ DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -# DFLAGS += -DSIMPLE +ifeq ($(findstring cosmology,$(TYPE)),cosmology) +DFLAGS += -DSIMPLE +else DFLAGS += -DVL +endif # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR From ff09d5dc0939b98710652066a97610e6a5472c3d Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 17:02:12 -0500 Subject: [PATCH 679/694] Fix clang-tidy warning --- src/global/global.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/global/global.cpp b/src/global/global.cpp index 1a867965c..2d3a23467 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -242,6 +242,10 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) #endif // DE } else if (strcmp(name, "output_always") == 0) { int tmp = atoi(value); + // In this case the CHOLLA_ASSERT macro runs into issuse with the readability-simplify-boolean-expr clang-tidy check + // due to some weird macro expansion stuff. That check has been disabled here for now but in clang-tidy 18 the + // IgnoreMacro option should be used instead. + // NOLINTNEXTLINE(readability-simplify-boolean-expr) CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0."); parms->output_always = tmp; #ifdef MHD From b4589620da6750efceff188d213c6e46cdc4a8c5 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 23 Jan 2024 17:29:13 -0500 Subject: [PATCH 680/694] Replace #warning with print to cerr Avoids issues with running clang-tidy in non-VL builds --- src/reconstruction/plmc_cuda_tests.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 0207a09ac..678f6329d 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -26,7 +26,10 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) { #ifndef VL - #warning "The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) integrator" + std::cerr << "Warning: The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) " + "integrator" + << std::endl; + return; #endif // VL // Set up PRNG to use std::mt19937_64 prng(42); From 73816e40fa5e57d55c143c22dae9ea5b196149ed Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 15 Dec 2023 15:23:07 -0500 Subject: [PATCH 681/694] Remove deprecated CUDA macro --- builds/make.inc.template | 5 - builds/make.type.basic_scalar | 1 - builds/make.type.cloudy | 1 - builds/make.type.cooling | 1 - builds/make.type.disk | 1 - builds/make.type.dust | 1 - builds/make.type.hydro | 1 - builds/make.type.mhd | 1 - builds/make.type.rot_proj | 1 - builds/make.type.static_grav | 1 - src/cooling/cooling_cuda.cu | 76 ++++---- src/cooling/cooling_cuda.h | 14 +- src/cooling/load_cloudy_texture.cu | 22 +-- src/cooling/load_cloudy_texture.h | 10 +- src/cooling/texture_utilities.h | 11 +- src/global/global_cuda.cu | 6 +- src/global/global_cuda.h | 30 ++- src/gravity/gravity_functions.cpp | 5 +- src/gravity/static_grav.h | 11 +- src/grid/cuda_boundaries.h | 9 +- src/grid/grid3D.cpp | 48 ++--- src/h_correction/h_correction_2D_cuda.cu | 17 +- src/h_correction/h_correction_2D_cuda.h | 21 +-- src/h_correction/h_correction_3D_cuda.cu | 13 +- src/h_correction/h_correction_3D_cuda.h | 13 +- src/hydro/hydro_cuda.cu | 201 ++++++++++----------- src/hydro/hydro_cuda.h | 20 +- src/hydro/hydro_cuda_tests.cu | 4 - src/integrators/VL_1D_cuda.cu | 122 ++++++------- src/integrators/VL_1D_cuda.h | 11 +- src/integrators/VL_2D_cuda.cu | 108 ++++++----- src/integrators/VL_2D_cuda.h | 11 +- src/integrators/VL_3D_cuda.cu | 4 +- src/integrators/VL_3D_cuda.h | 11 +- src/integrators/simple_1D_cuda.cu | 88 +++++---- src/integrators/simple_1D_cuda.h | 11 +- src/integrators/simple_2D_cuda.cu | 80 ++++---- src/integrators/simple_2D_cuda.h | 11 +- src/integrators/simple_3D_cuda.cu | 110 ++++++----- src/integrators/simple_3D_cuda.h | 13 +- src/mpi/mpi_routines.cpp | 3 +- src/reconstruction/pcm_cuda.cu | 221 +++++++++++------------ src/reconstruction/pcm_cuda.h | 9 +- src/reconstruction/plmp_cuda.cu | 135 +++++++------- src/reconstruction/plmp_cuda.h | 11 +- src/reconstruction/ppmp_cuda.cu | 221 +++++++++++------------ src/reconstruction/ppmp_cuda.h | 11 +- src/riemann_solvers/exact_cuda.cu | 74 ++++---- src/riemann_solvers/exact_cuda.h | 11 +- src/riemann_solvers/hll_cuda.cu | 104 +++++------ src/riemann_solvers/hll_cuda.h | 11 +- src/riemann_solvers/hllc_cuda.cu | 136 +++++++------- src/riemann_solvers/hllc_cuda.h | 11 +- src/riemann_solvers/hlld_cuda.cu | 25 ++- src/riemann_solvers/hlld_cuda.h | 10 +- src/riemann_solvers/hlld_cuda_tests.cu | 56 +++--- src/riemann_solvers/roe_cuda.cu | 128 +++++++------ src/riemann_solvers/roe_cuda.h | 11 +- src/utils/error_check_cuda.cu | 20 +- src/utils/error_check_cuda.h | 15 +- src/utils/error_handling.cpp | 5 - src/utils/reduction_utilities.cu | 2 - src/utils/reduction_utilities.h | 30 ++- 63 files changed, 1109 insertions(+), 1276 deletions(-) diff --git a/builds/make.inc.template b/builds/make.inc.template index 98d2c146c..abfa97d1e 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -1,11 +1,6 @@ #POISSON_SOLVER ?= -DPFFT #DFLAGS += $(POISSON_SOLVER) - -#To use GPUs, CUDA must be turned on here -#Optional error checking can also be enabled -DFLAGS += -DCUDA - #To use MPI, DFLAGS must include -DMPI_CHOLLA DFLAGS += -DMPI_CHOLLA diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar index 5aa4a5d0e..02706b223 100644 --- a/builds/make.type.basic_scalar +++ b/builds/make.type.basic_scalar @@ -1,6 +1,5 @@ #-- Default hydro build with BASIC_SCALAR -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.cloudy b/builds/make.type.cloudy index 86e43315b..10fa51d60 100644 --- a/builds/make.type.cloudy +++ b/builds/make.type.cloudy @@ -6,7 +6,6 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP diff --git a/builds/make.type.cooling b/builds/make.type.cooling index 85dedc25d..0b96722a0 100644 --- a/builds/make.type.cooling +++ b/builds/make.type.cooling @@ -6,7 +6,6 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP diff --git a/builds/make.type.disk b/builds/make.type.disk index 284b2c73d..47bb22829 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -21,7 +21,6 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT #DFLAGS += -DSTATIC_GRAV -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.dust b/builds/make.type.dust index b6b808202..fbb75c66f 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -6,7 +6,6 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.hydro b/builds/make.type.hydro index 1a96baaa9..9e9b1d77c 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -1,6 +1,5 @@ #-- Default hydro only build -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.mhd b/builds/make.type.mhd index d08e6373e..6348c173e 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -6,7 +6,6 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj index e29ab43e7..22a733e0b 100644 --- a/builds/make.type.rot_proj +++ b/builds/make.type.rot_proj @@ -1,6 +1,5 @@ #-- Default hydro only build with rotated projection -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index cd77643f2..2c17f7e8b 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -1,6 +1,5 @@ #-- Default hydro only build with static_grav -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 DFLAGS += -DPLMC diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 192a1848d..5cbebbb72 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -1,19 +1,18 @@ /*! \file cooling_cuda.cu * \brief Functions to calculate cooling rate for a given rho, P, dt. */ -#ifdef CUDA - #ifdef COOLING_GPU +#ifdef COOLING_GPU - #include + #include - #include "../cooling/cooling_cuda.h" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../utils/gpu.hpp" + #include "../cooling/cooling_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" - #ifdef CLOUDY_COOL - #include "../cooling/texture_utilities.h" - #endif + #ifdef CLOUDY_COOL + #include "../cooling/texture_utilities.h" + #endif cudaTextureObject_t coolTexObj = 0; cudaTextureObject_t heatTexObj = 0; @@ -64,10 +63,10 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int Real cool; // cooling rate per volume, erg/s/cm^3 // #ifndef DE Real vx, vy, vz, p; - // #endif - #ifdef DE + // #endif + #ifdef DE Real ge; - #endif + #endif mu = 0.6; // mu = 1.27; @@ -94,29 +93,29 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int vz = dev_conserved[3 * n_cells + id] / d; p = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); p = fmax(p, (Real)TINY_NUMBER); - // #endif - #ifdef DE + // #endif + #ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id] / d; ge = fmax(ge, (Real)TINY_NUMBER); - #endif + #endif // calculate the number density of the gas (in cgs) n = d * DENSITY_UNIT / (mu * MP); // calculate the temperature of the gas T_init = p * PRESSURE_UNIT / (n * KB); - #ifdef DE + #ifdef DE T_init = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - #endif + #endif // calculate cooling rate per volume T = T_init; - // call the cooling function - #ifdef CLOUDY_COOL + // call the cooling function + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - #endif + #endif // calculate change in temperature given dt del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); @@ -129,12 +128,12 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int T -= cool * dt_sub * TIME_UNIT * (gamma - 1.0) / (n * KB); // how much time is left from the original timestep? dt -= dt_sub; - // calculate cooling again - #ifdef CLOUDY_COOL + // calculate cooling again + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - #endif + #endif // calculate new change in temperature del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); } @@ -145,23 +144,23 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int // adjust value of energy based on total change in temperature del_T = T_init - T; // total change in T E -= n * KB * del_T / ((gamma - 1.0) * ENERGY_UNIT); - #ifdef DE + #ifdef DE ge -= KB * del_T / (mu * MP * (gamma - 1.0) * SP_ENERGY_UNIT); - #endif + #endif - // calculate cooling rate for new T - #ifdef CLOUDY_COOL + // calculate cooling rate for new T + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); - #endif + // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); + #endif // and send back from kernel dev_conserved[4 * n_cells + id] = E; - #ifdef DE + #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] = d * ge; - #endif + #endif } } @@ -317,7 +316,7 @@ __device__ Real CIE_cool(Real n, Real T) return cool; } - #ifdef CLOUDY_COOL + #ifdef CLOUDY_COOL /* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating @@ -353,7 +352,6 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda // printf("DEBUG Cloudy L350: %.17e\n",cool); return cool; } - #endif // CLOUDY_COOL + #endif // CLOUDY_COOL - #endif // COOLING_GPU -#endif // CUDA +#endif // COOLING_GPU diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index 32fa3207a..d9105fde3 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -1,14 +1,13 @@ /*! \file cooling_cuda.h * \brief Declarations of cooling functions. */ -#ifdef CUDA - #ifdef COOLING_GPU - #pragma once +#ifdef COOLING_GPU + #pragma once - #include + #include - #include "../global/global.h" - #include "../utils/gpu.hpp" + #include "../global/global.h" + #include "../utils/gpu.hpp" extern cudaTextureObject_t coolTexObj; extern cudaTextureObject_t heatTexObj; @@ -48,5 +47,4 @@ __device__ Real CIE_cool(Real n, Real T); tables at z = 0 with solar metallicity and an HM05 UV background. */ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); - #endif // COOLING_GPU -#endif // CUDA +#endif // COOLING_GPU diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index 8ac7e602c..5e0f2d460 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -1,18 +1,17 @@ /*! \file load_cloudy_texture.cu * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ -#ifdef CUDA - #ifdef CLOUDY_COOL +#ifdef CLOUDY_COOL - #include - #include + #include + #include - #include "../cooling/cooling_cuda.h" - #include "../cooling/load_cloudy_texture.h" - #include "../cooling/texture_utilities.h" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../io/io.h" // provides chprintf + #include "../cooling/cooling_cuda.h" + #include "../cooling/load_cloudy_texture.h" + #include "../cooling/texture_utilities.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" // provides chprintf cudaArray *cuCoolArray; cudaArray *cuHeatArray; @@ -284,5 +283,4 @@ void Test_Cloudy_Speed() exit(0); } - #endif -#endif +#endif // CLOUDY_COOL diff --git a/src/cooling/load_cloudy_texture.h b/src/cooling/load_cloudy_texture.h index 3da31e4dd..7d6307f71 100644 --- a/src/cooling/load_cloudy_texture.h +++ b/src/cooling/load_cloudy_texture.h @@ -1,12 +1,11 @@ /*! \file load_cloudy_texture.h * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ -#ifdef CUDA - #ifdef CLOUDY_COOL +#ifdef CLOUDY_COOL - #pragma once + #pragma once - #include "../global/global.h" + #include "../global/global.h" /* \fn void Load_Cuda_Textures() * \brief Load the Cloudy cooling tables into texture memory on the GPU. */ @@ -17,5 +16,4 @@ void Load_Cuda_Textures(); * arrays. */ void Free_Cuda_Textures(); - #endif -#endif +#endif // CLOUDY_COOL diff --git a/src/cooling/texture_utilities.h b/src/cooling/texture_utilities.h index 56d1ac82b..fc335bcf7 100644 --- a/src/cooling/texture_utilities.h +++ b/src/cooling/texture_utilities.h @@ -5,13 +5,12 @@ // would be included into a .cpp file because tex2D is undefined when compiling // with gcc. -#ifdef CUDA - #pragma once +#pragma once - #include +#include - #include "../global/global.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../utils/gpu.hpp" inline __device__ float lerp(float v0, float v1, float f) { return fma(f, v1, fma(-f, v0, v0)); } @@ -40,5 +39,3 @@ inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float // The outer lerp interpolates along y return lerp(lerp(t00, t10, fx), lerp(t01, t11, fx), fy); } - -#endif // CUDA diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu index 4a34b1630..17c515416 100644 --- a/src/global/global_cuda.cu +++ b/src/global/global_cuda.cu @@ -1,9 +1,7 @@ /*! \file global_cuda.cu * \brief Declarations of the cuda global variables. */ -#ifdef CUDA - - #include "../global/global.h" +#include "../global/global.h" // Declare global variables bool memory_allocated; @@ -16,5 +14,3 @@ Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z; Real *dev_grav_potential; Real *temp_potential; Real *buffer_potential; - -#endif // CUDA diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 23e3b3dec..3f4d3148e 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -5,16 +5,14 @@ #ifndef GLOBAL_CUDA_H #define GLOBAL_CUDA_H -#ifdef CUDA +#include +#include +#include - #include - #include - #include +#include "../global/global.h" +#include "../utils/gpu.hpp" - #include "../global/global.h" - #include "../utils/gpu.hpp" - - #define TPB 256 // threads per block +#define TPB 256 // threads per block // #define TPB 64 extern bool memory_allocated; // Flag becomes true after allocating the memory @@ -49,9 +47,9 @@ __device__ inline int sgn_CUDA(Real x) } } - // Define atomic_add if it's not supported - #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 - #else +// Define atomic_add if it's not supported +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else __device__ double atomicAdd(double *address, double val) { unsigned long long int *address_as_ull = (unsigned long long int *)address; @@ -62,12 +60,10 @@ __device__ double atomicAdd(double *address, double val) } while (assumed != old); return __longlong_as_double(old); } - #endif +#endif - // This helper function exists to make it easier to find printfs inside - // kernels - #define kernel_printf printf +// This helper function exists to make it easier to find printfs inside +// kernels +#define kernel_printf printf #endif // GLOBAL_CUDA_H - -#endif // CUDA diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index b92d06564..744f55825 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -5,12 +5,9 @@ #include "../global/global.h" #include "../grid/grid3D.h" #include "../io/io.h" + #include "../mpi/cuda_mpi_routines.h" #include "../utils/error_handling.h" - #ifdef CUDA - #include "../mpi/cuda_mpi_routines.h" - #endif - #ifdef PARALLEL_OMP #include "../utils/parallel_omp.h" #endif diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 9b05181c1..e671555bf 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -2,14 +2,13 @@ * \brief Definitions of functions to calculate gravitational acceleration in 1, 2, and 3D. Called in Update_Conserved_Variables functions in hydro_cuda.cu. */ -#ifdef CUDA - #pragma once +#pragma once - #include // provides sqrt log cos sin atan etc. - #include +#include // provides sqrt log cos sin atan etc. +#include - #include "../global/global.h" // provides GN etc. +#include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x * x; } @@ -209,5 +208,3 @@ inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off } return; } - -#endif // CUDA diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index 0c2617720..bbf0a5ab8 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -1,7 +1,6 @@ -#ifdef CUDA - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" // void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int // ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); @@ -22,5 +21,3 @@ void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t); - -#endif diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index b315d7764..8e8725240 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -441,40 +441,34 @@ void Grid3D::Execute_Hydro_Integrator(void) // Run the hydro integrator on the grid if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D { -#ifdef CUDA - #ifdef VL +#ifdef VL VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); - #endif // VL - #ifdef SIMPLE +#endif // VL +#ifdef SIMPLE Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); - #endif // SIMPLE -#endif // CUDA +#endif // SIMPLE } else if (H.nx > 1 && H.ny > 1 && H.nz == 1) // 2D { -#ifdef CUDA - #ifdef VL +#ifdef VL VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields, H.custom_grav); - #endif // VL - #ifdef SIMPLE +#endif // VL +#ifdef SIMPLE Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields, H.custom_grav); - #endif // SIMPLE -#endif // CUDA +#endif // SIMPLE } else if (H.nx > 1 && H.ny > 1 && H.nz > 1) // 3D { -#ifdef CUDA - #ifdef VL +#ifdef VL VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, C.Grav_potential); - #endif // VL - #ifdef SIMPLE +#endif // VL +#ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, density_floor, U_floor, C.Grav_potential); - #endif // SIMPLE -#endif +#endif // SIMPLE } else { chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); chexit(-1); @@ -508,27 +502,23 @@ Real Grid3D::Update_Hydro_Grid() // == Perform chemistry/cooling (there are a few different cases) == -#ifdef CUDA - - #ifdef COOLING_GPU - #ifdef CPU_TIME +#ifdef COOLING_GPU + #ifdef CPU_TIME Timer.Cooling_GPU.Start(); - #endif + #endif // ==Apply Cooling from cooling/cooling_cuda.h== Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); - #ifdef CPU_TIME + #ifdef CPU_TIME Timer.Cooling_GPU.End(); - #endif + #endif - #endif // COOLING_GPU +#endif // COOLING_GPU - #ifdef DUST +#ifdef DUST // ==Apply dust from dust/dust_cuda.h== Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius); #endif // DUST -#endif // CUDA - #ifdef CHEMISTRY_GPU // Update the H and He ionization fractions and apply cooling and photoheating Update_Chemistry(); diff --git a/src/h_correction/h_correction_2D_cuda.cu b/src/h_correction/h_correction_2D_cuda.cu index f13827644..d4e65d7cc 100644 --- a/src/h_correction/h_correction_2D_cuda.cu +++ b/src/h_correction/h_correction_2D_cuda.cu @@ -1,15 +1,15 @@ /*! \file h_correction_2D_cuda.cu * \brief Functions definitions for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA - #ifdef H_CORRECTION - #include +#ifdef H_CORRECTION - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../h_correction/h_correction_2D_cuda.h" - #include "../utils/gpu.hpp" + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../h_correction/h_correction_2D_cuda.h" + #include "../utils/gpu.hpp" /*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) @@ -165,5 +165,4 @@ __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, i } } - #endif // H_CORRECTION -#endif // CUDA +#endif // H_CORRECTION diff --git a/src/h_correction/h_correction_2D_cuda.h b/src/h_correction/h_correction_2D_cuda.h index 4fc213133..9d824cf42 100644 --- a/src/h_correction/h_correction_2D_cuda.h +++ b/src/h_correction/h_correction_2D_cuda.h @@ -1,16 +1,16 @@ /*! \file h_correction_2D_cuda.h * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA - #ifdef H_CORRECTION - #ifndef H_CORRECTION_2D_H - #define H_CORRECTION_2D_H - #include +#ifdef H_CORRECTION + #ifndef H_CORRECTION_2D_H + #define H_CORRECTION_2D_H - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../utils/gpu.hpp" + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" /*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) @@ -42,6 +42,5 @@ __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, i Sanders et al, 1998. */ __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost); - #endif // H_CORRECTION_2D_H - #endif // H_CORRECTION -#endif // CUDA + #endif // H_CORRECTION_2D_H +#endif // H_CORRECTION diff --git a/src/h_correction/h_correction_3D_cuda.cu b/src/h_correction/h_correction_3D_cuda.cu index 716332607..b3609b529 100644 --- a/src/h_correction/h_correction_3D_cuda.cu +++ b/src/h_correction/h_correction_3D_cuda.cu @@ -1,14 +1,13 @@ /*! \file h_correction_3D_cuda.cu * \brief Functions definitions for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../h_correction/h_correction_3D_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../h_correction/h_correction_3D_cuda.h" +#include "../utils/gpu.hpp" /*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) @@ -264,5 +263,3 @@ __global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah etah_z[id] = etah; } } - -#endif // CUDA diff --git a/src/h_correction/h_correction_3D_cuda.h b/src/h_correction/h_correction_3D_cuda.h index bb2aedd89..c1d2f8a49 100644 --- a/src/h_correction/h_correction_3D_cuda.h +++ b/src/h_correction/h_correction_3D_cuda.h @@ -1,12 +1,12 @@ /*! \file h_correction_3D_cuda.h * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA - #ifndef H_CORRECTION_3D_H - #define H_CORRECTION_3D_H - #include "../global/global.h" - #include "../utils/gpu.hpp" +#ifndef H_CORRECTION_3D_H +#define H_CORRECTION_3D_H + +#include "../global/global.h" +#include "../utils/gpu.hpp" /*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) @@ -56,5 +56,4 @@ __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah __global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost); - #endif // H_CORRECTION_3D_H -#endif // CUDA +#endif // H_CORRECTION_3D_H diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index db16dd59e..33f739449 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1,32 +1,31 @@ /*! \file hydro_cuda.cu * \brief Definitions of functions used in all cuda integration algorithms. */ -#ifdef CUDA - #include - #include - #include +#include +#include +#include - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../gravity/static_grav.h" - #include "../hydro/hydro_cuda.h" - #include "../utils/DeviceVector.h" - #include "../utils/cuda_utilities.h" - #include "../utils/gpu.hpp" - #include "../utils/hydro_utilities.h" - #include "../utils/reduction_utilities.h" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../gravity/static_grav.h" +#include "../hydro/hydro_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/reduction_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav) { int id; - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV Real d, d_inv, vx; Real gx, d_n, d_inv_n, vx_n; gx = 0.0; - #endif +#endif Real dtodx = dt / dx; @@ -35,11 +34,11 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, // threads corresponding to real cells do the calculation if (id > n_ghost - 1 && id < n_cells - n_ghost) { - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; - #endif +#endif // update the conserved variable array dev_conserved[id] += dtodx * (dev_F[id - 1] - dev_F[id]); @@ -47,25 +46,25 @@ __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, dev_conserved[2 * n_cells + id] += dtodx * (dev_F[2 * n_cells + id - 1] - dev_F[2 * n_cells + id]); dev_conserved[3 * n_cells + id] += dtodx * (dev_F[3 * n_cells + id - 1] - dev_F[3 * n_cells + id]); dev_conserved[4 * n_cells + id] += dtodx * (dev_F[4 * n_cells + id - 1] - dev_F[4 * n_cells + id]); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += dtodx * (dev_F[(5 + i) * n_cells + id - 1] - dev_F[(5 + i) * n_cells + id]); } - #endif - #ifdef DE +#endif +#ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += dtodx * (dev_F[(n_fields - 1) * n_cells + id - 1] - dev_F[(n_fields - 1) * n_cells + id]); - #endif - #ifdef STATIC_GRAV // add gravitational source terms, time averaged from n to - // n+1 +#endif +#ifdef STATIC_GRAV // add gravitational source terms, time averaged from n to + // n+1 calc_g_1D(id, x_off, n_ghost, custom_grav, dx, xbound, &gx); d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n); - #endif +#endif if (dev_conserved[id] != dev_conserved[id]) { printf("%3d Thread crashed in final update. %f\n", id, dev_conserved[id]); } @@ -89,12 +88,12 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x int id, xid, yid, n_cells; int imo, jmo; - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV Real d, d_inv, vx, vy; Real gx, gy, d_n, d_inv_n, vx_n, vy_n; gx = 0.0; gy = 0.0; - #endif +#endif Real dtodx = dt / dx; Real dtody = dt / dy; @@ -111,12 +110,12 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x // threads corresponding to real cells do the calculation if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; - #endif +#endif // update the conserved variable array dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]); dev_conserved[n_cells + id] += dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) + @@ -127,19 +126,19 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]); dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]); } - #endif - #ifdef DE +#endif +#ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]); - #endif - #ifdef STATIC_GRAV +#endif +#ifdef STATIC_GRAV // calculate the gravitational acceleration as a function of x & y position calc_g_2D(xid, yid, x_off, y_off, n_ghost, custom_grav, dx, dy, xbound, ybound, &gx, &gy); // add gravitational source terms, time averaged from n to n+1 @@ -151,7 +150,7 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + 0.25 * dt * gy * (d + d_n) * (vy + vy_n); - #endif +#endif if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id]) { printf("%3d %3d Thread crashed in final update. %f %f %f\n", xid, yid, dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), dev_conserved[id]); @@ -180,19 +179,19 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R int id, xid, yid, zid, n_cells; int imo, jmo, kmo; - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV Real d, d_inv, vx, vy, vz; Real gx, gy, gz, d_n, d_inv_n, vx_n, vy_n, vz_n; gx = 0.0; gy = 0.0; gz = 0.0; - #endif +#endif - #ifdef DENSITY_FLOOR +#ifdef DENSITY_FLOOR Real dens_0; - #endif +#endif - #ifdef GRAVITY +#ifdef GRAVITY Real d, d_inv, vx, vy, vz; Real gx, gy, gz, d_n, d_inv_n, vx_n, vy_n, vz_n; Real pot_l, pot_r; @@ -201,12 +200,12 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R gy = 0.0; gz = 0.0; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT int id_ll, id_rr; Real pot_ll, pot_rr; - #endif + #endif - #endif // GRAVITY +#endif // GRAVITY Real dtodx = dt / dx; Real dtody = dt / dy; @@ -225,13 +224,13 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R // threads corresponding to real cells do the calculation if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost) { - #if defined(STATIC_GRAV) || defined(GRAVITY) +#if defined(STATIC_GRAV) || defined(GRAVITY) d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; vy = dev_conserved[2 * n_cells + id] * d_inv; vz = dev_conserved[3 * n_cells + id] * d_inv; - #endif +#endif // update the conserved variable array dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]) + @@ -248,13 +247,13 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) + dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved[(5 + i) * n_cells + id] += dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) + dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]); - #ifdef COOLING_GRACKLE + #ifdef COOLING_GRACKLE // If the updated value is negative, then revert to the value before the // update if (dev_conserved[(5 + i) * n_cells + id] < 0) { @@ -263,21 +262,21 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) + dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]); } - #endif - } #endif - #ifdef DE + } +#endif +#ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]); - // + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + - // dtodz*(vz_kmo-vz_kpo)); - // Note: this term is added in a separate kernel to avoid synchronization - // issues - #endif + // + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + + // dtodz*(vz_kmo-vz_kpo)); + // Note: this term is added in a separate kernel to avoid synchronization + // issues +#endif - #ifdef DENSITY_FLOOR +#ifdef DENSITY_FLOOR if (dev_conserved[id] < density_floor) { if (dev_conserved[id] > 0) { dens_0 = dev_conserved[id]; @@ -288,18 +287,18 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dev_conserved[2 * n_cells + id] *= (density_floor / dens_0); dev_conserved[3 * n_cells + id] *= (density_floor / dens_0); dev_conserved[4 * n_cells + id] *= (density_floor / dens_0); - #ifdef DE + #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); - #endif + #endif } else { // If the density is negative: average the density on that cell dens_0 = dev_conserved[id]; Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved); } } - #endif // DENSITY_FLOOR +#endif // DENSITY_FLOOR - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); d_n = dev_conserved[id]; @@ -313,9 +312,9 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + 0.25 * dt * gy * (d + d_n) * (vy + vy_n) + 0.25 * dt * gz * (d + d_n) * (vz + vz_n); - #endif +#endif - #ifdef GRAVITY +#ifdef GRAVITY d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; @@ -328,44 +327,44 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R id_r = (xid + 1) + (yid)*nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT id_ll = (xid - 2) + (yid)*nx + (zid)*nx * ny; id_rr = (xid + 2) + (yid)*nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; gx = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); - #else + #else gx = -0.5 * (pot_r - pot_l) / dx; - #endif + #endif // Get Y componet of gravity field id_l = (xid) + (yid - 1) * nx + (zid)*nx * ny; id_r = (xid) + (yid + 1) * nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT id_ll = (xid) + (yid - 2) * nx + (zid)*nx * ny; id_rr = (xid) + (yid + 2) * nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; gy = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); - #else + #else gy = -0.5 * (pot_r - pot_l) / dy; - #endif + #endif // Get Z componet of gravity field id_l = (xid) + (yid)*nx + (zid - 1) * nx * ny; id_r = (xid) + (yid)*nx + (zid + 1) * nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT id_ll = (xid) + (yid)*nx + (zid - 2) * nx * ny; id_rr = (xid) + (yid)*nx + (zid + 2) * nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; gz = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); - #else + #else gz = -0.5 * (pot_r - pot_l) / dz; - #endif + #endif // Add gravity term to Momentum dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); @@ -377,9 +376,9 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dev_conserved[4 * n_cells + id] += 0.5 * dt * (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + gz * (d * vz + d_n * vz_n)); - #endif // GRAVITY +#endif // GRAVITY - #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) +#if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, @@ -387,7 +386,7 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]); Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); } - #endif // DENSITY_FLOOR +#endif // DENSITY_FLOOR /* d = dev_conserved[ id]; d_inv = 1.0 / d; @@ -543,16 +542,16 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n vz = dev_conserved[3 * n_cells + id] * d_inv; E = dev_conserved[4 * n_cells + id]; - // Compute the maximum inverse crossing time in the cell - #ifdef MHD +// Compute the maximum inverse crossing time in the cell +#ifdef MHD // Compute the cell centered magnetic field using a straight average of // the faces auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); max_dti = fmax(max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); - #else // not MHD +#else // not MHD max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); - #endif // MHD +#endif // MHD } } @@ -597,7 +596,7 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n return dev_dti[0]; } - #ifdef AVERAGE_SLOW_CELLS +#ifdef AVERAGE_SLOW_CELLS void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow) @@ -658,9 +657,9 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n } } } - #endif // AVERAGE_SLOW_CELLS +#endif // AVERAGE_SLOW_CELLS - #ifdef DE +#ifdef DE __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields) { @@ -788,11 +787,11 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, E = dev_conserved[4 * n_cells + id]; GE = dev_conserved[(n_fields - 1) * n_cells + id]; E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); - #ifdef MHD + #ifdef MHD // Add the magnetic energy auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); - #endif // MHD + #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); @@ -1099,9 +1098,9 @@ __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, in } } - #endif // DE +#endif // DE - #ifdef TEMPERATURE_FLOOR +#ifdef TEMPERATURE_FLOOR __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) { @@ -1131,15 +1130,15 @@ __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; } - #ifdef DE + #ifdef DE U = dev_conserved[(n_fields - 1) * n_cells + id] / d; if (U < U_floor) { dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; } - #endif + #endif } } - #endif // TEMPERATURE_FLOOR +#endif // TEMPERATURE_FLOOR __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved) @@ -1184,12 +1183,12 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int int N = 0; Real d_av, vx_av, vy_av, vz_av, P_av; d_av = vx_av = vy_av = vz_av = P_av = 0.0; - #ifdef SCALAR +#ifdef SCALAR Real scalar[NSCALARS], scalar_av[NSCALARS]; for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] = 0.0; } - #endif +#endif for (int kk = k - 1; kk <= k + 1; kk++) { for (int jj = j - 1; jj <= j + 1; jj++) { @@ -1200,22 +1199,22 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int my = conserved[grid_enum::momentum_y * ncells + idn]; mz = conserved[grid_enum::momentum_z * ncells + idn]; P = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); - #ifdef SCALAR +#ifdef SCALAR for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar[n] = conserved[grid_enum::scalar * ncells + idn]; } - #endif +#endif if (d > 0.0 && P > 0.0) { d_av += d; vx_av += mx; vy_av += my; vz_av += mz; P_av += P / (gamma - 1.0); - #ifdef SCALAR +#ifdef SCALAR for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] += scalar[n]; } - #endif +#endif N++; } } @@ -1226,11 +1225,11 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int vx_av = vx_av / d_av; vy_av = vy_av / d_av; vz_av = vz_av / d_av; - #ifdef SCALAR +#ifdef SCALAR for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar_av[n] = scalar_av[n] / d_av; } - #endif +#endif d_av = d_av / N; // replace cell values with new averaged values @@ -1240,14 +1239,14 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int conserved[id + ncells * grid_enum::momentum_z] = d_av * vz_av; conserved[id + ncells * grid_enum::Energy] = P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av); - #ifdef DE +#ifdef DE conserved[id + ncells * grid_enum::GasEnergy] = P_av / (gamma - 1.0); - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int n = 0; n < NSCALARS; n++) { // NOLINT conserved[id + ncells * grid_enum::scalar] = d_av * scalar_av[n]; } - #endif +#endif d = d_av; E = P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av); @@ -1255,5 +1254,3 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int printf("%3d %3d %3d FC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, vx_av, vy_av, vz_av); } - -#endif // CUDA diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 2de29eea3..12c4bc95e 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -1,12 +1,11 @@ /*! \file hydro_cuda.h * \brief Declarations of functions used in all cuda integration algorithms. */ -#ifdef CUDA - #ifndef HYDRO_CUDA_H - #define HYDRO_CUDA_H +#ifndef HYDRO_CUDA_H +#define HYDRO_CUDA_H - #include "../global/global.h" - #include "../utils/mhd_utilities.h" +#include "../global/global.h" +#include "../utils/mhd_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav); @@ -76,19 +75,19 @@ __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghos __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); - #ifdef AVERAGE_SLOW_CELLS +#ifdef AVERAGE_SLOW_CELLS void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow); __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow); - #endif +#endif - #ifdef TEMPERATURE_FLOOR +#ifdef TEMPERATURE_FLOOR __global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); - #endif +#endif __global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); @@ -114,5 +113,4 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved); - #endif // HYDRO_CUDA_H -#endif // CUDA +#endif // HYDRO_CUDA_H diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index d633773cc..f1345c77f 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -23,8 +23,6 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#if defined(CUDA) - // ============================================================================= // Tests for the Calc_dt_GPU function // ============================================================================= @@ -145,5 +143,3 @@ TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) // ============================================================================= // End of tests for the mhdInverseCrossingTime function // ============================================================================= - -#endif // CUDA diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 88cf9bf7f..f2ad520b8 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -1,28 +1,27 @@ /*! \file VL_1D_cuda.cu * \brief Definitions of the cuda VL algorithm functions. */ -#ifdef CUDA - #ifdef VL - - #include - #include - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../hydro/hydro_cuda.h" - #include "../integrators/VL_1D_cuda.h" - #include "../io/io.h" - #include "../reconstruction/pcm_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/error_handling.h" - #include "../utils/gpu.hpp" +#ifdef VL + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_1D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/error_handling.h" + #include "../utils/gpu.hpp" __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, int n_ghost, Real dx, Real dt, Real gamma, @@ -66,19 +65,19 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea n_fields); GPU_Error_Check(); - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE + #endif + #ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC + #endif + #ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif + #endif GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep @@ -86,62 +85,62 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea F_x, n_cells, n_ghost, dx, 0.5 * dt, gama, n_fields); GPU_Error_Check(); - // Step 4: Construct left and right interface values using updated conserved - // variables - #ifdef PCM + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - #endif - #ifdef PLMC + #endif + #ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); - #endif - #ifdef PLMP + #endif + #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif - #ifdef PPMP + #endif + #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - #endif - #ifdef PPMC + #endif + #ifdef PPMC hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); - #endif + #endif GPU_Error_Check(); - // Step 5: Calculate the fluxes again - #ifdef EXACT + // Step 5: Calculate the fluxes again + #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE + #endif + #ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC + #endif + #ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif + #endif GPU_Error_Check(); - #ifdef DE + #ifdef DE // Compute the divergence of velocity before updating the conserved array, // this solves synchronization issues when adding this term on // Update_Conserved_Variables hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields); - #endif + #endif // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); GPU_Error_Check(); - #ifdef DE + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif + #endif return; } @@ -165,17 +164,17 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; - #ifdef DE + #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, P; int ipo; - #endif + #endif // threads corresponding all cells except outer ring of ghost cells do the // calculation if (id > 0 && id < n_cells - 1) { imo = id - 1; - #ifdef DE + #ifdef DE d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -188,7 +187,7 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de ipo = id + 1; vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; - #endif + #endif // update the conserved variable array dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F[imo] - dev_F[id]); dev_conserved_half[n_cells + id] = @@ -199,21 +198,20 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de dev_conserved[3 * n_cells + id] + dtodx * (dev_F[3 * n_cells + imo] - dev_F[3 * n_cells + id]); dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] + dtodx * (dev_F[4 * n_cells + imo] - dev_F[4 * n_cells + id]); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved_half[(5 + i) * n_cells + id] = dev_conserved[(5 + i) * n_cells + id] + dtodx * (dev_F[(5 + i) * n_cells + imo] - dev_F[(5 + i) * n_cells + id]); } - #endif - #ifdef DE + #endif + #ifdef DE dev_conserved_half[(n_fields - 1) * n_cells + id] = dev_conserved[(n_fields - 1) * n_cells + id] + dtodx * (dev_F[(n_fields - 1) * n_cells + imo] - dev_F[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo)); - #endif + #endif } } - #endif // VL -#endif // CUDA +#endif // VL diff --git a/src/integrators/VL_1D_cuda.h b/src/integrators/VL_1D_cuda.h index 2d901234c..3b7ff5425 100644 --- a/src/integrators/VL_1D_cuda.h +++ b/src/integrators/VL_1D_cuda.h @@ -1,17 +1,14 @@ /*! \file VL_1D_cuda.h * \brief Declarations for the cuda version of the 1D VL algorithm. */ -#ifdef CUDA +#ifndef VL_1D_CUDA_H +#define VL_1D_CUDA_H - #ifndef VL_1D_CUDA_H - #define VL_1D_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields, int custom_grav); void Free_Memory_VL_1D(); - #endif // VL_1D_CUDA_H -#endif // CUDA +#endif // VL_1D_CUDA_H diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 4c46638ed..3c8992d71 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -1,25 +1,24 @@ /*! \file VL_2D_cuda.cu * \brief Definitions of the cuda 2D VL algorithm functions. */ -#ifdef CUDA - #ifdef VL +#ifdef VL - #include - #include + #include + #include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../hydro/hydro_cuda.h" - #include "../integrators/VL_2D_cuda.h" - #include "../reconstruction/pcm_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/gpu.hpp" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_2D_cuda.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, @@ -68,25 +67,25 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of n_ghost, gama, n_fields); GPU_Error_Check(); - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE + #endif + #ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC + #endif + #ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif + #endif GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep @@ -94,71 +93,71 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields); GPU_Error_Check(); - // Step 4: Construct left and right interface values using updated conserved - // variables - #ifdef PLMP + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PLMC + #endif + #ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMP + #endif + #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif // PPMP - #ifdef PPMC + #endif // PPMP + #ifdef PPMC hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); - #endif // PPMC + #endif // PPMC GPU_Error_Check(); - // Step 5: Calculate the fluxes again - #ifdef EXACT + // Step 5: Calculate the fluxes again + #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE + #endif + #ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC + #endif + #ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif + #endif GPU_Error_Check(); - #ifdef DE + #ifdef DE // Compute the divergence of velocity before updating the conserved array, // this solves synchronization issues when adding this term on // Update_Conserved_Variables hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); - #endif + #endif // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); GPU_Error_Check(); - #ifdef DE + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif + #endif return; } @@ -194,17 +193,17 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de yid = id / nx; xid = id - yid * nx; - #ifdef DE + #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, P; int ipo, jpo; - #endif + #endif // all threads but one outer ring of ghost cells if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1) { imo = xid - 1 + yid * nx; jmo = xid + (yid - 1) * nx; - #ifdef DE + #ifdef DE d = dev_conserved[id]; d_inv = 1.0 / d; vx = dev_conserved[1 * n_cells + id] * d_inv; @@ -220,7 +219,7 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; - #endif + #endif // update the conserved variable array dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]); @@ -236,23 +235,22 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] + dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) + dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_conserved_half[(5 + i) * n_cells + id] = dev_conserved[(5 + i) * n_cells + id] + dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) + dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]); } - #endif - #ifdef DE + #endif + #ifdef DE dev_conserved_half[(n_fields - 1) * n_cells + id] = dev_conserved[(n_fields - 1) * n_cells + id] + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); - #endif + #endif } } - #endif // VL -#endif // CUDA +#endif // VL diff --git a/src/integrators/VL_2D_cuda.h b/src/integrators/VL_2D_cuda.h index a13495688..0ac7da807 100644 --- a/src/integrators/VL_2D_cuda.h +++ b/src/integrators/VL_2D_cuda.h @@ -1,17 +1,14 @@ /*! \file VL_2D_cuda.h * \brief Declarations for the cuda version of the 2D VL algorithm. */ -#ifdef CUDA +#ifndef VL_2D_CUDA_H +#define VL_2D_CUDA_H - #ifndef VL_2D_CUDA_H - #define VL_2D_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields, int custom_grav); void Free_Memory_VL_2D(); - #endif // VL_2D_CUDA_H -#endif // CUDA +#endif // VL_2D_CUDA_H diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 1c0b0f56b..64912f21d 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -4,7 +4,7 @@ * multidimensional MHD" */ -#if defined(CUDA) && defined(VL) +#ifdef VL #include #include @@ -507,4 +507,4 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de } } -#endif // CUDA and VL +#endif // VL diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 3f2cf8d75..4c2f48857 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -1,12 +1,10 @@ /*! \file VL_3D_cuda.h * \brief Declarations for the cuda version of the 3D VL algorithm. */ -#ifdef CUDA +#ifndef VL_3D_CUDA_H +#define VL_3D_CUDA_H - #ifndef VL_3D_CUDA_H - #define VL_3D_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -15,5 +13,4 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int void Free_Memory_VL_3D(); - #endif // VL_3D_CUDA_H -#endif // CUDA +#endif // VL_3D_CUDA_H diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 36401a8fe..80f26021a 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -1,27 +1,25 @@ /*! \file simple_1D_cuda.cu * \brief Definitions of the 1D simple algorithm functions. */ -#ifdef CUDA - - #include - #include - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../hydro/hydro_cuda.h" - #include "../integrators/simple_1D_cuda.h" - #include "../io/io.h" - #include "../reconstruction/pcm_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/error_handling.h" - #include "../utils/gpu.hpp" +#include +#include +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../hydro/hydro_cuda.h" +#include "../integrators/simple_1D_cuda.h" +#include "../io/io.h" +#include "../reconstruction/pcm_cuda.h" +#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/plmp_cuda.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppmp_cuda.h" +#include "../riemann_solvers/exact_cuda.h" +#include "../riemann_solvers/hllc_cuda.h" +#include "../riemann_solvers/roe_cuda.h" +#include "../utils/error_handling.h" +#include "../utils/gpu.hpp" void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields, int custom_grav) @@ -54,66 +52,66 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, memory_allocated = true; } - // Step 1: Do the reconstruction - #ifdef PCM +// Step 1: Do the reconstruction +#ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif - #ifdef PLMP +#endif +#ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); GPU_Error_Check(); - #endif - #ifdef PLMC +#endif +#ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); GPU_Error_Check(); - #endif - #ifdef PPMP +#endif +#ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); GPU_Error_Check(); - #endif - #ifdef PPMC +#endif +#ifdef PPMC hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); GPU_Error_Check(); - #endif +#endif - // Step 2: Calculate the fluxes - #ifdef EXACT +// Step 2: Calculate the fluxes +#ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE +#endif +#ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC +#endif +#ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif +#endif GPU_Error_Check(); - #ifdef DE +#ifdef DE // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields); - #endif +#endif // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); GPU_Error_Check(); - // Synchronize the total and internal energy, if using dual-energy formalism - #ifdef DE +// Synchronize the total and internal energy, if using dual-energy formalism +#ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif +#endif return; } @@ -126,5 +124,3 @@ void Free_Memory_Simple_1D() cudaFree(Q_Rx); cudaFree(F_x); } - -#endif // CUDA diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h index 43dcc4fc8..82ccf0c29 100644 --- a/src/integrators/simple_1D_cuda.h +++ b/src/integrators/simple_1D_cuda.h @@ -1,17 +1,14 @@ /*! \file simple_1D_cuda.h * \brief Declarations for the 1D simple algorithm. */ -#ifdef CUDA +#ifndef SIMPLE_1D_CUDA_H +#define SIMPLE_1D_CUDA_H - #ifndef SIMPLE_1D_CUDA_H - #define SIMPLE_1D_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields, int custom_grav); void Free_Memory_Simple_1D(); - #endif // Simple_1D_CUDA_H -#endif // CUDA +#endif // Simple_1D_CUDA_H diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 9361bf9e7..97d435c51 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -1,24 +1,22 @@ /*! \file simple_2D_cuda.cu * \brief Definitions of the cuda 2D simple algorithm functions. */ -#ifdef CUDA +#include +#include - #include - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../hydro/hydro_cuda.h" - #include "../integrators/simple_2D_cuda.h" - #include "../reconstruction/pcm_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../hydro/hydro_cuda.h" +#include "../integrators/simple_2D_cuda.h" +#include "../reconstruction/pcm_cuda.h" +#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/plmp_cuda.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppmp_cuda.h" +#include "../riemann_solvers/exact_cuda.h" +#include "../riemann_solvers/hllc_cuda.h" +#include "../riemann_solvers/roe_cuda.h" +#include "../utils/gpu.hpp" void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields, int custom_grav) @@ -55,75 +53,75 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int memory_allocated = true; } - // Step 1: Do the reconstruction - #ifdef PCM +// Step 1: Do the reconstruction +#ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); - #endif - #ifdef PLMP +#endif +#ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PLMC +#endif +#ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMP +#endif +#ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMC +#endif +#ifdef PPMC hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); - #endif +#endif GPU_Error_Check(); - // Step 2: Calculate the fluxes - #ifdef EXACT +// Step 2: Calculate the fluxes +#ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE +#endif +#ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC +#endif +#ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif +#endif GPU_Error_Check(); - #ifdef DE +#ifdef DE // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); - #endif +#endif // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); GPU_Error_Check(); - // Synchronize the total and internal energy - #ifdef DE +// Synchronize the total and internal energy +#ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif +#endif return; } @@ -139,5 +137,3 @@ void Free_Memory_Simple_2D() cudaFree(F_x); cudaFree(F_y); } - -#endif // CUDA diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h index 5439828a5..a381c553a 100644 --- a/src/integrators/simple_2D_cuda.h +++ b/src/integrators/simple_2D_cuda.h @@ -1,17 +1,14 @@ /*! \file simple_2D_cuda.h * \brief Declarations for the cuda version of the 2D simple algorithm. */ -#ifdef CUDA +#ifndef SIMPLE_2D_CUDA_H +#define SIMPLE_2D_CUDA_H - #ifndef SIMPLE_2D_CUDA_H - #define SIMPLE_2D_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields, int custom_grav); void Free_Memory_Simple_2D(); - #endif // SIMPLE_2D_CUDA_H -#endif // CUDA +#endif // SIMPLE_2D_CUDA_H diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 865a6f9c8..b767d4fc5 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -1,28 +1,27 @@ /*! \file simple_3D_cuda.cu * \brief Definitions of the cuda 3D simple algorithm functions. */ -#ifdef CUDA - #ifdef SIMPLE - - #include - #include - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../hydro/hydro_cuda.h" - #include "../integrators/simple_3D_cuda.h" - #include "../io/io.h" - #include "../reconstruction/pcm_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../riemann_solvers/hll_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/gpu.hpp" +#ifdef SIMPLE + + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/simple_3D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -66,13 +65,13 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); GPU_Error_Check(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real))); - #if defined(GRAVITY) + #if defined(GRAVITY) // GPU_Error_Check( cudaMalloc((void**)&dev_grav_potential, // n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; - #else + #else dev_grav_potential = NULL; - #endif + #endif // If memory is single allocated: memory_allocated becomes true and // successive timesteps won't allocate memory. If the memory is not single @@ -82,90 +81,90 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, chprintf(" Memory allocated \n"); } - #if defined(GRAVITY) && !defined(GRAVITY_GPU) + #if defined(GRAVITY) && !defined(GRAVITY_GPU) GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); - #endif + #endif - // Step 1: Construct left and right interface values using updated conserved - // variables - #ifdef PCM + // Step 1: Construct left and right interface values using updated conserved + // variables + #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif - #ifdef PLMP + #endif + #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif // PLMP - #ifdef PLMC + #endif // PLMP + #ifdef PLMC hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, n_fields); - #endif - #ifdef PPMP + #endif + #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif // PPMP - #ifdef PPMC + #endif // PPMP + #ifdef PPMC hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); GPU_Error_Check(); - #endif // PPMC + #endif // PPMC - // Step 2: Calculate the fluxes - #ifdef EXACT + // Step 2: Calculate the fluxes + #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif // EXACT - #ifdef ROE + #endif // EXACT + #ifdef ROE hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif // ROE - #ifdef HLLC + #endif // ROE + #ifdef HLLC hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif // HLLC - #ifdef HLL + #endif // HLLC + #ifdef HLL hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif // HLL + #endif // HLL GPU_Error_Check(); - #ifdef DE + #ifdef DE // Compute the divergence of Vel before updating the conserved array, this // solves synchronization issues when adding this term on // Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); GPU_Error_Check(); - #endif + #endif // Step 3: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, @@ -173,18 +172,18 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); GPU_Error_Check(); - #ifdef DE + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); GPU_Error_Check(); - #endif + #endif - #ifdef TEMPERATURE_FLOOR + #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor); GPU_Error_Check(); - #endif // TEMPERATURE_FLOOR + #endif // TEMPERATURE_FLOOR return; } @@ -204,5 +203,4 @@ void Free_Memory_Simple_3D() cudaFree(F_z); } - #endif // SIMPLE -#endif // CUDA +#endif // SIMPLE diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 585c553ba..cd52f892b 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -1,13 +1,11 @@ /*! \file simple_3D_cuda.h * \brief Declarations for the cuda version of the 3D simple algorithm. */ -#ifdef CUDA +#ifndef SIMPLE_3D_CUDA_H +#define SIMPLE_3D_CUDA_H - #ifndef SIMPLE_3D_CUDA_H - #define SIMPLE_3D_CUDA_H - - #include "../chemistry_gpu/chemistry_gpu.h" - #include "../global/global.h" +#include "../chemistry_gpu/chemistry_gpu.h" +#include "../global/global.h" void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, @@ -16,5 +14,4 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, void Free_Memory_Simple_3D(); - #endif // SIMPLE_3D_CUDA_H -#endif // CUDA +#endif // SIMPLE_3D_CUDA_H diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 9fa4f1729..8f6c533e0 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -227,12 +227,11 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) // #ifndef GRAVITY // // Needed to initialize cuda after gravity in order to work on Summit // //initialize cuda for use with mpi - #ifdef CUDA + if (initialize_cuda_mpi(procID_node, nproc_node)) { chprintf("Error initializing cuda with mpi.\n"); chexit(-10); } - #endif /*CUDA*/ // #endif//ONLY_PARTICLES } diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 8d53832f1..e7264ca54 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -1,15 +1,14 @@ /*! \file pcm_cuda.cu * \brief Definitions of the piecewise constant reconstruction functions */ -#ifdef CUDA - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/pcm_cuda.h" - #include "../utils/cuda_utilities.h" - #include "../utils/gpu.hpp" - #include "../utils/mhd_utilities.h" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/pcm_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/mhd_utilities.h" __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) @@ -18,13 +17,13 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R // these will be placed into registers for each thread Real d, mx, my, mz, E; - #ifdef DE +#ifdef DE Real ge; - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR Real scalar[NSCALARS]; - #endif // SCALAR +#endif // SCALAR // get a global thread ID int xid = threadIdx.x + blockIdx.x * blockDim.x; @@ -39,14 +38,14 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel dev_bounds_L[id] = d; @@ -54,14 +53,14 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R dev_bounds_L[2 * n_cells + id] = my; dev_bounds_L[3 * n_cells + id] = mz; dev_bounds_L[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // retrieve appropriate conserved variables id = xid + 1; @@ -70,14 +69,14 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel id = xid; @@ -86,14 +85,14 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R dev_bounds_R[2 * n_cells + id] = my; dev_bounds_R[3 * n_cells + id] = mz; dev_bounds_R[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } } @@ -104,12 +103,12 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, // declare conserved variables for each stencil // these will be placed into registers for each thread Real d, mx, my, mz, E; - #ifdef DE +#ifdef DE Real ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real scalar[NSCALARS]; - #endif // SCALAR +#endif // SCALAR int n_cells = nx * ny; @@ -130,14 +129,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel dev_bounds_Lx[id] = d; @@ -145,14 +144,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Lx[2 * n_cells + id] = my; dev_bounds_Lx[3 * n_cells + id] = mz; dev_bounds_Lx[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Lx[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_Lx[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // retrieve appropriate conserved variables id = xid + 1 + yid * nx; @@ -161,14 +160,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel id = xid + yid * nx; @@ -177,14 +176,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Rx[2 * n_cells + id] = my; dev_bounds_Rx[3 * n_cells + id] = mz; dev_bounds_Rx[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } // y direction @@ -196,14 +195,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel dev_bounds_Ly[id] = d; @@ -211,14 +210,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Ly[2 * n_cells + id] = my; dev_bounds_Ly[3 * n_cells + id] = mz; dev_bounds_Ly[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Ly[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_Ly[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // retrieve appropriate conserved variables id = xid + (yid + 1) * nx; @@ -227,14 +226,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, my = dev_conserved[2 * n_cells + id]; mz = dev_conserved[3 * n_cells + id]; E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // send values back from the kernel id = xid + yid * nx; @@ -243,14 +242,14 @@ __global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Ry[2 * n_cells + id] = my; dev_bounds_Ry[3 * n_cells + id] = mz; dev_bounds_Ry[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Ry[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_Ry[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } } @@ -259,11 +258,11 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields) { - // declare conserved variables for each stencil - // these will be placed into registers for each thread - #ifdef SCALAR +// declare conserved variables for each stencil +// these will be placed into registers for each thread +#ifdef SCALAR Real scalar[NSCALARS]; - #endif // SCALAR +#endif // SCALAR int const n_cells = nx * ny * nz; @@ -282,18 +281,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real const my = dev_conserved[2 * n_cells + id]; Real const mz = dev_conserved[3 * n_cells + id]; Real const E = dev_conserved[4 * n_cells + id]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar[i] = dev_conserved[(5 + i) * n_cells + id]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD auto const [cellCenteredBx, cellCenteredBy, cellCenteredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE Real const ge = dev_conserved[(n_fields - 1) * n_cells + id]; - #endif // DE +#endif // DE // ================================ // Send values back from the kernel @@ -305,18 +304,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Lx[2 * n_cells + id] = my; dev_bounds_Lx[3 * n_cells + id] = mz; dev_bounds_Lx[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Lx[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Lx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; dev_bounds_Lx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Lx[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // Send the y+1/2 Left interface dev_bounds_Ly[id] = d; @@ -324,18 +323,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Ly[2 * n_cells + id] = my; dev_bounds_Ly[3 * n_cells + id] = mz; dev_bounds_Ly[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Ly[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Ly[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz; dev_bounds_Ly[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Ly[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // Send the z+1/2 Left interface dev_bounds_Lz[id] = d; @@ -343,18 +342,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Lz[2 * n_cells + id] = my; dev_bounds_Lz[3 * n_cells + id] = mz; dev_bounds_Lz[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Lz[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Lz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx; dev_bounds_Lz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Lz[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE // Send the x-1/2 Right interface if (xid > 0) { @@ -364,18 +363,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Rx[2 * n_cells + id] = my; dev_bounds_Rx[3 * n_cells + id] = mz; dev_bounds_Rx[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Rx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; dev_bounds_Rx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } if (yid > 0) { @@ -386,18 +385,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Ry[2 * n_cells + id] = my; dev_bounds_Ry[3 * n_cells + id] = mz; dev_bounds_Ry[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Ry[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Ry[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz; dev_bounds_Ry[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Ry[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } if (zid > 0) { @@ -408,20 +407,18 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, dev_bounds_Rz[2 * n_cells + id] = my; dev_bounds_Rz[3 * n_cells + id] = mz; dev_bounds_Rz[4 * n_cells + id] = E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_Rz[(5 + i) * n_cells + id] = scalar[i]; } - #endif // SCALAR - #ifdef MHD +#endif // SCALAR +#ifdef MHD dev_bounds_Rz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx; dev_bounds_Rz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy; - #endif // MHD - #ifdef DE +#endif // MHD +#ifdef DE dev_bounds_Rz[(n_fields - 1) * n_cells + id] = ge; - #endif // DE +#endif // DE } } } - -#endif // CUDA diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index b6990c11b..dbf83fb65 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -1,10 +1,8 @@ /*! \file pcm_cuda.h * \brief Declarations of the cuda pcm kernels */ -#ifdef CUDA - - #ifndef PCM_CUDA_H - #define PCM_CUDA_H +#ifndef PCM_CUDA_H +#define PCM_CUDA_H __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields); @@ -18,5 +16,4 @@ __global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); - #endif // PCM_CUDA_H -#endif // CUDA +#endif // PCM_CUDA_H diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index a000da4da..e8cfa0d09 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -1,18 +1,17 @@ /*! \file plmp_cuda.cu * \brief Definitions of the piecewise linear reconstruction functions for with limiting in the primitive variables. */ -#ifdef CUDA - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/plmp_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/plmp_cuda.h" +#include "../utils/gpu.hpp" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -53,24 +52,24 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real mx_L, my_L, mz_L, E_L; Real mx_R, my_R, mz_R, E_R; - #ifdef DE +#ifdef DE Real ge_i, ge_imo, ge_ipo, ge_L, ge_R, dge_L, dge_R, E_kin, E, dge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; - #endif // SCALAR +#endif // SCALAR - #ifndef VL // Don't use velocities to reconstruct when using VL +#ifndef VL // Don't use velocities to reconstruct when using VL Real dtodx = dt / dx; Real dfl, dfr, mxfl, mxfr, myfl, myfr, mzfl, mzfr, Efl, Efr; - #ifdef DE + #ifdef DE Real gefl, gefr; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR Real scalarfl[NSCALARS], scalarfr[NSCALARS]; - #endif // SCALAR - #endif // VL + #endif // SCALAR +#endif // VL // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -114,23 +113,23 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_i = dev_conserved[o1 * n_cells + id] / d_i; vy_i = dev_conserved[o2 * n_cells + id] / d_i; vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else +#else p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_i = dge / d_i; - #endif // DE +#endif // DE // cell i-1 if (dir == 0) { id = xid - 1 + yid * nx + zid * nx * ny; @@ -145,24 +144,24 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else +#else p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_imo = dge / d_imo; - #endif // DE +#endif // DE // cell i+1 if (dir == 0) { id = xid + 1 + yid * nx + zid * nx * ny; @@ -177,24 +176,24 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else +#else p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE ge_ipo = dge / d_ipo; - #endif // DE +#endif // DE // Calculate the interface values for each primitive variable Interface_Values_PLM(d_imo, d_i, d_ipo, &d_L, &d_R); @@ -202,14 +201,14 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Interface_Values_PLM(vy_imo, vy_i, vy_ipo, &vy_L, &vy_R); Interface_Values_PLM(vz_imo, vz_i, vz_ipo, &vz_L, &vz_R); Interface_Values_PLM(p_imo, p_i, p_ipo, &p_L, &p_R); - #ifdef DE +#ifdef DE Interface_Values_PLM(ge_imo, ge_i, ge_ipo, &ge_L, &ge_R); - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { Interface_Values_PLM(scalar_imo[i], scalar_i[i], scalar_ipo[i], &scalar_L[i], &scalar_R[i]); } - #endif // SCALAR +#endif // SCALAR // Apply mimimum constraints d_L = fmax(d_L, (Real)TINY_NUMBER); @@ -226,19 +225,19 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou mz_R = d_R * vz_R; E_L = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); E_R = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); - #ifdef DE +#ifdef DE dge_L = d_L * ge_L; dge_R = d_R * ge_R; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscalar_L[i] = d_L * scalar_L[i]; dscalar_R[i] = d_R * scalar_R[i]; } - #endif // SCALAR +#endif // SCALAR - // #ifdef CTU - #ifndef VL // Don't use velocities to reconstruct when using VL +// #ifdef CTU +#ifndef VL // Don't use velocities to reconstruct when using VL // calculate fluxes for each variable dfl = mx_L; dfr = mx_R; @@ -250,16 +249,16 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou mzfr = mx_R * vz_R; Efl = (E_L + p_L) * vx_L; Efr = (E_R + p_R) * vx_R; - #ifdef DE + #ifdef DE gefl = dge_L * vx_L; gefr = dge_R * vx_R; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarfl[i] = dscalar_L[i] * vx_L; scalarfr[i] = dscalar_R[i] * vx_R; } - #endif // SCALAR + #endif // SCALAR // Evolve the boundary extrapolated values half a timestep. d_L += 0.5 * (dtodx) * (dfl - dfr); @@ -272,18 +271,18 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou mz_R += 0.5 * (dtodx) * (mzfl - mzfr); E_L += 0.5 * (dtodx) * (Efl - Efr); E_R += 0.5 * (dtodx) * (Efl - Efr); - #ifdef DE + #ifdef DE dge_L += 0.5 * (dtodx) * (gefl - gefr); dge_R += 0.5 * (dtodx) * (gefl - gefr); - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscalar_L[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]); dscalar_R[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]); } - #endif // SCALAR + #endif // SCALAR - #endif // NO VL +#endif // NO VL // Convert the left and right states in the primitive to the conserved // variables send final values back from kernel bounds_R refers to the right @@ -302,14 +301,14 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_R[o2 * n_cells + id] = my_L; dev_bounds_R[o3 * n_cells + id] = mz_L; dev_bounds_R[4 * n_cells + id] = E_L; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = dscalar_L[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = dge_L; - #endif // DE +#endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; dev_bounds_L[id] = d_R; @@ -317,14 +316,14 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_L[o2 * n_cells + id] = my_R; dev_bounds_L[o3 * n_cells + id] = mz_R; dev_bounds_L[4 * n_cells + id] = E_R; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = dscalar_R[i]; } - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = dge_R; - #endif // DE +#endif // DE } } @@ -364,5 +363,3 @@ __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L *q_L = q_i - 0.5 * del_q_m; *q_R = q_i + 0.5 * del_q_m; } - -#endif // CUDA diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h index 7768722d5..34faa14df 100644 --- a/src/reconstruction/plmp_cuda.h +++ b/src/reconstruction/plmp_cuda.h @@ -1,12 +1,10 @@ /*! \file plmp_cuda.h * \brief Declarations of the cuda plmp kernels. */ -#ifdef CUDA +#ifndef PLMP_CUDA_H +#define PLMP_CUDA_H - #ifndef PLMP_CUDA_H - #define PLMP_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" /*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -23,5 +21,4 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou limiting. */ __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R); - #endif // PLMP_CUDA_H -#endif // CUDA +#endif // PLMP_CUDA_H diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index ae8da90cb..2038f215a 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -1,19 +1,19 @@ /*! \file ppmp_cuda.cu * \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000) functions with limiting in the primitive variables. */ -#ifdef CUDA - #ifdef PPMP - #include +#ifdef PPMP - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../utils/gpu.hpp" + #include - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif // #define STEEPENING // #define FLATTENING @@ -52,9 +52,9 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; - #ifdef FLATTENING + #ifdef FLATTENING Real p_imth, p_ipth; - #endif // FLATTENING + #endif // FLATTENING // declare left and right interface values Real d_L, vx_L, vy_L, vz_L, p_L; @@ -63,7 +63,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // declare other variables Real del_q_imo, del_q_i, del_q_ipo; - #ifndef VL + #ifndef VL // #ifdef CTU Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i @@ -79,24 +79,24 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real dR_p, vxR_p, pR_p; Real chi_L_m, chi_L_0, chi_L_p; Real chi_R_m, chi_R_0, chi_R_p; - #endif // CTU + #endif // CTU - #ifdef DE + #ifdef DE Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt, ge_L, ge_R, E_kin, E, dge; - #ifndef VL + #ifndef VL // #ifdef CTU Real del_ge, ge_6, geL_0, geR_0; - #endif // CTU - #endif // DE + #endif // CTU + #endif // DE - #ifdef SCALAR + #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - #ifndef VL + #ifndef VL // #ifdef CTU Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS]; - #endif // CTU - #endif // SCALAR + #endif // CTU + #endif // SCALAR // get a thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; @@ -160,23 +160,23 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_i = dev_conserved[o1 * n_cells + id] / d_i; vy_i = dev_conserved[o2 * n_cells + id] / d_i; vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else + #else p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef DE + #ifdef DE ge_i = dge / d_i; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; } - #endif // SCALAR + #endif // SCALAR // cell i-1 if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; @@ -185,24 +185,24 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else + #else p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef DE + #ifdef DE ge_imo = dge / d_imo; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; } - #endif // SCALAR + #endif // SCALAR // cell i+1 if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; @@ -211,24 +211,24 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else + #else p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef DE + #ifdef DE ge_ipo = dge / d_ipo; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; } - #endif // SCALAR + #endif // SCALAR // cell i-2 if (dir == 0) id = xid - 2 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid - 2) * nx + zid * nx * ny; @@ -237,24 +237,24 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else + #else p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_imt = fmax(p_imt, (Real)TINY_NUMBER); - #ifdef DE + #ifdef DE ge_imt = dge / d_imt; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; } - #endif // SCALAR + #endif // SCALAR // cell i+2 if (dir == 0) id = xid + 2 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid + 2) * nx + zid * nx * ny; @@ -263,25 +263,25 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; - #ifdef DE // PRESSURE_DE + #ifdef DE // PRESSURE_DE E = dev_conserved[4 * n_cells + id]; E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else + #else p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * (gamma - 1.0); - #endif // PRESSURE_DE + #endif // PRESSURE_DE p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); - #ifdef DE + #ifdef DE ge_ipt = dge / d_ipt; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; } - #endif // SCALAR - #ifdef FLATTENING + #endif // SCALAR + #ifdef FLATTENING // cell i-3 if (dir == 0) id = xid - 3 + yid * nx + zid * nx * ny; if (dir == 1) id = xid + (yid - 3) * nx + zid * nx * ny; @@ -306,7 +306,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_conserved[id]) * (gamma - 1.0); p_ipth = fmax(p_imth, (Real)TINY_NUMBER); - #endif // FLATTENING + #endif // FLATTENING // use ppm routines to set cell boundary values (see Fryxell Sec. 3.1.1) @@ -350,7 +350,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Calculate the interface values for pressure Interface_Values_PPM(p_imo, p_i, p_ipo, del_q_imo, del_q_i, del_q_ipo, &p_L, &p_R); - #ifdef DE + #ifdef DE // Calculate the monotonized slopes for cells imo, i, ipo (internal energy) del_q_imo = Calculate_Slope(ge_imt, ge_imo, ge_i); del_q_i = Calculate_Slope(ge_imo, ge_i, ge_ipo); @@ -358,9 +358,9 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // Calculate the interface values for internal energy Interface_Values_PPM(ge_imo, ge_i, ge_ipo, del_q_imo, del_q_i, del_q_ipo, &ge_L, &ge_R); - #endif // DE + #endif // DE - #ifdef SCALAR + #ifdef SCALAR // Calculate the monotonized slopes for cells imo, i, ipo (passive scalars) for (int i = 0; i < NSCALARS; i++) { del_q_imo = Calculate_Slope(scalar_imt[i], scalar_imo[i], scalar_i[i]); @@ -371,9 +371,9 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Interface_Values_PPM(scalar_imo[i], scalar_i[i], scalar_ipo[i], del_q_imo, del_q_i, del_q_ipo, &scalar_L[i], &scalar_R[i]); } - #endif // SCALAR + #endif // SCALAR - #ifdef STEEPENING + #ifdef STEEPENING Real d2_rho_imo, d2_rho_ipo, eta_i; // check for contact discontinuities & steepen if necessary (see Fryxell // Sec 3.1.2) if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true, @@ -407,9 +407,9 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } } } - #endif // STEEPENING + #endif // STEEPENING - #ifdef FLATTENING + #ifdef FLATTENING Real F_imo, F_i, F_ipo; // flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) // calculate the shock steepness parameters (Fryxell Eqn 43) @@ -448,30 +448,30 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_L = F_i * vy_i + (1 - F_i) * vy_L; vz_L = F_i * vz_i + (1 - F_i) * vz_L; p_L = F_i * p_i + (1 - F_i) * p_L; - #ifdef DE + #ifdef DE ge_L = F_i * ge_i + (1 - F_i) * ge_L; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_L[i]; } - #endif // SCALAR + #endif // SCALAR d_R = F_i * d_i + (1 - F_i) * d_R; vx_R = F_i * vx_i + (1 - F_i) * vx_R; vy_R = F_i * vy_i + (1 - F_i) * vy_R; vz_R = F_i * vz_i + (1 - F_i) * vz_R; p_R = F_i * p_i + (1 - F_i) * p_R; - #ifdef DE + #ifdef DE ge_R = F_i * ge_i + (1 - F_i) * ge_R; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_R[i]; } - #endif // SCALAR - #endif // FLATTENING + #endif // SCALAR + #endif // FLATTENING - #ifndef VL + #ifndef VL // #ifdef CTU // compute sound speed in cell i cs = sqrt(gamma * p_i / d_i); @@ -485,28 +485,28 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_vy = vy_R - vy_L; del_vz = vz_R - vz_L; del_p = p_R - p_L; - #ifdef DE + #ifdef DE del_ge = ge_R - ge_L; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_scalar[i] = scalar_R[i] - scalar_L[i]; } - #endif // SCALAR + #endif // SCALAR d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); // Fryxell Eqn 30 vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); // Fryxell Eqn 30 vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); // Fryxell Eqn 30 vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); // Fryxell Eqn 30 p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); // Fryxell Eqn 30 - #ifdef DE + #ifdef DE ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); // Fryxell Eqn 30 - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 } - #endif // SCALAR + #endif // SCALAR // set speed of characteristics (v-c, v, v+c) using average values of v and // c @@ -531,14 +531,14 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dL_0 = d_L + 0.5 * alpha_0 * (del_d + d_6 * (1 - (2. / 3.) * alpha_0)); vyL_0 = vy_L + 0.5 * alpha_0 * (del_vy + vy_6 * (1 - (2. / 3.) * alpha_0)); vzL_0 = vz_L + 0.5 * alpha_0 * (del_vz + vz_6 * (1 - (2. / 3.) * alpha_0)); - #ifdef DE + #ifdef DE geL_0 = ge_L + 0.5 * alpha_0 * (del_ge + ge_6 * (1 - (2. / 3.) * alpha_0)); - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarL_0[i] = scalar_L[i] + 0.5 * alpha_0 * (del_scalar[i] + scalar_6[i] * (1 - (2. / 3.) * alpha_0)); } - #endif // SCALAR + #endif // SCALAR pL_0 = p_L + 0.5 * alpha_0 * (del_p + p_6 * (1 - (2. / 3.) * alpha_0)); vxL_p = vx_L + 0.5 * alpha_p * (del_vx + vx_6 * (1 - (2. / 3.) * alpha_p)); pL_p = p_L + 0.5 * alpha_p * (del_p + p_6 * (1 - (2. / 3.) * alpha_p)); @@ -549,14 +549,14 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dR_0 = d_R - 0.5 * beta_0 * (del_d - d_6 * (1 - (2. / 3.) * beta_0)); vyR_0 = vy_R - 0.5 * beta_0 * (del_vy - vy_6 * (1 - (2. / 3.) * beta_0)); vzR_0 = vz_R - 0.5 * beta_0 * (del_vz - vz_6 * (1 - (2. / 3.) * beta_0)); - #ifdef DE + #ifdef DE geR_0 = ge_R - 0.5 * beta_0 * (del_ge - ge_6 * (1 - (2. / 3.) * beta_0)); - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * (del_scalar[i] - scalar_6[i] * (1 - (2. / 3.) * beta_0)); } - #endif // SCALAR + #endif // SCALAR pR_0 = p_R - 0.5 * beta_0 * (del_p - p_6 * (1 - (2. / 3.) * beta_0)); dR_p = d_R - 0.5 * beta_p * (del_d - d_6 * (1 - (2. / 3.) * beta_p)); vxR_p = vx_R - 0.5 * beta_p * (del_vx - vx_6 * (1 - (2. / 3.) * beta_p)); @@ -570,28 +570,28 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou vy_L = vyL_0; vz_L = vzL_0; p_L = pL_m; - #ifdef DE + #ifdef DE ge_L = geL_0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_L[i] = scalarL_0[i]; } - #endif // SCALAR + #endif // SCALAR // right d_R = dR_p; vx_R = vxR_p; vy_R = vyR_0; vz_R = vzR_0; p_R = pR_p; - #ifdef DE + #ifdef DE ge_R = geR_0; - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalar_R[i] = scalarR_0[i]; } - #endif // SCALAR + #endif // SCALAR // correct these initial guesses by taking into account the number of // characteristics on each side of the interface @@ -637,7 +637,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_R = p_L + (d_R * d_R * cr * cr) * (chi_R_p + chi_R_m); vx_R = vx_R + d_R * cr * (chi_R_p - chi_R_m); d_R = pow(((1.0 / d_R) - (chi_R_m + chi_R_0 + chi_R_p)), -1); - #endif // CTU + #endif // CTU // Apply mimimum constraints d_L = fmax(d_L, (Real)TINY_NUMBER); @@ -656,14 +656,14 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L; - #endif // DE + #endif // DE // bounds_L refers to the left side of the i+1/2 interface id = xid + yid * nx + zid * nx * ny; dev_bounds_L[id] = d_R; @@ -671,14 +671,14 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R; - #endif // DE + #endif // DE } } @@ -772,5 +772,4 @@ __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, return -A * B; } - #endif // PPMP -#endif // CUDA +#endif // PPMP diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h index ca0bf1553..064d328fa 100644 --- a/src/reconstruction/ppmp_cuda.h +++ b/src/reconstruction/ppmp_cuda.h @@ -1,12 +1,10 @@ /*! \file ppmp_cuda.h * \brief Declarations of the cuda ppmp kernels. */ -#ifdef CUDA +#ifndef PPMP_CUDA_H +#define PPMP_CUDA_H - #ifndef PPMP_CUDA_H - #define PPMP_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" /*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real @@ -39,5 +37,4 @@ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); See Fryxell Eqn 36. */ __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo); - #endif // PPMP_CUDA_H -#endif // CUDA +#endif // PPMP_CUDA_H diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index 918188441..9e0a4cff2 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -1,19 +1,17 @@ /*! \file exact_cuda.cu * \brief Function definitions for the cuda exact Riemann solver.*/ -#ifdef CUDA +#include +#include - #include - #include +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../riemann_solvers/exact_cuda.h" +#include "../utils/gpu.hpp" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../riemann_solvers/exact_cuda.h" - #include "../utils/gpu.hpp" - - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -55,13 +53,13 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds // energy Real vm, pm; // velocity and pressure in the star region - #ifdef DE +#ifdef DE Real gel, ger, E_kin, E, dge; - #endif +#endif - #ifdef SCALAR +#ifdef SCALAR Real scalarl[NSCALARS], scalarr[NSCALARS]; - #endif +#endif // Each thread executes the solver independently // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) @@ -71,44 +69,44 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds vxl = dev_bounds_L[o1 * n_cells + tid] / dl; vyl = dev_bounds_L[o2 * n_cells + tid] / dl; vzl = dev_bounds_L[o3 * n_cells + tid] / dl; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_bounds_L[4 * n_cells + tid]; E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl); dge = dev_bounds_L[(n_fields - 1) * n_cells + tid]; pl = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else +#else pl = (dev_bounds_L[4 * n_cells + tid] - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pl = fmax(pl, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarl[i] = dev_bounds_L[(5 + i) * n_cells + tid] / dl; } - #endif - #ifdef DE +#endif +#ifdef DE gel = dge / dl; - #endif +#endif dr = dev_bounds_R[tid]; vxr = dev_bounds_R[o1 * n_cells + tid] / dr; vyr = dev_bounds_R[o2 * n_cells + tid] / dr; vzr = dev_bounds_R[o3 * n_cells + tid] / dr; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E = dev_bounds_R[4 * n_cells + tid]; E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr); dge = dev_bounds_R[(n_fields - 1) * n_cells + tid]; pr = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else +#else pr = (dev_bounds_R[4 * n_cells + tid] - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pr = fmax(pr, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarr[i] = dev_bounds_R[(5 + i) * n_cells + tid] / dr; } - #endif - #ifdef DE +#endif +#ifdef DE ger = dge / dr; - #endif +#endif // compute sounds speeds in left and right regions cl = sqrt(gamma * pl / dl); @@ -133,26 +131,26 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds if (vs >= 0) { dev_flux[o2 * n_cells + tid] = ds * vs * vyl; dev_flux[o3 * n_cells + tid] = ds * vs * vzl; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarl[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = ds * vs * gel; - #endif +#endif Es = (ps / (gamma - 1.0)) + 0.5 * ds * (vs * vs + vyl * vyl + vzl * vzl); } else { dev_flux[o2 * n_cells + tid] = ds * vs * vyr; dev_flux[o3 * n_cells + tid] = ds * vs * vzr; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarr[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = ds * vs * ger; - #endif +#endif Es = (ps / (gamma - 1.0)) + 0.5 * ds * (vs * vs + vyr * vyr + vzr * vzr); } dev_flux[4 * n_cells + tid] = (Es + ps) * vs; @@ -334,5 +332,3 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real } } } - -#endif // CUDA diff --git a/src/riemann_solvers/exact_cuda.h b/src/riemann_solvers/exact_cuda.h index f1a3d3261..4cb004fb5 100644 --- a/src/riemann_solvers/exact_cuda.h +++ b/src/riemann_solvers/exact_cuda.h @@ -2,12 +2,10 @@ * \brief Declarations of functions for the cuda exact riemann solver kernel. */ -#ifdef CUDA +#ifndef EXACT_CUDA_H +#define EXACT_CUDA_H - #ifndef EXACT_CUDA_H - #define EXACT_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" /*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -26,5 +24,4 @@ __device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real c __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); - #endif // EXACT_CUDA_H -#endif // CUDA +#endif // EXACT_CUDA_H diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu index ad8dca3e1..2987771b2 100644 --- a/src/riemann_solvers/hll_cuda.cu +++ b/src/riemann_solvers/hll_cuda.cu @@ -1,18 +1,16 @@ /*! \file hllc_cuda.cu * \brief Function definitions for the cuda HLLC Riemann solver.*/ -#ifdef CUDA +#include - #include +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../riemann_solvers/hll_cuda.h" +#include "../utils/gpu.hpp" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../riemann_solvers/hll_cuda.h" - #include "../utils/gpu.hpp" - - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -43,12 +41,12 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R // Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers; Real f_d, f_mx, f_my, f_mz, f_E; Real Sl, Sr, cfl, cfr; - #ifdef DE +#ifdef DE Real dgel, dger, f_ge_l, f_ge_r, f_ge, E_kin; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; - #endif +#endif // Real etah = 0; @@ -78,39 +76,39 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R myl = dev_bounds_L[o2 * n_cells + tid]; mzl = dev_bounds_L[o3 * n_cells + tid]; El = dev_bounds_L[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscl[i] = dev_bounds_L[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid]; - #endif +#endif dr = dev_bounds_R[tid]; mxr = dev_bounds_R[o1 * n_cells + tid]; myr = dev_bounds_R[o2 * n_cells + tid]; mzr = dev_bounds_R[o3 * n_cells + tid]; Er = dev_bounds_R[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscr[i] = dev_bounds_R[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dger = dev_bounds_R[(n_fields - 1) * n_cells + tid]; - #endif +#endif // calculate primitive variables vxl = mxl / dl; vyl = myl / dl; vzl = mzl / dl; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl); pl = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma); - #else +#else pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0); - #endif // DE +#endif // DE pl = fmax(pl, (Real)TINY_NUMBER); // #ifdef SCALAR // for (int i=0; i 0.0) { @@ -210,14 +208,14 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = f_my_l; dev_flux[o3 * n_cells + tid] = f_mz_l; dev_flux[4 * n_cells + tid] = f_E_l; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc_l[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l; - #endif +#endif return; } else if (Sr < 0.0) { dev_flux[tid] = f_d_r; @@ -225,14 +223,14 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = f_my_r; dev_flux[o3 * n_cells + tid] = f_mz_r; dev_flux[4 * n_cells + tid] = f_E_r; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc_r[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r; - #endif +#endif return; } // otherwise compute subsonic flux @@ -242,14 +240,14 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R f_my = ((Sr * f_my_l) - (Sl * f_my_r) + Sl * Sr * (myr - myl)) / (Sr - Sl); f_mz = ((Sr * f_mz_l) - (Sl * f_mz_r) + Sl * Sr * (mzr - mzl)) / (Sr - Sl); f_E = ((Sr * f_E_l) - (Sl * f_E_r) + Sl * Sr * (Er - El)) / (Sr - Sl); - #ifdef DE +#ifdef DE f_ge = ((Sr * f_ge_l) - (Sl * f_ge_r) + Sl * Sr * (dger - dgel)) / (Sr - Sl); - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_sc[i] = ((Sr * f_sc_l[i]) - (Sl * f_sc_r[i]) + Sl * Sr * (dscr[i] - dscl[i])) / (Sr - Sl); } - #endif +#endif // return the hllc fluxes dev_flux[tid] = f_d; @@ -257,16 +255,14 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = f_my; dev_flux[o3 * n_cells + tid] = f_mz; dev_flux[4 * n_cells + tid] = f_E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge; - #endif +#endif } } } - -#endif // CUDA diff --git a/src/riemann_solvers/hll_cuda.h b/src/riemann_solvers/hll_cuda.h index 8b4cceb10..43dc18cbe 100644 --- a/src/riemann_solvers/hll_cuda.h +++ b/src/riemann_solvers/hll_cuda.h @@ -1,12 +1,10 @@ /*! \file hllc_cuda.h * \brief Declarations of functions for the cuda hllc riemann solver kernel. */ -#ifdef CUDA +#ifndef HLL_CUDA_H +#define HLL_CUDA_H - #ifndef HLL_CUDA_H - #define HLL_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" /*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -15,5 +13,4 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); - #endif // HLLC_CUDA_H -#endif // CUDA +#endif // HLLC_CUDA_H diff --git a/src/riemann_solvers/hllc_cuda.cu b/src/riemann_solvers/hllc_cuda.cu index 912765d23..c923edf47 100644 --- a/src/riemann_solvers/hllc_cuda.cu +++ b/src/riemann_solvers/hllc_cuda.cu @@ -1,18 +1,16 @@ /*! \file hllc_cuda.cu * \brief Function definitions for the cuda HLLC Riemann solver.*/ -#ifdef CUDA +#include - #include +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../riemann_solvers/hllc_cuda.h" +#include "../utils/gpu.hpp" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../riemann_solvers/hllc_cuda.h" - #include "../utils/gpu.hpp" - - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -43,13 +41,13 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers; Real f_d, f_mx, f_my, f_mz, f_E; Real Sl, Sr, Sm, cfl, cfr, ps; - #ifdef DE +#ifdef DE Real dgel, dger, gel, ger, gels, gers, f_ge_l, f_ge_r, f_ge, E_kin; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; - #endif +#endif Real etah = 0; @@ -79,66 +77,66 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ myl = dev_bounds_L[o2 * n_cells + tid]; mzl = dev_bounds_L[o3 * n_cells + tid]; El = dev_bounds_L[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscl[i] = dev_bounds_L[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid]; - #endif +#endif dr = dev_bounds_R[tid]; mxr = dev_bounds_R[o1 * n_cells + tid]; myr = dev_bounds_R[o2 * n_cells + tid]; mzr = dev_bounds_R[o3 * n_cells + tid]; Er = dev_bounds_R[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscr[i] = dev_bounds_R[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dger = dev_bounds_R[(n_fields - 1) * n_cells + tid]; - #endif +#endif // calculate primitive variables vxl = mxl / dl; vyl = myl / dl; vzl = mzl / dl; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl); pl = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma); - #else +#else pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pl = fmax(pl, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scl[i] = dscl[i] / dl; } - #endif - #ifdef DE +#endif +#ifdef DE gel = dgel / dl; - #endif +#endif vxr = mxr / dr; vyr = myr / dr; vzr = mzr / dr; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr); pr = hydro_utilities::Get_Pressure_From_DE(Er, Er - E_kin, dger, gamma); - #else +#else pr = (Er - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pr = fmax(pr, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scr[i] = dscr[i] / dr; } - #endif - #ifdef DE +#endif +#ifdef DE ger = dger / dr; - #endif +#endif // calculate the enthalpy in each cell Hl = (El + pl) / dl; @@ -182,28 +180,28 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ f_my_l = myl * vxl; f_mz_l = mzl * vxl; f_E_l = (El + pl) * vxl; - #ifdef DE +#ifdef DE f_ge_l = dgel * vxl; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_sc_l[i] = dscl[i] * vxl; } - #endif +#endif f_d_r = mxr; f_mx_r = mxr * vxr + pr; f_my_r = myr * vxr; f_mz_r = mzr * vxr; f_E_r = (Er + pr) * vxr; - #ifdef DE +#ifdef DE f_ge_r = dger * vxr; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_sc_r[i] = dscr[i] * vxr; } - #endif +#endif // return upwind flux if flow is supersonic if (Sl > 0.0) { @@ -212,14 +210,14 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ dev_flux[o2 * n_cells + tid] = f_my_l; dev_flux[o3 * n_cells + tid] = f_mz_l; dev_flux[4 * n_cells + tid] = f_E_l; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc_l[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l; - #endif +#endif return; } else if (Sr < 0.0) { dev_flux[tid] = f_d_r; @@ -227,14 +225,14 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ dev_flux[o2 * n_cells + tid] = f_my_r; dev_flux[o3 * n_cells + tid] = f_mz_r; dev_flux[4 * n_cells + tid] = f_E_r; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc_r[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r; - #endif +#endif return; } // otherwise compute subsonic flux @@ -250,14 +248,14 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ myls = dls * vyl; mzls = dls * vzl; Els = (El * (Sl - vxl) - pl * vxl + ps * Sm) / (Sl - Sm); - #ifdef DE +#ifdef DE gels = dls * gel; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scls[i] = dls * scl[i]; } - #endif +#endif // conserved variables in the right star state drs = dr * (Sr - vxr) / (Sr - Sm); @@ -265,14 +263,14 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ myrs = drs * vyr; mzrs = drs * vzr; Ers = (Er * (Sr - vxr) - pr * vxr + ps * Sm) / (Sr - Sm); - #ifdef DE +#ifdef DE gers = drs * ger; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scrs[i] = drs * scr[i]; } - #endif +#endif // compute the hllc flux (Batten eqn 27) f_d = 0.5 * (f_d_l + f_d_r + (Sr - fabs(Sm)) * drs + (Sl + fabs(Sm)) * dls - Sl * dl - Sr * dr); @@ -280,15 +278,15 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ f_my = 0.5 * (f_my_l + f_my_r + (Sr - fabs(Sm)) * myrs + (Sl + fabs(Sm)) * myls - Sl * myl - Sr * myr); f_mz = 0.5 * (f_mz_l + f_mz_r + (Sr - fabs(Sm)) * mzrs + (Sl + fabs(Sm)) * mzls - Sl * mzl - Sr * mzr); f_E = 0.5 * (f_E_l + f_E_r + (Sr - fabs(Sm)) * Ers + (Sl + fabs(Sm)) * Els - Sl * El - Sr * Er); - #ifdef DE +#ifdef DE f_ge = 0.5 * (f_ge_l + f_ge_r + (Sr - fabs(Sm)) * gers + (Sl + fabs(Sm)) * gels - Sl * dgel - Sr * dger); - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_sc[i] = 0.5 * (f_sc_l[i] + f_sc_r[i] + (Sr - fabs(Sm)) * scrs[i] + (Sl + fabs(Sm)) * scls[i] - Sl * dscl[i] - Sr * dscr[i]); } - #endif +#endif // return the hllc fluxes dev_flux[tid] = f_d; @@ -296,16 +294,14 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ dev_flux[o2 * n_cells + tid] = f_my; dev_flux[o3 * n_cells + tid] = f_mz; dev_flux[4 * n_cells + tid] = f_E; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_sc[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge; - #endif +#endif } } } - -#endif // CUDA diff --git a/src/riemann_solvers/hllc_cuda.h b/src/riemann_solvers/hllc_cuda.h index 2268c3320..f10c7c43f 100644 --- a/src/riemann_solvers/hllc_cuda.h +++ b/src/riemann_solvers/hllc_cuda.h @@ -1,12 +1,10 @@ /*! \file hllc_cuda.h * \brief Declarations of functions for the cuda hllc riemann solver kernel. */ -#ifdef CUDA +#ifndef HLLC_CUDA_H +#define HLLC_CUDA_H - #ifndef HLLC_CUDA_H - #define HLLC_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" /*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int @@ -15,5 +13,4 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); - #endif // HLLC_CUDA_H -#endif // CUDA +#endif // HLLC_CUDA_H diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 361393940..80d6902c7 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -24,9 +24,7 @@ #include "../utils/hydro_utilities.h" #endif // DE -#ifdef CUDA - - #ifdef MHD +#ifdef MHD namespace mhd { // ========================================================================= @@ -184,12 +182,12 @@ __device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Rea state.magneticY = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y]; state.magneticZ = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z]; - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { state.scalarSpecific[i] = interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / state.density; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE state.thermalEnergySpecific = interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; Real energyNonThermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(state.density, state.velocityX, @@ -199,11 +197,11 @@ __device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Rea state.gasPressure = fmax(hydro_utilities::Get_Pressure_From_DE(state.energy, state.energy - energyNonThermal, state.thermalEnergySpecific * state.density, gamma), (Real)TINY_NUMBER); - #else + #else // Note that this function does the positive pressure check // internally state.gasPressure = mhd::internal::Calc_Pressure_Primitive(state, magneticX, gamma); - #endif // DE + #endif // DE state.totalPressure = mhd::utils::computeTotalPressure(state.gasPressure, magneticX, state.magneticY, state.magneticZ); @@ -303,14 +301,14 @@ __device__ __host__ void returnFluxes(int const &threadId, int const &o1, int co dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_z] = flux.magneticY; dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_y] = flux.magneticZ; - #ifdef SCALAR + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = state.scalarSpecific[i] * flux.density; } - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE dev_flux[threadId + n_cells * grid_enum::GasEnergy] = state.thermalEnergySpecific * flux.density; - #endif // DE + #endif // DE } // ===================================================================== @@ -512,5 +510,4 @@ __device__ __host__ mhd::internal::Flux computeDoubleStarFluxes( } // namespace internal } // end namespace mhd - #endif // MHD -#endif // CUDA +#endif // MHD diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index 49f48f5b9..8c547e889 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -15,7 +15,6 @@ #include "../global/global.h" #include "../utils/hydro_utilities.h" -#ifdef CUDA /*! * \brief Namespace for MHD code * @@ -62,12 +61,12 @@ Real static const _hlldSmallNumber = 1.0e-8; */ struct State { Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, gasPressure, totalPressure; - #ifdef SCALAR +#ifdef SCALAR Real scalarSpecific[grid_enum::nscalars]; - #endif // SCALAR - #ifdef DE +#endif // SCALAR +#ifdef DE Real thermalEnergySpecific; - #endif // DE +#endif // DE }; /*! @@ -268,4 +267,3 @@ inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::internal::State con } } // namespace internal } // end namespace mhd -#endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 4993fa47e..7fc96bf0c 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -23,8 +23,7 @@ #include "../utils/mhd_utilities.h" #include "../utils/testing_utilities.h" -#ifdef CUDA - #ifdef MHD +#ifdef MHD // ========================================================================= // Integration tests for the entire HLLD solver. Unit tests are below // ========================================================================= @@ -78,12 +77,12 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test int const nz = 1; // Number of cells in the z-direction int const n_cells = nx * ny * nz; int nFields = 8; // Total number of conserved fields - #ifdef SCALAR + #ifdef SCALAR nFields += NSCALARS; - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE nFields++; - #endif // DE + #endif // DE // Launch Parameters dim3 const dimGrid(1, 1, 1); // How many blocks in the grid @@ -168,18 +167,18 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test // Field names std::vector fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies", "X Magnetic Field", "Y Magnetic Field", "Z Magnetic Field"}; - #ifdef DE + #ifdef DE fieldNames.push_back("Thermal energy (dual energy)"); fiducialFlux.push_back(thermalEnergyFlux); - #endif // DE - #ifdef SCALAR + #endif // DE + #ifdef SCALAR std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(), scalarNames.begin() + grid_enum::nscalars); fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(), scalarFlux.begin() + grid_enum::nscalars); - #endif // SCALAR + #endif // SCALAR ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) << "The fiducial flux, test flux, and field name vectors are not all " @@ -243,18 +242,18 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test output.at(6) = input.at(6); // Y Magnetic Field output.at(7) = input.at(7); // Z Magnetic Field - #ifdef SCALAR + #ifdef SCALAR std::vector conservedScalar(primitiveScalars.size()); std::transform(primitiveScalars.begin(), primitiveScalars.end(), conservedScalar.begin(), [&](Real const &c) { return c * output.at(0); }); output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE output.push_back(mhd::utils::computeThermalEnergy( output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), output.at(grid_enum::magnetic_x), output.at(grid_enum::magnetic_y), output.at(grid_enum::magnetic_z), gamma)); - #endif // DE + #endif // DE return output; } // ===================================================================== @@ -266,14 +265,14 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test */ void SetUp() { - #ifdef SCALAR + #ifdef SCALAR ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently " "supported in HLLD tests. NSCALARS = " << NSCALARS; ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test " "with passive scalars. NSCALARS = " << NSCALARS; - #endif // SCALAR + #endif // SCALAR } // ===================================================================== private: @@ -1649,9 +1648,9 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes) // State size_t numElements = 8; - #ifdef SCALAR + #ifdef SCALAR numElements += 3; - #endif // SCALAR + #endif // SCALAR std::vector const state(numElements, 0.0); std::vector const fiducialFlux(8, 0.0); @@ -1693,7 +1692,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0}, negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; - #ifdef SCALAR + #ifdef SCALAR std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; negativePressure.insert(negativePressure.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); @@ -1705,8 +1704,8 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) conservedScalar.begin() + grid_enum::nscalars); negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE negativePressure.push_back(mhd::utils::computeThermalEnergy( negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), negativePressure.at(2), negativePressure.at(3), negativePressure.at(grid_enum::magnetic_x), negativePressure.at(grid_enum::magnetic_y), @@ -1728,7 +1727,7 @@ TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) negativeDensityPressure.at(4), negativeDensityPressure.at(0), negativeDensityPressure.at(1), negativeDensityPressure.at(2), negativeDensityPressure.at(3), negativeDensityPressure.at(grid_enum::magnetic_x), negativeDensityPressure.at(grid_enum::magnetic_y), negativeDensityPressure.at(grid_enum::magnetic_z), gamma)); - #endif // DE + #endif // DE for (size_t direction = 0; direction < 3; direction++) { { @@ -2259,12 +2258,12 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) int threadId = 0; int n_cells = 10; int nFields = 8; // Total number of conserved fields - #ifdef SCALAR + #ifdef SCALAR nFields += NSCALARS; - #endif // SCALAR - #ifdef DE + #endif // SCALAR + #ifdef DE nFields++; - #endif // DE + #endif // DE // Lambda for finding indices and check if they're correct auto findIndex = [](std::vector const &vec, double const &num, int const &fidIndex, std::string const &name) { @@ -2408,6 +2407,5 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) ", totalPressure"); } } - // ========================================================================= - #endif // MHD -#endif // CUDA +// ========================================================================= +#endif // MHD diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index 332dcf3be..1735fe24d 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -1,18 +1,16 @@ /*! \file roe_cuda.cu * \brief Function definitions for the cuda Roe Riemann solver.*/ -#ifdef CUDA +#include - #include +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../riemann_solvers/roe_cuda.h" +#include "../utils/gpu.hpp" - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../riemann_solvers/roe_cuda.h" - #include "../utils/gpu.hpp" - - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif /*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, @@ -48,13 +46,13 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0.0; Real test0, test1, test2, test3, test4; int hlle_flag = 0; - #ifdef DE +#ifdef DE Real dgel, gel, dger, ger, f_ge_l, f_ge_r, E_kin; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS]; - #endif +#endif int o1, o2, o3; if (dir == 0) { @@ -81,66 +79,66 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R myl = dev_bounds_L[o2 * n_cells + tid]; mzl = dev_bounds_L[o3 * n_cells + tid]; El = dev_bounds_L[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscalarl[i] = dev_bounds_L[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid]; - #endif +#endif dr = dev_bounds_R[tid]; mxr = dev_bounds_R[o1 * n_cells + tid]; myr = dev_bounds_R[o2 * n_cells + tid]; mzr = dev_bounds_R[o3 * n_cells + tid]; Er = dev_bounds_R[4 * n_cells + tid]; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dscalarr[i] = dev_bounds_R[(5 + i) * n_cells + tid]; } - #endif - #ifdef DE +#endif +#ifdef DE dger = dev_bounds_R[(n_fields - 1) * n_cells + tid]; - #endif +#endif // calculate primitive variables vxl = mxl / dl; vyl = myl / dl; vzl = mzl / dl; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl); pl = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma); - #else +#else pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pl = fmax(pl, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarl[i] = dscalarl[i] / dl; } - #endif - #ifdef DE +#endif +#ifdef DE gel = dgel / dl; - #endif +#endif vxr = mxr / dr; vyr = myr / dr; vzr = mzr / dr; - #ifdef DE // PRESSURE_DE +#ifdef DE // PRESSURE_DE E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr); pr = hydro_utilities::Get_Pressure_From_DE(Er, Er - E_kin, dger, gamma); - #else +#else pr = (Er - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0); - #endif // PRESSURE_DE +#endif // PRESSURE_DE pr = fmax(pr, (Real)TINY_NUMBER); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { scalarr[i] = dscalarr[i] / dr; } - #endif - #ifdef DE +#endif +#ifdef DE ger = dger / dr; - #endif +#endif // calculate the enthalpy in each cell Hl = (El + pl) / dl; @@ -173,28 +171,28 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R f_my_l = mxl * vyl; f_mz_l = mxl * vzl; f_E_l = (El + pl) * vxl; - #ifdef DE +#ifdef DE f_ge_l = mxl * gel; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_scalar_l[i] = mxl * scalarl[i]; } - #endif +#endif f_d_r = mxr; f_mx_r = mxr * vxr + pr; f_my_r = mxr * vyr; f_mz_r = mxr * vzr; f_E_r = (Er + pr) * vxr; - #ifdef DE +#ifdef DE f_ge_r = mxr * ger; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_scalar_r[i] = mxr * scalarr[i]; } - #endif +#endif // return upwind flux if flow is supersonic if (lambda_m >= 0.0) { @@ -203,14 +201,14 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = f_my_l; dev_flux[o3 * n_cells + tid] = f_mz_l; dev_flux[4 * n_cells + tid] = f_E_l; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_scalar_l[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l; - #endif +#endif return; } else if (lambda_p <= 0.0) { dev_flux[tid] = f_d_r; @@ -218,14 +216,14 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = f_my_r; dev_flux[o3 * n_cells + tid] = f_mz_r; dev_flux[4 * n_cells + tid] = f_E_r; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = f_scalar_r[i]; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r; - #endif +#endif return; } // otherwise calculate the Roe fluxes @@ -342,17 +340,17 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R f_E_l = El * (vxl - bm) + pl * vxl; f_E_r = Er * (vxr - bp) + pr * vxr; - #ifdef DE +#ifdef DE f_ge_l = dgel * (vxl - bm); f_ge_r = dger * (vxr - bp); - #endif +#endif - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { f_scalar_l[i] = dscalarl[i] * (vxl - bm); f_scalar_r[i] = dscalarr[i] * (vxr - bp); } - #endif +#endif // compute the HLLE flux at the interface tmp = 0.5 * (bp + bm) / (bp - bm); @@ -362,15 +360,15 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r) + (f_my_l - f_my_r) * tmp; dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r) + (f_mz_l - f_mz_r) * tmp; dev_flux[4 * n_cells + tid] = 0.5 * (f_E_l + f_E_r) + (f_E_l - f_E_r) * tmp; - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { dev_flux[(5 + i) * n_cells + tid] = 0.5 * (f_scalar_l[i] + f_scalar_r[i]) + (f_scalar_l[i] - f_scalar_r[i]) * tmp; } - #endif - #ifdef DE +#endif +#ifdef DE dev_flux[(n_fields - 1) * n_cells + tid] = 0.5 * (f_ge_l + f_ge_r) + (f_ge_l - f_ge_r) * tmp; - #endif +#endif return; } // otherwise return the roe fluxes @@ -380,7 +378,7 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r - sum_2); dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r - sum_3); dev_flux[4 * n_cells + tid] = 0.5 * (f_E_l + f_E_r - sum_4); - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { if (dev_flux[tid] >= 0.0) { dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarl[i]; @@ -388,17 +386,15 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarr[i]; } } - #endif - #ifdef DE +#endif +#ifdef DE if (dev_flux[tid] >= 0.0) { dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * gel; } else { dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * ger; } - #endif +#endif } } } } - -#endif // CUDA diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h index 4ee81022d..3b992eb8e 100644 --- a/src/riemann_solvers/roe_cuda.h +++ b/src/riemann_solvers/roe_cuda.h @@ -1,12 +1,10 @@ /*! \file roe_cuda.h * \brief Declarations of functions for the cuda roe riemann solver kernel. */ -#ifdef CUDA +#ifndef ROE_CUDA_H + #define Roe_CUDA_H - #ifndef ROE_CUDA_H - #define Roe_CUDA_H - - #include "../global/global.h" + #include "../global/global.h" /*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, @@ -15,5 +13,4 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); - #endif // ROE_CUDA_H -#endif // CUDA +#endif // ROE_CUDA_H diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index 470d47edd..153106b10 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -1,17 +1,15 @@ /*! \file error_check_cuda.cu * \brief Error Check Cuda */ -#ifdef CUDA +#include +#include +#include - #include - #include - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../io/io.h" - #include "../utils/error_check_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" +#include "../utils/error_check_cuda.h" +#include "../utils/gpu.hpp" __global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value) @@ -72,5 +70,3 @@ int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, return error_value_host; } - -#endif diff --git a/src/utils/error_check_cuda.h b/src/utils/error_check_cuda.h index 110a1b035..98bf9b391 100644 --- a/src/utils/error_check_cuda.h +++ b/src/utils/error_check_cuda.h @@ -1,15 +1,13 @@ /*! \file error_check_cuda.h * \brief error_check_cuda.h */ -#ifdef CUDA +#ifndef ERROR_CHECK_CUDA_H +#define ERROR_CHECK_CUDA_H - #ifndef ERROR_CHECK_CUDA_H - #define ERROR_CHECK_CUDA_H +#include "../global/global.h" - #include "../global/global.h" - - #define N_Z 24 - #define N_Y 24 +#define N_Z 24 +#define N_Y 24 int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, dim3 Block_Error); @@ -17,5 +15,4 @@ int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, __global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value); - #endif // ERROR_CHECK_CUDA_H -#endif // CUDA +#endif // ERROR_CHECK_CUDA_H diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 2fe9e9735..60246cfbe 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -47,11 +47,6 @@ void Check_Configuration(Parameters const& P) // Check that MACRO_FLAGS has contents static_assert(sizeof(MACRO_FLAGS) > 1); - // Must have CUDA -#ifndef CUDA - #error "The CUDA macro is required" -#endif //! CUDA - // Can only have one integrator enabled #if ((defined(VL) + defined(CTU) + defined(SIMPLE)) != 1) #error "Only one integrator can be enabled at a time." diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 518572cd2..6434f560b 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -13,7 +13,6 @@ // Local Includes #include "../utils/reduction_utilities.h" -#ifdef CUDA namespace reduction_utilities { // ===================================================================== @@ -40,4 +39,3 @@ __global__ void kernelReduceMax(Real* in, Real* out, size_t N) } // ===================================================================== } // namespace reduction_utilities -#endif // CUDA \ No newline at end of file diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index e47f72d26..99191d8c5 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -17,7 +17,6 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" -#ifdef CUDA /*! * \brief Namespace to contain device resident reduction functions. Includes * functions and kernels for array reduction, warp level, block level, and @@ -81,7 +80,7 @@ __inline__ __device__ Real blockReduceMax(Real val) } // ===================================================================== - #ifndef O_HIP +#ifndef O_HIP // ===================================================================== // This section handles the atomics. It is complicated because CUDA // doesn't currently support atomics with non-integral types. @@ -158,7 +157,7 @@ inline __device__ double decode(long long val) } return bit_cast(val); } - #endif // O_HIP +#endif // O_HIP /*! * \brief Perform an atomic reduction to find the maximum value of `val` * @@ -170,12 +169,12 @@ inline __device__ double decode(long long val) */ inline __device__ float atomicMaxBits(float* address, float val) { - #ifdef O_HIP +#ifdef O_HIP return atomicMax(address, val); - #else // O_HIP +#else // O_HIP int old = atomicMax((int*)address, encode(val)); return decode(old); - #endif // O_HIP +#endif // O_HIP } /*! @@ -189,12 +188,12 @@ inline __device__ float atomicMaxBits(float* address, float val) */ inline __device__ double atomicMaxBits(double* address, double val) { - #ifdef O_HIP +#ifdef O_HIP return atomicMax(address, val); - #else // O_HIP +#else // O_HIP long long old = atomicMax((long long*)address, encode(val)); return decode(old); - #endif // O_HIP +#endif // O_HIP } /*! @@ -208,12 +207,12 @@ inline __device__ double atomicMaxBits(double* address, double val) */ inline __device__ float atomicMinBits(float* address, float val) { - #ifdef O_HIP +#ifdef O_HIP return atomicMin(address, val); - #else // O_HIP +#else // O_HIP int old = atomicMin((int*)address, encode(val)); return decode(old); - #endif // O_HIP +#endif // O_HIP } /*! @@ -227,12 +226,12 @@ inline __device__ float atomicMinBits(float* address, float val) */ inline __device__ double atomicMinBits(double* address, double val) { - #ifdef O_HIP +#ifdef O_HIP return atomicMin(address, val); - #else // O_HIP +#else // O_HIP long long old = atomicMin((long long*)address, encode(val)); return decode(old); - #endif // O_HIP +#endif // O_HIP } // ===================================================================== @@ -305,4 +304,3 @@ __inline__ __device__ void gridReduceMax(Real val, Real* out) __global__ void kernelReduceMax(Real* in, Real* out, size_t N); // ===================================================================== } // namespace reduction_utilities -#endif // CUDA From cebf4e70c3d899549d745fbd6702a3b3017d4b7a Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 15 Dec 2023 16:17:15 -0500 Subject: [PATCH 682/694] Fix typo in header guard --- src/riemann_solvers/roe_cuda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h index 3b992eb8e..bff592876 100644 --- a/src/riemann_solvers/roe_cuda.h +++ b/src/riemann_solvers/roe_cuda.h @@ -2,9 +2,9 @@ * \brief Declarations of functions for the cuda roe riemann solver kernel. */ #ifndef ROE_CUDA_H - #define Roe_CUDA_H +#define ROE_CUDA_H - #include "../global/global.h" +#include "../global/global.h" /*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, From 5909554d6349ba6695e2e1b482433d6cc2496d98 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 1 Feb 2024 14:02:14 -0500 Subject: [PATCH 683/694] Formatting --- src/grid/grid3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 8e8725240..7a6f320d9 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -517,7 +517,7 @@ Real Grid3D::Update_Hydro_Grid() #ifdef DUST // ==Apply dust from dust/dust_cuda.h== Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius); - #endif // DUST +#endif // DUST #ifdef CHEMISTRY_GPU // Update the H and He ionization fractions and apply cooling and photoheating From 3e1c1959f277272cb17c362ca1c4169d36ef8567 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 1 Feb 2024 14:05:14 -0500 Subject: [PATCH 684/694] set default floor values in global.h and add warning --- src/global/global.cpp | 9 +++++++++ src/global/global.h | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 2d3a23467..49bc5f681 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -439,14 +439,23 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) #ifdef TEMPERATURE_FLOOR } else if (strcmp(name, "temperature_floor") == 0) { parms->temperature_floor = atof(value); + if (parms->temperature_floor == 0) { + chprintf("WARNING: temperature floor is set to its default value!\n"); + } #endif #ifdef DENSITY_FLOOR } else if (strcmp(name, "density_floor") == 0) { parms->density_floor = atof(value); + if (parms->density_floor == 0) { + chprintf("WARNING: density floor is set to its default value!\n"); + } #endif #ifdef SCALAR_FLOOR } else if (strcmp(name, "scalar_floor") == 0) { parms->scalar_floor = atof(value); + if (parms->scalar_floor == 0) { + chprintf("WARNING: scalar floor is set to its default value!\n"); + } #endif #ifdef ANALYSIS } else if (strcmp(name, "analysis_scale_outputs_file") == 0) { diff --git a/src/global/global.h b/src/global/global.h index 3cf58312a..8fd8b505b 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -323,13 +323,13 @@ struct Parameters { // photoionization rates of HI, HeI and HeII #endif #ifdef TEMPERATURE_FLOOR - Real temperature_floor; + Real temperature_floor = 0; #endif #ifdef DENSITY_FLOOR - Real density_floor; + Real density_floor = 0; #endif #ifdef SCALAR_FLOOR - Real scalar_floor; + Real scalar_floor = 0; #endif #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output From 4f695aad5a8401c66d437fb420976d89e74bea59 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 1 Feb 2024 14:25:26 -0500 Subject: [PATCH 685/694] updated the python concatenation scripts to automatically detect whether cholla files were written in a flat directory-structure or if files from different simulation cycles were written to separate directories --- python_scripts/concat_2d_data.py | 23 +++++++++------- python_scripts/concat_3d_data.py | 23 +++++++++------- python_scripts/concat_internals.py | 44 +++++++++++++++++++++++++++++- python_scripts/concat_particles.py | 40 ++++++++++++++------------- 4 files changed, 90 insertions(+), 40 deletions(-) diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py index 5cf6fde55..9c4e0dd86 100755 --- a/python_scripts/concat_2d_data.py +++ b/python_scripts/concat_2d_data.py @@ -21,11 +21,11 @@ import concat_internals # ============================================================================== -def concat_2d_dataset(source_directory: pathlib.Path, - output_directory: pathlib.Path, +def concat_2d_dataset(output_directory: pathlib.Path, num_processes: int, output_number: int, dataset_kind: str, + build_source_path, concat_xy: bool = True, concat_yz: bool = True, concat_xz: bool = True, @@ -41,8 +41,6 @@ def concat_2d_dataset(source_directory: pathlib.Path, Parameters ---------- - source_directory : pathlib.Path - The directory containing the unconcatenated files output_directory : pathlib.Path The directory containing the new concatenated files num_processes : int @@ -51,6 +49,8 @@ def concat_2d_dataset(source_directory: pathlib.Path, The output number to concatenate dataset_kind : str The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'. + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. concat_xy : bool If True then concatenate the XY slices/projections. Defaults to True. concat_yz : bool @@ -67,8 +67,6 @@ def concat_2d_dataset(source_directory: pathlib.Path, What compression settings to use if compressing. Defaults to None. chunking : bool or tuple Whether or not to use chunking and the chunk size. Defaults to None. - source_directory: pathlib.Path : - output_directory: pathlib.Path : num_processes: int : @@ -106,7 +104,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5') # Setup the destination file - with h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.0', 'r') as source_file: + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: # Copy over header destination_file = concat_internals.copy_header(source_file, destination_file) @@ -144,7 +142,7 @@ def concat_2d_dataset(source_directory: pathlib.Path, # Copy data for rank in range(num_processes): # Open source file - source_file = h5py.File(source_directory / f'{output_number}_{dataset_kind}.h5.{rank}', 'r') + source_file = h5py.File(build_source_path(proc_id = rank, nfile = output_number), 'r') # Loop through and copy datasets for dataset in datasets_to_copy: @@ -249,13 +247,18 @@ def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str) -> tuple: cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ datasets.') args = cli.parse_args() + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = f'_{args.dataset_kind}', + known_output_snap = args.concat_outputs[0]) + # Perform the concatenation for output in args.concat_outputs: - concat_2d_dataset(source_directory=args.source_directory, - output_directory=args.output_directory, + concat_2d_dataset(output_directory=args.output_directory, num_processes=args.num_processes, output_number=output, dataset_kind=args.dataset_kind, + build_source_path = build_source_path, concat_xy=args.disable_xy, concat_yz=args.disable_yz, concat_xz=args.disable_xz, diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py index 599a4a8d1..1d5ba8228 100755 --- a/python_scripts/concat_3d_data.py +++ b/python_scripts/concat_3d_data.py @@ -19,10 +19,10 @@ import concat_internals # ============================================================================== -def concat_3d_dataset(source_directory: pathlib.Path, - output_directory: pathlib.Path, +def concat_3d_dataset(output_directory: pathlib.Path, num_processes: int, output_number: int, + build_source_path, skip_fields: list = [], destination_dtype: np.dtype = None, compression_type: str = None, @@ -33,8 +33,6 @@ def concat_3d_dataset(source_directory: pathlib.Path, Parameters ---------- - source_directory : pathlib.Path - The directory containing the unconcatenated files output_directory : pathlib.Path The directory containing the new concatenated files num_processes : int @@ -43,6 +41,8 @@ def concat_3d_dataset(source_directory: pathlib.Path, The output number to concatenate skip_fields : list List of fields to skip concatenating. Defaults to []. + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. destination_dtype : np.dtype The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. compression_type : str @@ -51,8 +51,6 @@ def concat_3d_dataset(source_directory: pathlib.Path, What compression settings to use if compressing. Defaults to None. chunking : bool or tuple Whether or not to use chunking and the chunk size. Defaults to None. - source_directory: pathlib.Path : - output_directory: pathlib.Path : num_processes: int : @@ -81,7 +79,7 @@ def concat_3d_dataset(source_directory: pathlib.Path, destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}.h5') # Setup the output file - with h5py.File(source_directory / f'{output_number}.h5.0', 'r') as source_file: + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: # Copy header data destination_file = concat_internals.copy_header(source_file, destination_file) @@ -108,7 +106,7 @@ def concat_3d_dataset(source_directory: pathlib.Path, # loop over files for a given output for i in range(0, num_processes): # open the input file for reading - source_file = h5py.File(source_directory / f'{output_number}.h5.{i}', 'r') + source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') # Compute the offset slicing nx_local, ny_local, nz_local = source_file.attrs['dims_local'] @@ -140,12 +138,17 @@ def concat_3d_dataset(source_directory: pathlib.Path, cli = concat_internals.common_cli() args = cli.parse_args() + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = '', + known_output_snap = args.concat_outputs[0]) + # Perform the concatenation for output in args.concat_outputs: - concat_3d_dataset(source_directory=args.source_directory, - output_directory=args.output_directory, + concat_3d_dataset(output_directory=args.output_directory, num_processes=args.num_processes, output_number=output, + build_source_path = build_source_path, skip_fields=args.skip_fields, destination_dtype=args.dtype, compression_type=args.compression_type, diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py index 6f90f0211..bc615012e 100755 --- a/python_scripts/concat_internals.py +++ b/python_scripts/concat_internals.py @@ -5,6 +5,7 @@ import h5py import argparse +import functools import pathlib # ============================================================================== @@ -119,7 +120,7 @@ def concat_output(raw_argument: str) -> list: raise ValueError() iterable_argument = iterable_argument.union(set(range(start, end+1))) - return iterable_argument + return list(iterable_argument) # ============================================================================ # ============================================================================ @@ -176,3 +177,44 @@ def chunk_arg(raw_argument: str) -> tuple: return cli # ============================================================================== + +def _get_source_path(proc_id : int, source_directory : pathlib.Path, + pre_extension_suffix : str, nfile : int, new_style : bool, + extension : str = '.h5'): + dirname = str(source_directory) + if new_style: + out = f"{dirname}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" + else: + # in principle, when source_directory isn't an empty string and it doesn't end + # end in a '/', part of it should act like a filename prefix + # -> with that said, the concatenation scripts have not supported this behavior + # since we've made use of pathlib.Path + out = f"{dirname}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" + return pathlib.Path(out) + +def get_source_path_builder(source_directory : pathlib.Path, + pre_extension_suffix : str, + known_output_snap : int): + """ + Source files (that are to be concatenated) have one of 2 formats. This identifies + the format in use and returns a function appropriate for building the pathnames + + This function auto-detect the format and returns a function to construct paths to these + files + """ + + # try newer format first: + common_kw = {'source_directory' : source_directory, 'extension' : '.h5', + 'pre_extension_suffix' : pre_extension_suffix} + new_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap, + new_style = True, **common_kw) + old_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap, + new_style = False, **common_kw) + if new_style_path.is_file(): + return functools.partial(_get_source_path, new_style = True, **common_kw) + elif old_style_path.is_file(): + return functools.partial(_get_source_path, new_style = False, **common_kw) + raise RuntimeError( + "Could not find any files to concatenate. We searched " + f"{new_style_path!s} and {old_style_path!s}" + ) \ No newline at end of file diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py index 8a916f08e..e049ce94c 100755 --- a/python_scripts/concat_particles.py +++ b/python_scripts/concat_particles.py @@ -19,10 +19,10 @@ import concat_internals # ====================================================================================================================== -def concat_particles_dataset(source_directory: pathlib.Path, - output_directory: pathlib.Path, +def concat_particles_dataset(output_directory: pathlib.Path, num_processes: int, output_number: int, + build_source_path, skip_fields: list = [], destination_dtype: np.dtype = None, compression_type: str = None, @@ -33,14 +33,14 @@ def concat_particles_dataset(source_directory: pathlib.Path, Parameters ---------- - source_directory : pathlib.Path - The directory containing the unconcatenated files output_directory : pathlib.Path The directory containing the new concatenated files num_processes : int The number of ranks that Cholla was run with output_number : int The output number to concatenate + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. skip_fields : list List of fields to skip concatenating. Defaults to []. destination_dtype : np.dtype @@ -51,8 +51,6 @@ def concat_particles_dataset(source_directory: pathlib.Path, What compression settings to use if compressing. Defaults to None. chunking : bool or tuple Whether or not to use chunking and the chunk size. Defaults to None. - source_directory: pathlib.Path : - output_directory: pathlib.Path : num_processes: int : @@ -83,8 +81,8 @@ def concat_particles_dataset(source_directory: pathlib.Path, # Setup the output file # Note that the call to `__get_num_particles` is potentially expensive as it # opens every single file to read the number of particles in that file - num_particles = __get_num_particles(source_directory, num_processes, output_number) - destination_file = __setup_destination_file(source_directory, + num_particles = __get_num_particles(build_source_path, num_processes, output_number) + destination_file = __setup_destination_file(build_source_path, destination_file, output_number, num_particles, @@ -98,7 +96,7 @@ def concat_particles_dataset(source_directory: pathlib.Path, particles_offset = 0 for i in range(0, num_processes): # open the input file for reading - source_file = h5py.File(source_directory / f'{output_number}_particles.h5.{i}', 'r') + source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') # Compute the offset slicing for the 3D data nx_local, ny_local, nz_local = source_file.attrs['dims_local'] @@ -131,7 +129,7 @@ def concat_particles_dataset(source_directory: pathlib.Path, # ============================================================================== # ============================================================================== -def __get_num_particles(source_directory: pathlib.Path, +def __get_num_particles(build_source_path, num_processes: int, output_number: int) -> int: """Get the total number of particles in the output. This function is heavily @@ -139,8 +137,8 @@ def __get_num_particles(source_directory: pathlib.Path, Parameters ---------- - source_directory : pathlib.Path - The directory of the unconcatenated files + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. num_processes : int The number of processes output_number : int @@ -155,14 +153,14 @@ def __get_num_particles(source_directory: pathlib.Path, num_particles = 0 for i in range(0, num_processes): # open the input file for reading - with h5py.File(source_directory / f'{output_number}_particles.h5.{i}', 'r') as source_file: + with h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') as source_file: num_particles += source_file.attrs['n_particles_local'] return num_particles # ============================================================================== # ============================================================================== -def __setup_destination_file(source_directory: pathlib.Path, +def __setup_destination_file(build_source_path, destination_file: h5py.File, output_number: int, num_particles: int, @@ -175,8 +173,8 @@ def __setup_destination_file(source_directory: pathlib.Path, Parameters ---------- - source_directory : pathlib.Path - The directory containing the unconcatenated files + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. destination_file : h5py.File The destination file output_number : int @@ -199,7 +197,7 @@ def __setup_destination_file(source_directory: pathlib.Path, h5py.File The fully set up destination file """ - with h5py.File(source_directory / f'{output_number}_particles.h5.0', 'r') as source_file: + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: # Copy header data destination_file = concat_internals.copy_header(source_file, destination_file) @@ -235,10 +233,14 @@ def __setup_destination_file(source_directory: pathlib.Path, cli = concat_internals.common_cli() args = cli.parse_args() + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = f'_particles', + known_output_snap = args.concat_outputs[0]) + # Perform the concatenation for output in args.concat_outputs: - concat_particles_dataset(source_directory=args.source_directory, - output_directory=args.output_directory, + concat_particles_dataset(output_directory=args.output_directory, num_processes=args.num_processes, output_number=output, skip_fields=args.skip_fields, From d47d8dd83434134525a6283e6b85e872313fcbdf Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 1 Feb 2024 14:48:26 -0500 Subject: [PATCH 686/694] minor bugfix --- python_scripts/concat_particles.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py index e049ce94c..89bb3bc1a 100755 --- a/python_scripts/concat_particles.py +++ b/python_scripts/concat_particles.py @@ -243,6 +243,7 @@ def __setup_destination_file(build_source_path, concat_particles_dataset(output_directory=args.output_directory, num_processes=args.num_processes, output_number=output, + build_source_path = build_source_path, skip_fields=args.skip_fields, destination_dtype=args.dtype, compression_type=args.compression_type, From 114f96b555d3ac01822ba6925bdb124d6c17487b Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 1 Feb 2024 14:56:15 -0500 Subject: [PATCH 687/694] change warning message and tweak how the default floors are set --- src/global/global.cpp | 6 +++--- src/global/global.h | 6 ------ src/grid/grid3D.cpp | 6 ------ 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 49bc5f681..ed140059c 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -440,21 +440,21 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "temperature_floor") == 0) { parms->temperature_floor = atof(value); if (parms->temperature_floor == 0) { - chprintf("WARNING: temperature floor is set to its default value!\n"); + chprintf("WARNING: temperature floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); } #endif #ifdef DENSITY_FLOOR } else if (strcmp(name, "density_floor") == 0) { parms->density_floor = atof(value); if (parms->density_floor == 0) { - chprintf("WARNING: density floor is set to its default value!\n"); + chprintf("WARNING: density floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); } #endif #ifdef SCALAR_FLOOR } else if (strcmp(name, "scalar_floor") == 0) { parms->scalar_floor = atof(value); if (parms->scalar_floor == 0) { - chprintf("WARNING: scalar floor is set to its default value!\n"); + chprintf("WARNING: scalar floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); } #endif #ifdef ANALYSIS diff --git a/src/global/global.h b/src/global/global.h index 8fd8b505b..91c54911e 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -322,15 +322,9 @@ struct Parameters { char UVB_rates_file[MAXLEN]; // File for the UVB photoheating and // photoionization rates of HI, HeI and HeII #endif -#ifdef TEMPERATURE_FLOOR Real temperature_floor = 0; -#endif -#ifdef DENSITY_FLOOR Real density_floor = 0; -#endif -#ifdef SCALAR_FLOOR Real scalar_floor = 0; -#endif #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output // values for cosmological diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 1cfde07a6..b773e18ea 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -261,20 +261,14 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef TEMPERATURE_FLOOR H.temperature_floor = P->temperature_floor; #else - H.temperature_floor = 0.0; -#endif #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; #else - H.density_floor = 0.0; -#endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; #else - H.scalar_floor = 0.0; -#endif #ifdef COSMOLOGY H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0'); From 3eb2fb8eb19825ed653792cf552dfa66215cf9f4 Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 1 Feb 2024 15:16:59 -0500 Subject: [PATCH 688/694] fix syntax errors --- src/grid/grid3D.cpp | 8 +++----- src/hydro/hydro_cuda.cu | 2 -- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index b773e18ea..9ca5ba69a 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -260,15 +260,15 @@ void Grid3D::Initialize(struct Parameters *P) // Values for lower limit for density and temperature #ifdef TEMPERATURE_FLOOR H.temperature_floor = P->temperature_floor; -#else +#endif #ifdef DENSITY_FLOOR H.density_floor = P->density_floor; -#else +#endif #ifdef SCALAR_FLOOR H.scalar_floor = P->scalar_floor; -#else +#endif #ifdef COSMOLOGY H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0'); @@ -490,8 +490,6 @@ Real Grid3D::Update_Hydro_Grid() Execute_Hydro_Integrator(); -#ifdef CUDA - #ifdef TEMPERATURE_FLOOR // Set the lower limit temperature (Internal Energy) Real U_floor; diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 8a69266ff..4c45c87f2 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1305,5 +1305,3 @@ __global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, } } } - -#endif // CUDA From cede7f3a1b65b149a74f35a7da96fe24478f76ac Mon Sep 17 00:00:00 2001 From: helenarichie Date: Thu, 1 Feb 2024 15:19:06 -0500 Subject: [PATCH 689/694] run clang format --- src/global/global.cpp | 12 +++++++++--- src/global/global.h | 4 ++-- src/grid/grid3D.cpp | 24 ++++++++++++------------ src/hydro/hydro_cuda.cu | 4 ++-- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index ed140059c..a5e4068a4 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -440,21 +440,27 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "temperature_floor") == 0) { parms->temperature_floor = atof(value); if (parms->temperature_floor == 0) { - chprintf("WARNING: temperature floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); + chprintf( + "WARNING: temperature floor is set to its default value (zero)! It can be set to a different value in the " + "input parameter file.\n"); } #endif #ifdef DENSITY_FLOOR } else if (strcmp(name, "density_floor") == 0) { parms->density_floor = atof(value); if (parms->density_floor == 0) { - chprintf("WARNING: density floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); + chprintf( + "WARNING: density floor is set to its default value (zero)! It can be set to a different value in the input " + "parameter file.\n"); } #endif #ifdef SCALAR_FLOOR } else if (strcmp(name, "scalar_floor") == 0) { parms->scalar_floor = atof(value); if (parms->scalar_floor == 0) { - chprintf("WARNING: scalar floor is set to its default value (zero)! It can be set to a different value in the input parameter file.\n"); + chprintf( + "WARNING: scalar floor is set to its default value (zero)! It can be set to a different value in the input " + "parameter file.\n"); } #endif #ifdef ANALYSIS diff --git a/src/global/global.h b/src/global/global.h index 91c54911e..b1a906532 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -323,8 +323,8 @@ struct Parameters { // photoionization rates of HI, HeI and HeII #endif Real temperature_floor = 0; - Real density_floor = 0; - Real scalar_floor = 0; + Real density_floor = 0; + Real scalar_floor = 0; #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output // values for cosmological diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 9ca5ba69a..ef4d57928 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -458,7 +458,7 @@ void Grid3D::Execute_Hydro_Integrator(void) Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, C.Grav_potential); - #endif // SIMPLE +#endif // SIMPLE } else { chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); chexit(-1); @@ -490,27 +490,27 @@ Real Grid3D::Update_Hydro_Grid() Execute_Hydro_Integrator(); - #ifdef TEMPERATURE_FLOOR +#ifdef TEMPERATURE_FLOOR // Set the lower limit temperature (Internal Energy) Real U_floor; // Minimum of internal energy from minumum of temperature U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT; - #ifdef COSMOLOGY + #ifdef COSMOLOGY U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2 U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; - #endif + #endif Apply_Temperature_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, U_floor); - #endif // TEMPERATURE_FLOOR +#endif // TEMPERATURE_FLOOR - #ifdef SCALAR_FLOOR - #ifdef DUST +#ifdef SCALAR_FLOOR + #ifdef DUST Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor); - #endif - #endif // SCALAR_FLOOR + #endif +#endif // SCALAR_FLOOR - // == Perform chemistry/cooling (there are a few different cases) == - #ifdef COOLING_GPU - #ifdef CPU_TIME +// == Perform chemistry/cooling (there are a few different cases) == +#ifdef COOLING_GPU + #ifdef CPU_TIME Timer.Cooling_GPU.Start(); #endif // ==Apply Cooling from cooling/cooling_cuda.h== diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 4c45c87f2..4d0661fbd 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1143,12 +1143,12 @@ __global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, in dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; } - #ifdef DE +#ifdef DE U = dev_conserved[(n_fields - 1) * n_cells + id] / d; if (U < U_floor) { dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; } - #endif +#endif } } From 472c001fddc64f97e50bbd96fc5e5e3df9566d69 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 1 Feb 2024 15:45:22 -0500 Subject: [PATCH 690/694] Modified the filenames that the system-tests search for. --- src/io/io.cpp | 16 ++++++++++++---- src/io/io.h | 15 +++++++++++---- src/system_tests/system_tester.cpp | 12 +++++++----- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 31430a61f..a8e075624 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2795,6 +2795,17 @@ std::string FnameTemplate::effective_output_dir_path(int nfile) const noexcept } std::string FnameTemplate::format_fname(int nfile, const std::string &pre_extension_suffix) const noexcept +{ +#ifdef MPI_CHOLLA + int file_proc_id = procID; +#else + int file_proc_id = 0; +#endif + return format_fname(nfile, file_proc_id, pre_extension_suffix); +} + +std::string FnameTemplate::format_fname(int nfile, int file_proc_id, + const std::string &pre_extension_suffix) const noexcept { // get the leading section of the string const std::string path_prefix = @@ -2811,10 +2822,7 @@ std::string FnameTemplate::format_fname(int nfile, const std::string &pre_extens const char *extension = ".txt"; #endif - std::string procID_part; // initialized to empty string -#ifdef MPI_CHOLLA - procID_part = ("." + std::to_string(procID)); -#endif + std::string procID_part = "." + std::to_string(procID); // initialized to empty string return path_prefix + std::to_string(nfile) + pre_extension_suffix + extension + procID_part; } diff --git a/src/io/io.h b/src/io/io.h index a25f5298a..d8f6ca8ca 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -54,7 +54,7 @@ void Write_Debug(Real* Value, const char* fname, int nValues, int iProc); /* Lightweight object designed to centralize the file-naming logic (& any associated configuration). * * Cholla pathnames traditionally followed the following template: - * "{outdir}{nfile}{pre_extension_suffix}{extension}[.{proc_id}]" + * "{outdir}{nfile}{pre_extension_suffix}{extension}.{proc_id}" * where each curly-braced token represents a different variable. In detail: * - `{outdir}` is the parameter from the parameter file. The historical behavior (that we currently * maintain), if this is non-empty, then all charaters following the last '/' are treated as a @@ -65,10 +65,10 @@ void Write_Debug(Real* Value, const char* fname, int nValues, int iProc); * file extension (or `{extension}`) * - `{extension}` is the filename extension. Examples include ".h5" or ".bin" or ".txt". * - `{proc_id}` represents the process-id that held the data that will be written to this file. - * In non-MPI runs, this will be omitted. + * Previously, in non-MPI runs, this was omitted. * * Instances can be configured to support the following newer file-naming template - * "{outdir}/{nfile}/{nfile}{pre_extension_suffix}{extension}[.{proc_id}]" + * "{outdir}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" * where the the significance of each curly-braced token is largely unchanged. There are 2 things * worth noting: * - all files written at a single simulation-cycle are now grouped in a single directory @@ -83,7 +83,12 @@ class FnameTemplate public: FnameTemplate() = delete; - FnameTemplate(const Parameters& P) : separate_cycle_dirs_(not P.legacy_flat_outdir), outdir_(P.outdir) {} + FnameTemplate(bool separate_cycle_dirs, std::string outdir) + : separate_cycle_dirs_(separate_cycle_dirs), outdir_(std::move(outdir)) + { + } + + FnameTemplate(const Parameters& P) : FnameTemplate(not P.legacy_flat_outdir, P.outdir) {} /* Specifies whether separate cycles are written to separate directories */ bool separate_cycle_dirs() const noexcept { return separate_cycle_dirs_; } @@ -94,6 +99,8 @@ class FnameTemplate /* format the file path */ std::string format_fname(int nfile, const std::string& pre_extension_suffix) const noexcept; + std::string format_fname(int nfile, int file_proc_id, const std::string& pre_extension_suffix) const noexcept; + private: bool separate_cycle_dirs_; std::string outdir_; diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index 2fa2a4129..1888fd752 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -55,15 +55,16 @@ void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, do // Make sure we have all the required data files and open the test data file _testHydroFieldsFileVec.resize(numMpiRanks); _testParticlesFileVec.resize(numMpiRanks); + FnameTemplate fname_template(true, _outputDirectory); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { // Load the hydro data - std::string filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + std::string filePath = fname_template.format_fname(1, fileIndex, ""); if (_hydroDataExists and std::filesystem::exists(filePath)) { _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } // Load the particles data - filePath = _outputDirectory + "/1_particles.h5." + std::to_string(fileIndex); + filePath = fname_template.format_fname(1, fileIndex, "_particles"); if (_particleDataExists and std::filesystem::exists(filePath)) { _testParticlesFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } @@ -236,15 +237,16 @@ void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Err // Make sure we have all the required data files and open the data files _testHydroFieldsFileVec.resize(numMpiRanks); std::vector initialHydroFieldsFileVec(numMpiRanks); + FnameTemplate fname_template(true, _outputDirectory); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { // Initial time data - std::string filePath = _outputDirectory + "/0.h5." + std::to_string(fileIndex); + std::string filePath = fname_template.format_fname(0, fileIndex, ""); if (std::filesystem::exists(filePath)) { initialHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } // Final time data - filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + filePath = fname_template.format_fname(1, fileIndex, ""); if (std::filesystem::exists(filePath)) { _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } @@ -359,7 +361,7 @@ void system_test::SystemTestRunner::openHydroTestData() { _testHydroFieldsFileVec.resize(numMpiRanks); for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { - std::string filePath = _outputDirectory + "/1.h5." + std::to_string(fileIndex); + std::string filePath = FnameTemplate(true, _outputDirectory).format_fname(1, fileIndex, ""); if (std::filesystem::exists(filePath)) { _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } From ca6019eb48813025f30353e8d34b340f8707d6d9 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Thu, 1 Feb 2024 17:48:51 -0500 Subject: [PATCH 691/694] fixing another test. --- src/io/io_tests.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp index 87fdf8a09..30b43f644 100644 --- a/src/io/io_tests.cpp +++ b/src/io/io_tests.cpp @@ -20,19 +20,20 @@ TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) { // Set parameters - int const num_ranks = 4; + int const num_ranks = 4; + std::string restart_nfile_str = "0"; // Generate the data to read from system_test::SystemTestRunner initializer(false, true, false); initializer.numMpiRanks = num_ranks; initializer.chollaLaunchParams.append(" tout=0.0 outstep=0.0"); initializer.launchCholla(); - std::string const read_directory = initializer.getOutputDirectory() + "/"; + std::string const read_directory = initializer.getOutputDirectory() + "/" + restart_nfile_str + "/"; // Reload data and run the test system_test::SystemTestRunner loadRun(false, true, false); loadRun.numMpiRanks = num_ranks; - loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=0 indir=" + read_directory); + loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=" + restart_nfile_str + " indir=" + read_directory); #ifdef MHD loadRun.setFiducialNumTimeSteps(854); From 5fe2c5c82ff31e94bdcfa14832d6907a01bc5c35 Mon Sep 17 00:00:00 2001 From: Matthew Abruzzo Date: Fri, 2 Feb 2024 10:44:32 -0500 Subject: [PATCH 692/694] Fixing a small oversight with determining file names in system_tester --- src/io/io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index a8e075624..536ede3c3 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -2822,7 +2822,7 @@ std::string FnameTemplate::format_fname(int nfile, int file_proc_id, const char *extension = ".txt"; #endif - std::string procID_part = "." + std::to_string(procID); // initialized to empty string + std::string procID_part = "." + std::to_string(file_proc_id); // initialized to empty string return path_prefix + std::to_string(nfile) + pre_extension_suffix + extension + procID_part; } From 9280c1933588e29788193701b29febd6bec7cd67 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 2 Feb 2024 15:49:19 -0500 Subject: [PATCH 693/694] deleted stray <<<< --- src/global/global.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index e361cec13..3a56305b9 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -224,7 +224,6 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) parms->n_rotated_projection = atoi(value); } else if (strcmp(name, "n_slice") == 0) { parms->n_slice = atoi(value); -<<<<<<< HEAD } else if (strcmp(name, "n_out_float32") == 0) { parms->n_out_float32 = atoi(value); } else if (strcmp(name, "out_float32_density") == 0) { From ae75fe35c66cb8fdf0ab6cc6106598468fbe1297 Mon Sep 17 00:00:00 2001 From: Evan Schneider Date: Fri, 2 Feb 2024 15:56:41 -0500 Subject: [PATCH 694/694] fix formatting error --- src/global/global.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index 3a56305b9..64eac0d5b 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -241,13 +241,13 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) parms->out_float32_GasEnergy = atoi(value); #endif // DE #ifdef MHD - } else if (strcmp(name, "out_float32_magnetic_x")==0) { + } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); - } else if (strcmp(name, "out_float32_magnetic_y")==0) { + } else if (strcmp(name, "out_float32_magnetic_y") == 0) { parms->out_float32_magnetic_y = atoi(value); - } else if (strcmp(name, "out_float32_magnetic_z")==0) { + } else if (strcmp(name, "out_float32_magnetic_z") == 0) { parms->out_float32_magnetic_z = atoi(value); -#endif // MHD +#endif // MHD } else if (strcmp(name, "output_always") == 0) { int tmp = atoi(value); // In this case the CHOLLA_ASSERT macro runs into issuse with the readability-simplify-boolean-expr clang-tidy check